tooluniverse.iedb_prediction_tool 源代码
"""
IEDB Prediction Tool - MHC-I and MHC-II Binding Prediction
Provides access to the IEDB Analysis Resource tools API for predicting
peptide binding to MHC class I and class II molecules.
API: https://tools-cluster-interface.iedb.org/tools_api/
No authentication required.
Methods: NetMHCpan EL (recommended), NetMHCpan BA, SMM, ANN
"""
import requests
import csv
import io
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool
IEDB_TOOLS_BASE = "https://tools-cluster-interface.iedb.org/tools_api"
[文档]
@register_tool("IEDBPredictionTool")
class IEDBPredictionTool(BaseTool):
"""
Tool for predicting peptide-MHC binding using IEDB Analysis Resource.
Supported operations:
- predict_mhci: Predict MHC class I binding (CD8+ T cell epitopes)
- predict_mhcii: Predict MHC class II binding (CD4+ T cell epitopes)
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = 120 # predictions can be slow
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "predict_mhci"
)
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
if self.endpoint_type == "predict_mhci":
return self._predict_mhci(arguments)
elif self.endpoint_type == "predict_mhcii":
return self._predict_mhcii(arguments)
return {
"status": "error",
"error": f"Unknown endpoint: {self.endpoint_type}",
}
except requests.exceptions.Timeout:
return {"status": "error", "error": "IEDB prediction timed out (max 120s)"}
except Exception as e:
return {"status": "error", "error": f"IEDB prediction error: {str(e)}"}
[文档]
def _parse_tsv(self, text: str) -> List[Dict[str, str]]:
reader = csv.DictReader(io.StringIO(text.strip()), delimiter="\t")
return [dict(row) for row in reader]
[文档]
def _predict_mhci(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
sequence = arguments.get("sequence", "")
allele = arguments.get("allele", "HLA-A*02:01")
method = arguments.get("method", "netmhcpan_el")
length = arguments.get("length", 9)
if not sequence:
return {"status": "error", "error": "sequence is required"}
data = {
"method": method,
"sequence_text": sequence,
"allele": allele,
"length": str(length),
}
resp = requests.post(
f"{IEDB_TOOLS_BASE}/mhci/",
data=data,
timeout=self.timeout,
)
resp.raise_for_status()
results = self._parse_tsv(resp.text)
# Sort by score (descending for EL, ascending for BA)
for r in results:
try:
r["score"] = float(r.get("score", 0))
r["percentile_rank"] = float(r.get("percentile_rank", 100))
except (ValueError, TypeError):
pass
results.sort(key=lambda x: x.get("percentile_rank", 100))
return {
"status": "success",
"data": results,
"metadata": {
"method": method,
"allele": allele,
"length": length,
"n_peptides": len(results),
"source": "IEDB Analysis Resource",
"interpretation": (
"percentile_rank < 0.5% = strong binder, "
"0.5-2% = moderate binder, >2% = weak/non-binder"
),
},
}
[文档]
def _predict_mhcii(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
sequence = arguments.get("sequence", "")
allele = arguments.get("allele", "HLA-DRB1*01:01")
method = arguments.get("method", "netmhciipan_el")
if not sequence:
return {"status": "error", "error": "sequence is required"}
data = {
"method": method,
"sequence_text": sequence,
"allele": allele,
}
resp = requests.post(
f"{IEDB_TOOLS_BASE}/mhcii/",
data=data,
timeout=self.timeout,
)
resp.raise_for_status()
results = self._parse_tsv(resp.text)
for r in results:
try:
r["percentile_rank"] = float(r.get("percentile_rank", 100))
except (ValueError, TypeError):
pass
results.sort(key=lambda x: x.get("percentile_rank", 100))
return {
"status": "success",
"data": results,
"metadata": {
"method": method,
"allele": allele,
"n_peptides": len(results),
"source": "IEDB Analysis Resource",
},
}