Source code for tooluniverse.mhcmotifatlas_tool
"""MHC Motif Atlas - naturally-presented MHC ligand peptides tool.
The MHC Motif Atlas (mhcmotifatlas.org) provides curated lists of
naturally-presented MHC ligand peptides per allele (MHC class I and class II)
across human, mouse, cattle and chicken, together with per-allele MHC protein
sequences. These data support binding-motif and specificity analysis.
This tool retrieves ligand peptides for a given allele and, optionally, the
MHC protein sequence for that allele. All endpoints are keyless plain-text
TSV files.
Endpoints:
- http://mhcmotifatlas.org/data/classI/all_peptides.txt (Allele, Peptide)
- http://mhcmotifatlas.org/data/classII/MS/Peptides/all_peptides.txt
(Allele, Peptide, Core)
- http://mhcmotifatlas.org/data/classI/MHC_I_sequences.txt (Allele, Sequence)
"""
from __future__ import annotations
from typing import Any, Dict, List
import requests
from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool
_BASE_URL = "http://mhcmotifatlas.org/data"
_REQUEST_TIMEOUT = 30
_PEPTIDE_URLS = {
"I": f"{_BASE_URL}/classI/all_peptides.txt",
"II": f"{_BASE_URL}/classII/MS/Peptides/all_peptides.txt",
}
_SEQUENCE_URLS = {
"I": f"{_BASE_URL}/classI/MHC_I_sequences.txt",
"II": f"{_BASE_URL}/classII/MHC_II_sequences.txt",
}
[docs]
@register_tool("MHCMotifAtlasTool")
class MHCMotifAtlasTool(BaseTool):
"""Retrieve naturally-presented MHC ligand peptides per allele."""
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
allele = (arguments.get("allele") or "").strip()
if not allele:
return {
"status": "error",
"error": "allele is required (e.g. 'A0101' for class I, 'DRB1_01_01' for class II)",
}
mhc_class = self._normalize_class(arguments.get("mhc_class", "I"))
if mhc_class not in ("I", "II"):
return {
"status": "error",
"error": "mhc_class must be 'I' or 'II'",
}
include_sequence = bool(arguments.get("include_sequence", False))
try:
limit = int(arguments.get("limit", 100))
except (TypeError, ValueError):
limit = 100
limit = max(1, min(1000, limit))
return self._fetch_ligands(allele, mhc_class, include_sequence, limit)
except Exception as exc: # never raise
return {"status": "error", "error": f"Unexpected error: {exc}"}
[docs]
@staticmethod
def _normalize_class(value: Any) -> str:
s = str(value).strip().upper()
s = s.replace("MHC-", "").replace("CLASS", "").replace("MHC", "").strip()
if s in ("1", "I"):
return "I"
if s in ("2", "II"):
return "II"
return s
[docs]
def _fetch_text(self, url: str) -> Dict[str, Any]:
try:
resp = request_with_retry(requests, "GET", url, timeout=_REQUEST_TIMEOUT)
except Exception as exc:
return {"status": "error", "error": f"Request failed: {exc}"}
if resp.status_code != 200:
return {
"status": "error",
"error": f"HTTP {resp.status_code} fetching {url}",
}
return {"status": "success", "text": resp.text}
[docs]
def _fetch_ligands(
self,
allele: str,
mhc_class: str,
include_sequence: bool,
limit: int,
) -> Dict[str, Any]:
result = self._fetch_text(_PEPTIDE_URLS[mhc_class])
if result.get("status") == "error":
return result
lines = result["text"].splitlines()
if not lines:
return {"status": "error", "error": "Empty peptide table"}
header = lines[0].split("\t")
has_core = len(header) >= 3 and header[2].lower() == "core"
peptides: List[Dict[str, str]] = []
total_matches = 0
for line in lines[1:]:
if not line:
continue
cols = line.split("\t")
if len(cols) < 2:
continue
if cols[0] != allele:
continue
total_matches += 1
if len(peptides) < limit:
rec = {"peptide": cols[1]}
if has_core and len(cols) >= 3:
rec["core"] = cols[2]
peptides.append(rec)
if total_matches == 0:
return {
"status": "error",
"error": (
f"No ligands found for allele '{allele}' in MHC class {mhc_class}. "
"Use the atlas allele format (e.g. 'A0101', 'B0702' for class I; "
"'DRB1_01_01' for class II)."
),
}
sequence = None
if include_sequence:
sequence = self._fetch_sequence(allele, mhc_class)
return {
"status": "success",
"data": {
"allele": allele,
"mhc_class": mhc_class,
"peptides": peptides,
"sequence": sequence,
},
"metadata": {
"source": "MHC Motif Atlas (mhcmotifatlas.org)",
"total_matches": total_matches,
"returned": len(peptides),
"limit": limit,
"truncated": total_matches > len(peptides),
"has_core_column": has_core,
},
}
[docs]
def _fetch_sequence(self, allele: str, mhc_class: str):
url = _SEQUENCE_URLS.get(mhc_class)
if not url:
return None
result = self._fetch_text(url)
if result.get("status") == "error":
return None
for line in result["text"].splitlines()[1:]:
if not line:
continue
cols = line.split("\t")
if len(cols) >= 2 and cols[0] == allele:
return cols[1]
return None