Source code for tooluniverse.hlaligandatlas_tool

"""HLA Ligand Atlas - benign-tissue immunopeptidome reference tool.

The HLA Ligand Atlas (hla-ligand-atlas.org) is a curated reference of
HLA-presented peptides eluted from non-malignant ("benign") human tissues.
It is widely used to filter self-peptides during cancer neoantigen discovery.

This tool retrieves benign HLA ligands from the aggregated release table and
the companion donor-allele table. All endpoints are keyless and return
gzip-compressed (aggregated) or plain (donors) TSV files.

Endpoints (release 2020.12):
- https://hla-ligand-atlas.org/rel/2020.12/aggregated.tsv.gz
- https://hla-ligand-atlas.org/rel/2020.12/donors.tsv.gz
"""

from __future__ import annotations

import gzip
import io
from typing import Any, Dict, List, Optional

import requests

from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool

_BASE_URL = "https://hla-ligand-atlas.org/rel/2020.12"
_REQUEST_TIMEOUT = 30


[docs] @register_tool("HLALigandAtlasTool") class HLALigandAtlasTool(BaseTool): """Retrieve benign-tissue HLA-presented peptides from the HLA Ligand Atlas."""
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: try: operation = ( (self.tool_config.get("fields") or {}).get("operation") or arguments.get("operation") or "get_benign_peptides" ) if operation == "get_benign_peptides": return self._get_benign_peptides(arguments) if operation == "get_donors": return self._get_donors(arguments) return { "status": "error", "error": f"Unknown operation: {operation}", } except Exception as exc: # never raise return {"status": "error", "error": f"Unexpected error: {exc}"}
# ------------------------------------------------------------------ # # benign peptides (aggregated.tsv.gz) # ------------------------------------------------------------------ #
[docs] def _get_benign_peptides(self, arguments: Dict[str, Any]) -> Dict[str, Any]: peptide = (arguments.get("peptide") or "").strip().upper() hla_class = (arguments.get("hla_class") or "").strip().upper() allele = (arguments.get("allele") or "").strip() tissue = (arguments.get("tissue") or "").strip().lower() try: limit = int(arguments.get("limit", 50)) except (TypeError, ValueError): limit = 50 limit = max(1, min(500, limit)) if hla_class and hla_class not in ("HLA-I", "HLA-II"): return { "status": "error", "error": "hla_class must be 'HLA-I' or 'HLA-II'", } url = f"{_BASE_URL}/aggregated.tsv.gz" try: resp = request_with_retry(requests, "GET", url, timeout=_REQUEST_TIMEOUT) except Exception as exc: return {"status": "error", "error": f"Request failed: {exc}"} if resp.status_code != 200: return { "status": "error", "error": f"HTTP {resp.status_code} fetching aggregated table", } try: raw = gzip.decompress(resp.content) text = raw.decode("utf-8", errors="replace") except Exception as exc: return { "status": "error", "error": f"Failed to decompress aggregated TSV: {exc}", } lines = text.splitlines() if not lines: return {"status": "error", "error": "Empty aggregated table"} header = lines[0].split("\t") rows: List[Dict[str, Any]] = [] scanned = 0 for line in lines[1:]: if not line: continue scanned += 1 cols = line.split("\t") if len(cols) < 5: continue pep = cols[1] cls = cols[2] donor_alleles = cols[3] tissues = cols[4] if peptide and pep.upper() != peptide: continue if hla_class and cls.upper() != hla_class: continue if allele and allele not in donor_alleles: continue if tissue and tissue not in tissues.lower(): continue rows.append( { "peptide_sequence_id": cols[0], "peptide_sequence": pep, "hla_class": cls, "donor_alleles": [a for a in donor_alleles.split(",") if a], "tissues": [t for t in tissues.split(",") if t], } ) if len(rows) >= limit: break return { "status": "success", "data": {"peptides": rows}, "metadata": { "source": "HLA Ligand Atlas (release 2020.12)", "columns": header, "returned": len(rows), "rows_scanned": scanned, "limit": limit, "truncated": len(rows) >= limit, "filters": { "peptide": peptide or None, "hla_class": hla_class or None, "allele": allele or None, "tissue": tissue or None, }, }, }
# ------------------------------------------------------------------ # # donors (donors.tsv.gz) # ------------------------------------------------------------------ #
[docs] def _get_donors(self, arguments: Dict[str, Any]) -> Dict[str, Any]: donor_filter = (arguments.get("donor") or "").strip() allele_filter = (arguments.get("allele") or "").strip() url = f"{_BASE_URL}/donors.tsv.gz" try: resp = request_with_retry(requests, "GET", url, timeout=_REQUEST_TIMEOUT) except Exception as exc: return {"status": "error", "error": f"Request failed: {exc}"} if resp.status_code != 200: return { "status": "error", "error": f"HTTP {resp.status_code} fetching donors table", } text = self._maybe_gunzip(resp.content) if text is None: return { "status": "error", "error": "Failed to read donors table", } lines = text.splitlines() if not lines: return {"status": "error", "error": "Empty donors table"} header = lines[0].split("\t") records: List[Dict[str, str]] = [] for line in lines[1:]: if not line: continue cols = line.split("\t") if len(cols) < 2: continue donor, hla_allele = cols[0], cols[1] if donor_filter and donor_filter not in donor: continue if allele_filter and allele_filter not in hla_allele: continue records.append({"donor": donor, "hla_allele": hla_allele}) return { "status": "success", "data": {"donors": records}, "metadata": { "source": "HLA Ligand Atlas (release 2020.12)", "columns": header, "returned": len(records), "filters": { "donor": donor_filter or None, "allele": allele_filter or None, }, }, }
[docs] @staticmethod def _maybe_gunzip(content: bytes) -> Optional[str]: """Decode content, decompressing if it is gzip-framed.""" try: if content[:2] == b"\x1f\x8b": with gzip.GzipFile(fileobj=io.BytesIO(content)) as gz: content = gz.read() return content.decode("utf-8", errors="replace") except Exception: return None