Source code for tooluniverse.alphafill_tool

"""
AlphaFill tool for ToolUniverse — ligands/cofactors transplanted into AlphaFold models.

AlphaFold predicts apo (ligand-free) protein structures. AlphaFill enriches them by
transplanting ligands, cofactors, and ions from homologous experimental PDB structures
into the predicted model. For a UniProt accession, this tool reports which small
molecules can be modeled into its AlphaFold structure, from which PDB entries, and at
what local fit quality — useful for hypothesizing cofactor/ligand/drug binding that the
bare AlphaFold model does not show.

API: https://alphafill.eu/v1/aff/{uniprot}/json  (public, no authentication, JSON)
"""

from collections import defaultdict
from typing import Any, Dict

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

ALPHAFILL_BASE = "https://alphafill.eu/v1/aff"


[docs] @register_tool("AlphaFillTransplantsTool") class AlphaFillTransplantsTool(BaseTool): """List ligands/cofactors AlphaFill transplants into a UniProt's AlphaFold model."""
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("fields", {}).get("timeout", 30)
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: uniprot = (arguments.get("uniprot") or "").strip().upper() if not uniprot: return { "status": "error", "error": "'uniprot' accession is required (e.g. 'P00520')", } try: resp = requests.get( f"{ALPHAFILL_BASE}/{uniprot}/json", headers={"Accept": "application/json"}, timeout=self.timeout, ) if resp.status_code == 404: return { "status": "success", "data": {"uniprot": uniprot, "transplants": []}, "metadata": { "uniprot": uniprot, "note": f"No AlphaFill model for '{uniprot}' (no AlphaFold model or no transplants).", }, } resp.raise_for_status() payload = resp.json() except requests.exceptions.Timeout: return { "status": "error", "error": f"AlphaFill request timed out after {self.timeout}s", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"AlphaFill request failed: {e}"} except ValueError: return { "status": "error", "error": "AlphaFill returned a non-JSON response", } if not isinstance(payload, dict): payload = {} hits = payload.get("hits", []) # Aggregate transplanted molecules by compound, keeping the best (lowest # local RMSD) instance and the set of source PDB entries. agg: Dict[str, Dict[str, Any]] = defaultdict( lambda: {"count": 0, "best_local_rmsd": None, "source_pdb_ids": set()} ) for hit in hits: pdb_id = hit.get("pdb_id") for transplant in hit.get("transplants", []) or []: cid = transplant.get("compound_id") or transplant.get("analogue_id") if not cid: continue rec = agg[cid] rec["count"] += 1 if pdb_id: rec["source_pdb_ids"].add(pdb_id) rmsd = transplant.get("local_rmsd") if isinstance(rmsd, (int, float)) and ( rec["best_local_rmsd"] is None or rmsd < rec["best_local_rmsd"] ): rec["best_local_rmsd"] = rmsd transplants = sorted( ( { "compound_id": cid, "occurrences": rec["count"], "best_local_rmsd": rec["best_local_rmsd"], "source_pdb_ids": sorted(rec["source_pdb_ids"])[:10], } for cid, rec in agg.items() ), key=lambda x: -x["occurrences"], ) return { "status": "success", "data": { "uniprot": uniprot, "alphafill_version": payload.get("alphafill_version"), "n_homolog_hits": len(hits), "transplants": transplants, }, "metadata": { "uniprot": uniprot, "distinct_ligands": len(transplants), "note": "compound_id is the PDB chemical component (ligand/cofactor/ion) code; lower best_local_rmsd = better fit", "source": "AlphaFill", }, }