Source code for tooluniverse.pdbe_ligands_tool
# pdbe_ligands_tool.py
"""
PDBe Ligands and Residues tool for ToolUniverse.
The PDBe REST API provides information about ligands bound to PDB structures
and detailed per-residue information for protein chains. These complement the
existing PDBe compound tools (which look up compounds by ID) by providing
structure-centric queries.
API: https://www.ebi.ac.uk/pdbe/api/pdb/entry/
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PDBE_API_BASE_URL = "https://www.ebi.ac.uk/pdbe/api/pdb/entry"
[docs]
@register_tool("PDBeLigandsTool")
class PDBeLigandsTool(BaseTool):
"""
Tool for querying PDBe structure-bound ligands and residue details.
Supports:
- Get all ligands bound in a PDB structure (drug-like, cofactors, ions)
- Get per-residue listing with observed ratio for a PDB chain
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "ligand_monomers")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PDBe API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"PDBe API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to PDBe API"}
except requests.exceptions.HTTPError as e:
status = e.response.status_code if e.response is not None else "unknown"
if status == 404:
pdb_id = arguments.get("pdb_id", "unknown")
return {
"error": f"PDB entry '{pdb_id}' not found. Provide a valid 4-character PDB ID (e.g., '4hhb', '3ert')."
}
return {"error": f"PDBe API HTTP {status}"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "ligand_monomers":
return self._get_ligand_monomers(arguments)
elif self.endpoint == "residue_listing":
return self._get_residue_listing(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_ligand_monomers(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get all ligand monomers bound in a PDB structure."""
pdb_id = arguments.get("pdb_id", "")
if not pdb_id:
return {
"error": "pdb_id is required (4-character PDB ID, e.g., '4hhb', '3ert', '1m17')."
}
pdb_id = pdb_id.lower().strip()
url = f"{PDBE_API_BASE_URL}/ligand_monomers/{pdb_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if pdb_id not in data:
return {"error": f"No ligand data for PDB '{pdb_id}'."}
ligands_raw = data[pdb_id]
ligands = []
for lig in ligands_raw[:50]:
annotations = lig.get("annotations", [])
annotation_info = []
for ann in annotations[:5]:
interacting = ann.get("interacting_entity", {})
annotation_info.append(
{
"type": ann.get("type"),
"interacting_entity_id": interacting.get("entity_id"),
"interacting_chain": interacting.get("auth_asym_id"),
"interacting_uniprot": interacting.get("best_unp_accession"),
}
)
ligands.append(
{
"chem_comp_id": lig.get("chem_comp_id"),
"chem_comp_name": lig.get("chem_comp_name"),
"weight": lig.get("weight"),
"chain_id": lig.get("chain_id"),
"entity_id": lig.get("entity_id"),
"author_residue_number": lig.get("author_residue_number"),
"carbohydrate_polymer": lig.get("carbohydrate_polymer", False),
"annotations": annotation_info,
}
)
return {
"data": {
"pdb_id": pdb_id,
"ligands": ligands,
"total_ligands": len(ligands_raw),
},
"metadata": {
"source": "PDBe REST API (ebi.ac.uk/pdbe)",
},
}
[docs]
def _get_residue_listing(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get per-residue listing for a PDB structure chain."""
pdb_id = arguments.get("pdb_id", "")
if not pdb_id:
return {
"error": "pdb_id is required (4-character PDB ID, e.g., '4hhb', '3ert')."
}
chain_id = arguments.get("chain_id", None)
pdb_id = pdb_id.lower().strip()
if chain_id:
url = f"{PDBE_API_BASE_URL}/residue_listing/{pdb_id}/chain/{chain_id}"
else:
url = f"{PDBE_API_BASE_URL}/residue_listing/{pdb_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if pdb_id not in data:
return {"error": f"No residue data for PDB '{pdb_id}'."}
entry_data = data[pdb_id]
molecules = entry_data.get("molecules", [])
result_molecules = []
for mol in molecules[:10]:
chains = mol.get("chains", [])
chain_results = []
for chain in chains[:5]:
residues = chain.get("residues", [])
# Summarize residues - show first/last and count
residue_summary = []
for res in residues[:30]:
residue_summary.append(
{
"residue_number": res.get("residue_number"),
"residue_name": res.get("residue_name"),
"author_residue_number": res.get("author_residue_number"),
"observed_ratio": res.get("observed_ratio"),
}
)
chain_results.append(
{
"chain_id": chain.get("chain_id"),
"struct_asym_id": chain.get("struct_asym_id"),
"total_residues": len(residues),
"residues": residue_summary,
}
)
result_molecules.append(
{
"entity_id": mol.get("entity_id"),
"chains": chain_results,
}
)
return {
"data": {
"pdb_id": pdb_id,
"molecules": result_molecules,
"total_molecules": len(molecules),
},
"metadata": {
"source": "PDBe REST API (ebi.ac.uk/pdbe)",
},
}