Source code for tooluniverse.pdbe_compound_tool
# pdbe_compound_tool.py
"""
PDBe Compound tool for ToolUniverse.
PDBe Graph API provides detailed chemical compound information from the Protein
Data Bank, including molecular formula, weight, SMILES, InChI identifiers,
systematic names, cross-references to PubChem/DrugBank/ClinicalTrials, and
the PDB structures containing each compound.
API: https://www.ebi.ac.uk/pdbe/graph-api/compound/
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PDBE_COMPOUND_BASE_URL = "https://www.ebi.ac.uk/pdbe/graph-api/compound"
[docs]
@register_tool("PDBECompoundTool")
class PDBECompoundTool(BaseTool):
"""
Tool for querying PDBe compound (ligand/small molecule) information.
Supports:
- Get compound summary (formula, weight, SMILES, InChI, cross-references)
- Get all PDB structures containing a specific compound
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "get_summary")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PDBe Compound API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"PDBe Compound API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to PDBe Compound API"}
except requests.exceptions.HTTPError as e:
status = e.response.status_code if e.response is not None else "unknown"
if status == 404:
return {
"error": "Compound not found in PDBe. Check the 3-letter compound code (e.g., ATP, HEM, NAG)."
}
return {"error": f"PDBe Compound API HTTP {status}"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "get_summary":
return self._get_summary(arguments)
elif self.endpoint == "get_structures":
return self._get_structures(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed compound summary from PDBe."""
comp_id = arguments.get("comp_id", "")
if not comp_id:
return {
"error": "comp_id is required (PDB chemical component ID, e.g., 'ATP', 'HEM', 'NAG', 'CFF')."
}
comp_id = comp_id.upper()
url = f"{PDBE_COMPOUND_BASE_URL}/summary/{comp_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if comp_id not in data:
return {"error": f"Compound '{comp_id}' not found in PDBe."}
compound_list = data[comp_id]
if not compound_list:
return {"error": f"No data for compound '{comp_id}'."}
compound = compound_list[0]
# Extract SMILES
smiles = []
for s in compound.get("smiles", []):
smiles.append(
{
"program": s.get("program"),
"value": s.get("name"),
}
)
# Extract cross-references
cross_links = []
for cl in compound.get("cross_links", []):
cross_links.append(
{
"resource": cl.get("resource"),
"resource_id": cl.get("resource_id"),
}
)
# Extract systematic names
sys_names = []
for sn in compound.get("systematic_names", []):
sys_names.append(
{
"program": sn.get("program"),
"name": sn.get("name"),
}
)
return {
"data": {
"comp_id": comp_id,
"name": compound.get("name"),
"formula": compound.get("formula"),
"weight": compound.get("weight"),
"formal_charge": compound.get("formal_charge"),
"compound_type": compound.get("compound_type"),
"inchi": compound.get("inchi"),
"inchi_key": compound.get("inchi_key"),
"smiles": smiles[:3],
"systematic_names": sys_names[:3],
"cross_references": cross_links[:15],
"first_observed_in": compound.get("first_observed_in", []),
"release_status": compound.get("release_status"),
"creation_date": compound.get("creation_date"),
},
"metadata": {
"source": "PDBe Graph API (ebi.ac.uk/pdbe)",
},
}
[docs]
def _get_structures(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get PDB structures containing a specific compound via PDBe API."""
comp_id = arguments.get("comp_id", "")
if not comp_id:
return {
"error": "comp_id is required (PDB chemical component ID, e.g., 'ATP', 'HEM', 'NAG')."
}
comp_id = comp_id.upper()
# Use PDBe API for compound summary - it provides the same info
url = f"{PDBE_COMPOUND_BASE_URL}/summary/{comp_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if comp_id not in data:
return {"error": f"Compound '{comp_id}' not found in PDBe."}
compound_list = data[comp_id]
if not compound_list:
return {"error": f"No data for compound '{comp_id}'."}
compound = compound_list[0]
first_seen = compound.get("first_observed_in", [])
# Also get compound in_pdb data from the regular PDBe API
pdb_url = f"https://www.ebi.ac.uk/pdbe/api/pdb/compound/summary/{comp_id}"
try:
pdb_response = requests.get(pdb_url, timeout=self.timeout)
pdb_response.raise_for_status()
pdb_data = pdb_response.json()
pdb_entries = pdb_data.get(comp_id, [{}])
if pdb_entries:
pdb_entry = pdb_entries[0]
pdb_ids = pdb_entry.get("pdb_entries", [])
else:
pdb_ids = []
except Exception:
pdb_ids = []
return {
"data": {
"comp_id": comp_id,
"name": compound.get("name"),
"formula": compound.get("formula"),
"weight": compound.get("weight"),
"first_observed_in": first_seen,
"pdb_entries": pdb_ids[:50] if pdb_ids else first_seen,
},
"metadata": {
"source": "PDBe Graph API (ebi.ac.uk/pdbe)",
"total_structures": len(pdb_ids) if pdb_ids else len(first_seen),
},
}