Source code for tooluniverse.pdbe_kb_tool
# pdbe_kb_tool.py
"""
PDBe-KB (PDBe Knowledge Base) Graph API tool for ToolUniverse.
PDBe-KB is an aggregated knowledge base that integrates structural data from
PDB with functional annotations from 30+ partner resources (UniProt, CATH,
SCOP, Pfam, etc.). The Graph API provides access to ligand binding sites,
protein-protein interaction interfaces, structural summaries, and more.
API: https://www.ebi.ac.uk/pdbe/graph-api/
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PDBE_KB_BASE_URL = "https://www.ebi.ac.uk/pdbe/graph-api"
[docs]
@register_tool("PDBe_KB_Tool")
class PDBe_KB_Tool(BaseTool):
"""
Tool for querying PDBe-KB (Knowledge Base) Graph API.
PDBe-KB aggregates structural biology knowledge including:
- Ligand binding sites mapped to UniProt positions
- Protein-protein interaction interfaces
- Structural coverage statistics
- Superposition clusters and best chain coverage
Data is indexed by UniProt accession and provides residue-level
annotations using UniProt numbering.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "summary_stats")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PDBe-KB Graph API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"PDBe-KB API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to PDBe-KB API"}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
acc = arguments.get("uniprot_accession", "unknown")
return {"error": f"No PDBe-KB data found for {acc}"}
return {"error": f"PDBe-KB API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying PDBe-KB: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate PDBe-KB endpoint."""
if self.endpoint == "summary_stats":
return self._get_summary_stats(arguments)
elif self.endpoint == "ligand_sites":
return self._get_ligand_sites(arguments)
elif self.endpoint == "interface_residues":
return self._get_interface_residues(arguments)
elif self.endpoint == "superposition":
return self._get_superposition(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_summary_stats(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get aggregated structural summary statistics for a protein."""
acc = arguments.get("uniprot_accession", "")
if not acc:
return {"error": "uniprot_accession parameter is required"}
url = f"{PDBE_KB_BASE_URL}/uniprot/summary_stats/{acc}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if acc not in data:
return {"error": f"No summary data for {acc}"}
stats = data[acc]
return {
"data": {
"uniprot_accession": acc,
"pdbs": stats.get("pdbs"),
"ligands": stats.get("ligands"),
"interaction_partners": stats.get("interaction_partners"),
"annotations": stats.get("annotations"),
"similar_proteins": stats.get("similar_proteins"),
},
"metadata": {
"source": "PDBe-KB (PDBe Knowledge Base) Graph API",
},
}
[docs]
def _get_ligand_sites(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get ligand binding site residues for a protein."""
acc = arguments.get("uniprot_accession", "")
if not acc:
return {"error": "uniprot_accession parameter is required"}
url = f"{PDBE_KB_BASE_URL}/uniprot/ligand_sites/{acc}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if acc not in data:
return {"error": f"No ligand binding data for {acc}"}
protein_data = data[acc]
ligands = []
max_ligands = 50 # Limit output size
for entry in protein_data.get("data", [])[:max_ligands]:
binding_residues = []
for res in entry.get("residues", [])[:20]:
binding_residues.append(
{
"start": res.get("startIndex"),
"end": res.get("endIndex"),
"pdb_entries": res.get("allPDBEntries", [])[:10],
}
)
additional = entry.get("additionalData", {})
ligands.append(
{
"name": entry.get("name"),
"accession": entry.get("accession"),
"binding_residues": binding_residues,
"is_cofactor": bool(additional.get("coFactorId")),
"is_solvent": additional.get("isSolvent", False),
"chembl_id": additional.get("chemblId") or None,
"drugbank_id": additional.get("drugBankId") or None,
}
)
return {
"data": {
"uniprot_accession": acc,
"protein_length": protein_data.get("length"),
"ligands": ligands,
"total_ligands": len(protein_data.get("data", [])),
},
"metadata": {
"source": "PDBe-KB (PDBe Knowledge Base) Graph API",
},
}
[docs]
def _get_interface_residues(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein-protein interaction interface residues."""
acc = arguments.get("uniprot_accession", "")
if not acc:
return {"error": "uniprot_accession parameter is required"}
url = f"{PDBE_KB_BASE_URL}/uniprot/interface_residues/{acc}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if acc not in data:
return {"error": f"No interface data for {acc}"}
protein_data = data[acc]
partners = []
max_partners = 30
for entry in protein_data.get("data", [])[:max_partners]:
interface_residues = []
for res in entry.get("residues", [])[:30]:
interface_residues.append(
{
"start": res.get("startIndex"),
"end": res.get("endIndex"),
"pdb_entries": res.get("allPDBEntries", [])[:10],
}
)
partners.append(
{
"partner_name": entry.get("name"),
"partner_accession": entry.get("accession"),
"interface_residues": interface_residues,
}
)
return {
"data": {
"uniprot_accession": acc,
"protein_length": protein_data.get("length"),
"interaction_partners": partners,
"total_partners": len(protein_data.get("data", [])),
},
"metadata": {
"source": "PDBe-KB (PDBe Knowledge Base) Graph API",
},
}
[docs]
def _get_superposition(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get structural superposition clusters for a protein.
Returns clusters of structurally superposed PDB chains grouped by
protein segments. Each cluster contains a representative structure
and aligned member structures.
"""
acc = arguments.get("uniprot_accession", "")
if not acc:
return {
"error": "uniprot_accession parameter is required (e.g., 'P04637' for TP53, 'P00533' for EGFR)."
}
url = f"{PDBE_KB_BASE_URL}/uniprot/superposition/{acc}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
if acc not in data:
return {"error": f"No superposition data for {acc}"}
segments_data = data[acc]
segments = []
for seg in segments_data[:10]:
clusters = seg.get("clusters", [])
cluster_results = []
for cluster in clusters[:10]:
members = []
representative = None
for member in cluster[:20]:
entry = {
"pdb_id": member.get("pdb_id"),
"auth_asym_id": member.get("auth_asym_id"),
"struct_asym_id": member.get("struct_asym_id"),
"entity_id": member.get("entity_id"),
"is_representative": member.get("is_representative", False),
}
if entry["is_representative"]:
representative = entry
members.append(entry)
cluster_results.append(
{
"representative": representative,
"total_members": len(cluster),
"members": members,
}
)
segments.append(
{
"segment_start": seg.get("segment_start"),
"segment_end": seg.get("segment_end"),
"num_clusters": len(clusters),
"clusters": cluster_results,
}
)
return {
"data": {
"uniprot_accession": acc,
"segments": segments,
"total_segments": len(segments_data),
},
"metadata": {
"source": "PDBe-KB (PDBe Knowledge Base) Graph API",
},
}