Source code for tooluniverse.pdbe_sifts_tool
# pdbe_sifts_tool.py
"""
PDBe SIFTS Mapping tool for ToolUniverse.
SIFTS (Structure Integration with Function, Taxonomy and Sequences) provides
cross-referencing between PDB structures and UniProt proteins, enabling
structure-based discovery of best available crystal/EM structures for a protein.
API: https://www.ebi.ac.uk/pdbe/api/
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PDBE_API_BASE_URL = "https://www.ebi.ac.uk/pdbe/api"
[docs]
@register_tool("PDBeSIFTSTool")
class PDBeSIFTSTool(BaseTool):
"""
PDBe SIFTS Mapping tool for UniProt-PDB cross-referencing.
Provides ranked best structures for a protein, PDB-to-UniProt chain
mapping, and comprehensive structure coverage analysis.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "best_structures")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PDBe SIFTS API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"PDBe SIFTS API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to PDBe SIFTS API"}
except requests.exceptions.HTTPError as e:
return {"error": f"PDBe SIFTS API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying PDBe SIFTS API: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "best_structures":
return self._get_best_structures(arguments)
elif self.endpoint == "pdb_to_uniprot":
return self._get_pdb_to_uniprot(arguments)
elif self.endpoint == "uniprot_to_pdb":
return self._get_uniprot_to_pdb(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_best_structures(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get best PDB structures for a UniProt protein, ranked by coverage and resolution."""
accession = arguments.get("uniprot_accession", "")
if not accession:
return {"error": "uniprot_accession parameter is required (e.g., P04637)"}
url = f"{PDBE_API_BASE_URL}/mappings/best_structures/{accession}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
entries = data.get(accession, [])
structures = []
for e in entries[:50]:
structures.append(
{
"pdb_id": e.get("pdb_id"),
"chain_id": e.get("chain_id"),
"uniprot_start": e.get("start"),
"uniprot_end": e.get("end"),
"resolution": e.get("resolution"),
"experimental_method": e.get("experimental_method"),
"coverage": e.get("coverage"),
"tax_id": e.get("tax_id"),
}
)
return {
"data": {
"uniprot_accession": accession,
"structures": structures,
"total_structures": len(entries),
},
"metadata": {
"source": "PDBe SIFTS - Best Structures",
"accession": accession,
},
}
[docs]
def _get_pdb_to_uniprot(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Map PDB entry chains to UniProt accessions."""
pdb_id = arguments.get("pdb_id", "")
if not pdb_id:
return {"error": "pdb_id parameter is required (e.g., 1tup)"}
pdb_id = pdb_id.lower()
url = f"{PDBE_API_BASE_URL}/mappings/uniprot/{pdb_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
entry_data = data.get(pdb_id, {})
uniprot_data = entry_data.get("UniProt", {})
proteins = []
for acc, info in uniprot_data.items():
chain_mappings = []
for m in info.get("mappings", [])[:20]:
chain_mappings.append(
{
"chain_id": m.get("chain_id"),
"pdb_start": m.get("start", {}).get("residue_number"),
"pdb_end": m.get("end", {}).get("residue_number"),
"uniprot_start": m.get("unp_start"),
"uniprot_end": m.get("unp_end"),
}
)
proteins.append(
{
"uniprot_accession": acc,
"name": info.get("identifier"),
"chain_mappings": chain_mappings,
}
)
return {
"data": {
"pdb_id": pdb_id,
"proteins": proteins,
"total_proteins": len(proteins),
},
"metadata": {
"source": "PDBe SIFTS - PDB to UniProt Mapping",
"pdb_id": pdb_id,
},
}
[docs]
def _get_uniprot_to_pdb(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get all PDB entries covering a UniProt protein."""
accession = arguments.get("uniprot_accession", "")
if not accession:
return {"error": "uniprot_accession parameter is required (e.g., P04637)"}
# Use best_structures endpoint which returns all PDB structures
url = f"{PDBE_API_BASE_URL}/mappings/best_structures/{accession}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
entries = data.get(accession, [])
# Group by PDB ID to show unique structures
pdb_entries = {}
for e in entries:
pdb_id = e.get("pdb_id", "")
if pdb_id not in pdb_entries:
pdb_entries[pdb_id] = {
"pdb_id": pdb_id,
"resolution": e.get("resolution"),
"experimental_method": e.get("experimental_method"),
"chains": [],
}
pdb_entries[pdb_id]["chains"].append(
{
"chain_id": e.get("chain_id"),
"uniprot_start": e.get("start"),
"uniprot_end": e.get("end"),
"coverage": e.get("coverage"),
}
)
# Sort by resolution (best first)
sorted_entries = sorted(
pdb_entries.values(),
key=lambda x: x.get("resolution") or 999,
)
return {
"data": {
"uniprot_accession": accession,
"pdb_entries": sorted_entries[:50],
"total_pdb_entries": len(pdb_entries),
"total_chain_mappings": len(entries),
},
"metadata": {
"source": "PDBe SIFTS - UniProt to PDB Mapping",
"accession": accession,
},
}