Source code for tooluniverse.ebi_proteins_coordinates_tool
# ebi_proteins_coordinates_tool.py
"""
EBI Proteins Coordinates tool for ToolUniverse.
The EBI Proteins API Coordinates endpoint maps UniProt protein positions to
genomic coordinates at exon-level resolution. This enables translation between
protein residue numbering and chromosomal positions, essential for connecting
variant-level information across protein and genome databases.
API: https://www.ebi.ac.uk/proteins/api/coordinates/
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
EBI_PROTEINS_BASE_URL = "https://www.ebi.ac.uk/proteins/api"
[docs]
@register_tool("EBIProteinsCoordinatesTool")
class EBIProteinsCoordinatesTool(BaseTool):
"""
Tool for querying EBI Proteins API coordinate mappings.
Supports:
- Map protein positions to genomic coordinates (exon-level)
- Get protein-to-genome coordinate mappings for a UniProt accession
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "get_coordinates")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the EBI Proteins Coordinates API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"EBI Proteins API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to EBI Proteins API"}
except requests.exceptions.HTTPError as e:
status = e.response.status_code if e.response is not None else "unknown"
if status == 404:
return {
"error": "Protein not found. Provide a valid UniProt accession (e.g., 'P04637' for TP53)."
}
if status == 400:
return {"error": "Bad request. Check the UniProt accession format."}
return {"error": f"EBI Proteins API HTTP {status}"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "get_coordinates":
return self._get_coordinates(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_coordinates(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein-to-genomic coordinate mappings."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession is required (UniProt accession, e.g., 'P04637' for TP53, 'P00533' for EGFR)."
}
accession = accession.strip()
url = f"{EBI_PROTEINS_BASE_URL}/coordinates/{accession}"
response = requests.get(
url, timeout=self.timeout, headers={"Accept": "application/json"}
)
response.raise_for_status()
data = response.json()
# Extract gene info
gene_info = []
for g in data.get("gene", []):
gene_info.append(
{
"value": g.get("value"),
"type": g.get("type"),
}
)
# Extract genomic coordinates
gn_coordinates = data.get("gnCoordinate", [])
mappings = []
for gc in gn_coordinates[:10]: # Limit to 10 transcript mappings
genomic_loc = gc.get("genomicLocation", {})
exons = genomic_loc.get("exon", [])
exon_mappings = []
for exon in exons[:20]:
prot_loc = exon.get("proteinLocation", {})
genome_loc = exon.get("genomeLocation", {})
exon_mappings.append(
{
"exon_id": exon.get("id"),
"protein_start": prot_loc.get("begin", {}).get("position"),
"protein_end": prot_loc.get("end", {}).get("position"),
"genome_start": genome_loc.get("begin", {}).get("position"),
"genome_end": genome_loc.get("end", {}).get("position"),
}
)
mappings.append(
{
"ensembl_gene_id": gc.get("ensemblGeneId"),
"ensembl_transcript_id": gc.get("ensemblTranscriptId"),
"ensembl_translation_id": gc.get("ensemblTranslationId"),
"chromosome": genomic_loc.get("chromosome"),
"start": genomic_loc.get("start"),
"end": genomic_loc.get("end"),
"reverse_strand": genomic_loc.get("reverseStrand"),
"num_exons": len(exons),
"exons": exon_mappings,
}
)
return {
"data": {
"accession": data.get("accession"),
"protein_name": data.get("name"),
"taxid": data.get("taxid"),
"gene": gene_info,
"coordinate_mappings": mappings,
},
"metadata": {
"source": "EBI Proteins API (ebi.ac.uk/proteins/api)",
"total_transcript_mappings": len(gn_coordinates),
"returned_mappings": len(mappings),
},
}