Source code for tooluniverse.ebi_proteins_ext_tool
# ebi_proteins_ext_tool.py
"""
EBI Proteins API Extended tool for ToolUniverse.
Extended endpoints for the EBI Proteins API covering mutagenesis data
and post-translational modification (PTM) proteomics evidence.
API: https://www.ebi.ac.uk/proteins/api/
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PROTEINS_API_BASE_URL = "https://www.ebi.ac.uk/proteins/api"
[docs]
@register_tool("EBIProteinsExtTool")
class EBIProteinsExtTool(BaseTool):
"""
Extended tool for EBI Proteins API covering mutagenesis and PTM data.
These endpoints provide detailed mutagenesis experiment results and
mass spectrometry-based post-translational modification evidence
mapped to UniProt protein sequences.
Supports: mutagenesis data, proteomics PTM evidence.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "mutagenesis")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the EBI Proteins API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"EBI Proteins API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to EBI Proteins API"}
except requests.exceptions.HTTPError as e:
return {"error": f"EBI Proteins API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying EBI Proteins API: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "mutagenesis":
return self._get_mutagenesis(arguments)
elif self.endpoint == "proteomics_ptm":
return self._get_proteomics_ptm(arguments)
elif self.endpoint == "variation":
return self._get_variation(arguments)
elif self.endpoint == "features":
return self._get_features(arguments)
elif self.endpoint == "antigen":
return self._get_antigen(arguments)
elif self.endpoint == "coordinates":
return self._get_coordinates(arguments)
elif self.endpoint == "proteomics":
return self._get_proteomics(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_mutagenesis(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get mutagenesis experiment data for a protein."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/mutagenesis/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
features = []
for f in data.get("features", []):
# Extract evidence details
evidences = []
for ev in f.get("evidences", []):
src = ev.get("source", {})
evidences.append(
{
"code": ev.get("code"),
"source_name": src.get("name"),
"source_id": src.get("id"),
"source_url": src.get("url"),
}
)
features.append(
{
"type": f.get("type"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"original_sequence": f.get("alternativeSequence"),
"description": f.get("description"),
"evidences": evidences[:5],
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"gene_name": None,
"features": features[:100],
"total_features": len(data.get("features", [])),
},
"metadata": {
"source": "EBI Proteins API - Mutagenesis",
"accession": accession,
},
}
[docs]
def _get_proteomics_ptm(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get post-translational modification evidence from mass spec proteomics."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/proteomics-ptm/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
features = []
for f in data.get("features", []):
# Extract source databases
evidences = []
for ev in f.get("evidences", []):
src = ev.get("source", {})
props = src.get("properties", {})
evidences.append(
{
"source": src.get("name"),
"id": src.get("id"),
"url": src.get("url"),
"properties": props,
}
)
features.append(
{
"type": f.get("type"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"description": f.get("description"),
"evidences": evidences[:5],
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"features": features[:100],
"total_features": len(data.get("features", [])),
},
"metadata": {
"source": "EBI Proteins API - Proteomics PTM",
"accession": accession,
},
}
[docs]
def _get_variation(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein sequence variants from multiple sources."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
source_type = arguments.get("source_type")
disease_only = arguments.get("disease_only", False)
url = f"{PROTEINS_API_BASE_URL}/variation/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
raw_features = data.get("features", [])
# Filter by source type if specified
if source_type:
raw_features = [
f for f in raw_features if f.get("sourceType") == source_type
]
# Filter to disease-associated only
if disease_only:
raw_features = [
f
for f in raw_features
if any(a.get("disease") for a in f.get("association", []))
]
# Count by source
source_counts = {}
for f in data.get("features", []):
src = f.get("sourceType", "unknown")
source_counts[src] = source_counts.get(src, 0) + 1
variants = []
for f in raw_features[:100]:
# Extract clinical significance
clinical = []
for cs in f.get("clinicalSignificances", []):
clinical.append(
{
"type": cs.get("type"),
"sources": cs.get("sources", []),
}
)
# Extract disease associations
associations = []
for a in f.get("association", []):
associations.append(
{
"name": a.get("name"),
"description": a.get("description"),
"is_disease": a.get("disease", False),
}
)
# Extract cross-references (first 3)
xrefs = []
for x in f.get("xrefs", [])[:3]:
xrefs.append(
{
"database": x.get("name"),
"id": x.get("id"),
"url": x.get("url"),
}
)
variants.append(
{
"position_start": f.get("begin"),
"position_end": f.get("end"),
"wild_type": f.get("wildType"),
"alternative": f.get("alternativeSequence"),
"source_type": f.get("sourceType"),
"clinical_significances": clinical if clinical else None,
"associations": associations if associations else None,
"xrefs": xrefs if xrefs else None,
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"variants": variants,
"total_variants": len(raw_features),
"total_all_sources": len(data.get("features", [])),
"source_counts": source_counts,
},
"metadata": {
"source": "EBI Proteins API - Variation",
"accession": accession,
},
}
[docs]
def _get_features(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein features filtered by category."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
category = arguments.get("category", "DOMAINS_AND_SITES")
url = f"{PROTEINS_API_BASE_URL}/features/{accession}"
params = {"categories": category}
headers = {"Accept": "application/json"}
response = requests.get(
url, params=params, headers=headers, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
features = []
for f in data.get("features", []):
evidences = []
for ev in f.get("evidences", [])[:3]:
src = ev.get("source", {})
evidences.append(
{
"code": ev.get("code"),
"source_name": src.get("name"),
"source_id": src.get("id"),
}
)
features.append(
{
"type": f.get("type"),
"category": f.get("category"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"description": f.get("description"),
"evidences": evidences if evidences else None,
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"sequence_length": data.get("sequence", None),
"category_queried": category,
"features": features[:100],
"total_features": len(data.get("features", [])),
},
"metadata": {
"source": "EBI Proteins API - Features",
"accession": accession,
},
}
[docs]
def _get_antigen(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get antigenic regions for a protein (useful for antibody design)."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/antigen/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
regions = []
for f in data.get("features", []):
evidences = []
for ev in f.get("evidences", [])[:3]:
src = ev.get("source", {})
evidences.append(
{
"code": ev.get("code"),
"source_name": src.get("name"),
"source_id": src.get("id"),
}
)
regions.append(
{
"type": f.get("type"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"match_score": f.get("matchScore"),
"antigen_sequence": f.get("antigenSequence"),
"evidences": evidences if evidences else None,
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"antigenic_regions": regions,
"total_regions": len(regions),
},
"metadata": {
"source": "EBI Proteins API - Antigen",
"accession": accession,
},
}
[docs]
def _get_coordinates(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get genomic coordinates for a protein."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/coordinates/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
mappings = []
for gc in data.get("gnCoordinate", []):
gen_loc = gc.get("genomicLocation", {})
exons = gen_loc.get("exon", [])
mappings.append(
{
"ensembl_gene_id": gc.get("ensemblGeneId"),
"ensembl_transcript_id": gc.get("ensemblTranscriptId"),
"chromosome": gen_loc.get("chromosome"),
"start": gen_loc.get("start"),
"end": gen_loc.get("end"),
"reverseStrand": gen_loc.get("reverseStrand"),
"num_exons": len(exons),
}
)
return {
"data": {
"accession": data.get("accession"),
"name": data.get("name"),
"sequence_length": len(data.get("sequence", "")),
"genomic_mappings": mappings,
"total_mappings": len(mappings),
},
"metadata": {
"source": "EBI Proteins API - Coordinates",
"accession": accession,
},
}
[docs]
def _get_proteomics(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get proteomics peptide evidence for a protein."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/proteomics/{accession}"
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
peptides = []
for f in data.get("features", []):
evidences = []
for ev in f.get("evidences", [])[:3]:
src = ev.get("source", {})
evidences.append(
{
"source": src.get("name"),
"id": src.get("id"),
}
)
peptides.append(
{
"type": f.get("type"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"peptide": f.get("peptide"),
"unique": f.get("unique"),
"evidences": evidences if evidences else None,
}
)
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"peptides": peptides[:100],
"total_peptides": len(data.get("features", [])),
},
"metadata": {
"source": "EBI Proteins API - Proteomics",
"accession": accession,
},
}