Source code for tooluniverse.ebi_proteins_features_tool
# ebi_proteins_features_tool.py
"""
EBI Proteins API Feature Categories tool for ToolUniverse.
Provides access to specific feature categories from the EBI Proteins API:
- DOMAINS_AND_SITES: binding sites, DNA binding regions, motifs, regions
- MOLECULE_PROCESSING: signal peptides, transit peptides, chains, propeptides
- STRUCTURAL: secondary structure assignments (helix, strand, turn)
API: https://www.ebi.ac.uk/proteins/api/
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
PROTEINS_API_BASE_URL = "https://www.ebi.ac.uk/proteins/api"
[docs]
class EBIProteinsFeaturesTool(BaseTool):
"""
Tool for retrieving category-specific protein features from EBI Proteins API.
Different from EBIProteinsExtTool (mutagenesis, PTM) - this covers
domain/site annotations, molecule processing info, and secondary structure.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.category = fields.get("category", "DOMAINS_AND_SITES")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the EBI Proteins API features call."""
try:
return self._get_features(arguments)
except requests.exceptions.Timeout:
return {"error": f"EBI Proteins API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to EBI Proteins API"}
except requests.exceptions.HTTPError as e:
code = e.response.status_code if e.response is not None else "unknown"
if code == 404:
return {"error": f"Protein not found: {arguments.get('accession', '')}"}
return {"error": f"EBI Proteins API HTTP error: {code}"}
except Exception as e:
return {"error": f"Unexpected error querying EBI Proteins API: {str(e)}"}
[docs]
def _get_features(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get features for a specific category."""
accession = arguments.get("accession", "")
if not accession:
return {
"error": "accession parameter is required (UniProt accession, e.g., P04637)"
}
url = f"{PROTEINS_API_BASE_URL}/features/{accession}"
params = {"categories": self.category}
headers = {"Accept": "application/json"}
response = requests.get(
url, params=params, headers=headers, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
features = []
for f in data.get("features", []):
feature = {
"type": f.get("type"),
"position_start": f.get("begin"),
"position_end": f.get("end"),
"description": f.get("description"),
}
# Include evidences (compact)
evidences = []
for ev in f.get("evidences", [])[:3]:
src = ev.get("source", {})
evidences.append(
{
"code": ev.get("code"),
"source": src.get("name"),
"id": src.get("id"),
}
)
if evidences:
feature["evidences"] = evidences
features.append(feature)
# Get feature type distribution
type_counts = {}
for f in data.get("features", []):
ft = f.get("type", "UNKNOWN")
type_counts[ft] = type_counts.get(ft, 0) + 1
# Determine source label
category_labels = {
"DOMAINS_AND_SITES": "Domain/Site Features",
"MOLECULE_PROCESSING": "Molecule Processing",
"STRUCTURAL": "Secondary Structure",
}
return {
"data": {
"accession": data.get("accession"),
"entry_name": data.get("entryName"),
"sequence_length": len(data.get("sequence", "")),
"features": features[:100],
"total_features": len(data.get("features", [])),
"feature_type_counts": type_counts,
},
"metadata": {
"source": f"EBI Proteins API - {category_labels.get(self.category, self.category)}",
"accession": accession,
"category": self.category,
},
}