Source code for tooluniverse.ebi_proteins_interactions_tool
# ebi_proteins_interactions_tool.py
"""
EBI Proteins Interactions tool for ToolUniverse.
Provides protein-protein interaction data from the EBI Proteins API,
sourced from IntAct. Returns experimentally validated binary interactions
with partner details and experiment counts.
API: https://www.ebi.ac.uk/proteins/api/proteins/interaction/
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
EBI_PROTEINS_BASE_URL = "https://www.ebi.ac.uk/proteins/api"
[docs]
@register_tool("EBIProteinsInteractionsTool")
class EBIProteinsInteractionsTool(BaseTool):
"""
Tool for querying EBI Proteins protein-protein interaction data.
Supports:
- Get interaction partners for a protein (from IntAct)
- Get detailed protein info with interactions, diseases, locations
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "interactions")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the EBI Proteins Interactions API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"EBI Proteins API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to EBI Proteins API"}
except requests.exceptions.HTTPError as e:
status = e.response.status_code if e.response is not None else "unknown"
if status == 400:
return {
"error": f"Invalid accession. Use a UniProt accession (e.g., P04637)."
}
return {"error": f"EBI Proteins API HTTP {status}"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint."""
if self.endpoint == "interactions":
return self._get_interactions(arguments)
elif self.endpoint == "interaction_details":
return self._get_interaction_details(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_interactions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein-protein interaction partners."""
accession = arguments.get("accession", "")
if not accession:
return {"error": "accession is required (e.g., 'P04637')."}
url = f"{EBI_PROTEINS_BASE_URL}/proteins/interaction/{accession}"
response = requests.get(
url,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
# Data is a list of entries, each with interactions
all_interactions = []
if isinstance(data, list):
for entry in data:
for interaction in entry.get("interactions", []):
partner_acc = interaction.get(
"accession2", interaction.get("accession1")
)
# Skip self-interactions
if partner_acc == accession:
partner_acc = interaction.get("accession1")
all_interactions.append(
{
"partner_accession": partner_acc,
"gene_name": interaction.get("gene"),
"experiments": interaction.get("experiments", 0),
"organism_differ": interaction.get("organismDiffer", False),
"intact_id_a": interaction.get("interactor1"),
"intact_id_b": interaction.get("interactor2"),
}
)
# Deduplicate by partner accession, keep highest experiment count
seen = {}
for interaction in all_interactions:
partner = interaction["partner_accession"]
if (
partner not in seen
or interaction["experiments"] > seen[partner]["experiments"]
):
seen[partner] = interaction
unique_interactions = sorted(
seen.values(), key=lambda x: x["experiments"], reverse=True
)
return {
"data": {
"query_accession": accession,
"interactions": unique_interactions,
},
"metadata": {
"source": "EBI Proteins API / IntAct (ebi.ac.uk/proteins)",
"total_interactions": len(unique_interactions),
},
}
[docs]
def _get_interaction_details(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein info with interactions, diseases, and locations."""
accession = arguments.get("accession", "")
if not accession:
return {"error": "accession is required (e.g., 'P04637')."}
url = f"{EBI_PROTEINS_BASE_URL}/proteins/interaction/{accession}"
response = requests.get(
url,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
if not isinstance(data, list) or not data:
return {"error": f"No interaction data found for {accession}"}
# Extract protein metadata from first entry
first_entry = data[0]
protein_name = first_entry.get("accession", accession)
protein_existence = first_entry.get("proteinExistence")
organism = None
taxonomy = first_entry.get("taxonomy")
if taxonomy:
organism = taxonomy if isinstance(taxonomy, str) else str(taxonomy)
# Collect all interactions across entries
all_interactions = []
diseases = set()
locations = set()
for entry in data:
for interaction in entry.get("interactions", []):
partner_acc = interaction.get(
"accession2", interaction.get("accession1")
)
if partner_acc == accession:
partner_acc = interaction.get("accession1")
all_interactions.append(
{
"partner_accession": partner_acc,
"gene_name": interaction.get("gene"),
"experiments": interaction.get("experiments", 0),
"organism_differ": interaction.get("organismDiffer", False),
}
)
# Extract diseases
for disease in entry.get("diseases", []):
disease_name = (
disease.get("diseaseId")
or disease.get("acronym")
or disease.get("type")
)
if disease_name:
diseases.add(str(disease_name))
# Extract subcellular locations
for loc in entry.get("subcellularLocations", []):
for subloc in loc.get("locations", [loc]):
loc_name = (
subloc.get("value") if isinstance(subloc, dict) else str(subloc)
)
if loc_name:
locations.add(str(loc_name))
# Deduplicate and sort
seen = {}
for interaction in all_interactions:
partner = interaction["partner_accession"]
if (
partner not in seen
or interaction["experiments"] > seen[partner]["experiments"]
):
seen[partner] = interaction
top_interactions = sorted(
seen.values(), key=lambda x: x["experiments"], reverse=True
)[:50]
return {
"data": {
"query_accession": accession,
"protein_name": protein_name,
"protein_existence": protein_existence,
"organism": organism,
"total_interaction_entries": len(data),
"top_interactions": top_interactions,
"diseases": sorted(diseases),
"subcellular_locations": sorted(locations),
},
"metadata": {
"source": "EBI Proteins API / IntAct (ebi.ac.uk/proteins)",
"total_interactions": len(seen),
},
}