Source code for tooluniverse.ena_portal_tool
# ena_portal_tool.py
"""
ENA Portal API tool for ToolUniverse.
The European Nucleotide Archive (ENA) Portal API provides programmatic
access to search studies, samples, and sequences across the world's
largest nucleotide sequence repository. Supports taxonomy-based queries,
text searches, and field selection.
API: https://www.ebi.ac.uk/ena/portal/api
No authentication required. Free for all use.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
ENA_PORTAL_BASE_URL = "https://www.ebi.ac.uk/ena/portal/api"
[docs]
@register_tool("ENAPortalTool")
class ENAPortalTool(BaseTool):
"""
Tool for querying the European Nucleotide Archive (ENA) Portal API.
Supports searching for studies, samples, and sequences with flexible
filtering by taxonomy, text content, and custom field selection.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "search_studies"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the ENA Portal API call."""
try:
return self._dispatch(arguments)
except requests.exceptions.Timeout:
return {
"error": f"ENA Portal API request timed out after {self.timeout} seconds"
}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to ENA Portal API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"ENA Portal API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying ENA Portal: {str(e)}"}
[docs]
def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint based on config."""
if self.endpoint_type == "search_studies":
return self._search_studies(arguments)
elif self.endpoint_type == "search_samples":
return self._search_samples(arguments)
elif self.endpoint_type == "count":
return self._count(arguments)
else:
return {"error": f"Unknown endpoint_type: {self.endpoint_type}"}
[docs]
def _search_studies(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search ENA studies by text query or taxonomy."""
query = arguments.get("query", "")
if not query:
return {
"error": "query parameter is required (e.g., 'description=\"cancer\"' or 'tax_tree(9606)')"
}
limit = min(arguments.get("limit", 10), 100)
fields = arguments.get(
"fields", "study_accession,study_title,center_name,first_public,description"
)
params = {
"result": "study",
"query": query,
"limit": limit,
"format": "json",
"fields": fields,
}
response = requests.get(
f"{ENA_PORTAL_BASE_URL}/search",
params=params,
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
if isinstance(raw, dict) and "message" in raw:
return {"error": f"ENA Portal API error: {raw['message']}"}
results = []
for item in raw[:limit]:
results.append(item)
return {
"data": results,
"metadata": {
"source": "ENA Portal API",
"query": query,
"returned": len(results),
"endpoint": "search/study",
},
}
[docs]
def _search_samples(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search ENA samples by text query or taxonomy."""
query = arguments.get("query", "")
if not query:
return {
"error": "query parameter is required (e.g., 'tax_tree(9606)' or 'description=\"liver\"')"
}
limit = min(arguments.get("limit", 10), 100)
fields = arguments.get(
"fields",
"sample_accession,sample_alias,description,tax_id,scientific_name,first_public",
)
params = {
"result": "sample",
"query": query,
"limit": limit,
"format": "json",
"fields": fields,
}
response = requests.get(
f"{ENA_PORTAL_BASE_URL}/search",
params=params,
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
if isinstance(raw, dict) and "message" in raw:
return {"error": f"ENA Portal API error: {raw['message']}"}
results = []
for item in raw[:limit]:
results.append(item)
return {
"data": results,
"metadata": {
"source": "ENA Portal API",
"query": query,
"returned": len(results),
"endpoint": "search/sample",
},
}
[docs]
def _count(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Count records matching a query in ENA."""
query = arguments.get("query", "")
result_type = arguments.get("result_type", "study")
if not query:
return {"error": "query parameter is required"}
params = {
"result": result_type,
"query": query,
}
response = requests.get(
f"{ENA_PORTAL_BASE_URL}/count",
params=params,
timeout=self.timeout,
)
response.raise_for_status()
text = response.text.strip()
# Response is "count\nNUMBER" format
lines = text.split("\n")
count_val = int(lines[-1]) if len(lines) > 1 else int(lines[0])
return {
"data": {
"count": count_val,
"result_type": result_type,
"query": query,
},
"metadata": {
"source": "ENA Portal API",
"endpoint": "count",
},
}