Source code for tooluniverse.ebi_search_tool
"""
EBI Search API Tool
This tool provides access to the EBI Search API, a unified search interface
across 160+ EBI data resources including Ensembl, UniProt, InterPro, and more.
"""
import requests
from typing import Any, Dict, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
[docs]
@register_tool("EBISearchRESTTool")
class EBISearchRESTTool(BaseTool):
"""
EBI Search API tool.
Generic wrapper for EBI Search API endpoints defined in ebi_search_tools.json.
"""
[docs]
def __init__(self, tool_config: Dict):
super().__init__(tool_config)
self.base_url = "https://www.ebi.ac.uk/ebisearch/ws/rest"
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
)
self.timeout = (
120 # Increased to 120s - facet queries can be very slow on EBI API
)
[docs]
def _build_url(self, args: Dict[str, Any]) -> str:
"""Build URL from endpoint template and arguments"""
endpoint_template = self.tool_config["fields"].get("endpoint", "")
# If endpoint template is provided, use it
if endpoint_template:
url = endpoint_template
# Replace placeholders in URL
for k, v in args.items():
url = url.replace(f"{{{k}}}", str(v))
return url
# Build URL based on tool name and arguments
tool_name = self.tool_config.get("name", "")
domain = args.get("domain", "")
entry_id = args.get("entry_id", "")
# For listing domains, use base URL
if tool_name == "ebi_list_domains":
return self.base_url
# For getting entry, use /entry/ path format
# EBI Search requires the entry ID format from search results (e.g., "P53_HUMAN" not "P04637")
# We'll use search to find the correct ID if direct lookup fails
if tool_name == "ebi_get_entry" and domain and entry_id:
return f"{self.base_url}/{domain}/entry/{entry_id}"
# For domain info or fields, use domain path
if domain:
if tool_name in ["ebi_get_domain_info", "ebi_get_domain_fields"]:
return f"{self.base_url}/{domain}"
# For search operations, domain is in path
return f"{self.base_url}/{domain}"
return self.base_url
[docs]
def _build_params(self, args: Dict[str, Any]) -> Dict[str, Any]:
"""Build query parameters for EBI Search API"""
params = {}
tool_name = self.tool_config.get("name", "")
# For entry retrieval, fields can be specified
if tool_name == "ebi_get_entry":
if "fields" in args:
params["fields"] = args["fields"]
params["format"] = args.get("format", "json")
return params
# For domain info/fields, no query params needed (or just format)
if tool_name in [
"ebi_get_domain_info",
"ebi_get_domain_fields",
"ebi_list_domains",
]:
params["format"] = args.get("format", "json")
return params
# For search operations, include query parameters
if "query" in args:
params["query"] = args["query"]
if "size" in args:
params["size"] = args["size"]
if "format" in args:
params["format"] = args.get("format", "json")
else:
params["format"] = "json"
# Facet parameters - EBI Search requires facets to be available for the domain
# Users should check available facets first using ebi_get_domain_info
if "facets" in args and args["facets"]:
# Facets should be comma-separated or space-separated
facets = args["facets"]
if isinstance(facets, list):
facets = ",".join(facets)
params["facets"] = facets
if "facetcount" in args:
params["facetcount"] = args["facetcount"]
# Field selection
if "fields" in args:
params["fields"] = args["fields"]
# Pagination
if "start" in args:
params["start"] = args["start"]
if "page" in args:
params["page"] = args["page"]
# Sorting
if "sort" in args:
params["sort"] = args["sort"]
return params
[docs]
def _find_entry_id_via_search(
self, domain: str, query: str
) -> Optional[Dict[str, Any]]:
"""Find entry ID via search and return entry data"""
try:
# Search for the entry
search_url = f"{self.base_url}/{domain}"
search_params = {"query": query, "size": 1, "format": "json"}
search_response = self.session.get(
search_url, params=search_params, timeout=self.timeout
)
search_response.raise_for_status()
search_data = search_response.json()
entries = search_data.get("entries", [])
if entries:
# Found entry via search - return it with note
entry = entries[0]
return {
"status": "success",
"data": entry,
"url": search_response.url,
"note": f"Entry found via search. EBI Search get_entry endpoint requires specific ID format. Use ebi_search_domain to find entries, then use the 'id' field from results for ebi_get_entry.",
"search_used": True,
"entry_id_from_search": entry.get("id", ""),
"suggestion": f"For direct entry access, use the 'id' field from search results (e.g., '{entry.get('id', '')}') instead of accession numbers.",
}
except Exception:
pass
return None
[docs]
def _extract_data(self, data: Dict, extract_path: Optional[str] = None) -> Any:
"""Extract specific data from API response"""
if not extract_path:
return data
# Handle different extraction paths
if extract_path == "entries":
return data.get("entries", [])
elif extract_path == "facets":
return data.get("facets", {})
elif extract_path == "hitCount":
return data.get("hitCount", 0)
elif extract_path == "domain":
# Domain info is typically at root level or in 'domain' key
if "domain" in data:
return data["domain"]
# Sometimes domain info is the root object itself
return data
elif extract_path == "fields":
# Fields can be in 'fields' key or 'domain.fields'
if "fields" in data:
return data["fields"]
if "domain" in data and "fields" in data["domain"]:
return data["domain"]["fields"]
return []
elif "." in extract_path:
# Extract nested path like "domain.name"
parts = extract_path.split(".")
result = data
for part in parts:
if isinstance(result, dict):
result = result.get(part, {})
else:
return None
return result
return data
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the EBI Search API call"""
tool_name = self.tool_config.get("name", "")
try:
# Build URL
url = self._build_url(arguments)
# Build parameters
params = self._build_params(arguments)
# Make API request
response = self.session.get(url, params=params, timeout=self.timeout)
# Handle get_entry endpoint - if 404, try to find correct ID via search
if tool_name == "ebi_get_entry" and response.status_code == 404:
domain = arguments.get("domain", "")
entry_id = arguments.get("entry_id", "")
if domain and entry_id:
# Try to find the correct entry ID format via search
search_result = self._find_entry_id_via_search(domain, entry_id)
if search_result:
return search_result
response.raise_for_status()
# Parse JSON response
data = response.json()
# Extract data if specified
extract_path = self.tool_config["fields"].get("extract_path")
if extract_path:
result = self._extract_data(data, extract_path)
else:
result = data
# Build response
response_data = {
"status": "success",
"data": result,
"url": response.url,
}
# Add metadata
if isinstance(data, dict):
if "hitCount" in data:
response_data["hitCount"] = data["hitCount"]
if "facets" in data:
facets = data["facets"]
response_data["facets"] = facets
# Add note about available facets
if isinstance(facets, dict) and facets:
available_facets = list(facets.keys())
response_data["available_facets"] = available_facets[
:10
] # First 10
elif isinstance(facets, list) and facets:
response_data["facets_note"] = (
f"Found {len(facets)} facet categories"
)
elif not facets:
response_data["facets_note"] = (
"No facets returned. Facets may need to be explicitly requested or may not be available for this domain/query."
)
if "domain" in data:
response_data["domain"] = data["domain"]
# Add count for list results
if isinstance(result, list):
response_data["count"] = len(result)
return response_data
except requests.exceptions.RequestException as e:
# For get_entry, try fallback via search
if tool_name == "ebi_get_entry" and "404" in str(e):
domain = arguments.get("domain", "")
entry_id = arguments.get("entry_id", "")
if domain and entry_id:
search_result = self._find_entry_id_via_search(domain, entry_id)
if search_result:
return search_result
return {
"status": "error",
"error": f"EBI Search API error: {str(e)}",
"url": url if "url" in locals() else None,
}
except Exception as e:
return {
"status": "error",
"error": f"Unexpected error: {str(e)}",
"url": url if "url" in locals() else None,
}