Source code for tooluniverse.ensembl_xrefs_tool

# ensembl_xrefs_tool.py
"""
Ensembl Cross-references tool for ToolUniverse.

The Ensembl Xrefs API returns external database cross-references for any
Ensembl stable identifier (gene, transcript, translation, etc.). This
enables mapping between Ensembl, HGNC, EntrezGene, UniProt, Reactome,
MIM, GeneCards, and many other databases.

API: https://rest.ensembl.org/xrefs/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

ENSEMBL_REST_BASE = "https://rest.ensembl.org"
ENSEMBL_HEADERS = {"User-Agent": "ToolUniverse/1.0", "Accept": "application/json"}


[docs] @register_tool("EnsemblXrefsTool") class EnsemblXrefsTool(BaseTool): """ Tool for querying Ensembl cross-references API. Supports: - Get all external database cross-references for an Ensembl ID - Filter by external database name No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 90) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "xrefs_by_id")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Ensembl Xrefs API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"Ensembl API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to Ensembl REST API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" if status == 404: return { "error": "Ensembl ID not found. Provide a valid Ensembl stable ID." } return {"error": f"Ensembl REST API HTTP {status}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "xrefs_by_id": return self._get_xrefs(arguments) elif self.endpoint == "xrefs_by_symbol": return self._get_xrefs_by_symbol(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_xrefs(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get external cross-references for an Ensembl ID.""" ensembl_id = arguments.get("ensembl_id", "") external_db = arguments.get("external_db", None) if not ensembl_id: return { "error": "ensembl_id is required (e.g., 'ENSG00000141510', 'ENST00000269305')." } url = f"{ENSEMBL_REST_BASE}/xrefs/id/{ensembl_id}" params_parts = ["content-type=application/json"] if external_db: params_parts.append(f"external_db={external_db}") full_url = f"{url}?{';'.join(params_parts)}" response = requests.get(full_url, headers=ENSEMBL_HEADERS, timeout=self.timeout) response.raise_for_status() data = response.json() if not isinstance(data, list): return {"error": "Unexpected response format."} # Group by database by_db = {} xrefs = [] for item in data: db = item.get("dbname", "unknown") by_db.setdefault(db, 0) by_db[db] += 1 xrefs.append( { "dbname": db, "db_display_name": item.get("db_display_name"), "primary_id": item.get("primary_id"), "display_id": item.get("display_id"), "description": item.get("description"), "info_type": item.get("info_type"), "synonyms": item.get("synonyms", []), } ) return { "data": { "ensembl_id": ensembl_id, "xrefs": xrefs[:100], "database_summary": by_db, }, "metadata": { "source": "Ensembl REST API (rest.ensembl.org)", "total_xrefs": len(data), "returned": min(len(xrefs), 100), "databases_found": len(by_db), }, }
[docs] def _get_xrefs_by_symbol(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Look up Ensembl IDs for a gene symbol via external databases.""" symbol = arguments.get("symbol", "") species = arguments.get("species", "human") external_db = arguments.get("external_db", None) if not symbol: return {"error": "symbol is required (gene symbol, e.g., 'TP53', 'BRCA1')."} url = f"{ENSEMBL_REST_BASE}/xrefs/symbol/{species}/{symbol}" params_parts = ["content-type=application/json"] if external_db: params_parts.append(f"external_db={external_db}") full_url = f"{url}?{';'.join(params_parts)}" response = requests.get(full_url, headers=ENSEMBL_HEADERS, timeout=self.timeout) response.raise_for_status() data = response.json() if not isinstance(data, list): return {"error": "Unexpected response format."} results = [] for item in data[:50]: results.append( { "id": item.get("id"), "type": item.get("type"), } ) return { "data": { "symbol": symbol, "species": species, "ensembl_ids": results, }, "metadata": { "source": "Ensembl REST API (rest.ensembl.org)", "total_results": len(data), "returned": len(results), }, }