Source code for tooluniverse.ensembl_variation_ext_tool

# ensembl_variation_ext_tool.py
"""
Ensembl Variation Extended tool for ToolUniverse.

Provides variant population frequency data and detailed variant records
from Ensembl, complementing the existing VEP and basic variation tools.

API: https://rest.ensembl.org
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

ENSEMBL_BASE_URL = "https://rest.ensembl.org"
ENSEMBL_HEADERS = {"User-Agent": "ToolUniverse/1.0", "Accept": "application/json"}


[docs] @register_tool("EnsemblVariationExtTool") class EnsemblVariationExtTool(BaseTool): """ Tool for querying Ensembl variant population frequencies and detailed variant info. Supports: - Allele frequency data across gnomAD and 1000 Genomes populations - Detailed variant records with consequences, synonyms, and evidence No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 90) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "population_frequencies")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Ensembl Variation API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"Ensembl API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to Ensembl REST API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" text = "" if e.response is not None: try: text = e.response.json().get("error", "") except Exception: text = e.response.text[:200] return {"error": f"Ensembl API HTTP {status}: {text}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "population_frequencies": return self._get_population_frequencies(arguments) elif self.endpoint == "variant_detail": return self._get_variant_detail(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_population_frequencies(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get allele frequencies across global populations.""" variant_id = arguments.get("variant_id", "") species = arguments.get("species", "human") if not variant_id: return {"error": "variant_id is required (e.g., 'rs429358')."} url = f"{ENSEMBL_BASE_URL}/variation/{species}/{variant_id}" response = requests.get( url, params={"pops": "1", "content-type": "application/json"}, headers=ENSEMBL_HEADERS, timeout=self.timeout, ) response.raise_for_status() data = response.json() # Extract location info from mappings location = {} mappings = data.get("mappings", []) if mappings: m = mappings[0] location = { "chromosome": m.get("seq_region_name"), "start": m.get("start"), "allele_string": m.get("allele_string"), "assembly": m.get("assembly_name"), } # Process population data populations = [] for pop in data.get("populations", []): populations.append( { "population": pop.get("population"), "allele": pop.get("allele"), "frequency": pop.get("frequency"), "allele_count": pop.get("allele_count"), "count": pop.get("count"), } ) # Get unique population names unique_pops = set(p.get("population", "") for p in populations) return { "data": { "variant_id": data.get("name"), "most_severe_consequence": data.get("most_severe_consequence"), "source": data.get("source"), "location": location, "populations": populations, }, "metadata": { "source": "Ensembl REST API (rest.ensembl.org)", "total_population_entries": len(populations), "unique_populations": len(unique_pops), }, }
[docs] def _get_variant_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed variant information.""" variant_id = arguments.get("variant_id", "") species = arguments.get("species", "human") if not variant_id: return {"error": "variant_id is required (e.g., 'rs429358')."} url = f"{ENSEMBL_BASE_URL}/variation/{species}/{variant_id}" response = requests.get( url, params={"content-type": "application/json"}, headers=ENSEMBL_HEADERS, timeout=self.timeout, ) response.raise_for_status() data = response.json() # Extract mappings mappings = [] for m in data.get("mappings", []): mappings.append( { "seq_region_name": m.get("seq_region_name"), "start": m.get("start"), "end": m.get("end"), "allele_string": m.get("allele_string"), "strand": m.get("strand"), "assembly_name": m.get("assembly_name"), } ) synonyms = data.get("synonyms", []) return { "data": { "name": data.get("name"), "source": data.get("source"), "most_severe_consequence": data.get("most_severe_consequence"), "ancestral_allele": data.get("ancestral_allele"), "minor_allele": data.get("minor_allele"), "MAF": data.get("MAF"), "synonyms": synonyms, "mappings": mappings, "evidence": data.get("evidence", []), "clinical_significance": data.get("clinical_significance", []), }, "metadata": { "source": "Ensembl REST API (rest.ensembl.org)", "total_synonyms": len(synonyms), "total_mappings": len(mappings), }, }