Source code for tooluniverse.genome_nexus_tool

# genome_nexus_tool.py
"""
Genome Nexus tool for ToolUniverse.

Genome Nexus (Memorial Sloan Kettering Cancer Center) is a cancer variant
annotation aggregator that integrates data from VEP, SIFT, PolyPhen-2,
AlphaMissense, cancer hotspots, mutation assessor, and more.

API: https://www.genomenexus.org/
No authentication required. Uses GRCh37/hg19 coordinates.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

GENOME_NEXUS_BASE_URL = "https://www.genomenexus.org"


[docs] @register_tool("GenomeNexusTool") class GenomeNexusTool(BaseTool): """ Tool for annotating cancer variants using Genome Nexus (MSK). Supports: - Full variant annotation (VEP + SIFT + PolyPhen + AlphaMissense + hotspots) - Cancer hotspot lookup - Canonical transcript retrieval - Coordinate-based mutation annotation No authentication required. All coordinates in GRCh37/hg19. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "annotate_variant")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Genome Nexus API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"Genome Nexus API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to Genome Nexus API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" if status == 400: return { "error": "Invalid variant format. Use GRCh37/hg19 HGVS notation (e.g., '7:g.140453136A>T')." } if status == 404: return {"error": "Variant or gene not found in Genome Nexus."} return {"error": f"Genome Nexus API HTTP {status}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "annotate_variant": return self._annotate_variant(arguments) elif self.endpoint == "get_cancer_hotspots": return self._get_cancer_hotspots(arguments) elif self.endpoint == "get_canonical_transcript": return self._get_canonical_transcript(arguments) elif self.endpoint == "annotate_mutation": return self._annotate_mutation(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _annotate_variant(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Annotate a variant by HGVS genomic notation.""" hgvsg = arguments.get("hgvsg", "") if not hgvsg: return {"error": "hgvsg is required (e.g., '7:g.140453136A>T')."} url = f"{GENOME_NEXUS_BASE_URL}/annotation/{hgvsg}" params = {"fields": "hotspots,annotation_summary,mutation_assessor"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() if not data.get("successfully_annotated", True): return { "error": data.get( "errorMessage", f"Failed to annotate variant '{hgvsg}'" ) } return self._format_annotation(data)
[docs] def _annotate_mutation(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Annotate a mutation by genomic coordinates.""" chromosome = arguments.get("chromosome", "") start = arguments.get("start") end = arguments.get("end") ref = arguments.get("reference_allele", "") alt = arguments.get("variant_allele", "") if not all([chromosome, start, end, ref, alt]): return { "error": "chromosome, start, end, reference_allele, and variant_allele are all required." } # Use the genomic format endpoint query = f"{chromosome},{start},{end},{ref},{alt}" url = f"{GENOME_NEXUS_BASE_URL}/annotation/genomic/{query}" params = {"fields": "hotspots,annotation_summary,mutation_assessor"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() if not data.get("successfully_annotated", True): return { "error": data.get( "errorMessage", f"Failed to annotate mutation at {chromosome}:{start}", ) } return self._format_annotation(data)
[docs] def _format_annotation(self, data: Dict[str, Any]) -> Dict[str, Any]: """Format a variant annotation response.""" # Extract transcript consequences with pathogenicity scores tc_list = [] for tc in data.get("transcript_consequences", []): tc_entry = { "gene_symbol": tc.get("gene_symbol"), "transcript_id": tc.get("transcript_id"), "consequence_terms": tc.get("consequence_terms", []), "hgvsp": tc.get("hgvsp"), "hgvsc": tc.get("hgvsc"), "amino_acids": tc.get("amino_acids"), "codons": tc.get("codons"), "polyphen_prediction": tc.get("polyphen_prediction"), "polyphen_score": tc.get("polyphen_score"), "sift_prediction": tc.get("sift_prediction"), "sift_score": tc.get("sift_score"), "alphaMissense": tc.get("alphaMissense"), "canonical": tc.get("canonical"), "exon": tc.get("exon"), } tc_list.append(tc_entry) # Extract colocated variants (dbSNP IDs) colocated = [] for cv in data.get("colocatedVariants", []): colocated.append({"dbSnpId": cv.get("dbSnpId")}) # Extract hotspots hotspots_data = data.get("hotspots") hotspots_formatted = None if hotspots_data and hotspots_data.get("annotation"): hotspots_formatted = { "annotation": hotspots_data.get("annotation", []), } return { "data": { "variant": data.get("variant"), "hgvsg": data.get("hgvsg"), "assembly_name": data.get("assembly_name"), "most_severe_consequence": data.get("most_severe_consequence"), "annotation_summary": data.get("annotation_summary"), "transcript_consequences": tc_list, "hotspots": hotspots_formatted, "colocated_variants": colocated, }, "metadata": { "source": "Genome Nexus (genomenexus.org) - Memorial Sloan Kettering", }, }
[docs] def _get_cancer_hotspots(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get cancer hotspot data for a variant.""" hgvsg = arguments.get("hgvsg", "") if not hgvsg: return {"error": "hgvsg is required (e.g., '7:g.140453136A>T')."} url = f"{GENOME_NEXUS_BASE_URL}/annotation/{hgvsg}" params = {"fields": "hotspots,annotation_summary"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() if not data.get("successfully_annotated", True): return { "error": data.get( "errorMessage", f"Failed to annotate variant '{hgvsg}'" ) } # Extract gene symbol from annotation summary gene_symbol = None ann_summary = data.get("annotation_summary", {}) tc_summary = ann_summary.get("transcriptConsequences", []) if tc_summary: gene_symbol = tc_summary[0].get("hugoGeneSymbol") # Extract hotspots hotspot_data = data.get("hotspots", {}) hotspot_annotations = hotspot_data.get("annotation", []) flat_hotspots = [] for group in hotspot_annotations: if isinstance(group, list): for item in group: flat_hotspots.append( { "hugoSymbol": item.get("hugoSymbol"), "residue": item.get("residue"), "tumorCount": item.get("tumorCount"), "type": item.get("type"), } ) elif isinstance(group, dict): flat_hotspots.append( { "hugoSymbol": group.get("hugoSymbol"), "residue": group.get("residue"), "tumorCount": group.get("tumorCount"), "type": group.get("type"), } ) return { "data": { "variant": data.get("variant"), "gene_symbol": gene_symbol, "is_hotspot": len(flat_hotspots) > 0, "hotspots": flat_hotspots, }, "metadata": { "source": "Genome Nexus Cancer Hotspots (genomenexus.org)", }, }
[docs] def _get_canonical_transcript(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get canonical transcript for a gene.""" gene_symbol = arguments.get("gene_symbol", "") if not gene_symbol: return {"error": "gene_symbol is required (e.g., 'TP53')."} url = f"{GENOME_NEXUS_BASE_URL}/ensembl/canonical-transcript/hgnc/{gene_symbol}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() # Format Pfam domains pfam_domains = [] for d_item in data.get("pfamDomains", []): pfam_domains.append( { "pfamDomainId": d_item.get("pfamDomainId"), "pfamDomainStart": d_item.get("pfamDomainStart"), "pfamDomainEnd": d_item.get("pfamDomainEnd"), "pfamDomainDescription": d_item.get("pfamDomainDescription"), } ) return { "data": { "transcriptId": data.get("transcriptId"), "geneId": data.get("geneId"), "proteinId": data.get("proteinId"), "proteinLength": data.get("proteinLength"), "hugoSymbols": data.get("hugoSymbols", []), "refseqMrnaId": data.get("refseqMrnaId"), "ccdsId": data.get("ccdsId"), "pfamDomains": pfam_domains, }, "metadata": { "source": "Genome Nexus (genomenexus.org) - Memorial Sloan Kettering", }, }