Source code for tooluniverse.favor_tool

"""
FAVOR tool for ToolUniverse — comprehensive functional annotation of a variant.

FAVOR (Functional Annotation of Variants Online Resource, Harvard/HSPH) provides
integrated whole-genome single-variant annotation: allele frequencies (BRAVO/TOPMed,
gnomAD, 1000 Genomes), gene/consequence, deleteriousness scores (CADD, SIFT, PolyPhen,
AlphaMissense, MetaSVM, ...), conservation (GERP, phyloP, phastCons), ClinVar clinical
significance, and regulatory/epigenomic context — in a single call.

API: https://api.genohub.org/v1/variants/{chr}-{pos}-{ref}-{alt}  (GRCh38/hg38,
public, no authentication). Returns a flat record of ~180 annotations; this tool
groups the high-value ones and passes the full record through under all_annotations.
"""

from typing import Any, Dict, Optional

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

FAVOR_BASE = "https://api.genohub.org/v1/variants"


def _normalize_variant(raw: str) -> Optional[str]:
    """Normalize a variant string to FAVOR's 'chr-pos-ref-alt' form (hg38).

    Accepts 'chr19:44908822:C:T', '19-44908822-C-T', 'chr19-44908822-C-T', etc.
    Returns None if it cannot be parsed into 4 fields.
    """
    if not raw:
        return None
    s = raw.strip()
    for sep in (":", "-", "_", "/", " "):
        s = s.replace(sep, "|")
    parts = [p for p in s.split("|") if p != ""]
    if len(parts) != 4:
        return None
    chrom, pos, ref, alt = parts
    if chrom.lower().startswith("chr"):
        chrom = chrom[3:]
    if not pos.isdigit():
        return None
    ref, alt = ref.upper(), alt.upper()
    if not ref or not alt:
        return None
    return f"{chrom}-{pos}-{ref}-{alt}"


[docs] @register_tool("FAVORVariantAnnotationTool") class FAVORVariantAnnotationTool(BaseTool): """Comprehensive functional annotation for a single GRCh38 variant via FAVOR."""
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("fields", {}).get("timeout", 30)
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: variant = _normalize_variant(arguments.get("variant", "")) if variant is None: return { "status": "error", "error": ( "'variant' must be a GRCh38 SNV/indel as chr-pos-ref-alt " "(e.g. '19-44908822-C-T' or 'chr19:44908822:C:T')." ), } url = f"{FAVOR_BASE}/{variant}" try: resp = requests.get( url, headers={"Accept": "application/json"}, timeout=self.timeout ) if resp.status_code == 404: return { "status": "success", "data": {"variant": variant, "found": False}, "metadata": { "found": False, "note": f"Variant {variant} not found in FAVOR (GRCh38). " "Check the genome build and allele orientation.", "source": "FAVOR", }, } resp.raise_for_status() rec = resp.json() except requests.exceptions.Timeout: return { "status": "error", "error": f"FAVOR request timed out after {self.timeout}s", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"FAVOR request failed: {e}"} except ValueError: return {"status": "error", "error": "FAVOR returned a non-JSON response"} if not isinstance(rec, dict) or not rec.get("variant_vcf"): return { "status": "success", "data": {"variant": variant, "found": False}, "metadata": { "found": False, "note": f"No annotation record returned for {variant}.", "source": "FAVOR", }, } return { "status": "success", "data": self._curate(rec), "metadata": { "found": True, "genome_build": "GRCh38", "source": "FAVOR (Functional Annotation of Variants Online Resource)", }, }
[docs] @staticmethod def _curate(rec: Dict[str, Any]) -> Dict[str, Any]: g = rec.get # shorthand ancestry_af = { k.replace("af_", ""): g(k) for k in ( "af_total", "af_afr", "af_amr", "af_eas", "af_nfe", "af_sas", "af_asj", "af_fin", "af_ami", "af_oth", ) if g(k) is not None } thousand_genomes = { k.replace("tg_", ""): g(k) for k in ("tg_all", "tg_afr", "tg_amr", "tg_eas", "tg_eur", "tg_sas") if g(k) is not None } return { "found": True, "variant": { "variant_vcf": g("variant_vcf"), "rsid": g("rsid"), "chromosome": g("chromosome"), "position": g("position"), "hgvs_genomic": g("hgvsg"), }, "gene_consequence": { "gene": g("genecode_comprehensive_info") or g("geneinfo"), "category": g("genecode_comprehensive_category"), "exonic_category": g("genecode_comprehensive_exonic_category"), "so_term": g("so_term"), "protein_variant": g("protein_variant") or g("aa"), "hgvs_c": g("hgvsc") or g("cds"), "hgvs_p": g("hgvsp"), "is_canonical": g("is_canonical"), }, "allele_frequency": { "bravo_topmed_af": g("bravo_af"), "bravo_topmed_ac": g("bravo_ac"), "bravo_topmed_an": g("bravo_an"), "gnomad_af_by_ancestry": ancestry_af, "thousand_genomes_af": thousand_genomes, }, "deleteriousness": { "cadd_phred": g("cadd_phred"), "sift": g("sift_cat"), "polyphen2": g("polyphen_cat"), "alphamissense_class": g("am_class"), "alphamissense_pathogenicity": g("am_pathogenicity"), "metasvm_pred": g("metasvm_pred"), "mutation_assessor_score": g("mutation_assessor_score"), "mutation_taster_score": g("mutation_taster_score"), "fathmm_xf": g("fathmm_xf"), "grantham": g("grantham"), "linsight": g("linsight"), "funseq": g("funseq_description"), }, "conservation": { "gerp_s": g("gerp_s"), "phylop_mammalian": g("mamphylop"), "phylop_primate": g("priphylop"), "phylop_vertebrate": g("verphylop"), "phastcons_mammalian": g("mamphcons"), "apc_conservation": g("apc_conservation_v2"), }, "clinical": { "clinvar_significance": g("clnsig"), "clinvar_disease": g("clndn"), "clinvar_review_status": g("clnrevstat"), "clinvar_disease_db": g("clndisdb"), }, "regulatory": { "cage_promoter": g("cage_promoter"), "cage_enhancer": g("cage_enhancer"), "genehancer": g("genehancer"), "super_enhancer": g("super_enhancer"), "encode_dnase_sum": g("encode_dnase_sum"), "apc_epigenetics_active": g("apc_epigenetics_active"), "remap_overlap_tf": g("remap_overlap_tf"), }, "all_annotations": rec, }