tooluniverse.finngen_tool 源代码

"""
FinnGen REST API tool for ToolUniverse.

FinnGen is a large-scale Finnish genomics initiative combining genome
data from Finnish biobanks with health registry data. It provides GWAS
summary statistics for >2,400 disease endpoints across ~500,000 Finns.

API: https://r12.finngen.fi/api/
No authentication required. Free for all use.
Release 12 (current): 486,367 participants.
"""

import requests
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool

FINNGEN_BASE_URL = "https://r12.finngen.fi/api"


[文档] @register_tool("FinnGenTool") class FinnGenTool(BaseTool): """ Tool for querying FinnGen, the Finnish population genomics study. Provides access to phenotype metadata, variant fine-mapping regions, and regional GWAS associations for 2,470 disease endpoints from the Finnish biobank. No authentication required. """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "list_phenotypes" )
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the FinnGen API call.""" try: return self._dispatch(arguments) except requests.exceptions.Timeout: return { "status": "error", "error": f"FinnGen API request timed out after {self.timeout}s", } except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to FinnGen API", } except Exception as e: return { "status": "error", "error": f"FinnGen API error: {str(e)}", }
[文档] def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" dispatch_map = { "list_phenotypes": self._list_phenotypes, "get_phenotype": self._get_phenotype, "get_variant_finemapping": self._get_variant_finemapping, "get_region_associations": self._get_region_associations, } handler = dispatch_map.get(self.endpoint_type) if not handler: return { "status": "error", "error": f"Unknown endpoint_type: {self.endpoint_type}", } return handler(arguments)
[文档] def _list_phenotypes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List/search FinnGen phenotypes.""" query = (arguments.get("query") or "").lower() category = (arguments.get("category") or "").lower() min_cases = arguments.get("min_cases") limit = arguments.get("limit", 50) url = f"{FINNGEN_BASE_URL}/phenos" resp = requests.get(url, timeout=self.timeout) resp.raise_for_status() all_phenos = resp.json() results = [] for p in all_phenos: if query: searchable = f"{p.get('phenocode', '')} {p.get('phenostring', '')} {p.get('category', '')}".lower() if query not in searchable: continue if category: # Match against human-readable category name OR phenocode prefix # e.g. "C3_" matches phenocodes starting with "C3_"; "Neoplasms" matches category name phenocode = p.get("phenocode", "").lower() cat_name = p.get("category", "").lower() if category not in cat_name and not phenocode.startswith(category): continue if min_cases and p.get("num_cases", 0) < min_cases: continue results.append( { "phenocode": p.get("phenocode"), "phenostring": p.get("phenostring"), "category": p.get("category"), "num_cases": p.get("num_cases"), "num_controls": p.get("num_controls"), "num_gw_significant": p.get("num_gw_significant"), } ) results.sort(key=lambda x: x.get("num_cases", 0), reverse=True) total = len(results) results = results[:limit] return { "status": "success", "data": results, "metadata": { "source": "FinnGen r12", "total_matching": total, "returned": len(results), "total_phenotypes": len(all_phenos), }, }
[文档] def _get_phenotype(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get details for a specific FinnGen phenotype.""" phenocode = arguments.get("phenocode", "") if not phenocode: return {"status": "error", "error": "phenocode is required"} url = f"{FINNGEN_BASE_URL}/pheno/{phenocode}" resp = requests.get(url, timeout=self.timeout) if resp.status_code == 404: return { "status": "error", "error": f"Phenotype '{phenocode}' not found in FinnGen", } resp.raise_for_status() data = resp.json() result = { "phenocode": data.get("phenocode"), "phenostring": data.get("phenostring"), "category": data.get("category"), "num_cases": data.get("num_cases"), "num_controls": data.get("num_controls"), "num_gw_significant": data.get("num_gw_significant"), "num_cases_prev": data.get("num_cases_prev"), "num_controls_prev": data.get("num_controls_prev"), "gc_lambda": data.get("gc_lambda"), } return { "status": "success", "data": result, "metadata": { "source": "FinnGen r12", "release": "R12 (486,367 participants)", }, }
[文档] def _get_variant_finemapping(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get fine-mapping regions associated with a genomic variant.""" variant = arguments.get("variant", "") if not variant: return { "status": "error", "error": "variant is required (format: chr:pos:ref:alt, e.g. 19:44908684:T:C)", } # Normalize separators: accept chr-pos-ref-alt or chr:pos:ref:alt variant = variant.replace("-", ":") url = f"{FINNGEN_BASE_URL}/variant/{variant}" resp = requests.get(url, timeout=self.timeout) if resp.status_code == 404: return { "status": "error", "error": f"Variant '{variant}' not found in FinnGen", } resp.raise_for_status() data = resp.json() parsed_regions: List[Dict[str, Any]] = [ { "phenocode": r.get("phenocode"), "chromosome": r.get("chr"), "start": r.get("start"), "end": r.get("end"), "type": r.get("type"), } for r in data.get("regions", []) ] return { "status": "success", "data": parsed_regions, "metadata": { "source": "FinnGen r12", "variant": variant, "total_regions": len(parsed_regions), }, }
[文档] def _get_region_associations(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get regional association data for a phenotype in a genomic region.""" phenocode = arguments.get("phenocode", "") region = arguments.get("region", "") if not phenocode or not region: return { "status": "error", "error": "Both phenocode and region are required. region format: chr:start-end (e.g. 9:22000000-22200000)", } url = f"{FINNGEN_BASE_URL}/region/{phenocode}/{region}" resp = requests.get(url, timeout=self.timeout) if resp.status_code == 404: return { "status": "error", "error": f"No data for phenotype '{phenocode}' in region '{region}'", } resp.raise_for_status() data = resp.json() pheno_info = data.get("phenotype", {}) region_info = data.get("region", {}) summaries = data.get("region_summary", []) # Parse credible sets from the summaries credible_sets: List[Dict[str, Any]] = [] for summary in summaries: region_id = summary.get("region_id") for cs in summary.get("credible_sets", []): lead_variants = cs.get("lead_variants", []) parsed_leads = [] for lv in lead_variants: parsed_leads.append( { "variant_id": lv.get("id"), "rsid": lv.get("rsid"), "chromosome": lv.get("chr"), "position": lv.get("position"), "ref": lv.get("ref"), "alt": lv.get("alt"), "maf": lv.get("maf"), "posterior_probability": lv.get("prob"), "credible_set": lv.get("cs"), } ) credible_sets.append( { "region_id": region_id, "chromosome": cs.get("chr"), "start": cs.get("start"), "end": cs.get("end"), "lead_variants": parsed_leads, } ) result = { "phenotype": { "phenocode": pheno_info.get("phenocode"), "phenostring": pheno_info.get("phenostring"), "num_cases": pheno_info.get("num_cases"), "num_controls": pheno_info.get("num_controls"), }, "region": { "chromosome": region_info.get("chromosome"), "start": region_info.get("start"), "end": region_info.get("stop"), }, "credible_sets": credible_sets, } return { "status": "success", "data": result, "metadata": { "source": "FinnGen r12", "num_credible_sets": len(credible_sets), }, }