Source code for tooluniverse.epigraphdb_tool

"""
EpiGraphDB API tool for ToolUniverse.

EpiGraphDB is a database and analysis platform integrating epidemiological data
with knowledge graph approaches. It provides Mendelian Randomization analysis,
genetic correlations, drug repurposing insights, and GWAS-to-disease ontology mappings.

API: https://api.epigraphdb.org
No authentication required. Public access.

Documentation: https://epigraphdb.org
"""

import requests
from typing import Any

from .base_rest_tool import BaseRESTTool
from .tool_registry import register_tool

EPIGRAPHDB_BASE = "https://api.epigraphdb.org"


[docs] @register_tool("EpiGraphDBTool") class EpiGraphDBTool(BaseRESTTool): """ Tool for querying the EpiGraphDB API. Provides access to: - Mendelian Randomization (MR) results between GWAS traits - Genetic correlations between traits - Drug repurposing via genetic evidence (drugs targeting risk factor genes) - GWAS trait to EFO/disease ontology mapping - Gene druggability information via PPI network - Gene-drug associations from pharmacogenomics databases - OpenGWAS GWAS study search Uses IEU OpenGWAS trait IDs (e.g., 'ieu-a-2' for BMI, 'ieu-a-7' for CHD). No authentication required. """
[docs] def __init__(self, tool_config: dict): super().__init__(tool_config) self.timeout = 45 # MR queries can be slow self.operation = tool_config.get("fields", {}).get("operation", "mr")
[docs] def run(self, arguments: dict) -> dict: """Execute the EpiGraphDB API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"EpiGraphDB request timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return { "error": "Failed to connect to EpiGraphDB. Check network connectivity." } except requests.exceptions.HTTPError as e: return { "error": f"EpiGraphDB HTTP error: {e.response.status_code} - {e.response.text[:200]}" } except Exception as e: return {"error": f"Unexpected error querying EpiGraphDB: {str(e)}"}
[docs] def _query(self, arguments: dict) -> dict: """Route to the appropriate endpoint.""" op = self.operation if op == "mr": return self._get_mr(arguments) elif op == "genetic_cor": return self._get_genetic_cor(arguments) elif op == "drugs_risk_factors": return self._get_drugs_risk_factors(arguments) elif op == "gwas_efo": return self._get_gwas_efo(arguments) elif op == "disease_efo": return self._get_disease_efo(arguments) elif op == "gene_search": return self._search_gene(arguments) elif op == "gene_drugs": return self._get_gene_drugs(arguments) elif op == "opengwas_search": return self._search_opengwas(arguments) else: return {"error": f"Unknown operation: {op}"}
[docs] def _get_mr(self, arguments: dict) -> dict: """Get Mendelian Randomization results between exposure and outcome traits.""" exposure_trait = arguments.get("exposure_trait", "").strip() outcome_trait = arguments.get("outcome_trait", "").strip() if not exposure_trait or not outcome_trait: return {"error": "Both exposure_trait and outcome_trait are required"} pval_threshold = float(arguments.get("pval_threshold", 1e-5)) params = { "exposure_trait": exposure_trait, "outcome_trait": outcome_trait, "pval_threshold": pval_threshold, "mode": "table", } url = f"{EPIGRAPHDB_BASE}/mr" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) mr_results = [] for r in results[:50]: exposure = r.get("exposure", {}) outcome = r.get("outcome", {}) mr = r.get("mr", {}) mr_results.append( { "exposure_id": exposure.get("id"), "exposure_trait": exposure.get("trait"), "outcome_id": outcome.get("id"), "outcome_trait": outcome.get("trait"), "beta": mr.get("b"), "se": mr.get("se"), "pval": mr.get("pval"), "method": mr.get("method"), "selection": mr.get("selection"), "moescore": mr.get("moescore"), } ) return { "data": { "mr_results": mr_results, "total_count": len(results), }, "metadata": { "exposure_trait": exposure_trait, "outcome_trait": outcome_trait, "pval_threshold": pval_threshold, "source": "EpiGraphDB", "description": ( "Mendelian Randomization evidence. beta = causal effect estimate. " "moescore > 0.9 suggests high-quality instruments." ), }, }
[docs] def _get_genetic_cor(self, arguments: dict) -> dict: """Get genetic correlations between a trait and other GWAS traits.""" trait = arguments.get("trait", "").strip() if not trait: return {"error": "trait parameter is required (e.g., 'Body mass index')"} pval_threshold = float(arguments.get("pval_threshold", 0.05)) params = { "trait": trait, "pval_threshold": pval_threshold, "mode": "table", } url = f"{EPIGRAPHDB_BASE}/genetic-cor" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) cor_results = [] for r in results[:50]: trait1 = r.get("trait1", {}) trait2 = r.get("trait2", {}) cor = r.get("cor", {}) cor_results.append( { "trait1_id": trait1.get("id"), "trait1_trait": trait1.get("trait"), "trait2_id": trait2.get("id"), "trait2_trait": trait2.get("trait"), "rg": cor.get("rg"), "rg_se": cor.get("rg_se"), "rg_pval": cor.get("rg_pval"), "h2": cor.get("h2"), "h2_intercept": cor.get("h2_intercept"), } ) return { "data": { "correlations": cor_results, "total_count": len(results), }, "metadata": { "trait": trait, "pval_threshold": pval_threshold, "source": "EpiGraphDB", "description": "rg = genetic correlation coefficient (-1 to 1)", }, }
[docs] def _get_drugs_risk_factors(self, arguments: dict) -> dict: """Get drugs associated with a risk factor trait via genetic evidence.""" trait = arguments.get("trait", "").strip() if not trait: return { "error": "trait parameter is required (e.g., 'Body mass index', 'LDL cholesterol')" } pval_threshold = float(arguments.get("pval_threshold", 1e-4)) params = { "trait": trait, "pval_threshold": pval_threshold, "mode": "table", } url = f"{EPIGRAPHDB_BASE}/drugs/risk-factors" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) # Deduplicate by drug label seen_drugs: dict[str, Any] = {} for r in results: drug = r.get("drug", {}) drug_label = drug.get("label", "") gene = r.get("gene", {}) variant = r.get("variant", {}) if drug_label not in seen_drugs: seen_drugs[drug_label] = { "drug": drug_label, "gene": gene.get("name"), "variant": variant.get("name"), "evidence_count": 1, } else: seen_drugs[drug_label]["evidence_count"] += 1 drug_list = sorted(seen_drugs.values(), key=lambda x: -x["evidence_count"])[:30] return { "data": { "drugs": drug_list, "total_evidence_count": len(results), "unique_drug_count": len(seen_drugs), }, "metadata": { "trait": trait, "pval_threshold": pval_threshold, "source": "EpiGraphDB", "description": ( "Drugs whose target genes are associated with the trait via GWAS. " "Useful for drug repurposing and prioritization." ), }, }
[docs] def _get_gwas_efo(self, arguments: dict) -> dict: """Map GWAS traits to EFO (Experimental Factor Ontology) terms.""" trait = arguments.get("trait", "").strip() if not trait: return { "error": "trait parameter is required (e.g., 'Body mass index', 'coronary artery disease')" } params = { "trait": trait, "mode": "table", } url = f"{EPIGRAPHDB_BASE}/ontology/gwas-efo" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) # Deduplicate by EFO term seen_efo: dict = {} for r in results: efo = r.get("efo", {}) gwas = r.get("gwas", {}) efo_id = efo.get("id", "") if efo_id not in seen_efo: seen_efo[efo_id] = { "efo_id": efo_id, "efo_label": efo.get("value"), "gwas_id": gwas.get("id"), "gwas_trait": gwas.get("trait"), "score": r.get("r", {}).get("score"), } efo_list = list(seen_efo.values()) return { "data": { "efo_mappings": efo_list, "total_count": len(efo_list), }, "metadata": { "trait": trait, "source": "EpiGraphDB", "description": "Maps GWAS trait names to EFO ontology terms for standardization", }, }
[docs] def _get_disease_efo(self, arguments: dict) -> dict: """Map a disease label to EFO terms and associated GWAS studies.""" disease = arguments.get("disease", "").strip() if not disease: return { "error": "disease parameter is required (e.g., 'type 2 diabetes', 'breast cancer')" } params = { "disease_label": disease, "mode": "table", } url = f"{EPIGRAPHDB_BASE}/ontology/disease-efo" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) mappings = [] for r in results[:20]: efo = r.get("efo", {}) gwas = r.get("gwas", {}) mappings.append( { "efo_id": efo.get("id"), "efo_label": efo.get("value"), "gwas_id": gwas.get("id"), "gwas_trait": gwas.get("trait"), "score": r.get("r", {}).get("score"), } ) return { "data": { "disease_efo_mappings": mappings, "total_count": len(results), }, "metadata": { "disease": disease, "source": "EpiGraphDB", "description": "Maps disease labels to EFO terms and OpenGWAS studies", }, }
[docs] def _search_gene(self, arguments: dict) -> dict: """Search for genes in EpiGraphDB by name or Ensembl ID.""" gene_name = arguments.get("gene_name") gene_id = arguments.get("gene_id") if not gene_name and not gene_id: return {"error": "Either gene_name or gene_id (Ensembl ID) is required"} params: dict[str, Any] = {"limit": min(int(arguments.get("limit", 10)), 50)} if gene_name: params["name"] = gene_name elif gene_id: params["id"] = gene_id url = f"{EPIGRAPHDB_BASE}/meta/nodes/Gene/search" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) genes = [] for r in results: node = r.get("node", {}) genes.append( { "ensembl_id": node.get("ensembl_id"), "name": node.get("_name"), "description": node.get("description"), "chr": node.get("chr"), "start": node.get("start"), "end": node.get("end"), "gene_type": node.get("type"), "druggability_tier": node.get("druggability_tier"), "bio_druggable": node.get("bio_druggable"), "small_mol_druggable": node.get("small_mol_druggable"), } ) return { "data": { "genes": genes, "total_found": len(genes), }, "metadata": { "query_name": gene_name, "query_id": gene_id, "source": "EpiGraphDB", "description": "Gene info including druggability tiers from EpiGraphDB", }, }
[docs] def _get_gene_drugs(self, arguments: dict) -> dict: """Get drug-gene associations from pharmacogenomics databases.""" gene_name = arguments.get("gene_name", "").strip() if not gene_name: return { "error": "gene_name parameter is required (e.g., 'TP53', 'BRCA1', 'EGFR')" } params = { "gene_name": gene_name, "pval_threshold": float(arguments.get("pval_threshold", 0.05)), } url = f"{EPIGRAPHDB_BASE}/gene/drugs" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results = data.get("results", []) drug_associations = [] for r in results: gene = r.get("gene", {}) drug = r.get("drug", {}) rel = r.get("r", {}) drug_associations.append( { "gene": gene.get("name"), "drug": drug.get("label"), "source": r.get("r_source"), "pharmgkb_evidence": rel.get("pharmgkb_level_of_evidence"), "cpic_level": rel.get("cpic_level"), "pgx_on_fda_label": rel.get("pgx_on_fda_label"), "guideline": rel.get("guideline"), } ) return { "data": { "gene_drug_associations": drug_associations, "total_count": len(drug_associations), }, "metadata": { "gene_name": gene_name, "source": "EpiGraphDB (CPIC, PharmGKB)", "description": "Pharmacogenomics drug-gene associations with clinical evidence levels", }, }
[docs] def _search_opengwas(self, arguments: dict) -> dict: """Search OpenGWAS database for GWAS studies by trait name using NLP.""" query = arguments.get("query", "").strip() if not query: return { "error": "query parameter is required (e.g., 'body mass index', 'coronary heart disease')" } top_n = min(int(arguments.get("top_n", 10)), 50) params = { "text": query, "entity_type": "Gwas", "top_n": top_n, } url = f"{EPIGRAPHDB_BASE}/nlp/query/text" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() results_data = data.get("results", {}) # NLP endpoint returns {"results": {"results": [...], "clean_text": ...}} if isinstance(results_data, dict): inner_results = results_data.get("results", []) else: inner_results = results_data if isinstance(results_data, list) else [] studies = [] for r in inner_results[:top_n]: if r.get("meta_node") == "Gwas" or not r.get("meta_node"): studies.append( { "id": r.get("id"), "trait": r.get("name") or r.get("text"), "similarity_score": r.get("score"), "meta_node": r.get("meta_node"), } ) return { "data": { "gwas_studies": studies, "total_found": len(studies), }, "metadata": { "query": query, "source": "EpiGraphDB / IEU OpenGWAS (NLP search)", "description": ( "Search OpenGWAS GWAS studies by trait name similarity. " "Use returned 'id' values (e.g., 'ieu-a-2') in MR analyses." ), }, }