Source code for tooluniverse.epigraphdb_tool
"""
EpiGraphDB API tool for ToolUniverse.
EpiGraphDB is a database and analysis platform integrating epidemiological data
with knowledge graph approaches. It provides Mendelian Randomization analysis,
genetic correlations, drug repurposing insights, and GWAS-to-disease ontology mappings.
API: https://api.epigraphdb.org
No authentication required. Public access.
Documentation: https://epigraphdb.org
"""
import requests
from typing import Any
from .base_rest_tool import BaseRESTTool
from .tool_registry import register_tool
EPIGRAPHDB_BASE = "https://api.epigraphdb.org"
[docs]
@register_tool("EpiGraphDBTool")
class EpiGraphDBTool(BaseRESTTool):
"""
Tool for querying the EpiGraphDB API.
Provides access to:
- Mendelian Randomization (MR) results between GWAS traits
- Genetic correlations between traits
- Drug repurposing via genetic evidence (drugs targeting risk factor genes)
- GWAS trait to EFO/disease ontology mapping
- Gene druggability information via PPI network
- Gene-drug associations from pharmacogenomics databases
- OpenGWAS GWAS study search
Uses IEU OpenGWAS trait IDs (e.g., 'ieu-a-2' for BMI, 'ieu-a-7' for CHD).
No authentication required.
"""
[docs]
def __init__(self, tool_config: dict):
super().__init__(tool_config)
self.timeout = 45 # MR queries can be slow
self.operation = tool_config.get("fields", {}).get("operation", "mr")
[docs]
def run(self, arguments: dict) -> dict:
"""Execute the EpiGraphDB API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"EpiGraphDB request timed out after {self.timeout}s",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to EpiGraphDB. Check network connectivity.",
}
except requests.exceptions.HTTPError as e:
return {
"status": "error",
"error": f"EpiGraphDB HTTP error: {e.response.status_code} - {e.response.text[:200]}",
}
except Exception as e:
return {
"status": "error",
"error": f"Unexpected error querying EpiGraphDB: {str(e)}",
}
[docs]
def _query(self, arguments: dict) -> dict:
"""Route to the appropriate endpoint."""
op = self.operation
if op == "mr":
return self._get_mr(arguments)
elif op == "genetic_cor":
return self._get_genetic_cor(arguments)
elif op == "drugs_risk_factors":
return self._get_drugs_risk_factors(arguments)
elif op == "gwas_efo":
return self._get_gwas_efo(arguments)
elif op == "disease_efo":
return self._get_disease_efo(arguments)
elif op == "gene_search":
return self._search_gene(arguments)
elif op == "gene_drugs":
return self._get_gene_drugs(arguments)
elif op == "opengwas_search":
return self._search_opengwas(arguments)
else:
return {"status": "error", "error": f"Unknown operation: {op}"}
[docs]
@staticmethod
def _casing_variants(trait: str) -> list:
"""Return trait label casing variants to try, original first.
EpiGraphDB's /mr endpoint matches GWAS trait labels exactly and
case-sensitively (e.g. 'coronary heart disease' returns nothing but
'Coronary heart disease' returns results). Try the sentence-case form
as a fallback so a lowercase input does not silently miss.
"""
variants = [trait]
for variant in (trait.capitalize(), trait.title()):
if variant not in variants:
variants.append(variant)
return variants
[docs]
def _query_mr(self, exposure: str, outcome: str, pval_threshold: float) -> list:
"""Call the EpiGraphDB /mr endpoint for one exposure/outcome label pair."""
params = {
"exposure_trait": exposure,
"outcome_trait": outcome,
"pval_threshold": pval_threshold,
"mode": "table",
}
resp = requests.get(
f"{EPIGRAPHDB_BASE}/mr", params=params, timeout=self.timeout
)
resp.raise_for_status()
return resp.json().get("results", [])
[docs]
def _get_mr(self, arguments: dict) -> dict:
"""Get Mendelian Randomization results between exposure and outcome traits."""
exposure_trait = arguments.get("exposure_trait", "").strip()
outcome_trait = arguments.get("outcome_trait", "").strip()
if not exposure_trait or not outcome_trait:
return {
"status": "error",
"error": "Both exposure_trait and outcome_trait are required",
}
pval_threshold = float(arguments.get("pval_threshold", 1e-5))
results = self._query_mr(exposure_trait, outcome_trait, pval_threshold)
used_exposure, used_outcome = exposure_trait, outcome_trait
# Fallback: EpiGraphDB trait labels are case-sensitive, so retry with
# sentence-case variants when the exact labels return nothing.
if not results:
for ex in self._casing_variants(exposure_trait):
for out in self._casing_variants(outcome_trait):
if (ex, out) == (exposure_trait, outcome_trait):
continue # already tried the exact labels above
retry = self._query_mr(ex, out, pval_threshold)
if retry:
results, used_exposure, used_outcome = retry, ex, out
break
if results:
break
mr_results = []
for r in results[:50]:
exposure = r.get("exposure", {})
outcome = r.get("outcome", {})
mr = r.get("mr", {})
mr_results.append(
{
"exposure_id": exposure.get("id"),
"exposure_trait": exposure.get("trait"),
"outcome_id": outcome.get("id"),
"outcome_trait": outcome.get("trait"),
"beta": mr.get("b"),
"se": mr.get("se"),
"pval": mr.get("pval"),
"method": mr.get("method"),
"selection": mr.get("selection"),
"moescore": mr.get("moescore"),
}
)
metadata = {
"exposure_trait": used_exposure,
"outcome_trait": used_outcome,
"pval_threshold": pval_threshold,
"source": "EpiGraphDB",
"description": (
"Mendelian Randomization evidence. beta = causal effect estimate. "
"moescore > 0.9 suggests high-quality instruments."
),
}
if not results:
# Don't let an exact-label miss read as 'no causal evidence'.
metadata["note"] = (
f"No MR results for exposure '{exposure_trait}' -> outcome "
f"'{outcome_trait}'. EpiGraphDB matches exact, case-sensitive GWAS "
"trait labels. Use EpiGraphDB_search_opengwas to find the exact "
"trait label, then retry with that label."
)
elif (used_exposure, used_outcome) != (exposure_trait, outcome_trait):
metadata["note"] = (
f"Input labels did not match exactly; matched on '{used_exposure}'"
f" -> '{used_outcome}' after case normalization."
)
return {
"status": "success",
"data": {
"mr_results": mr_results,
"total_count": len(results),
},
"metadata": metadata,
}
[docs]
def _get_genetic_cor(self, arguments: dict) -> dict:
"""Get genetic correlations between a trait and other GWAS traits."""
trait = arguments.get("trait", "").strip()
if not trait:
return {
"status": "error",
"error": "trait parameter is required (e.g., 'Body mass index')",
}
# NOTE: the /genetic-cor endpoint ignores pval_threshold (it returns a
# fixed |rg| > 0.8 set), so it is intentionally not forwarded.
results = self._query_genetic_cor(trait)
used_trait = trait
# The /genetic-cor graph matches exact, case-sensitive trait labels;
# retry sentence-case variants when the exact label returns nothing.
if not results:
for variant in self._casing_variants(trait):
if variant == trait:
continue
retry = self._query_genetic_cor(variant)
if retry:
results, used_trait = retry, variant
break
cor_results = []
for r in results[:50]:
trait1 = r.get("trait1", {})
trait2 = r.get("trait2", {})
cor = r.get("cor", {})
cor_results.append(
{
"trait1_id": trait1.get("id"),
"trait1_trait": trait1.get("trait"),
"trait2_id": trait2.get("id"),
"trait2_trait": trait2.get("trait"),
"rg": cor.get("rg"),
"rg_se": cor.get("rg_se"),
"rg_pval": cor.get("rg_pval"),
"h2": cor.get("h2"),
"h2_intercept": cor.get("h2_intercept"),
}
)
metadata = {
"trait": used_trait,
"source": "EpiGraphDB",
"description": (
"rg = genetic correlation coefficient (-1 to 1). The /genetic-cor "
"endpoint returns only strong correlations (|rg| > 0.8); the "
"pval_threshold argument is not applied by the upstream API."
),
}
if not results:
# Many common labels (e.g. 'Body mass index') have no node in the
# genetic-correlation graph even though they exist in OpenGWAS;
# don't let that read as 'no shared genetics'.
metadata["note"] = (
f"No genetic correlations found for '{trait}'. The /genetic-cor "
"graph matches exact, case-sensitive labels and only stores "
"|rg| > 0.8 edges, so many traits return empty. Try a related "
"label (e.g. 'Waist circumference' for adiposity) or rely on "
"bidirectional MR for triangulation instead."
)
elif used_trait != trait:
metadata["note"] = (
f"Matched on '{used_trait}' after case normalization of '{trait}'."
)
return {
"status": "success",
"data": {
"correlations": cor_results,
"total_count": len(results),
},
"metadata": metadata,
}
[docs]
def _query_genetic_cor(self, trait: str) -> list:
"""Call the EpiGraphDB /genetic-cor endpoint for one trait label."""
params = {"trait": trait, "mode": "table"}
resp = requests.get(
f"{EPIGRAPHDB_BASE}/genetic-cor", params=params, timeout=self.timeout
)
resp.raise_for_status()
return resp.json().get("results", [])
[docs]
def _get_drugs_risk_factors(self, arguments: dict) -> dict:
"""Get drugs associated with a risk factor trait via genetic evidence."""
trait = arguments.get("trait", "").strip()
if not trait:
return {
"status": "error",
"error": "trait parameter is required (e.g., 'Body mass index', 'LDL cholesterol')",
}
pval_threshold = float(arguments.get("pval_threshold", 1e-4))
params = {
"trait": trait,
"pval_threshold": pval_threshold,
"mode": "table",
}
url = f"{EPIGRAPHDB_BASE}/drugs/risk-factors"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
# Deduplicate by drug label
seen_drugs: dict[str, Any] = {}
for r in results:
drug = r.get("drug", {})
drug_label = drug.get("label", "")
gene = r.get("gene", {})
variant = r.get("variant", {})
if drug_label not in seen_drugs:
seen_drugs[drug_label] = {
"drug": drug_label,
"gene": gene.get("name"),
"variant": variant.get("name"),
"evidence_count": 1,
}
else:
seen_drugs[drug_label]["evidence_count"] += 1
drug_list = sorted(seen_drugs.values(), key=lambda x: -x["evidence_count"])[:30]
return {
"status": "success",
"data": {
"drugs": drug_list,
"total_evidence_count": len(results),
"unique_drug_count": len(seen_drugs),
},
"metadata": {
"trait": trait,
"pval_threshold": pval_threshold,
"source": "EpiGraphDB",
"description": (
"Drugs whose target genes are associated with the trait via GWAS. "
"Useful for drug repurposing and prioritization."
),
},
}
[docs]
def _get_gwas_efo(self, arguments: dict) -> dict:
"""Map GWAS traits to EFO (Experimental Factor Ontology) terms."""
trait = arguments.get("trait", "").strip()
if not trait:
return {
"status": "error",
"error": "trait parameter is required (e.g., 'Body mass index', 'coronary artery disease')",
}
params = {
"trait": trait,
"mode": "table",
}
url = f"{EPIGRAPHDB_BASE}/ontology/gwas-efo"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
# Deduplicate by EFO term
seen_efo: dict = {}
for r in results:
efo = r.get("efo", {})
gwas = r.get("gwas", {})
efo_id = efo.get("id", "")
if efo_id not in seen_efo:
seen_efo[efo_id] = {
"efo_id": efo_id,
"efo_label": efo.get("value"),
"gwas_id": gwas.get("id"),
"gwas_trait": gwas.get("trait"),
"score": r.get("r", {}).get("score"),
}
efo_list = list(seen_efo.values())
return {
"status": "success",
"data": {
"efo_mappings": efo_list,
"total_count": len(efo_list),
},
"metadata": {
"trait": trait,
"source": "EpiGraphDB",
"description": "Maps GWAS trait names to EFO ontology terms for standardization",
},
}
[docs]
def _get_disease_efo(self, arguments: dict) -> dict:
"""Map a disease label to EFO terms and associated GWAS studies."""
disease = arguments.get("disease", "").strip()
if not disease:
return {
"status": "error",
"error": "disease parameter is required (e.g., 'type 2 diabetes', 'breast cancer')",
}
params = {
"disease_label": disease,
"mode": "table",
}
url = f"{EPIGRAPHDB_BASE}/ontology/disease-efo"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
mappings = []
for r in results[:20]:
efo = r.get("efo", {})
gwas = r.get("gwas", {})
mappings.append(
{
"efo_id": efo.get("id"),
"efo_label": efo.get("value"),
"gwas_id": gwas.get("id"),
"gwas_trait": gwas.get("trait"),
"score": r.get("r", {}).get("score"),
}
)
return {
"status": "success",
"data": {
"disease_efo_mappings": mappings,
"total_count": len(results),
},
"metadata": {
"disease": disease,
"source": "EpiGraphDB",
"description": "Maps disease labels to EFO terms and OpenGWAS studies",
},
}
[docs]
def _search_gene(self, arguments: dict) -> dict:
"""Search for genes in EpiGraphDB by name or Ensembl ID."""
gene_name = arguments.get("gene_name")
gene_id = arguments.get("gene_id")
if not gene_name and not gene_id:
return {
"status": "error",
"error": "Either gene_name or gene_id (Ensembl ID) is required",
}
params: dict[str, Any] = {"limit": min(int(arguments.get("limit", 10)), 50)}
if gene_name:
params["name"] = gene_name
elif gene_id:
params["id"] = gene_id
url = f"{EPIGRAPHDB_BASE}/meta/nodes/Gene/search"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
genes = []
for r in results:
node = r.get("node", {})
genes.append(
{
"ensembl_id": node.get("ensembl_id"),
"name": node.get("_name"),
"description": node.get("description"),
"chr": node.get("chr"),
"start": node.get("start"),
"end": node.get("end"),
"gene_type": node.get("type"),
"druggability_tier": node.get("druggability_tier"),
"bio_druggable": node.get("bio_druggable"),
"small_mol_druggable": node.get("small_mol_druggable"),
}
)
return {
"status": "success",
"data": {
"genes": genes,
"total_found": len(genes),
},
"metadata": {
"query_name": gene_name,
"query_id": gene_id,
"source": "EpiGraphDB",
"description": "Gene info including druggability tiers from EpiGraphDB",
},
}
[docs]
def _get_gene_drugs(self, arguments: dict) -> dict:
"""Get drug-gene associations from pharmacogenomics databases."""
gene_name = (
arguments.get("gene_name") or arguments.get("gene_symbol") or ""
).strip()
if not gene_name:
return {
"status": "error",
"error": "gene_name parameter is required (e.g., 'TP53', 'BRCA1', 'EGFR')",
}
params = {
"gene_name": gene_name,
"pval_threshold": float(arguments.get("pval_threshold", 0.05)),
}
url = f"{EPIGRAPHDB_BASE}/gene/drugs"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
drug_associations = []
for r in results:
gene = r.get("gene", {})
drug = r.get("drug", {})
rel = r.get("r", {})
drug_associations.append(
{
"gene": gene.get("name"),
"drug": drug.get("label"),
"source": r.get("r_source"),
"pharmgkb_evidence": rel.get("pharmgkb_level_of_evidence"),
"cpic_level": rel.get("cpic_level"),
"pgx_on_fda_label": rel.get("pgx_on_fda_label"),
"guideline": rel.get("guideline"),
}
)
return {
"status": "success",
"data": {
"gene_drug_associations": drug_associations,
"total_count": len(drug_associations),
},
"metadata": {
"gene_name": gene_name,
"source": "EpiGraphDB (CPIC, PharmGKB)",
"description": "Pharmacogenomics drug-gene associations with clinical evidence levels",
},
}
[docs]
def _search_opengwas(self, arguments: dict) -> dict:
"""Search OpenGWAS database for GWAS studies by trait name using NLP."""
query = arguments.get("query", "").strip()
if not query:
return {
"status": "error",
"error": "query parameter is required (e.g., 'body mass index', 'coronary heart disease')",
}
top_n = min(int(arguments.get("top_n", 10)), 50)
params = {
"text": query,
"entity_type": "Gwas",
"top_n": top_n,
}
url = f"{EPIGRAPHDB_BASE}/nlp/query/text"
resp = requests.get(url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
results_data = data.get("results", {})
# NLP endpoint returns {"results": {"results": [...], "clean_text": ...}}
if isinstance(results_data, dict):
inner_results = results_data.get("results", [])
else:
inner_results = results_data if isinstance(results_data, list) else []
studies = []
for r in inner_results[:top_n]:
if r.get("meta_node") == "Gwas" or not r.get("meta_node"):
studies.append(
{
"id": r.get("id"),
"trait": r.get("name") or r.get("text"),
"similarity_score": r.get("score"),
"meta_node": r.get("meta_node"),
}
)
return {
"status": "success",
"data": {
"gwas_studies": studies,
"total_found": len(studies),
},
"metadata": {
"query": query,
"source": "EpiGraphDB / IEU OpenGWAS (NLP search)",
"description": (
"Search OpenGWAS GWAS studies by trait name similarity. "
"Use returned 'id' values (e.g., 'ieu-a-2') in MR analyses."
),
},
}