Source code for tooluniverse.disgenet_tool
"""
DisGeNET API tool for ToolUniverse.
DisGeNET is one of the largest public collections of genes and variants
associated with human diseases, aggregating data from multiple sources.
API Documentation: https://api.disgenet.com/
Requires API key: Register at https://www.disgenet.com/
Note: the DisGeNET v1 API exposes gene/variant-disease associations through the
`/gda/summary` and `/vda/summary` endpoints using QUERY parameters
(`gene_symbol`, `gene_ncbi_id`, `disease`, `variant`), authenticated with an
`Authorization: <api_key>` header (no "Bearer " prefix). Academic API keys can
only access CURATED sources (CLINGEN, CLINVAR, ORPHANET, GENCC, UNIPROT, ...).
"""
import os
import re
import requests
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for DisGeNET API
DISGENET_API_URL = "https://api.disgenet.com/api/v1"
# A disease passed as a bare UMLS CUI looks like "C0006142".
_CUI_RE = re.compile(r"^C\d+$")
[docs]
@register_tool("DisGeNETTool")
class DisGeNETTool(BaseTool):
"""
Tool for querying DisGeNET gene-disease association database.
Supports:
- Gene-disease associations (GDAs)
- Variant-disease associations (VDAs)
- Disease -> associated genes
Requires API key via DISGENET_API_KEY environment variable.
Register for free at https://www.disgenet.com/
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
self.api_key = os.environ.get("DISGENET_API_KEY", "")
[docs]
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with authentication.
The DisGeNET v1 API expects the raw key in the Authorization header
(NOT an OAuth "Bearer <key>" form).
"""
headers = {
"Accept": "application/json",
"User-Agent": "ToolUniverse/DisGeNET",
}
if self.api_key:
headers["Authorization"] = self.api_key
return headers
# ------------------------------------------------------------------ helpers
[docs]
@staticmethod
def _normalize_disease(disease: str):
"""Return a DisGeNET-acceptable disease code, or None if the input is a
free-text name (which the summary endpoint does not accept)."""
d = (disease or "").strip()
if not d:
return None
if d.upper().startswith("UMLS_"):
return "UMLS_" + d[5:]
if _CUI_RE.match(d):
return "UMLS_" + d
# Already a vocabulary-prefixed code (e.g. MONDO_..., DO_..., HPO_...)
if "_" in d and d.split("_", 1)[0].isupper():
return d
return None
[docs]
def _query_summary(self, kind: str, query: Dict[str, Any]):
"""Call /{gda,vda}/summary with query params. Returns (payload, warnings).
Raises requests exceptions on HTTP error (handled by callers)."""
params = {"page_number": 0}
params.update({k: v for k, v in query.items() if v not in (None, "")})
response = requests.get(
f"{DISGENET_API_URL}/{kind}/summary",
params=params,
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
body = response.json()
payload = body.get("payload") or []
warnings = body.get("warnings") or []
return payload, warnings
[docs]
@staticmethod
def _fmt_gda(row: Dict[str, Any]) -> Dict[str, Any]:
return {
"gene_symbol": row.get("symbolOfGene"),
"gene_ncbi_id": row.get("geneNcbiID"),
"disease_name": row.get("diseaseName"),
"disease_umls_cui": row.get("diseaseUMLSCUI"),
"score": row.get("score"),
"n_pmids": row.get("numPMIDs"),
"ei": row.get("ei"),
}
[docs]
@staticmethod
def _fmt_vda(row: Dict[str, Any]) -> Dict[str, Any]:
return {
"variant": row.get("variantStrID"),
"gene_symbol": row.get("symbolOfGene"),
"disease_name": row.get("diseaseName"),
"disease_umls_cui": row.get("diseaseUMLSCUI"),
"score": row.get("score"),
"n_pmids": row.get("numPMIDs"),
}
[docs]
@staticmethod
def _apply_filters(rows: List[Dict[str, Any]], min_score, limit):
if min_score is not None:
rows = [r for r in rows if (r.get("score") is not None and r["score"] >= min_score)]
if limit:
rows = rows[: int(limit)]
return rows
[docs]
def _err(self, e) -> Dict[str, Any]:
if isinstance(e, requests.exceptions.HTTPError) and e.response is not None:
code = e.response.status_code
if code in (401, 403):
return {"status": "error", "error": "DisGeNET API key invalid or unauthorized for this resource (academic keys only access CURATED sources)."}
return {"status": "error", "error": f"DisGeNET HTTP error: {code}"}
return {"status": "error", "error": f"DisGeNET request failed: {str(e)}"}
# --------------------------------------------------------------- dispatch
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute DisGeNET API call based on operation type."""
if not self.api_key:
return {
"status": "error",
"error": "DisGeNET API key required. Set DISGENET_API_KEY environment variable. Register at https://www.disgenet.com/",
}
operation = arguments.get("operation", "")
if not operation:
operation = self.get_schema_const_operation()
if operation in ("search_gene", "get_gda"):
return self._gene_disease(arguments)
elif operation in ("search_disease", "get_disease_genes"):
return self._disease_genes(arguments)
elif operation == "get_vda":
return self._variant_disease(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: search_gene, search_disease, get_gda, get_vda, get_disease_genes",
}
# ---------------------------------------------------------------- operations
[docs]
def _gene_disease(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Gene-disease associations by gene symbol (or NCBI gene id)."""
gene = (arguments.get("gene") or "").strip()
disease = (arguments.get("disease") or "").strip()
if not gene and not disease:
return {"status": "error", "error": "Provide 'gene' (symbol or NCBI id) or 'disease' (UMLS CUI)."}
query: Dict[str, Any] = {}
if gene:
if gene.isdigit():
query["gene_ncbi_id"] = gene
else:
query["gene_symbol"] = gene
if disease:
code = self._normalize_disease(disease)
if code is None:
return {"status": "error", "error": f"'{disease}' is not a UMLS CUI. Resolve the disease name to a CUI first (e.g. umls_search_concepts), then pass disease='C0152200'."}
query["disease"] = code
if arguments.get("source"):
query["source"] = arguments["source"]
try:
payload, warnings = self._query_summary("gda", query)
except Exception as e: # noqa: BLE001 - never raise out of run()
return self._err(e)
rows = self._apply_filters([self._fmt_gda(r) for r in payload],
arguments.get("min_score"),
arguments.get("limit", 25))
return {
"status": "success",
"data": {"gene": gene or None, "disease": disease or None,
"associations": rows, "count": len(rows)},
"metadata": {"source": "DisGeNET GDA", "warnings": warnings,
"note": "Academic keys return CURATED sources only."},
}
[docs]
def _disease_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""All genes associated with a disease (UMLS CUI)."""
disease = (arguments.get("disease") or "").strip()
if not disease:
return {"status": "error", "error": "Missing required parameter: disease (UMLS CUI, e.g. C0152200)."}
code = self._normalize_disease(disease)
if code is None:
return {"status": "error", "error": f"'{disease}' is not a UMLS CUI. Resolve the disease name to a CUI first (e.g. umls_search_concepts), then pass disease='C0152200'."}
query: Dict[str, Any] = {"disease": code}
if arguments.get("source"):
query["source"] = arguments["source"]
try:
payload, warnings = self._query_summary("gda", query)
except Exception as e: # noqa: BLE001
return self._err(e)
genes, seen = [], set()
for row in payload:
sym = row.get("symbolOfGene")
if sym and sym not in seen:
seen.add(sym)
genes.append({"symbol": sym, "ncbi_id": row.get("geneNcbiID"),
"score": row.get("score"), "n_pmids": row.get("numPMIDs")})
genes = self._apply_filters(genes, arguments.get("min_score"), arguments.get("limit", 50))
return {
"status": "success",
"data": {"disease": disease, "disease_code": code,
"genes": genes, "gene_count": len(genes)},
"metadata": {"source": "DisGeNET", "warnings": warnings,
"note": "Academic keys return CURATED sources only."},
}
[docs]
def _variant_disease(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Variant-disease associations by variant rsID or gene symbol."""
variant = (arguments.get("variant") or "").strip()
gene = (arguments.get("gene") or "").strip()
if not variant and not gene:
return {"status": "error", "error": "Provide 'variant' (rsID) or 'gene' (symbol)."}
query: Dict[str, Any] = {}
if variant:
query["variant"] = variant
elif gene:
query["gene_symbol"] = gene
try:
payload, warnings = self._query_summary("vda", query)
except Exception as e: # noqa: BLE001
return self._err(e)
rows = self._apply_filters([self._fmt_vda(r) for r in payload],
arguments.get("min_score"),
arguments.get("limit", 25))
return {
"status": "success",
"data": {"variant": variant or None, "gene": gene or None,
"associations": rows, "count": len(rows)},
"metadata": {"source": "DisGeNET VDA", "warnings": warnings,
"note": "Academic keys return CURATED sources only."},
}