Source code for tooluniverse.disgenet_tool
"""
DisGeNET API tool for ToolUniverse.
DisGeNET is one of the largest public collections of genes and variants
associated with human diseases, aggregating data from multiple sources.
API Documentation: https://www.disgenet.org/api/
Requires API key: Register at https://www.disgenet.org/
"""
import os
import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for DisGeNET API
DISGENET_API_URL = "https://www.disgenet.org/api"
[docs]
@register_tool("DisGeNETTool")
class DisGeNETTool(BaseTool):
"""
Tool for querying DisGeNET gene-disease association database.
DisGeNET provides:
- Gene-disease associations (GDAs)
- Variant-disease associations (VDAs)
- Disease-disease associations
- Aggregated evidence scores
Requires API key via DISGENET_API_KEY environment variable.
Register for free at https://www.disgenet.org/
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
self.api_key = os.environ.get("DISGENET_API_KEY", "")
[docs]
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with authentication."""
headers = {
"Accept": "application/json",
"User-Agent": "ToolUniverse/DisGeNET",
}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
return headers
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute DisGeNET API call based on operation type."""
if not self.api_key:
return {
"status": "error",
"error": "DisGeNET API key required. Set DISGENET_API_KEY environment variable. Register at https://www.disgenet.org/",
}
operation = arguments.get("operation", "")
if operation == "search_gene":
return self._search_gene(arguments)
elif operation == "search_disease":
return self._search_disease(arguments)
elif operation == "get_gda":
return self._get_gene_disease_associations(arguments)
elif operation == "get_vda":
return self._get_variant_disease_associations(arguments)
elif operation == "get_disease_genes":
return self._get_disease_genes(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: search_gene, search_disease, get_gda, get_vda, get_disease_genes",
}
[docs]
def _search_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search for gene-disease associations by gene symbol.
Args:
arguments: Dict containing:
- gene: Gene symbol (e.g., BRCA1, TP53)
- limit: Maximum results (default 10)
"""
gene = arguments.get("gene", "")
if not gene:
return {"status": "error", "error": "Missing required parameter: gene"}
limit = arguments.get("limit", 10)
try:
response = requests.get(
f"{DISGENET_API_URL}/gda/gene/{gene}",
params={"limit": limit},
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
return {
"status": "success",
"data": {
"gene": gene,
"associations": data
if isinstance(data, list)
else data.get("results", []),
"count": len(data)
if isinstance(data, list)
else data.get("count", 0),
},
"metadata": {
"source": "DisGeNET",
"gene": gene,
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
return {"status": "error", "error": "Invalid or expired API key"}
if e.response.status_code == 404:
return {
"status": "success",
"data": {"gene": gene, "associations": [], "count": 0},
"metadata": {"note": "No associations found for gene"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _search_disease(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search for disease information and associated genes.
Args:
arguments: Dict containing:
- disease: Disease name or ID (UMLS CUI, e.g., C0006142 for breast cancer)
- limit: Maximum results (default 10)
"""
disease = arguments.get("disease", "")
if not disease:
return {"status": "error", "error": "Missing required parameter: disease"}
limit = arguments.get("limit", 10)
try:
# Try as UMLS CUI first
if disease.startswith("C") and disease[1:].isdigit():
endpoint = f"/gda/disease/{disease}"
else:
endpoint = f"/gda/disease/{disease}"
response = requests.get(
f"{DISGENET_API_URL}{endpoint}",
params={"limit": limit},
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
return {
"status": "success",
"data": {
"disease": disease,
"associations": data
if isinstance(data, list)
else data.get("results", []),
"count": len(data)
if isinstance(data, list)
else data.get("count", 0),
},
"metadata": {
"source": "DisGeNET",
"disease": disease,
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {
"status": "success",
"data": {"disease": disease, "associations": [], "count": 0},
"metadata": {"note": "No associations found for disease"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_gene_disease_associations(
self, arguments: Dict[str, Any]
) -> Dict[str, Any]:
"""
Get gene-disease associations with filtering options.
Args:
arguments: Dict containing:
- gene: Gene symbol (optional if disease provided)
- disease: Disease ID (optional if gene provided)
- source: Data source filter (CURATED, ANIMAL_MODELS, LITERATURE, etc.)
- min_score: Minimum GDA score (0-1)
- limit: Maximum results
"""
gene = arguments.get("gene", "")
disease = arguments.get("disease", "")
if not gene and not disease:
return {"status": "error", "error": "Either gene or disease required"}
params = {"limit": arguments.get("limit", 25)}
if arguments.get("source"):
params["source"] = arguments["source"]
if arguments.get("min_score"):
params["min_score"] = arguments["min_score"]
try:
if gene:
endpoint = f"/gda/gene/{gene}"
else:
endpoint = f"/gda/disease/{disease}"
response = requests.get(
f"{DISGENET_API_URL}{endpoint}",
params=params,
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
associations = data if isinstance(data, list) else data.get("results", [])
return {
"status": "success",
"data": {
"gene": gene if gene else None,
"disease": disease if disease else None,
"associations": associations,
"count": len(associations),
},
"metadata": {
"source": "DisGeNET GDA",
"filters": params,
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_variant_disease_associations(
self, arguments: Dict[str, Any]
) -> Dict[str, Any]:
"""
Get variant-disease associations.
Args:
arguments: Dict containing:
- variant: Variant ID (rsID, e.g., rs1234)
- gene: Gene symbol to get all variants
- limit: Maximum results
"""
variant = arguments.get("variant", "")
gene = arguments.get("gene", "")
if not variant and not gene:
return {"status": "error", "error": "Either variant or gene required"}
params = {"limit": arguments.get("limit", 25)}
try:
if variant:
endpoint = f"/vda/variant/{variant}"
else:
endpoint = f"/vda/gene/{gene}"
response = requests.get(
f"{DISGENET_API_URL}{endpoint}",
params=params,
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
associations = data if isinstance(data, list) else data.get("results", [])
return {
"status": "success",
"data": {
"variant": variant if variant else None,
"gene": gene if gene else None,
"associations": associations,
"count": len(associations),
},
"metadata": {
"source": "DisGeNET VDA",
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {
"status": "success",
"data": {"associations": [], "count": 0},
"metadata": {"note": "No variant associations found"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_disease_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get all genes associated with a disease.
Args:
arguments: Dict containing:
- disease: Disease ID (UMLS CUI) or disease name
- min_score: Minimum association score (0-1)
- limit: Maximum results
"""
disease = arguments.get("disease", "")
if not disease:
return {"status": "error", "error": "Missing required parameter: disease"}
params = {
"limit": arguments.get("limit", 50),
}
if arguments.get("min_score"):
params["min_score"] = arguments["min_score"]
try:
response = requests.get(
f"{DISGENET_API_URL}/gda/disease/{disease}",
params=params,
headers=self._get_headers(),
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
associations = data if isinstance(data, list) else data.get("results", [])
# Extract unique genes
genes = []
seen = set()
for assoc in associations:
gene_symbol = assoc.get("gene_symbol", assoc.get("geneSymbol"))
if gene_symbol and gene_symbol not in seen:
seen.add(gene_symbol)
genes.append(
{
"symbol": gene_symbol,
"score": assoc.get("score", assoc.get("gda_score")),
"evidence_count": assoc.get(
"evidence_count", assoc.get("nPublications")
),
}
)
return {
"status": "success",
"data": {
"disease": disease,
"genes": genes,
"gene_count": len(genes),
},
"metadata": {
"source": "DisGeNET",
"disease": disease,
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}