Source code for tooluniverse.wormbase_tool
# wormbase_tool.py
"""
WormBase REST API tool for ToolUniverse.
WormBase is the central repository for research using the model organism
Caenorhabditis elegans and related nematodes. It provides curated gene
information, phenotypes, expression data, and orthologs.
API: https://rest.wormbase.org
No authentication required. Free for academic/research use.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
WORMBASE_BASE_URL = "https://rest.wormbase.org/rest"
[docs]
@register_tool("WormBaseTool")
class WormBaseTool(BaseTool):
"""
Tool for querying WormBase, the C. elegans genome database.
Provides detailed gene information for C. elegans and other
nematodes including phenotypes, expression data, orthologs,
and functional annotations.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "gene_overview"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the WormBase API call."""
try:
return self._dispatch(arguments)
except requests.exceptions.Timeout:
return {
"error": f"WormBase API request timed out after {self.timeout} seconds"
}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to WormBase API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"WormBase API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying WormBase: {str(e)}"}
[docs]
def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint based on config."""
if self.endpoint_type == "gene_overview":
return self._gene_overview(arguments)
elif self.endpoint_type == "gene_phenotypes":
return self._gene_phenotypes(arguments)
elif self.endpoint_type == "gene_expression":
return self._gene_expression(arguments)
else:
return {"error": f"Unknown endpoint_type: {self.endpoint_type}"}
[docs]
def _gene_overview(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed gene overview from WormBase by WBGene ID."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {
"error": "gene_id parameter is required (e.g., 'WBGene00006763' for unc-26)"
}
url = f"{WORMBASE_BASE_URL}/widget/gene/{gene_id}/overview"
response = requests.get(
url,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
fields = raw.get("fields", {})
# Extract name info
name_data = fields.get("name", {}).get("data", {})
gene_name = name_data.get("label", "") if isinstance(name_data, dict) else ""
wb_id = name_data.get("id", gene_id) if isinstance(name_data, dict) else gene_id
# Extract taxonomy
taxonomy_data = fields.get("taxonomy", {}).get("data", {})
species = ""
if isinstance(taxonomy_data, dict):
genus = taxonomy_data.get("genus", "")
sp = taxonomy_data.get("species", "")
species = f"{genus} {sp}".strip()
# Extract description
desc_data = fields.get("concise_description", {}).get("data", {})
description = ""
if isinstance(desc_data, dict):
description = desc_data.get("text", "")
elif isinstance(desc_data, str):
description = desc_data
# Legacy description
legacy_data = fields.get("legacy_manual_description", {}).get("data", {})
legacy_desc = ""
if isinstance(legacy_data, dict):
legacy_desc = legacy_data.get("text", "")
# Sequence name
seq_name = fields.get("sequence_name", {}).get("data", "")
# Classification
classification = fields.get("classification", {}).get("data", {})
gene_type = None
if isinstance(classification, dict):
gene_type = classification.get("type", None)
if isinstance(gene_type, dict):
gene_type = gene_type.get("label", None)
# Status
status = fields.get("status", {}).get("data", "")
result = {
"wormbase_id": wb_id,
"gene_name": gene_name,
"sequence_name": seq_name,
"species": species,
"description": description or legacy_desc,
"gene_type": gene_type,
"status": status,
}
return {
"data": result,
"metadata": {
"source": "WormBase",
"query": gene_id,
"endpoint": "gene_overview",
},
}
[docs]
def _gene_phenotypes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get phenotype annotations for a C. elegans gene from WormBase."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {
"error": "gene_id parameter is required (e.g., 'WBGene00006763' for unc-26)"
}
url = f"{WORMBASE_BASE_URL}/widget/gene/{gene_id}/phenotype"
response = requests.get(
url,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
fields = raw.get("fields", {})
# Gene name
name_data = fields.get("name", {}).get("data", {})
gene_name = name_data.get("label", "") if isinstance(name_data, dict) else ""
# Observed phenotypes
phenotypes = []
pheno_data = fields.get("phenotype", {}).get("data", [])
if isinstance(pheno_data, list):
for p in pheno_data[:50]:
if isinstance(p, dict):
pheno_info = p.get("phenotype", {})
pheno_entry = {
"phenotype_id": pheno_info.get("id", "")
if isinstance(pheno_info, dict)
else "",
"phenotype_name": pheno_info.get("label", "")
if isinstance(pheno_info, dict)
else str(pheno_info),
}
# Evidence
evidence = p.get("evidence", [])
if isinstance(evidence, list) and evidence:
first_ev = evidence[0] if isinstance(evidence[0], dict) else {}
pheno_entry["evidence_type"] = first_ev.get("label", "")
phenotypes.append(pheno_entry)
# Not-observed phenotypes
not_observed = []
not_pheno_data = fields.get("phenotype_not_observed", {}).get("data", [])
if isinstance(not_pheno_data, list):
for p in not_pheno_data[:20]:
if isinstance(p, dict):
pheno_info = p.get("phenotype", {})
not_observed.append(
{
"phenotype_id": pheno_info.get("id", "")
if isinstance(pheno_info, dict)
else "",
"phenotype_name": pheno_info.get("label", "")
if isinstance(pheno_info, dict)
else str(pheno_info),
}
)
result = {
"wormbase_id": gene_id,
"gene_name": gene_name,
"phenotype_count": len(pheno_data) if isinstance(pheno_data, list) else 0,
"phenotypes": phenotypes,
"not_observed_count": len(not_pheno_data)
if isinstance(not_pheno_data, list)
else 0,
"phenotypes_not_observed": not_observed,
}
return {
"data": result,
"metadata": {
"source": "WormBase",
"query": gene_id,
"endpoint": "gene_phenotypes",
},
}
[docs]
def _gene_expression(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get expression data for a C. elegans gene from WormBase."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {
"error": "gene_id parameter is required (e.g., 'WBGene00006763' for unc-26)"
}
url = f"{WORMBASE_BASE_URL}/widget/gene/{gene_id}/expression"
response = requests.get(
url,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
fields = raw.get("fields", {})
# Gene name
name_data = fields.get("name", {}).get("data", {})
gene_name = name_data.get("label", "") if isinstance(name_data, dict) else ""
# Tissues expressed in
expressed_in = []
tissue_data = fields.get("expressed_in", {}).get("data", [])
if isinstance(tissue_data, list):
for t in tissue_data[:30]:
if isinstance(t, dict):
ontology_term = t.get("ontology_term", {})
expressed_in.append(
{
"term_id": ontology_term.get("id", "")
if isinstance(ontology_term, dict)
else "",
"term_name": ontology_term.get("label", "")
if isinstance(ontology_term, dict)
else str(t),
}
)
# Developmental stages
expressed_during = []
stage_data = fields.get("expressed_during", {}).get("data", [])
if isinstance(stage_data, list):
for s in stage_data[:20]:
if isinstance(s, dict):
ontology_term = s.get("ontology_term", {})
expressed_during.append(
{
"term_id": ontology_term.get("id", "")
if isinstance(ontology_term, dict)
else "",
"term_name": ontology_term.get("label", "")
if isinstance(ontology_term, dict)
else str(s),
}
)
# Subcellular localization
subcellular = []
sub_data = fields.get("subcellular_localization", {}).get("data", [])
if isinstance(sub_data, list):
for loc in sub_data[:10]:
if isinstance(loc, dict):
ontology_term = loc.get("ontology_term", {})
subcellular.append(
{
"term_id": ontology_term.get("id", "")
if isinstance(ontology_term, dict)
else "",
"term_name": ontology_term.get("label", "")
if isinstance(ontology_term, dict)
else str(loc),
}
)
# Expression clusters
clusters = []
cluster_data = fields.get("expression_cluster", {}).get("data", [])
if isinstance(cluster_data, list):
for c in cluster_data[:15]:
if isinstance(c, dict):
cluster_info = c.get("expression_cluster", {})
clusters.append(
{
"cluster_id": cluster_info.get("id", "")
if isinstance(cluster_info, dict)
else "",
"cluster_label": cluster_info.get("label", "")
if isinstance(cluster_info, dict)
else str(c),
}
)
result = {
"wormbase_id": gene_id,
"gene_name": gene_name,
"expressed_in_count": len(tissue_data)
if isinstance(tissue_data, list)
else 0,
"expressed_in": expressed_in,
"expressed_during": expressed_during,
"subcellular_localization": subcellular,
"expression_clusters_count": len(cluster_data)
if isinstance(cluster_data, list)
else 0,
"expression_clusters": clusters,
}
return {
"data": result,
"metadata": {
"source": "WormBase",
"query": gene_id,
"endpoint": "gene_expression",
},
}