Source code for tooluniverse.go_api_tool
# go_api_tool.py
"""
Gene Ontology (GO) REST API tool for ToolUniverse.
The Gene Ontology provides a standardized framework for describing gene
and gene product attributes across species. The GO API provides programmatic
access to ontology terms, gene annotations, and functional associations.
API: https://api.geneontology.org/api/
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
GO_BASE_URL = "https://api.geneontology.org/api"
[docs]
@register_tool("GOAPITool")
class GOAPITool(BaseTool):
"""
Tool for querying the Gene Ontology (GO) REST API.
The GO API provides access to three major ontology domains:
- Biological Process (BP): cellular/organismal processes
- Molecular Function (MF): molecular-level activities
- Cellular Component (CC): locations of gene products
Supports: GO term lookup, gene GO annotations, gene function associations.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "term")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the GO API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"GO API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to GO API"}
except requests.exceptions.HTTPError as e:
return {"error": f"GO API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying GO API: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate GO endpoint."""
if self.endpoint == "term":
return self._get_term(arguments)
elif self.endpoint == "gene_functions":
return self._get_gene_functions(arguments)
elif self.endpoint == "search_annotations":
return self._search_annotations(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_term(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get GO term details by GO ID."""
go_id = arguments.get("go_id", "")
if not go_id:
return {"error": "go_id parameter is required (e.g., GO:0006915)"}
# Normalize GO ID
if not go_id.startswith("GO:"):
go_id = f"GO:{go_id}"
url = f"{GO_BASE_URL}/ontology/term/{go_id}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
# Clean up synonyms (they have @ artifacts)
synonyms = [s.strip("@").strip() for s in data.get("synonyms", [])]
related_synonyms = [
s.strip("@").strip() for s in data.get("relatedSynonyms", [])
]
return {
"data": {
"go_id": data.get("goid"),
"label": data.get("label"),
"definition": data.get("definition"),
"synonyms": synonyms,
"related_synonyms": related_synonyms,
"xrefs": [x.strip("@").strip() for x in data.get("xrefs", [])],
"alternative_ids": [
a.strip("@").strip() for a in data.get("alternativeIds", [])
],
},
"metadata": {
"source": "Gene Ontology (GO)",
},
}
[docs]
def _get_gene_functions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get GO annotations (functions/processes/components) for a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {
"error": "gene_id parameter is required (e.g., HGNC:11998 or UniProtKB:P04637)"
}
rows = arguments.get("rows") or 20
aspect = arguments.get("aspect") # P=process, F=function, C=component
url = f"{GO_BASE_URL}/bioentity/gene/{gene_id}/function"
params = {"rows": min(rows, 100)}
if aspect:
params["fq"] = f'aspect:"{aspect}"'
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
associations = []
for a in data.get("associations", []):
obj = a.get("object", {})
categories = obj.get("category", [])
category = categories[0] if categories else None
associations.append(
{
"go_id": obj.get("id"),
"go_label": obj.get("label"),
"category": category,
"evidence_type": a.get("evidence_type"),
"evidence_label": a.get("evidence_label"),
"provided_by": a.get("provided_by", []),
"references": a.get("reference", []),
}
)
return {
"data": associations,
"metadata": {
"source": "Gene Ontology (GO)",
"gene_id": gene_id,
"total_results": data.get("numFound", len(associations)),
},
}
[docs]
def _search_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search for genes annotated with a specific GO term."""
go_id = arguments.get("go_id", "")
if not go_id:
return {"error": "go_id parameter is required (e.g., GO:0006915)"}
if not go_id.startswith("GO:"):
go_id = f"GO:{go_id}"
rows = arguments.get("rows") or 20
taxon = arguments.get("taxon") # e.g., NCBITaxon:9606
url = f"{GO_BASE_URL}/bioentity/function/{go_id}/genes"
params = {"rows": min(rows, 100)}
if taxon:
params["taxon"] = taxon
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
genes = []
for a in data.get("associations", []):
subj = a.get("subject", {})
taxon_info = subj.get("taxon", {})
genes.append(
{
"gene_id": subj.get("id"),
"gene_label": subj.get("label"),
"taxon_id": taxon_info.get("id"),
"taxon_label": taxon_info.get("label"),
"evidence_type": a.get("evidence_type"),
"references": a.get("reference", []),
}
)
return {
"data": genes,
"metadata": {
"source": "Gene Ontology (GO)",
"go_id": go_id,
"total_results": data.get("numFound", len(genes)),
},
}