Source code for tooluniverse.mygene_tool
# mygene_tool.py
"""
MyGene.info API tool for ToolUniverse.
MyGene.info is a high-performance gene annotation service providing
access to gene information from 30+ sources for 22M+ genes across 22K+ species.
API Documentation: https://mygene.info/doc
"""
import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for MyGene.info API v3
MYGENE_BASE_URL = "https://mygene.info/v3"
[docs]
@register_tool("MyGeneTool")
class MyGeneTool(BaseTool):
"""
Tool for querying MyGene.info API.
MyGene.info provides gene annotation data from 30+ sources including
Entrez Gene, Ensembl, UniProt, HGNC, and more.
No authentication required. Free for academic/research use.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
# Get the operation type from config
self.operation = tool_config.get("fields", {}).get("operation", "query")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the MyGene.info API call."""
operation = self.operation
if operation == "query":
return self._query_genes(arguments)
elif operation == "get_gene":
return self._get_gene(arguments)
elif operation == "query_batch":
return self._query_batch(arguments)
else:
return {"error": f"Unknown operation: {operation}"}
[docs]
def _query_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Query genes by keyword, symbol, or other identifiers.
Endpoint: GET /query
"""
query = arguments.get("query", "")
species = arguments.get("species", "human")
fields = arguments.get("fields", "symbol,name,entrezgene,ensembl.gene")
size = arguments.get("size", 10)
if not query:
return {"error": "Query parameter is required"}
params = {
"q": query,
"species": species,
"fields": fields,
"size": min(size, 100), # Cap at 100 to avoid overwhelming responses
}
try:
response = requests.get(
f"{MYGENE_BASE_URL}/query", params=params, timeout=self.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyGene.info API request failed: {str(e)}"}
[docs]
def _get_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get detailed gene annotation by gene ID.
Endpoint: GET /gene/<geneid>
"""
gene_id = arguments.get("gene_id", "")
fields = arguments.get(
"fields", "symbol,name,entrezgene,ensembl,summary,generif,pathway"
)
if not gene_id:
return {"error": "gene_id parameter is required"}
params = {"fields": fields}
try:
response = requests.get(
f"{MYGENE_BASE_URL}/gene/{gene_id}", params=params, timeout=self.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyGene.info API request failed: {str(e)}"}
[docs]
def _query_batch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Query multiple genes at once using POST.
Endpoint: POST /query
"""
gene_ids = arguments.get("gene_ids", [])
fields = arguments.get("fields", "symbol,name,entrezgene")
species = arguments.get("species", "human")
if not gene_ids:
return {"error": "gene_ids parameter is required (list of gene IDs)"}
# Convert list to comma-separated string if needed
if isinstance(gene_ids, list):
gene_ids_str = ",".join(str(g) for g in gene_ids)
else:
gene_ids_str = str(gene_ids)
data = {
"q": gene_ids_str,
"scopes": "entrezgene,ensembl.gene,symbol",
"species": species,
"fields": fields,
}
try:
response = requests.post(
f"{MYGENE_BASE_URL}/query", data=data, timeout=self.timeout
)
response.raise_for_status()
return {"results": response.json()}
except requests.RequestException as e:
return {"error": f"MyGene.info API request failed: {str(e)}"}
[docs]
@register_tool("MyVariantTool")
class MyVariantTool(BaseTool):
"""
Tool for querying MyVariant.info API.
MyVariant.info provides variant annotation data from 19+ sources
for 400M+ human variants.
No authentication required. Free for academic/research use.
"""
MYVARIANT_BASE_URL = "https://myvariant.info/v1"
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.operation = tool_config.get("fields", {}).get("operation", "query")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the MyVariant.info API call."""
operation = self.operation
if operation == "query":
return self._query_variants(arguments)
elif operation == "get_variant":
return self._get_variant(arguments)
else:
return {"error": f"Unknown operation: {operation}"}
[docs]
def _query_variants(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Query variants by various criteria.
Endpoint: GET /query
"""
query = arguments.get("query", "")
fields = arguments.get("fields", "dbsnp,clinvar,cadd,gnomad_genome")
size = arguments.get("size", 10)
if not query:
return {"error": "Query parameter is required"}
params = {"q": query, "fields": fields, "size": min(size, 100)}
try:
response = requests.get(
f"{self.MYVARIANT_BASE_URL}/query", params=params, timeout=self.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyVariant.info API request failed: {str(e)}"}
[docs]
def _get_variant(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get detailed variant annotation by HGVS ID.
Endpoint: GET /variant/<hgvsid>
"""
variant_id = arguments.get("variant_id", "")
fields = arguments.get("fields", "dbsnp,clinvar,cadd,gnomad_genome,dbnsfp")
if not variant_id:
return {"error": "variant_id parameter is required (HGVS format)"}
params = {"fields": fields}
try:
response = requests.get(
f"{self.MYVARIANT_BASE_URL}/variant/{variant_id}",
params=params,
timeout=self.timeout,
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyVariant.info API request failed: {str(e)}"}
[docs]
@register_tool("MyChemTool")
class MyChemTool(BaseTool):
"""
Tool for querying MyChem.info API.
MyChem.info provides chemical/drug annotation data from 30+ sources
for 90M+ chemicals and drugs.
No authentication required. Free for academic/research use.
"""
MYCHEM_BASE_URL = "https://mychem.info/v1"
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.operation = tool_config.get("fields", {}).get("operation", "query")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the MyChem.info API call."""
operation = self.operation
if operation == "query":
return self._query_chemicals(arguments)
elif operation == "get_chemical":
return self._get_chemical(arguments)
else:
return {"error": f"Unknown operation: {operation}"}
[docs]
def _query_chemicals(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Query chemicals/drugs by name, InChIKey, or other identifiers.
Endpoint: GET /query
"""
query = arguments.get("query", "")
fields = arguments.get("fields", "drugbank,chebi,pubchem,chembl")
size = arguments.get("size", 10)
if not query:
return {"error": "Query parameter is required"}
params = {"q": query, "fields": fields, "size": min(size, 100)}
try:
response = requests.get(
f"{self.MYCHEM_BASE_URL}/query", params=params, timeout=self.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyChem.info API request failed: {str(e)}"}
[docs]
def _get_chemical(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get detailed chemical annotation by InChIKey or other ID.
Endpoint: GET /chem/<chemid>
"""
chem_id = arguments.get("chem_id", "")
fields = arguments.get("fields", "drugbank,chebi,pubchem,chembl,drugcentral")
if not chem_id:
return {"error": "chem_id parameter is required (InChIKey recommended)"}
params = {"fields": fields}
try:
response = requests.get(
f"{self.MYCHEM_BASE_URL}/chem/{chem_id}",
params=params,
timeout=self.timeout,
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"error": f"MyChem.info API request failed: {str(e)}"}