Source code for tooluniverse.alliance_genome_tool
# alliance_genome_tool.py
"""
Alliance of Genome Resources REST API tool for ToolUniverse.
The Alliance of Genome Resources (AGR) integrates data from 7 model organism
databases (SGD, FlyBase, WormBase, ZFIN, RGD, MGI, Xenbase) plus human data.
It provides unified access to gene information, disease associations,
phenotypes, and cross-species search across all model organisms.
API: https://www.alliancegenome.org/api
No authentication required.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
ALLIANCE_BASE = "https://www.alliancegenome.org/api"
[docs]
@register_tool("AllianceGenomeTool")
class AllianceGenomeTool(BaseTool):
"""
Tool for querying the Alliance of Genome Resources API.
Provides cross-species gene information across 7 model organisms
(yeast, fly, worm, zebrafish, rat, mouse, frog) plus human.
Supports gene detail, disease associations, phenotypes, and search.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "gene_detail"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the Alliance of Genome Resources API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"Alliance API request timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to Alliance API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"Alliance API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying Alliance API: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to the appropriate Alliance endpoint."""
endpoint_type = self.endpoint_type
if endpoint_type == "gene_detail":
return self._get_gene_detail(arguments)
elif endpoint_type == "search_genes":
return self._search_genes(arguments)
elif endpoint_type == "gene_phenotypes":
return self._get_gene_phenotypes(arguments)
elif endpoint_type == "disease_genes":
return self._get_disease_genes(arguments)
elif endpoint_type == "disease_detail":
return self._get_disease_detail(arguments)
elif endpoint_type == "gene_orthologs":
return self._get_gene_orthologs(arguments)
elif endpoint_type == "gene_alleles":
return self._get_gene_alleles(arguments)
elif endpoint_type == "gene_expression_summary":
return self._get_gene_expression_summary(arguments)
elif endpoint_type == "gene_interactions":
return self._get_gene_interactions(arguments)
elif endpoint_type == "gene_disease_models":
return self._get_gene_disease_models(arguments)
elif endpoint_type == "allele_detail":
return self._get_allele_detail(arguments)
else:
return {"error": f"Unknown endpoint type: {endpoint_type}"}
[docs]
def _get_gene_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed gene information from Alliance."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {
"error": "gene_id parameter is required (e.g., 'HGNC:6081', 'MGI:98834', 'FB:FBgn0003996')"
}
url = f"{ALLIANCE_BASE}/gene/{gene_id}"
response = requests.get(
url, headers={"Accept": "application/json"}, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
species = data.get("species", {})
locations = data.get("genomeLocations", [])
loc_info = locations[0] if locations else {}
xrefs = data.get("crossReferenceMap", {})
# Extract cross-references
other_xrefs = xrefs.get("other", [])
xref_list = [
{"name": x.get("name"), "url": x.get("crossRefCompleteUrl")}
for x in other_xrefs[:10]
]
return {
"data": {
"id": data.get("id"),
"symbol": data.get("symbol"),
"name": data.get("name"),
"species": {
"name": species.get("name"),
"short_name": species.get("shortName"),
"taxon_id": species.get("taxonId"),
"data_provider": species.get("dataProviderShortName"),
},
"gene_synopsis": data.get("geneSynopsis"),
"automated_gene_synopsis": data.get("automatedGeneSynopsis"),
"synonyms": data.get("synonyms", []),
"so_term": data.get("soTerm", {}).get("name"),
"genomic_location": {
"chromosome": loc_info.get("chromosome"),
"start": loc_info.get("start"),
"end": loc_info.get("end"),
"assembly": loc_info.get("assembly"),
"strand": loc_info.get("strand"),
},
"cross_references": xref_list,
},
"metadata": {
"query_gene_id": gene_id,
"data_provider": data.get("dataProvider"),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _search_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search for genes across all model organisms."""
query = arguments.get("query", "")
if not query:
return {"error": "query parameter is required"}
limit = arguments.get("limit", 10)
url = f"{ALLIANCE_BASE}/search_autocomplete"
params = {"q": query, "category": "gene", "limit": min(int(limit), 50)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
genes = []
for r in results:
genes.append(
{
"symbol": r.get("symbol"),
"name": r.get("name"),
"primary_key": r.get("primaryKey"),
"name_key": r.get("name_key"),
"category": r.get("category"),
}
)
return {
"data": genes,
"metadata": {
"total_results": len(genes),
"query": query,
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_phenotypes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get phenotype annotations for a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
url = f"{ALLIANCE_BASE}/gene/{gene_id}/phenotypes"
params = {"limit": min(int(limit), 100), "page": int(page)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
phenotypes = []
for r in results:
subject = r.get("subject", {})
phenotypes.append(
{
"gene_symbol": subject.get("symbol"),
"gene_id": subject.get("primaryExternalId"),
"phenotype_statement": r.get("phenotypeStatement"),
}
)
return {
"data": phenotypes,
"metadata": {
"total_results": total,
"returned": len(phenotypes),
"query_gene_id": gene_id,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_disease_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get genes associated with a disease by Disease Ontology ID."""
disease_id = arguments.get("disease_id", "")
if not disease_id:
return {
"error": "disease_id parameter is required (e.g., 'DOID:162' for cancer)"
}
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
url = f"{ALLIANCE_BASE}/disease/{disease_id}/genes"
params = {"limit": min(int(limit), 100), "page": int(page)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
genes = []
for r in results:
subject = r.get("subject", {})
species = subject.get("taxon", {})
disease_obj = r.get("object", {})
genes.append(
{
"gene_symbol": subject.get("symbol")
or subject.get("geneSymbol", {}).get("displayText"),
"gene_id": subject.get("primaryExternalId") or subject.get("curie"),
"species": species.get("curie"),
"disease_name": disease_obj.get("name"),
"disease_id": disease_obj.get("curie"),
"association_type": r.get("associationType"),
}
)
return {
"data": genes,
"metadata": {
"total_results": total,
"returned": len(genes),
"query_disease_id": disease_id,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_disease_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get disease summary information by Disease Ontology ID."""
disease_id = arguments.get("disease_id", "")
if not disease_id:
return {
"error": "disease_id parameter is required (e.g., 'DOID:162' for cancer)"
}
url = f"{ALLIANCE_BASE}/disease/{disease_id}"
response = requests.get(
url, headers={"Accept": "application/json"}, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
do_term = data.get("doTerm", {})
synonyms = do_term.get("synonyms", [])
synonym_names = [s.get("name") for s in synonyms if s.get("name")]
return {
"data": {
"disease_id": do_term.get("curie"),
"name": do_term.get("name"),
"definition": do_term.get("definition"),
"synonyms": synonym_names,
"category": data.get("category"),
},
"metadata": {
"query_disease_id": disease_id,
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_orthologs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get ortholog genes across species for a given gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
stringency = arguments.get("stringency", "stringent")
url = f"{ALLIANCE_BASE}/gene/{gene_id}/orthologs"
params = {
"limit": min(int(limit), 100),
"page": int(page),
"filter.stringency": stringency,
}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
orthologs = []
for r in results:
orth = r.get("geneToGeneOrthologyGenerated", {})
subject = orth.get("subjectGene", {})
obj_gene = orth.get("objectGene", {})
methods = orth.get("predictionMethodsMatched", [])
orthologs.append(
{
"subject_gene_id": subject.get("primaryExternalId"),
"subject_symbol": subject.get("geneSymbol", {}).get("displayText"),
"subject_species": subject.get("taxon", {}).get("name"),
"ortholog_gene_id": obj_gene.get("primaryExternalId"),
"ortholog_symbol": obj_gene.get("geneSymbol", {}).get(
"displayText"
),
"ortholog_species": obj_gene.get("taxon", {}).get("name"),
"is_best_score": orth.get("isBestScore", {}).get("name"),
"is_best_score_reverse": orth.get("isBestScoreReverse", {}).get(
"name"
),
"methods": [m.get("name") for m in methods],
"method_count": len(methods),
}
)
return {
"data": orthologs,
"metadata": {
"total_results": total,
"returned": len(orthologs),
"query_gene_id": gene_id,
"stringency": stringency,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_alleles(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get alleles and variants for a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
url = f"{ALLIANCE_BASE}/gene/{gene_id}/alleles"
params = {"limit": min(int(limit), 100), "page": int(page)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
alleles = []
for r in results:
variants = r.get("variants", [])
variant_info = []
for v in variants[:3]:
variant_info.append(
{
"id": v.get("id"),
"name": v.get("name"),
"type": v.get("variantType", {}).get("name"),
"location": v.get("location"),
}
)
alleles.append(
{
"id": r.get("id"),
"symbol": r.get("symbol"),
"symbol_text": r.get("symbolText"),
"category": r.get("category"),
"has_disease": r.get("hasDisease"),
"has_phenotype": r.get("hasPhenotype"),
"variants": variant_info,
}
)
return {
"data": alleles,
"metadata": {
"total_results": total,
"returned": len(alleles),
"query_gene_id": gene_id,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_expression_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get expression summary (ribbon) for a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
url = f"{ALLIANCE_BASE}/gene/{gene_id}/expression-summary"
response = requests.get(
url, headers={"Accept": "application/json"}, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
total_annotations = data.get("totalAnnotations", 0)
groups = data.get("groups", [])
expression_groups = []
for g in groups:
terms = []
for t in g.get("terms", []):
if t.get("numberOfAnnotations", 0) > 0:
terms.append(
{
"id": t.get("id"),
"name": t.get("name"),
"annotation_count": t.get("numberOfAnnotations"),
}
)
expression_groups.append(
{
"group_name": g.get("name"),
"total_annotations": g.get("totalAnnotations", 0),
"terms": terms,
}
)
return {
"data": {
"total_annotations": total_annotations,
"expression_groups": expression_groups,
},
"metadata": {
"query_gene_id": gene_id,
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_interactions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get molecular or genetic interactions for a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
interaction_type = arguments.get("interaction_type", "molecular")
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
if interaction_type == "genetic":
url = f"{ALLIANCE_BASE}/gene/{gene_id}/genetic-interactions"
else:
url = f"{ALLIANCE_BASE}/gene/{gene_id}/molecular-interactions"
params = {"limit": min(int(limit), 100), "page": int(page)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
interactions = []
for r in results:
if interaction_type == "genetic":
gi = r.get("geneGeneticInteraction", {})
subject = gi.get("geneAssociationSubject", {})
obj = gi.get("geneGeneAssociationObject", {})
int_type = gi.get("interactionType", {}).get("name")
else:
gi = r.get("geneMolecularInteraction", {})
subject = gi.get("geneAssociationSubject", {})
obj = gi.get("geneGeneAssociationObject", {})
int_type = (
gi.get("interactionType", {}).get("name")
if gi.get("interactionType")
else None
)
interactions.append(
{
"subject_gene_id": subject.get("primaryExternalId"),
"subject_symbol": subject.get("geneSymbol", {}).get("displayText"),
"interactor_gene_id": obj.get("primaryExternalId") if obj else None,
"interactor_symbol": obj.get("geneSymbol", {}).get("displayText")
if obj
else None,
"interactor_species": obj.get("taxon", {}).get("name")
if obj
else None,
"interaction_type": int_type,
}
)
return {
"data": interactions,
"metadata": {
"total_results": total,
"returned": len(interactions),
"query_gene_id": gene_id,
"interaction_type": interaction_type,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_gene_disease_models(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get disease models involving a gene."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"error": "gene_id parameter is required"}
limit = arguments.get("limit", 20)
page = arguments.get("page", 1)
url = f"{ALLIANCE_BASE}/gene/{gene_id}/models"
params = {"limit": min(int(limit), 100), "page": int(page)}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
results = data.get("results", [])
models = []
for r in results:
model = r.get("model", {})
gene = r.get("gene", {})
disease_models = r.get("diseaseModels", [])
diseases = []
for dm in disease_models:
diseases.append(
{
"disease_name": dm.get("disease", {}).get("name"),
"disease_id": dm.get("disease", {}).get("curie"),
"association_type": dm.get("associationType"),
}
)
models.append(
{
"model_id": model.get("primaryExternalId"),
"model_name": model.get("agmFullName", {}).get("displayText")
if isinstance(model.get("agmFullName"), dict)
else model.get("name"),
"gene_symbol": gene.get("geneSymbol", {}).get("displayText"),
"gene_id": gene.get("primaryExternalId"),
"data_provider": r.get("dataProvider"),
"diseases": diseases,
}
)
return {
"data": models,
"metadata": {
"total_results": total,
"returned": len(models),
"query_gene_id": gene_id,
"page": int(page),
"source": "Alliance of Genome Resources",
},
}
[docs]
def _get_allele_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed information about a specific allele."""
allele_id = arguments.get("allele_id", "")
if not allele_id:
return {"error": "allele_id parameter is required"}
url = f"{ALLIANCE_BASE}/allele/{allele_id}"
response = requests.get(
url, headers={"Accept": "application/json"}, timeout=self.timeout
)
response.raise_for_status()
data = response.json()
allele = data.get("allele", {})
allele_of_gene = data.get("alleleOfGene", {})
synonyms = allele.get("alleleSynonyms", [])
synonym_list = [s.get("displayText") for s in synonyms if s.get("displayText")]
return {
"data": {
"id": allele.get("primaryExternalId"),
"symbol": allele.get("alleleSymbol", {}).get("displayText"),
"species": allele.get("taxon", {}).get("name"),
"alteration_type": data.get("alterationType"),
"gene_id": allele_of_gene.get("primaryExternalId"),
"gene_symbol": allele_of_gene.get("geneSymbol", {}).get("displayText"),
"synonyms": synonym_list,
},
"metadata": {
"query_allele_id": allele_id,
"source": "Alliance of Genome Resources",
},
}