tooluniverse.rgd_tool 源代码
"""
RGD Tool - Rat Genome Database
Provides access to rat gene data, disease annotations, phenotype associations,
QTL data, and orthologs via the RGD REST API.
API: https://rest.rgd.mcw.edu/rgdws/
No authentication required.
Reference: Smith et al., Nucleic Acids Res. 2023
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
RGD_BASE = "https://rest.rgd.mcw.edu/rgdws"
[文档]
@register_tool("RGDTool")
class RGDTool(BaseTool):
"""
Tool for querying the Rat Genome Database (RGD).
Supported operations:
- get_gene: Get rat gene details by RGD ID
- search_genes: Search rat genes by symbol/keyword
- get_annotations: Get disease/phenotype annotations for a gene
- get_orthologs: Get orthologs across species
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = 30
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "get_gene"
)
self.session = requests.Session()
self.session.headers.update(
{
"User-Agent": "ToolUniverse/1.0 (https://github.com/mims-harvard/ToolUniverse)"
}
)
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
handlers = {
"get_gene": self._get_gene,
"search_genes": self._search_genes,
"get_annotations": self._get_annotations,
"get_orthologs": self._get_orthologs,
}
handler = handlers.get(self.endpoint_type)
if not handler:
return {
"status": "error",
"error": f"Unknown endpoint: {self.endpoint_type}",
}
return handler(arguments)
except requests.exceptions.Timeout:
return {"status": "error", "error": "RGD API request timed out"}
except requests.exceptions.ConnectionError:
return {"status": "error", "error": "Failed to connect to RGD API"}
except Exception as e:
return {"status": "error", "error": f"RGD API error: {str(e)}"}
[文档]
def _get_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
rgd_id = arguments.get("rgd_id") or arguments.get("gene_id", "")
if not rgd_id:
return {"status": "error", "error": "rgd_id is required"}
rgd_id = str(rgd_id).replace("RGD:", "")
resp = self.session.get(f"{RGD_BASE}/genes/{rgd_id}", timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
return {
"status": "success",
"data": {
"rgd_id": data.get("rgdId"),
"symbol": data.get("symbol"),
"name": data.get("name"),
"description": data.get("agrDescription") or data.get("description"),
"type": data.get("type"),
"ensembl_symbol": data.get("ensemblGeneSymbol"),
"refseq_status": data.get("refSeqStatus"),
},
"metadata": {"source": "RGD", "query_id": rgd_id},
}
[文档]
def _search_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
query = arguments.get("query") or arguments.get("gene_symbol", "")
if not query:
return {"status": "error", "error": "query is required"}
limit = arguments.get("limit", 10)
# Use Alliance of Genome Resources search (aggregates RGD data)
# RGD's own symbol search is unreliable (returns 400 for many queries)
alliance_url = "https://www.alliancegenome.org/api/search"
params = {"q": query, "category": "gene", "limit": limit}
resp = self.session.get(alliance_url, params=params, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
genes = []
for r in data.get("results", []):
pk = r.get("primaryKey", "")
# Filter to RGD entries only (rat genes)
if not pk.startswith("RGD:"):
continue
genes.append(
{
"rgd_id": pk.replace("RGD:", ""),
"symbol": r.get("symbol"),
"name": r.get("name"),
"species": r.get("species", "Rattus norvegicus"),
"synonyms": r.get("synonyms", [])[:5],
}
)
return {
"status": "success",
"data": genes,
"metadata": {
"query": query,
"returned": len(genes),
"total_alliance_results": data.get("total", 0),
"source": "RGD via Alliance of Genome Resources",
},
}
[文档]
def _get_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
rgd_id = arguments.get("rgd_id") or arguments.get("gene_id", "")
if not rgd_id:
return {"status": "error", "error": "rgd_id is required"}
rgd_id = str(rgd_id).replace("RGD:", "")
aspect = arguments.get("aspect", "") # D=disease, P=pathway, etc.
resp = self.session.get(
f"{RGD_BASE}/annotations/rgdId/{rgd_id}",
timeout=self.timeout,
)
resp.raise_for_status()
data = resp.json()
if not isinstance(data, list):
data = [data] if data else []
# Filter by aspect if specified
if aspect:
data = [d for d in data if d.get("aspect") == aspect.upper()]
# Group by aspect
from collections import Counter
aspect_counts = Counter(d.get("aspect", "?") for d in data)
annotations = []
for ann in data[:50]:
annotations.append(
{
"term": ann.get("term"),
"term_acc": ann.get("termAcc"),
"qualifier": ann.get("qualifier"),
"aspect": ann.get("aspect"),
"evidence": ann.get("evidence"),
"data_src": ann.get("dataSrc"),
"notes": (ann.get("notes") or "")[:200],
}
)
aspect_labels = {
"D": "Disease",
"E": "Expression",
"P": "Pathway",
"F": "Molecular Function",
"C": "Cellular Component",
"W": "Phenotype",
}
return {
"status": "success",
"data": annotations,
"metadata": {
"rgd_id": rgd_id,
"total_annotations": len(data) if not aspect else len(annotations),
"returned": len(annotations),
"aspect_counts": {
aspect_labels.get(k, k): v for k, v in aspect_counts.items()
},
"source": "RGD",
},
}
[文档]
def _get_orthologs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
rgd_id = arguments.get("rgd_id") or arguments.get("gene_id", "")
if not rgd_id:
return {"status": "error", "error": "rgd_id is required"}
rgd_id = str(rgd_id).replace("RGD:", "")
resp = self.session.get(
f"{RGD_BASE}/genes/orthologs/{rgd_id}",
timeout=self.timeout,
)
resp.raise_for_status()
data = resp.json()
if not isinstance(data, list):
data = [data] if data else []
species_map = {
1: "human",
2: "mouse",
3: "rat",
4: "chinchilla",
5: "bonobo",
6: "dog",
7: "squirrel",
9: "pig",
13: "green_monkey",
14: "naked_mole_rat",
}
orthologs = []
for o in data:
orthologs.append(
{
"rgd_id": o.get("rgdId"),
"symbol": o.get("symbol"),
"name": o.get("name"),
"species": species_map.get(
o.get("speciesTypeKey"), str(o.get("speciesTypeKey"))
),
}
)
return {
"status": "success",
"data": orthologs,
"metadata": {
"query_rgd_id": rgd_id,
"ortholog_count": len(orthologs),
"source": "RGD",
},
}