Source code for tooluniverse.geo_tool
"""
GEO Database REST API Tool
This tool provides access to gene expression data from the GEO database.
GEO is a public repository that archives and freely distributes microarray,
next-generation sequencing, and other forms of high-throughput functional
genomics data.
"""
from typing import Dict, Any, List
from .ncbi_eutils_tool import NCBIEUtilsTool
from .tool_registry import register_tool
[docs]
@register_tool("GEORESTTool")
class GEORESTTool(NCBIEUtilsTool):
"""
GEO Database REST API tool with rate limiting.
Generic wrapper for GEO API endpoints defined in expression_tools.json.
"""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
fields = tool_config.get("fields", {})
parameter = tool_config.get("parameter", {})
self.endpoint_template: str = fields.get("endpoint", "/esearch.fcgi")
self.required: List[str] = parameter.get("required", [])
self.output_format: str = fields.get("return_format", "JSON")
def _build_url(self, arguments: Dict[str, Any]) -> str | Dict[str, Any]:
"""Build URL for GEO API request."""
url_path = self.endpoint_template
return self.base_url + url_path
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO API request."""
params = {"db": "gds", "retmode": "json", "retmax": 50}
# Build search query
query_parts = []
if "query" in arguments:
query_parts.append(arguments["query"])
if "organism" in arguments:
organism = arguments["organism"]
if organism.lower() == "homo sapiens":
query_parts.append("Homo sapiens[organism]")
elif organism.lower() == "mus musculus":
query_parts.append("Mus musculus[organism]")
else:
query_parts.append(f'"{organism}"[organism]')
if "study_type" in arguments:
study_type = arguments["study_type"]
query_parts.append(f'"{study_type}"[study_type]')
if "platform" in arguments:
platform = arguments["platform"]
query_parts.append(f'"{platform}"[platform]')
if "date_range" in arguments:
date_range = arguments["date_range"]
if ":" in date_range:
start_year, end_year = date_range.split(":")
query_parts.append(f'"{start_year}"[PDAT] : "{end_year}"[PDAT]')
if query_parts:
params["term"] = " AND ".join(query_parts)
if "limit" in arguments:
params["retmax"] = min(arguments["limit"], 500)
if "sort" in arguments:
sort = arguments["sort"]
if sort == "date":
params["sort"] = "relevance"
elif sort == "title":
params["sort"] = "title"
else:
params["sort"] = "relevance"
return params
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the tool with given arguments."""
# Validate required parameters
for param in self.required:
if param not in arguments:
return {"error": f"Missing required parameter: {param}"}
# Set endpoint for the base class
self.endpoint = self.endpoint_template
params = self._build_params(arguments)
# Use the parent class's _make_request with rate limiting
return self._make_request(self.endpoint, params)
[docs]
@register_tool("GEOSearchDatasets")
class GEOSearchDatasets(GEORESTTool):
"""Search GEO datasets by various criteria."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esearch.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO dataset search."""
params = {"db": "gds", "retmode": "json", "retmax": 50}
# Build search query
query_parts = []
if "query" in arguments:
query_parts.append(arguments["query"])
if "organism" in arguments:
organism = arguments["organism"]
query_parts.append(f'"{organism}"[organism]')
if "study_type" in arguments:
study_type = arguments["study_type"]
query_parts.append(f'"{study_type}"[study_type]')
if "platform" in arguments:
platform = arguments["platform"]
query_parts.append(f'"{platform}"[platform]')
if query_parts:
params["term"] = " AND ".join(query_parts)
if "limit" in arguments:
params["retmax"] = min(arguments["limit"], 500)
return params
[docs]
@register_tool("GEOGetDatasetInfo")
class GEOGetDatasetInfo(GEORESTTool):
"""Get detailed information about a specific GEO dataset."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esummary.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO dataset info retrieval."""
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
return {"db": "gds", "id": dataset_id, "retmode": "json"}
[docs]
@register_tool("GEOGetSampleInfo")
class GEOGetSampleInfo(GEORESTTool):
"""Get sample information for a GEO dataset."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esummary.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO sample info retrieval."""
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
return {"db": "gds", "id": dataset_id, "retmode": "json"}