"""
GEO Database REST API Tool
This tool provides access to gene expression data from the GEO database.
GEO is a public repository that archives and freely distributes microarray,
next-generation sequencing, and other forms of high-throughput functional
genomics data.
"""
from typing import Dict, Any, List
from .ncbi_eutils_tool import NCBIEUtilsTool
from .tool_registry import register_tool
[docs]
@register_tool("GEOSearchDatasets")
class GEOSearchDatasets(GEORESTTool):
"""Search GEO datasets by various criteria."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esearch.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO dataset search."""
params = {"db": "gds", "retmode": "json", "retmax": 50}
# Build search query
query_parts = []
if "query" in arguments:
query_parts.append(arguments["query"])
if "organism" in arguments:
organism = arguments["organism"]
query_parts.append(f'"{organism}"[organism]')
if "study_type" in arguments:
study_type = arguments["study_type"]
query_parts.append(f'"{study_type}"[study_type]')
if "platform" in arguments:
platform = arguments["platform"]
query_parts.append(f'"{platform}"[platform]')
if query_parts:
params["term"] = " AND ".join(query_parts)
if "limit" in arguments:
params["retmax"] = min(arguments["limit"], 500)
return params
[docs]
@register_tool("GEOGetDatasetInfo")
class GEOGetDatasetInfo(GEORESTTool):
"""Get detailed information about a specific GEO dataset."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esummary.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO dataset info retrieval."""
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
# Detect database type
db = self._detect_database(dataset_id)
# Check if dataset_id is already a numeric UID
if dataset_id.isdigit():
return {"db": db, "id": dataset_id, "retmode": "json"}
# For accession numbers, we need to convert to UID first
# This will be handled in the run method
return {"db": db, "id": dataset_id, "retmode": "json"}
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the tool with given arguments."""
# Validate required parameters
for param in self.required:
if param not in arguments:
return {"error": f"Missing required parameter: {param}"}
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
# Detect database type
db = self._detect_database(dataset_id)
# Check if dataset_id is already a numeric UID
if dataset_id.isdigit():
# Direct UID, use esummary directly
self.endpoint = self.endpoint_template
params = {"db": db, "id": dataset_id, "retmode": "json"}
return self._make_request(self.endpoint, params)
# For accession numbers, first convert to UID using esearch
search_result = self._accession_to_uid(dataset_id, db)
if search_result.get("status") != "success":
return search_result
search_data = search_result.get("data", {})
esearch_result = search_data.get("esearchresult", {})
idlist = esearch_result.get("idlist", [])
if not idlist:
return {
"status": "error",
"error": f"No UID found for accession {dataset_id} in database {db}",
"data": search_data,
}
# Use the first UID from the search results
uid = idlist[0]
# Now use esummary with the UID
self.endpoint = self.endpoint_template
params = {"db": db, "id": uid, "retmode": "json"}
return self._make_request(self.endpoint, params)
[docs]
@register_tool("GEOGetSampleInfo")
class GEOGetSampleInfo(GEORESTTool):
"""Get sample information for a GEO dataset."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint_template = "/esummary.fcgi"
def _build_params(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build parameters for GEO sample info retrieval."""
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
# Detect database type
db = self._detect_database(dataset_id)
# Check if dataset_id is already a numeric UID
if dataset_id.isdigit():
return {"db": db, "id": dataset_id, "retmode": "json"}
# For accession numbers, we need to convert to UID first
# This will be handled in the run method
return {"db": db, "id": dataset_id, "retmode": "json"}
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the tool with given arguments."""
# Validate required parameters
for param in self.required:
if param not in arguments:
return {"error": f"Missing required parameter: {param}"}
dataset_id = arguments.get("dataset_id", "")
if not dataset_id:
return {"error": "dataset_id is required"}
# Detect database type
db = self._detect_database(dataset_id)
# Check if dataset_id is already a numeric UID
if dataset_id.isdigit():
# Direct UID, use esummary directly
self.endpoint = self.endpoint_template
params = {"db": db, "id": dataset_id, "retmode": "json"}
return self._make_request(self.endpoint, params)
# For accession numbers, first convert to UID using esearch
search_result = self._accession_to_uid(dataset_id, db)
if search_result.get("status") != "success":
return search_result
search_data = search_result.get("data", {})
esearch_result = search_data.get("esearchresult", {})
idlist = esearch_result.get("idlist", [])
if not idlist:
return {
"status": "error",
"error": f"No UID found for accession {dataset_id} in database {db}",
"data": search_data,
}
# Use the first UID from the search results
uid = idlist[0]
# Now use esummary with the UID
self.endpoint = self.endpoint_template
params = {"db": db, "id": uid, "retmode": "json"}
return self._make_request(self.endpoint, params)