Source code for tooluniverse.gdc_tool
import json
from typing import Any, Dict
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from tooluniverse.tool_registry import register_tool
def _http_get(
url: str,
headers: Dict[str, str] | None = None,
timeout: int = 30,
) -> Dict[str, Any]:
req = Request(url, headers=headers or {})
with urlopen(req, timeout=timeout) as resp:
data = resp.read()
try:
return json.loads(data.decode("utf-8", errors="ignore"))
except Exception:
return {"raw": data.decode("utf-8", errors="ignore")}
[docs]
@register_tool(
"GDCCasesTool",
config={
"name": "GDC_search_cases",
"type": "GDCCasesTool",
"description": "Search NCI GDC cases via /cases",
"parameter": {
"type": "object",
"properties": {
"project_id": {
"type": "string",
"description": "GDC project identifier (e.g., 'TCGA-BRCA')",
},
"size": {
"type": "integer",
"default": 10,
"minimum": 1,
"maximum": 100,
"description": "Number of results (1–100)",
},
"offset": {
"type": "integer",
"default": 0,
"minimum": 0,
"description": "Offset for pagination (0-based)",
},
},
},
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
},
)
class GDCCasesTool:
[docs]
def run(self, arguments: Dict[str, Any]):
base = self.tool_config.get("settings", {}).get(
"base_url", "https://api.gdc.cancer.gov"
)
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
query: Dict[str, Any] = {}
if arguments.get("project_id"):
# Build filters JSON for project_id
filters = {
"op": "=",
"content": {
"field": "projects.project_id",
"value": [arguments["project_id"]],
},
}
query["filters"] = json.dumps(filters)
if arguments.get("size") is not None:
query["size"] = int(arguments["size"])
if arguments.get("offset") is not None:
query["from"] = int(arguments["offset"])
url = f"{base}/cases?{urlencode(query)}"
try:
data = _http_get(
url, headers={"Accept": "application/json"}, timeout=timeout
)
return {
"source": "GDC",
"endpoint": "cases",
"query": query,
"data": data,
"success": True,
}
except Exception as e:
return {
"error": str(e),
"source": "GDC",
"endpoint": "cases",
"success": False,
}
[docs]
@register_tool(
"GDCFilesTool",
config={
"name": "GDC_list_files",
"type": "GDCFilesTool",
"description": "List NCI GDC files via /files with optional data_type filter",
"parameter": {
"type": "object",
"properties": {
"data_type": {
"type": "string",
"description": "Data type filter (e.g., 'Gene Expression Quantification')",
},
"size": {
"type": "integer",
"default": 10,
"minimum": 1,
"maximum": 100,
"description": "Number of results (1–100)",
},
"offset": {
"type": "integer",
"default": 0,
"minimum": 0,
"description": "Offset for pagination (0-based)",
},
},
},
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
},
)
class GDCFilesTool:
[docs]
def run(self, arguments: Dict[str, Any]):
base = self.tool_config.get("settings", {}).get(
"base_url", "https://api.gdc.cancer.gov"
)
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
query: Dict[str, Any] = {}
if arguments.get("data_type"):
filters = {
"op": "=",
"content": {
"field": "files.data_type",
"value": [arguments["data_type"]],
},
}
query["filters"] = json.dumps(filters)
if arguments.get("size") is not None:
query["size"] = int(arguments["size"])
if arguments.get("offset") is not None:
query["from"] = int(arguments["offset"])
url = f"{base}/files?{urlencode(query)}"
try:
data = _http_get(
url, headers={"Accept": "application/json"}, timeout=timeout
)
return {
"source": "GDC",
"endpoint": "files",
"query": query,
"data": data,
"success": True,
}
except Exception as e:
return {
"error": str(e),
"source": "GDC",
"endpoint": "files",
"success": False,
}