Source code for tooluniverse.gtex_tool

import json
from typing import Any, Dict
from urllib.parse import urlencode
from urllib.request import Request, urlopen

from tooluniverse.tool_registry import register_tool


def _http_get(
    url: str,
    headers: Dict[str, str] | None = None,
    timeout: int = 30,
) -> Dict[str, Any]:
    req = Request(url, headers=headers or {})
    with urlopen(req, timeout=timeout) as resp:
        data = resp.read()
        try:
            return json.loads(data.decode("utf-8", errors="ignore"))
        except Exception:
            return {"raw": data.decode("utf-8", errors="ignore")}


def _extract_data_list(api_response: Any) -> list:
    """Extract the data list from a GTEx API response.

    API returns {"data": [...], "paging_info": {...}}.
    Falls back to the response itself if it is a list, or empty list otherwise.
    """
    if isinstance(api_response, dict) and "data" in api_response:
        return api_response.get("data", [])
    return api_response if isinstance(api_response, list) else []


def _resolve_gene_id(gene_input: str, base_url: str, timeout: int) -> str:
    """Resolve a gene symbol or unversioned Ensembl ID to a versioned GENCODE ID.

    Queries GTEx /reference/gene to resolve to the correct GENCODE v26 version.
    If a user-provided versioned ID (e.g., ENSG00000142192.21) doesn't match
    GENCODE v26, strip the version and re-resolve.
    """
    # Try resolving the input (versioned or not) via the reference API
    query_id = gene_input.split(".")[0] if gene_input.startswith("ENS") else gene_input
    url = f"{base_url}/reference/gene?geneId={query_id}&gencodeVersion=v26"
    try:
        data = _http_get(url, headers={"Accept": "application/json"}, timeout=timeout)
        genes = data.get("data", [])
        if isinstance(genes, list) and genes:
            return genes[0].get("gencodeId", gene_input)
    except Exception:
        pass
    return gene_input


[docs] @register_tool( "GTExExpressionTool", config={ "name": "GTEx_get_expression_summary", "type": "GTExExpressionTool", "description": "Get GTEx expression summary for a gene via /expression/geneExpression", "parameter": { "type": "object", "properties": { "gene_symbol": { "type": "string", "description": "Gene symbol (e.g., TP53, BRCA1). Auto-resolved to GENCODE ID.", }, "ensembl_gene_id": { "type": "string", "description": "Ensembl gene ID, e.g., ENSG00000141510", }, }, "required": [], }, "settings": {"base_url": "https://gtexportal.org/api/v2", "timeout": 30}, }, ) class GTExExpressionTool:
[docs] def __init__(self, tool_config=None): self.tool_config = tool_config or {}
[docs] def run(self, arguments: Dict[str, Any]): base = self.tool_config.get("settings", {}).get( "base_url", "https://gtexportal.org/api/v2" ) timeout = int(self.tool_config.get("settings", {}).get("timeout", 30)) # Resolve gene symbol or unversioned Ensembl ID to versioned GENCODE ID gene_input = arguments.get("gene_symbol") or arguments.get( "ensembl_gene_id", "" ) if not gene_input: return { "error": "Provide gene_symbol (e.g., 'TP53') or ensembl_gene_id (e.g., 'ENSG00000141510').", "success": False, } gencode_id = _resolve_gene_id(gene_input, base, timeout) # Feature-80A: /expression/medianGeneExpression now requires tissueSiteDetailId. # Use /expression/clusteredMedianGeneExpression which returns all tissues. query = { "gencodeId": gencode_id, "datasetId": "gtex_v8", } url = f"{base}/expression/clusteredMedianGeneExpression?{urlencode(query)}" try: api_response = _http_get( url, headers={"Accept": "application/json"}, timeout=timeout ) # clusteredMedianGeneExpression returns data under 'medianGeneExpression' key if isinstance(api_response, dict): expression_data = api_response.get("medianGeneExpression", []) else: expression_data = _extract_data_list(api_response) result = { "source": "GTEx", "endpoint": "expression/clusteredMedianGeneExpression", "query": query, "data": {"geneExpression": expression_data}, "success": True, } # Provide hint when results are empty due to GENCODE version mismatch if not expression_data and gencode_id == gene_input: result["note"] = ( f"No expression data found. Could not resolve '{gene_input}' to a " "versioned GENCODE ID. Try providing an Ensembl gene ID " "(e.g., 'ENSG00000141510') or use GTEx_get_median_gene_expression " "with a versioned ID (e.g., 'ENSG00000141510.16')." ) elif not expression_data: result["note"] = ( f"No expression data found for '{gencode_id}'. The GENCODE version " "may not match gtex_v8 (GENCODE v26). Try GTEx_get_median_gene_expression " "with a different version suffix." ) return result except Exception as e: return { "error": str(e), "source": "GTEx", "endpoint": "expression/clusteredMedianGeneExpression", "success": False, }
[docs] @register_tool( "GTExEQTLTool", config={ "name": "GTEx_query_eqtl", "type": "GTExEQTLTool", "description": "Query GTEx single-tissue eQTL via /association/singleTissueEqtl", "parameter": { "type": "object", "properties": { "gene_symbol": { "type": "string", "description": "Gene symbol (e.g., TP53, BRCA1). Auto-resolved to GENCODE ID.", }, "ensembl_gene_id": { "type": "string", "description": "Ensembl gene ID, e.g., ENSG00000141510", }, "page": { "type": "integer", "default": 1, "minimum": 1, "description": "Page number (1-based)", }, "size": { "type": "integer", "default": 10, "minimum": 1, "maximum": 100, "description": "Page size (1–100)", }, }, "required": [], }, "settings": {"base_url": "https://gtexportal.org/api/v2", "timeout": 30}, }, ) class GTExEQTLTool:
[docs] def __init__(self, tool_config=None): self.tool_config = tool_config or {}
[docs] def run(self, arguments: Dict[str, Any]): base = self.tool_config.get("settings", {}).get( "base_url", "https://gtexportal.org/api/v2" ) timeout = int(self.tool_config.get("settings", {}).get("timeout", 30)) # Resolve gene symbol or unversioned Ensembl ID to versioned GENCODE ID gene_input = arguments.get("gene_symbol") or arguments.get( "ensembl_gene_id", "" ) gencode_id = _resolve_gene_id(gene_input, base, timeout) query: Dict[str, Any] = { "gencodeId": gencode_id, "datasetId": arguments.get("dataset_id", "gtex_v8"), } if "page" in arguments: query["page"] = int(arguments["page"]) if "size" in arguments: query["pageSize"] = int(arguments["size"]) url = f"{base}/association/singleTissueEqtl?{urlencode(query)}" try: api_response = _http_get( url, headers={"Accept": "application/json"}, timeout=timeout ) eqtl_data = _extract_data_list(api_response) return { "source": "GTEx", "endpoint": "association/singleTissueEqtl", "query": query, "data": {"singleTissueEqtl": eqtl_data}, "success": True, } except Exception as e: return { "error": str(e), "source": "GTEx", "endpoint": "association/singleTissueEqtl", "success": False, }