Source code for tooluniverse.harmonizome_tool

# harmonizome_tool.py
"""
Harmonizome tool for ToolUniverse.

Harmonizome (Ma'ayan Lab, Mount Sinai) integrates data from 100+ genomics
datasets covering gene expression, protein interactions, pathways, diseases,
drug targets, and more into a unified gene-centric resource.

API: https://maayanlab.cloud/Harmonizome/api/1.0/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

HARMONIZOME_BASE_URL = "https://maayanlab.cloud/Harmonizome/api/1.0"


[docs] @register_tool("HarmonizomeTool") class HarmonizomeTool(BaseTool): """ Tool for querying Harmonizome gene and dataset information. Supports: - Gene details (symbol, name, description, synonyms, proteins) - Dataset catalog (100+ integrated genomics datasets) No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "get_gene")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Harmonizome API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"Harmonizome API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to Harmonizome API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" if status == 404: return { "error": "Gene not found in Harmonizome. Check the gene symbol." } return {"error": f"Harmonizome API HTTP {status}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "get_gene": return self._get_gene(arguments) elif self.endpoint == "list_datasets": return self._list_datasets(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get gene details from Harmonizome.""" gene_symbol = arguments.get("gene_symbol", "") if not gene_symbol: return {"error": "gene_symbol is required (e.g., 'TP53')."} url = f"{HARMONIZOME_BASE_URL}/gene/{gene_symbol}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() # Check if we got an error response if data.get("status") == 404 or "message" in data: return { "error": f"Gene '{gene_symbol}' not found: {data.get('message', 'unknown')}" } proteins = [] for p in data.get("proteins", []): proteins.append( { "symbol": p.get("symbol"), "href": p.get("href"), } ) return { "data": { "symbol": data.get("symbol"), "name": data.get("name"), "ncbi_entrez_gene_id": data.get("ncbiEntrezGeneId"), "ncbi_entrez_gene_url": data.get("ncbiEntrezGeneUrl"), "description": data.get("description"), "synonyms": data.get("synonyms", []), "proteins": proteins, }, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", }, }
[docs] def _list_datasets(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List all available Harmonizome datasets.""" url = f"{HARMONIZOME_BASE_URL}/dataset" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() entities = data.get("entities", []) datasets = [] for e in entities: datasets.append( { "name": e.get("name"), "href": e.get("href"), } ) return { "data": datasets, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", "total_datasets": len(datasets), }, }