tooluniverse.harmonizome_tool 源代码

# harmonizome_tool.py
"""
Harmonizome tool for ToolUniverse.

Harmonizome (Ma'ayan Lab, Mount Sinai) integrates data from 100+ genomics
datasets covering gene expression, protein interactions, pathways, diseases,
drug targets, and more into a unified gene-centric resource.

API: https://maayanlab.cloud/Harmonizome/api/1.0/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

HARMONIZOME_BASE_URL = "https://maayanlab.cloud/Harmonizome/api/1.0"


[文档] @register_tool("HarmonizomeTool") class HarmonizomeTool(BaseTool): """ Tool for querying Harmonizome gene and dataset information. Supports: - Gene details (symbol, name, description, synonyms, proteins) - Dataset catalog (100+ integrated genomics datasets) No authentication required. """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "get_gene")
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Harmonizome API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return { "status": "error", "error": f"Harmonizome API timed out after {self.timeout}s", } except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to Harmonizome API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" if status == 404: return { "status": "error", "error": "Gene not found in Harmonizome. Check the gene symbol.", } return {"status": "error", "error": f"Harmonizome API HTTP {status}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" handlers = { "get_gene": self._get_gene, "list_datasets": self._list_datasets, "get_dataset": self._get_dataset, "search": self._search, "search_genes": self._search_genes, } handler = handlers.get(self.endpoint) if handler: return handler(arguments) return {"status": "error", "error": f"Unknown endpoint: {self.endpoint}"}
[文档] def _get_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get gene details from Harmonizome.""" gene_symbol = arguments.get("gene_symbol", "") if not gene_symbol: return { "status": "error", "error": "gene_symbol is required (e.g., 'TP53').", } url = f"{HARMONIZOME_BASE_URL}/gene/{gene_symbol}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() # Check if we got an error response if data.get("status") == 404 or "message" in data: return { "status": "error", "error": f"Gene '{gene_symbol}' not found: {data.get('message', 'unknown')}", } proteins = [] for p in data.get("proteins", []): proteins.append( { "symbol": p.get("symbol"), "href": p.get("href"), } ) return { "status": "success", "data": { "symbol": data.get("symbol"), "name": data.get("name"), "ncbi_entrez_gene_id": data.get("ncbiEntrezGeneId"), "ncbi_entrez_gene_url": data.get("ncbiEntrezGeneUrl"), "description": data.get("description"), "synonyms": data.get("synonyms", []), "proteins": proteins, }, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", }, }
[文档] def _list_datasets(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List all available Harmonizome datasets.""" url = f"{HARMONIZOME_BASE_URL}/dataset" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() entities = data.get("entities", []) datasets = [] for e in entities: datasets.append( { "name": e.get("name"), "href": e.get("href"), } ) return { "status": "success", "data": datasets, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", "total_datasets": len(datasets), }, }
[文档] def _get_dataset(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information about a specific Harmonizome dataset.""" dataset_name = arguments.get("dataset_name", "") if not dataset_name: return { "status": "error", "error": "dataset_name is required (e.g., 'CTD Gene-Disease Associations'). Use Harmonizome_list_datasets to find names.", } # URL-encode: spaces become + in Harmonizome API encoded_name = dataset_name.replace(" ", "+") url = f"{HARMONIZOME_BASE_URL}/dataset/{encoded_name}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() if "message" in data: return { "status": "error", "error": f"Dataset not found: {data.get('message', dataset_name)}", } # Extract gene set names (can be large, limit to first 50) gene_sets_raw = data.get("geneSets", []) gene_set_limit = arguments.get("gene_set_limit") or 50 gene_sets = [ {"name": gs.get("name", "").split("/")[0]} for gs in gene_sets_raw[:gene_set_limit] ] return { "status": "success", "data": { "name": data.get("name"), "description": data.get("description"), "association": data.get("association"), "measurement": data.get("measurement"), "attribute_type": data.get("attributeType"), "attribute_group": data.get("attributeGroup"), "dataset_group": data.get("datasetGroup"), "pubmed_ids": data.get("pubMedIds", []), "gene_sets": gene_sets, "total_gene_sets": len(gene_sets_raw), }, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", "dataset_name": dataset_name, }, }
[文档] def _search_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search for genes by keyword in Harmonizome.""" query = arguments.get("query", "") if not query: return { "status": "error", "error": "query is required (e.g., 'kinase', 'tumor suppressor')", } limit = arguments.get("limit") or 20 url = f"{HARMONIZOME_BASE_URL}/gene" params = {"search": query} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() entities = data.get("entities", []) genes = [ {"symbol": e.get("symbol"), "href": e.get("href")} for e in entities[:limit] ] return { "status": "success", "data": genes, "metadata": { "source": "Harmonizome (maayanlab.cloud/Harmonizome)", "query": query, "total_results": data.get("count", len(genes)), }, }