Source code for tooluniverse.hmdb_tool

"""
HMDB (Human Metabolome Database) tool for ToolUniverse.

HMDB is the most comprehensive database of human metabolites containing
chemical, clinical, and molecular biology data.

Website: https://hmdb.ca/

Note: HMDB does not provide an open API. This tool uses PubChem as a cross-reference
source for metabolite data, with HMDB IDs for reference.
"""

import requests
import xml.etree.ElementTree as ET
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool

# HMDB base URL
HMDB_BASE_URL = "https://hmdb.ca"
# PubChem API for cross-referencing
PUBCHEM_API_URL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"


[docs] @register_tool("HMDBTool") class HMDBTool(BaseTool): """ Tool for querying HMDB (Human Metabolome Database). HMDB provides: - Metabolite identification - Chemical properties - Biological functions - Disease associations - Spectral data (MS, NMR) Uses PubChem for data retrieval with HMDB cross-references. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout: int = tool_config.get("timeout", 30) self.parameter = tool_config.get("parameter", {})
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute HMDB query based on operation type.""" operation = arguments.get("operation", "") if operation == "get_metabolite": return self._get_metabolite(arguments) elif operation == "search": return self._search(arguments) elif operation == "get_diseases": return self._get_diseases(arguments) else: return { "status": "error", "error": f"Unknown operation: {operation}. Supported: get_metabolite, search, get_diseases", }
[docs] def _get_metabolite(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get metabolite by HMDB ID. Uses PubChem to retrieve metabolite data via HMDB ID cross-reference. Args: arguments: Dict containing: - hmdb_id: HMDB ID (e.g., HMDB0000001) """ hmdb_id = arguments.get("hmdb_id", "") if not hmdb_id: return {"status": "error", "error": "Missing required parameter: hmdb_id"} # Normalize HMDB ID format if not hmdb_id.startswith("HMDB"): hmdb_id = f"HMDB{hmdb_id.zfill(7)}" try: # Use PubChem to find compound by HMDB ID response = requests.get( f"{PUBCHEM_API_URL}/compound/xref/RegistryID/{hmdb_id}/JSON", timeout=self.timeout, headers={"User-Agent": "ToolUniverse/HMDB"}, ) if response.status_code == 200: data = response.json() compounds = data.get("PC_Compounds", []) if compounds: cid = compounds[0].get("id", {}).get("id", {}).get("cid") # Get full compound properties props_response = requests.get( f"{PUBCHEM_API_URL}/compound/cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,InChIKey,IUPACName/JSON", timeout=self.timeout, ) if props_response.status_code == 200: props = ( props_response.json() .get("PropertyTable", {}) .get("Properties", [{}])[0] ) return { "status": "success", "data": { "hmdb_id": hmdb_id, "pubchem_cid": cid, "name": props.get("IUPACName"), "chemical_formula": props.get("MolecularFormula"), "molecular_weight": props.get("MolecularWeight"), "smiles": props.get("CanonicalSMILES"), "inchikey": props.get("InChIKey"), }, "metadata": { "source": "PubChem (HMDB cross-reference)", "hmdb_id": hmdb_id, "hmdb_url": f"{HMDB_BASE_URL}/metabolites/{hmdb_id}", "pubchem_url": f"https://pubchem.ncbi.nlm.nih.gov/compound/{cid}", }, } # Return HMDB reference if PubChem doesn't have it return { "status": "success", "data": { "hmdb_id": hmdb_id, "note": "Metabolite data available at HMDB website", }, "metadata": { "source": "HMDB", "hmdb_url": f"{HMDB_BASE_URL}/metabolites/{hmdb_id}", "note": "HMDB does not provide open API access. Visit hmdb_url for full data.", }, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_diseases(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get disease associations for a metabolite. Note: Disease associations are best accessed directly from HMDB website. Args: arguments: Dict containing: - hmdb_id: HMDB ID """ hmdb_id = arguments.get("hmdb_id", "") if not hmdb_id: return {"status": "error", "error": "Missing required parameter: hmdb_id"} if not hmdb_id.startswith("HMDB"): hmdb_id = f"HMDB{hmdb_id.zfill(7)}" try: # Get basic compound info from PubChem response = requests.get( f"{PUBCHEM_API_URL}/compound/xref/RegistryID/{hmdb_id}/property/IUPACName/JSON", timeout=self.timeout, headers={"User-Agent": "ToolUniverse/HMDB"}, ) name = None if response.status_code == 200: props = response.json().get("PropertyTable", {}).get("Properties", [{}]) if props: name = props[0].get("IUPACName") return { "status": "success", "data": { "hmdb_id": hmdb_id, "metabolite_name": name, "note": "Disease associations are available at HMDB website", "hmdb_url": f"{HMDB_BASE_URL}/metabolites/{hmdb_id}", }, "metadata": { "source": "HMDB", "hmdb_id": hmdb_id, "note": "HMDB does not provide open API. Visit hmdb_url for disease associations.", }, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _parse_xml(self, xml_text: str) -> Dict[str, Any]: """Parse HMDB XML response.""" try: root = ET.fromstring(xml_text) ns = {"hmdb": "http://www.hmdb.ca"} def get_text(elem, path): el = root.find(path, ns) return el.text if el is not None else None return { "hmdb_id": get_text(root, ".//hmdb:accession") or get_text(root, ".//accession"), "name": get_text(root, ".//hmdb:name") or get_text(root, ".//name"), "description": get_text(root, ".//hmdb:description") or get_text(root, ".//description"), "chemical_formula": get_text(root, ".//hmdb:chemical_formula") or get_text(root, ".//chemical_formula"), "smiles": get_text(root, ".//hmdb:smiles") or get_text(root, ".//smiles"), } except Exception: return {"raw_xml": xml_text[:500]}