Source code for tooluniverse.kegg_tool

"""
KEGG Database REST API Tool

This tool provides access to the KEGG (Kyoto Encyclopedia of Genes and Genomes)
database for pathway analysis, gene information, and organism data.
"""

import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] class KEGGRESTTool(BaseTool): """Base class for KEGG REST API tools."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.base_url = "https://rest.kegg.jp" self.session = requests.Session() self.session.headers.update( {"Accept": "text/plain, application/json", "User-Agent": "ToolUniverse/1.0"} ) self.timeout = 30
[docs] def _make_request( self, endpoint: str, params: Optional[Dict] = None ) -> Dict[str, Any]: """Make a request to the KEGG API.""" url = f"{self.base_url}{endpoint}" try: response = self.session.get(url, params=params, timeout=self.timeout) response.raise_for_status() # KEGG API returns text/plain by default, parse as text content = response.text.strip() # Try to parse as structured data if possible if content.startswith("{") or content.startswith("["): try: return {"status": "success", "data": response.json()} except Exception: pass # Return as text data return { "status": "success", "data": content, "url": url, "content_type": response.headers.get("content-type", "text/plain"), } except requests.exceptions.RequestException as e: return { "status": "error", "error": f"KEGG API request failed: {str(e)}", "url": url, }
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the tool with given arguments.""" return self._make_request(self.endpoint, arguments)
[docs] @register_tool("KEGGSearchPathway") class KEGGSearchPathway(KEGGRESTTool): """Search KEGG pathways by keyword."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/find/pathway"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search pathways with keyword.""" keyword = arguments.get("keyword", "") if not keyword: return {"status": "error", "error": "keyword is required"} # KEGG API requires the search term in the URL path endpoint = f"{self.endpoint}/{keyword}" result = self._make_request(endpoint) # Parse pathway results if result.get("status") == "success" and isinstance(result.get("data"), str): lines = result["data"].split("\n") pathways = [] for line in lines: if "\t" in line: parts = line.split("\t", 1) if len(parts) == 2: pathways.append( {"pathway_id": parts[0], "description": parts[1]} ) result["data"] = pathways result["count"] = len(pathways) return result
[docs] @register_tool("KEGGGetPathwayInfo") class KEGGGetPathwayInfo(KEGGRESTTool): """Get detailed pathway information by pathway ID."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/get"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get pathway information.""" pathway_id = arguments.get("pathway_id", "") if not pathway_id: return {"status": "error", "error": "pathway_id is required"} # Add pathway prefix if not present if not pathway_id.startswith("path:"): pathway_id = f"path:{pathway_id}" # KEGG API requires the ID in the URL path endpoint = f"{self.endpoint}/{pathway_id}" result = self._make_request(endpoint) # Parse pathway data if result.get("status") == "success" and isinstance(result.get("data"), str): lines = result["data"].split("\n") pathway_info = { "pathway_id": pathway_id, "raw_data": result["data"], "lines": len(lines), } result["data"] = pathway_info return result
[docs] @register_tool("KEGGFindGenes") class KEGGFindGenes(KEGGRESTTool): """Find genes by keyword in KEGG database."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/find/genes"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Find genes with keyword.""" keyword = arguments.get("keyword", "") if not keyword: return {"status": "error", "error": "keyword is required"} # KEGG API requires the search term in the URL path # For gene search, we don't need organism prefix in the URL endpoint = f"{self.endpoint}/{keyword}" result = self._make_request(endpoint) # Parse gene results if result.get("status") == "success" and isinstance(result.get("data"), str): lines = result["data"].split("\n") genes = [] for line in lines: if "\t" in line: parts = line.split("\t", 1) if len(parts) == 2: genes.append({"gene_id": parts[0], "description": parts[1]}) result["data"] = genes result["count"] = len(genes) return result
[docs] @register_tool("KEGGGetGeneInfo") class KEGGGetGeneInfo(KEGGRESTTool): """Get detailed gene information by gene ID."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/get"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get gene information.""" gene_id = arguments.get("gene_id", "") if not gene_id: return {"status": "error", "error": "gene_id is required"} # KEGG API requires the ID in the URL path endpoint = f"{self.endpoint}/{gene_id}" result = self._make_request(endpoint) # Parse gene data if result.get("status") == "success" and isinstance(result.get("data"), str): lines = result["data"].split("\n") gene_info = { "gene_id": gene_id, "raw_data": result["data"], "lines": len(lines), } result["data"] = gene_info return result
[docs] @register_tool("KEGGListOrganisms") class KEGGListOrganisms(KEGGRESTTool): """List available organisms in KEGG database."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/list/organism"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List organisms.""" result = self._make_request(self.endpoint) # Parse organism list if result.get("status") == "success" and isinstance(result.get("data"), str): lines = result["data"].split("\n") organisms = [] for line in lines: if "\t" in line: parts = line.split("\t") if len(parts) >= 3: organisms.append( { "organism_code": parts[0], "organism_name": parts[1], "description": parts[2] if len(parts) > 2 else "", } ) result["data"] = organisms result["count"] = len(organisms) return result