"""
KEGG Database REST API Tool
This tool provides access to the KEGG (Kyoto Encyclopedia of Genes and Genomes)
database for pathway analysis, gene information, and organism data.
"""
import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
[docs]
@register_tool("KEGGSearchPathway")
class KEGGSearchPathway(KEGGRESTTool):
"""Search KEGG pathways by keyword."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint = "/find/pathway"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search pathways with keyword."""
keyword = arguments.get("keyword", "")
if not keyword:
return {"status": "error", "error": "keyword is required"}
# KEGG API requires the search term in the URL path
endpoint = f"{self.endpoint}/{keyword}"
result = self._make_request(endpoint)
# Parse pathway results
if result.get("status") == "success" and isinstance(result.get("data"), str):
lines = result["data"].split("\n")
pathways = []
for line in lines:
if "\t" in line:
parts = line.split("\t", 1)
if len(parts) == 2:
pathways.append(
{"pathway_id": parts[0], "description": parts[1]}
)
result["data"] = pathways
result["count"] = len(pathways)
return result
[docs]
@register_tool("KEGGGetPathwayInfo")
class KEGGGetPathwayInfo(KEGGRESTTool):
"""Get detailed pathway information by pathway ID."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint = "/get"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get pathway information."""
pathway_id = arguments.get("pathway_id", "")
if not pathway_id:
return {"status": "error", "error": "pathway_id is required"}
# Add pathway prefix if not present
if not pathway_id.startswith("path:"):
pathway_id = f"path:{pathway_id}"
# KEGG API requires the ID in the URL path
endpoint = f"{self.endpoint}/{pathway_id}"
result = self._make_request(endpoint)
# Parse pathway data
if result.get("status") == "success" and isinstance(result.get("data"), str):
lines = result["data"].split("\n")
pathway_info = {
"pathway_id": pathway_id,
"raw_data": result["data"],
"lines": len(lines),
}
result["data"] = pathway_info
return result
[docs]
@register_tool("KEGGFindGenes")
class KEGGFindGenes(KEGGRESTTool):
"""Find genes by keyword in KEGG database."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint = "/find/genes"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Find genes with keyword."""
keyword = arguments.get("keyword", "")
if not keyword:
return {"status": "error", "error": "keyword is required"}
# KEGG API requires the search term in the URL path
# For gene search, we don't need organism prefix in the URL
endpoint = f"{self.endpoint}/{keyword}"
result = self._make_request(endpoint)
# Parse gene results
if result.get("status") == "success" and isinstance(result.get("data"), str):
lines = result["data"].split("\n")
genes = []
for line in lines:
if "\t" in line:
parts = line.split("\t", 1)
if len(parts) == 2:
genes.append({"gene_id": parts[0], "description": parts[1]})
result["data"] = genes
result["count"] = len(genes)
return result
[docs]
@register_tool("KEGGGetGeneInfo")
class KEGGGetGeneInfo(KEGGRESTTool):
"""Get detailed gene information by gene ID."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint = "/get"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get gene information."""
gene_id = arguments.get("gene_id", "")
if not gene_id:
return {"status": "error", "error": "gene_id is required"}
# KEGG API requires the ID in the URL path
endpoint = f"{self.endpoint}/{gene_id}"
result = self._make_request(endpoint)
# Parse gene data
if result.get("status") == "success" and isinstance(result.get("data"), str):
lines = result["data"].split("\n")
gene_info = {
"gene_id": gene_id,
"raw_data": result["data"],
"lines": len(lines),
}
result["data"] = gene_info
return result
[docs]
@register_tool("KEGGListOrganisms")
class KEGGListOrganisms(KEGGRESTTool):
"""List available organisms in KEGG database."""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.endpoint = "/list/organism"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List organisms."""
result = self._make_request(self.endpoint)
# Parse organism list
if result.get("status") == "success" and isinstance(result.get("data"), str):
lines = result["data"].split("\n")
organisms = []
for line in lines:
if "\t" in line:
parts = line.split("\t")
if len(parts) >= 3:
organisms.append(
{
"organism_code": parts[0],
"organism_name": parts[1],
"description": parts[2] if len(parts) > 2 else "",
}
)
result["data"] = organisms
result["count"] = len(organisms)
return result