tooluniverse.metacyc_tool 源代码
"""
MetaCyc tool for ToolUniverse.
MetaCyc is a curated database of experimentally elucidated metabolic
pathways from all domains of life.
Website: https://metacyc.org/
BioCyc: https://biocyc.org/
"""
import re
import requests
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
BIOCYC_BASE_URL = "https://biocyc.org"
BIOCYC_API_URL = "https://websvc.biocyc.org"
_AUTH_WALL_ERROR = {
"status": "error",
"error": (
"BioCyc now requires a free account for API access. MetaCyc tools are unavailable. "
"Create an account at https://biocyc.org/signup.shtml or use KEGG/Reactome tools as alternatives."
),
"retryable": False,
}
[文档]
@register_tool("MetaCycTool")
class MetaCycTool(BaseTool):
"""
Tool for querying MetaCyc metabolic pathway database.
MetaCyc provides:
- Experimentally elucidated metabolic pathways
- Enzymes and reactions
- Metabolites and compounds
- Pathway diagrams
Uses BioCyc web services API.
No authentication required for basic access.
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute MetaCyc query based on operation type."""
operation = arguments.get("operation", "")
# Auto-fill operation from tool config const if not provided by user
if not operation:
operation = self.get_schema_const_operation()
if operation == "search_pathways":
return self._search_pathways(arguments)
elif operation == "get_pathway":
return self._get_pathway(arguments)
elif operation == "get_compound":
return self._get_compound(arguments)
elif operation == "get_reaction":
return self._get_reaction(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: search_pathways, get_pathway, get_compound, get_reaction",
}
[文档]
def _fetch_biocyc_xml(self, object_id: str) -> Optional[str]:
"""Fetch BioCyc XML for a MetaCyc object using the web services API.
Feature-84B-004/005: biocyc.org/getxml?META=ID returns HTML (wrong).
websvc.biocyc.org/getxml?id=META:ID returns XML (correct).
Returns "AUTH_REQUIRED" if BioCyc redirects to account-required page.
"""
resp = requests.get(
f"{BIOCYC_API_URL}/getxml",
params={"id": f"META:{object_id}", "detail": "full"},
timeout=self.timeout,
headers={"User-Agent": "ToolUniverse/MetaCyc"},
)
if resp.status_code != 200:
return None
# Detect BioCyc authentication wall (redirected to account-required page)
if "account-required" in resp.url:
return "AUTH_REQUIRED"
content = resp.text
# Verify it's actually XML (not an HTML error page)
return content if content.strip().startswith("<?xml") else None
[文档]
def _parse_xml_field(self, xml: str, tag: str) -> Optional[str]:
"""Extract the text content of the first matching XML tag."""
m = re.search(rf"<{tag}[^>]*>([^<]+)</{tag}>", xml)
return m.group(1).strip() if m else None
[文档]
def _parse_xml_frameids(self, xml: str) -> List[str]:
"""Extract all frameid attribute values from an XML document."""
return re.findall(r'frameid=["\']([^"\']+)["\']', xml)
[文档]
def _search_pathways(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search MetaCyc for pathways.
Args:
arguments: Dict containing:
- query: Search query (pathway name or keyword)
"""
query = arguments.get("query", "")
if not query:
return {"status": "error", "error": "Missing required parameter: query"}
try:
# Use BioCyc quick search API
response = requests.get(
f"{BIOCYC_BASE_URL}/META/search-query",
params={"type": "PATHWAY", "query": query},
timeout=self.timeout,
headers={
"User-Agent": "ToolUniverse/MetaCyc",
"Accept": "application/json",
},
)
# If JSON response works
if "json" in response.headers.get("Content-Type", ""):
data = response.json()
return {
"status": "success",
"data": {
"query": query,
"results": data
if isinstance(data, list)
else data.get("results", []),
},
"metadata": {"source": "MetaCyc"},
}
# Non-JSON response — likely auth wall
return _AUTH_WALL_ERROR
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get pathway details by MetaCyc pathway ID.
Args:
arguments: Dict containing:
- pathway_id: MetaCyc pathway ID (e.g., PWY-5177)
"""
pathway_id = arguments.get("pathway_id", "")
if not pathway_id:
return {
"status": "error",
"error": "Missing required parameter: pathway_id",
}
try:
xml = self._fetch_biocyc_xml(pathway_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {"status": "error", "error": f"Pathway not found: {pathway_id}"}
name = self._parse_xml_field(xml, "common-name")
reaction_ids = [
fid
for fid in self._parse_xml_frameids(xml)
if fid != pathway_id and not fid.endswith("-VARIANTS")
]
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"pathway_id": pathway_id,
"name": name,
"synonyms": synonyms,
"reaction_ids": list(dict.fromkeys(reaction_ids)),
"url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}",
"diagram_url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}&detail-level=2",
},
"metadata": {"source": "MetaCyc", "pathway_id": pathway_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_compound(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get compound details from MetaCyc.
Args:
arguments: Dict containing:
- compound_id: MetaCyc compound ID (e.g., CPD-1)
"""
compound_id = arguments.get("compound_id", "")
if not compound_id:
return {
"status": "error",
"error": "Missing required parameter: compound_id",
}
try:
xml = self._fetch_biocyc_xml(compound_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {
"status": "error",
"error": f"Compound not found: {compound_id}",
}
name = self._parse_xml_field(xml, "common-name")
formula = self._parse_xml_field(xml, "molecular-weight-exp")
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"compound_id": compound_id,
"name": name,
"synonyms": synonyms,
"molecular_weight": formula,
"url": f"{BIOCYC_BASE_URL}/compound?orgid=META&id={compound_id}",
},
"metadata": {"source": "MetaCyc", "compound_id": compound_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_reaction(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get reaction details from MetaCyc.
Args:
arguments: Dict containing:
- reaction_id: MetaCyc reaction ID (e.g., RXN-14500)
"""
reaction_id = arguments.get("reaction_id", "")
if not reaction_id:
return {
"status": "error",
"error": "Missing required parameter: reaction_id",
}
try:
xml = self._fetch_biocyc_xml(reaction_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {
"status": "error",
"error": f"Reaction not found: {reaction_id}",
}
name = self._parse_xml_field(xml, "common-name")
ec_numbers = re.findall(r"<ec-number[^>]*>([^<]+)</ec-number>", xml)
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"reaction_id": reaction_id,
"name": name,
"ec_numbers": ec_numbers,
"synonyms": synonyms,
"url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=REACTION&object={reaction_id}",
},
"metadata": {"source": "MetaCyc", "reaction_id": reaction_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}