Source code for tooluniverse.metacyc_tool
"""
MetaCyc tool for ToolUniverse.
MetaCyc is a curated database of experimentally elucidated metabolic
pathways from all domains of life.
Website: https://metacyc.org/
BioCyc: https://biocyc.org/
"""
import os
import re
import requests
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
BIOCYC_BASE_URL = "https://biocyc.org"
BIOCYC_API_URL = "https://websvc.biocyc.org"
# BioCyc gates its web services behind a free account: anonymous requests are
# allowed for ~1 call then redirected to a "Create Account" page. Logging in
# (POST email+password -> session cookie) lifts the wall. Verified 2026-06-03.
BIOCYC_LOGIN_URL = f"{BIOCYC_API_URL}/credentials/login/"
_AUTH_WALL_ERROR = {
"status": "error",
"error": (
"BioCyc requires a free account for API access. "
"Set BIOCYC_EMAIL and BIOCYC_PASSWORD environment variables. "
"Register for free at https://biocyc.org/signup.shtml "
"(or use the KEGG/Reactome tools, which need no account)."
),
"retryable": False,
}
[docs]
@register_tool("MetaCycTool")
class MetaCycTool(BaseTool):
"""
Tool for querying MetaCyc metabolic pathway database.
MetaCyc provides:
- Experimentally elucidated metabolic pathways
- Enzymes and reactions
- Metabolites and compounds
- Pathway diagrams
Uses BioCyc web services API.
No authentication required for basic access.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
# Reused across calls so the BioCyc session cookie obtained at login is
# carried on every subsequent web-service request.
self.session = requests.Session()
self.session.headers.update({"User-Agent": "ToolUniverse/MetaCyc"})
self._logged_in = False
[docs]
def _ensure_login(self) -> Optional[Dict[str, Any]]:
"""Authenticate against BioCyc once per tool instance.
Returns None on success (the session now carries the auth cookie), or
an error dict (no credentials / bad credentials) the caller returns.
"""
if self._logged_in:
return None
email = os.environ.get("BIOCYC_EMAIL", "")
password = os.environ.get("BIOCYC_PASSWORD", "")
if not email or not password:
return _AUTH_WALL_ERROR
try:
resp = self.session.post(
BIOCYC_LOGIN_URL,
data={"email": email, "password": password},
timeout=self.timeout,
)
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"BioCyc login failed: {str(e)}"}
# Wrong credentials -> HTTP 401 {"error": "no match for email and password"}.
if resp.status_code != 200:
return {
"status": "error",
"error": (
"Invalid BioCyc credentials. Check BIOCYC_EMAIL and "
"BIOCYC_PASSWORD (register at https://biocyc.org/signup.shtml)."
),
}
self._logged_in = True
return None
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute MetaCyc query based on operation type."""
operation = arguments.get("operation", "")
# Auto-fill operation from tool config const if not provided by user
if not operation:
operation = self.get_schema_const_operation()
# All operations hit the account-gated BioCyc web services, so log in
# first and surface a clear credentials error before doing any work.
auth_error = self._ensure_login()
if auth_error is not None:
return auth_error
if operation == "search_pathways":
return self._search_pathways(arguments)
elif operation == "get_pathway":
return self._get_pathway(arguments)
elif operation == "get_compound":
return self._get_compound(arguments)
elif operation == "get_reaction":
return self._get_reaction(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: search_pathways, get_pathway, get_compound, get_reaction",
}
[docs]
def _fetch_biocyc_xml(self, object_id: str) -> Optional[str]:
"""Fetch BioCyc XML for a MetaCyc object using the web services API.
Feature-84B-004/005: biocyc.org/getxml?META=ID returns HTML (wrong).
websvc.biocyc.org/getxml?id=META:ID returns XML (correct).
Uses the authenticated session (see _ensure_login). Returns
"AUTH_REQUIRED" if BioCyc still redirects to an account-required page.
"""
resp = self.session.get(
f"{BIOCYC_API_URL}/getxml",
params={"id": f"META:{object_id}", "detail": "full"},
timeout=self.timeout,
)
if resp.status_code != 200:
return None
# Detect BioCyc authentication wall (redirected to account-required page)
if "account-required" in resp.url:
return "AUTH_REQUIRED"
content = resp.text
# Verify it's actually XML (not an HTML error page)
return content if content.strip().startswith("<?xml") else None
[docs]
def _parse_xml_field(self, xml: str, tag: str) -> Optional[str]:
"""Extract the text content of the first matching XML tag."""
m = re.search(rf"<{tag}[^>]*>([^<]+)</{tag}>", xml)
return m.group(1).strip() if m else None
[docs]
def _parse_xml_frameids(self, xml: str) -> List[str]:
"""Extract all frameid attribute values from an XML document."""
return re.findall(r'frameid=["\']([^"\']+)["\']', xml)
[docs]
def _parse_pathway_hits(self, xml: str) -> List[Dict[str, str]]:
"""Extract (id, name) pairs from each <Pathway> element of a query result."""
hits = []
for block in re.findall(r"<Pathway\b[^>]*>.*?</Pathway>", xml, flags=re.DOTALL):
m_id = re.search(r'frameid=["\']([^"\']+)["\']', block)
if not m_id:
continue
name = self._parse_xml_field(block, "common-name")
hits.append({"pathway_id": m_id.group(1), "name": name or m_id.group(1)})
return hits
[docs]
def _search_pathways(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search MetaCyc for pathways whose name matches the query.
Args:
arguments: Dict containing:
- query: Search query (pathway name or keyword)
Uses the authenticated BioVelo xmlquery web service, which returns
parseable XML (the public /META/search-query path serves an HTML page).
"""
query = arguments.get("query", "")
if not query:
return {"status": "error", "error": "Missing required parameter: query"}
# BioVelo: every MetaCyc pathway whose common-name contains the query.
escaped = query.replace('"', "")
biovelo = f'[x:x<-meta^^pathways,x^common-name~"{escaped}"]'
try:
response = self.session.get(
f"{BIOCYC_API_URL}/xmlquery",
params={"": biovelo, "detail": "low"},
timeout=self.timeout,
)
if response.status_code != 200 or "account-required" in response.url:
return _AUTH_WALL_ERROR
xml = response.text
if not xml.strip().startswith("<?xml"):
return _AUTH_WALL_ERROR
hits = self._parse_pathway_hits(xml)
return {
"status": "success",
"data": {"query": query, "results": hits},
"metadata": {"source": "MetaCyc", "count": len(hits)},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get pathway details by MetaCyc pathway ID.
Args:
arguments: Dict containing:
- pathway_id: MetaCyc pathway ID (e.g., PWY-5177)
"""
pathway_id = arguments.get("pathway_id", "")
if not pathway_id:
return {
"status": "error",
"error": "Missing required parameter: pathway_id",
}
try:
xml = self._fetch_biocyc_xml(pathway_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {"status": "error", "error": f"Pathway not found: {pathway_id}"}
name = self._parse_xml_field(xml, "common-name")
reaction_ids = [
fid
for fid in self._parse_xml_frameids(xml)
if fid != pathway_id and not fid.endswith("-VARIANTS")
]
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"pathway_id": pathway_id,
"name": name,
"synonyms": synonyms,
"reaction_ids": list(dict.fromkeys(reaction_ids)),
"url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}",
"diagram_url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}&detail-level=2",
},
"metadata": {"source": "MetaCyc", "pathway_id": pathway_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_compound(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get compound details from MetaCyc.
Args:
arguments: Dict containing:
- compound_id: MetaCyc compound ID (e.g., CPD-1)
"""
compound_id = arguments.get("compound_id", "")
if not compound_id:
return {
"status": "error",
"error": "Missing required parameter: compound_id",
}
try:
xml = self._fetch_biocyc_xml(compound_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {
"status": "error",
"error": f"Compound not found: {compound_id}",
}
name = self._parse_xml_field(xml, "common-name")
formula = self._parse_xml_field(xml, "molecular-weight-exp")
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"compound_id": compound_id,
"name": name,
"synonyms": synonyms,
"molecular_weight": formula,
"url": f"{BIOCYC_BASE_URL}/compound?orgid=META&id={compound_id}",
},
"metadata": {"source": "MetaCyc", "compound_id": compound_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_reaction(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get reaction details from MetaCyc.
Args:
arguments: Dict containing:
- reaction_id: MetaCyc reaction ID (e.g., RXN-14500)
"""
reaction_id = arguments.get("reaction_id", "")
if not reaction_id:
return {
"status": "error",
"error": "Missing required parameter: reaction_id",
}
try:
xml = self._fetch_biocyc_xml(reaction_id)
if xml == "AUTH_REQUIRED":
return _AUTH_WALL_ERROR
if xml is None:
return {
"status": "error",
"error": f"Reaction not found: {reaction_id}",
}
name = self._parse_xml_field(xml, "common-name")
ec_numbers = re.findall(r"<ec-number[^>]*>([^<]+)</ec-number>", xml)
synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
return {
"status": "success",
"data": {
"reaction_id": reaction_id,
"name": name,
"ec_numbers": ec_numbers,
"synonyms": synonyms,
"url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=REACTION&object={reaction_id}",
},
"metadata": {"source": "MetaCyc", "reaction_id": reaction_id},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}