Source code for tooluniverse.metacyc_tool

"""
MetaCyc tool for ToolUniverse.

MetaCyc is a curated database of experimentally elucidated metabolic
pathways from all domains of life.

Website: https://metacyc.org/
BioCyc: https://biocyc.org/
"""

import os
import re
import requests
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOCYC_BASE_URL = "https://biocyc.org"
BIOCYC_API_URL = "https://websvc.biocyc.org"
# BioCyc gates its web services behind a free account: anonymous requests are
# allowed for ~1 call then redirected to a "Create Account" page. Logging in
# (POST email+password -> session cookie) lifts the wall. Verified 2026-06-03.
BIOCYC_LOGIN_URL = f"{BIOCYC_API_URL}/credentials/login/"
_AUTH_WALL_ERROR = {
    "status": "error",
    "error": (
        "BioCyc requires a free account for API access. "
        "Set BIOCYC_EMAIL and BIOCYC_PASSWORD environment variables. "
        "Register for free at https://biocyc.org/signup.shtml "
        "(or use the KEGG/Reactome tools, which need no account)."
    ),
    "retryable": False,
}


[docs] @register_tool("MetaCycTool") class MetaCycTool(BaseTool): """ Tool for querying MetaCyc metabolic pathway database. MetaCyc provides: - Experimentally elucidated metabolic pathways - Enzymes and reactions - Metabolites and compounds - Pathway diagrams Uses BioCyc web services API. No authentication required for basic access. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout: int = tool_config.get("timeout", 30) self.parameter = tool_config.get("parameter", {}) # Reused across calls so the BioCyc session cookie obtained at login is # carried on every subsequent web-service request. self.session = requests.Session() self.session.headers.update({"User-Agent": "ToolUniverse/MetaCyc"}) self._logged_in = False
[docs] def _ensure_login(self) -> Optional[Dict[str, Any]]: """Authenticate against BioCyc once per tool instance. Returns None on success (the session now carries the auth cookie), or an error dict (no credentials / bad credentials) the caller returns. """ if self._logged_in: return None email = os.environ.get("BIOCYC_EMAIL", "") password = os.environ.get("BIOCYC_PASSWORD", "") if not email or not password: return _AUTH_WALL_ERROR try: resp = self.session.post( BIOCYC_LOGIN_URL, data={"email": email, "password": password}, timeout=self.timeout, ) except requests.exceptions.RequestException as e: return {"status": "error", "error": f"BioCyc login failed: {str(e)}"} # Wrong credentials -> HTTP 401 {"error": "no match for email and password"}. if resp.status_code != 200: return { "status": "error", "error": ( "Invalid BioCyc credentials. Check BIOCYC_EMAIL and " "BIOCYC_PASSWORD (register at https://biocyc.org/signup.shtml)." ), } self._logged_in = True return None
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute MetaCyc query based on operation type.""" operation = arguments.get("operation", "") # Auto-fill operation from tool config const if not provided by user if not operation: operation = self.get_schema_const_operation() # All operations hit the account-gated BioCyc web services, so log in # first and surface a clear credentials error before doing any work. auth_error = self._ensure_login() if auth_error is not None: return auth_error if operation == "search_pathways": return self._search_pathways(arguments) elif operation == "get_pathway": return self._get_pathway(arguments) elif operation == "get_compound": return self._get_compound(arguments) elif operation == "get_reaction": return self._get_reaction(arguments) else: return { "status": "error", "error": f"Unknown operation: {operation}. Supported: search_pathways, get_pathway, get_compound, get_reaction", }
[docs] def _fetch_biocyc_xml(self, object_id: str) -> Optional[str]: """Fetch BioCyc XML for a MetaCyc object using the web services API. Feature-84B-004/005: biocyc.org/getxml?META=ID returns HTML (wrong). websvc.biocyc.org/getxml?id=META:ID returns XML (correct). Uses the authenticated session (see _ensure_login). Returns "AUTH_REQUIRED" if BioCyc still redirects to an account-required page. """ resp = self.session.get( f"{BIOCYC_API_URL}/getxml", params={"id": f"META:{object_id}", "detail": "full"}, timeout=self.timeout, ) if resp.status_code != 200: return None # Detect BioCyc authentication wall (redirected to account-required page) if "account-required" in resp.url: return "AUTH_REQUIRED" content = resp.text # Verify it's actually XML (not an HTML error page) return content if content.strip().startswith("<?xml") else None
[docs] def _parse_xml_field(self, xml: str, tag: str) -> Optional[str]: """Extract the text content of the first matching XML tag.""" m = re.search(rf"<{tag}[^>]*>([^<]+)</{tag}>", xml) return m.group(1).strip() if m else None
[docs] def _parse_xml_frameids(self, xml: str) -> List[str]: """Extract all frameid attribute values from an XML document.""" return re.findall(r'frameid=["\']([^"\']+)["\']', xml)
[docs] def _parse_pathway_hits(self, xml: str) -> List[Dict[str, str]]: """Extract (id, name) pairs from each <Pathway> element of a query result.""" hits = [] for block in re.findall(r"<Pathway\b[^>]*>.*?</Pathway>", xml, flags=re.DOTALL): m_id = re.search(r'frameid=["\']([^"\']+)["\']', block) if not m_id: continue name = self._parse_xml_field(block, "common-name") hits.append({"pathway_id": m_id.group(1), "name": name or m_id.group(1)}) return hits
[docs] def _search_pathways(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Search MetaCyc for pathways whose name matches the query. Args: arguments: Dict containing: - query: Search query (pathway name or keyword) Uses the authenticated BioVelo xmlquery web service, which returns parseable XML (the public /META/search-query path serves an HTML page). """ query = arguments.get("query", "") if not query: return {"status": "error", "error": "Missing required parameter: query"} # BioVelo: every MetaCyc pathway whose common-name contains the query. escaped = query.replace('"', "") biovelo = f'[x:x<-meta^^pathways,x^common-name~"{escaped}"]' try: response = self.session.get( f"{BIOCYC_API_URL}/xmlquery", params={"": biovelo, "detail": "low"}, timeout=self.timeout, ) if response.status_code != 200 or "account-required" in response.url: return _AUTH_WALL_ERROR xml = response.text if not xml.strip().startswith("<?xml"): return _AUTH_WALL_ERROR hits = self._parse_pathway_hits(xml) return { "status": "success", "data": {"query": query, "results": hits}, "metadata": {"source": "MetaCyc", "count": len(hits)}, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get pathway details by MetaCyc pathway ID. Args: arguments: Dict containing: - pathway_id: MetaCyc pathway ID (e.g., PWY-5177) """ pathway_id = arguments.get("pathway_id", "") if not pathway_id: return { "status": "error", "error": "Missing required parameter: pathway_id", } try: xml = self._fetch_biocyc_xml(pathway_id) if xml == "AUTH_REQUIRED": return _AUTH_WALL_ERROR if xml is None: return {"status": "error", "error": f"Pathway not found: {pathway_id}"} name = self._parse_xml_field(xml, "common-name") reaction_ids = [ fid for fid in self._parse_xml_frameids(xml) if fid != pathway_id and not fid.endswith("-VARIANTS") ] synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml) return { "status": "success", "data": { "pathway_id": pathway_id, "name": name, "synonyms": synonyms, "reaction_ids": list(dict.fromkeys(reaction_ids)), "url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}", "diagram_url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}&detail-level=2", }, "metadata": {"source": "MetaCyc", "pathway_id": pathway_id}, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_compound(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get compound details from MetaCyc. Args: arguments: Dict containing: - compound_id: MetaCyc compound ID (e.g., CPD-1) """ compound_id = arguments.get("compound_id", "") if not compound_id: return { "status": "error", "error": "Missing required parameter: compound_id", } try: xml = self._fetch_biocyc_xml(compound_id) if xml == "AUTH_REQUIRED": return _AUTH_WALL_ERROR if xml is None: return { "status": "error", "error": f"Compound not found: {compound_id}", } name = self._parse_xml_field(xml, "common-name") formula = self._parse_xml_field(xml, "molecular-weight-exp") synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml) return { "status": "success", "data": { "compound_id": compound_id, "name": name, "synonyms": synonyms, "molecular_weight": formula, "url": f"{BIOCYC_BASE_URL}/compound?orgid=META&id={compound_id}", }, "metadata": {"source": "MetaCyc", "compound_id": compound_id}, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_reaction(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get reaction details from MetaCyc. Args: arguments: Dict containing: - reaction_id: MetaCyc reaction ID (e.g., RXN-14500) """ reaction_id = arguments.get("reaction_id", "") if not reaction_id: return { "status": "error", "error": "Missing required parameter: reaction_id", } try: xml = self._fetch_biocyc_xml(reaction_id) if xml == "AUTH_REQUIRED": return _AUTH_WALL_ERROR if xml is None: return { "status": "error", "error": f"Reaction not found: {reaction_id}", } name = self._parse_xml_field(xml, "common-name") ec_numbers = re.findall(r"<ec-number[^>]*>([^<]+)</ec-number>", xml) synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml) return { "status": "success", "data": { "reaction_id": reaction_id, "name": name, "ec_numbers": ec_numbers, "synonyms": synonyms, "url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=REACTION&object={reaction_id}", }, "metadata": {"source": "MetaCyc", "reaction_id": reaction_id}, } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}