tooluniverse.metacyc_tool 源代码

"""
MetaCyc tool for ToolUniverse.

MetaCyc is a curated database of experimentally elucidated metabolic
pathways from all domains of life.

Website: https://metacyc.org/
BioCyc: https://biocyc.org/
"""

import re
import requests
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOCYC_BASE_URL = "https://biocyc.org"
BIOCYC_API_URL = "https://websvc.biocyc.org"
_AUTH_WALL_ERROR = {
    "status": "error",
    "error": (
        "BioCyc now requires a free account for API access. MetaCyc tools are unavailable. "
        "Create an account at https://biocyc.org/signup.shtml or use KEGG/Reactome tools as alternatives."
    ),
    "retryable": False,
}



[文档]
@register_tool("MetaCycTool")
class MetaCycTool(BaseTool):
    """
    Tool for querying MetaCyc metabolic pathway database.

    MetaCyc provides:
    - Experimentally elucidated metabolic pathways
    - Enzymes and reactions
    - Metabolites and compounds
    - Pathway diagrams

    Uses BioCyc web services API.
    No authentication required for basic access.
    """


[文档]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout: int = tool_config.get("timeout", 30)
        self.parameter = tool_config.get("parameter", {})



[文档]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute MetaCyc query based on operation type."""
        operation = arguments.get("operation", "")
        # Auto-fill operation from tool config const if not provided by user
        if not operation:
            operation = self.get_schema_const_operation()

        if operation == "search_pathways":
            return self._search_pathways(arguments)
        elif operation == "get_pathway":
            return self._get_pathway(arguments)
        elif operation == "get_compound":
            return self._get_compound(arguments)
        elif operation == "get_reaction":
            return self._get_reaction(arguments)
        else:
            return {
                "status": "error",
                "error": f"Unknown operation: {operation}. Supported: search_pathways, get_pathway, get_compound, get_reaction",
            }



[文档]
    def _fetch_biocyc_xml(self, object_id: str) -> Optional[str]:
        """Fetch BioCyc XML for a MetaCyc object using the web services API.

        Feature-84B-004/005: biocyc.org/getxml?META=ID returns HTML (wrong).
        websvc.biocyc.org/getxml?id=META:ID returns XML (correct).
        Returns "AUTH_REQUIRED" if BioCyc redirects to account-required page.
        """
        resp = requests.get(
            f"{BIOCYC_API_URL}/getxml",
            params={"id": f"META:{object_id}", "detail": "full"},
            timeout=self.timeout,
            headers={"User-Agent": "ToolUniverse/MetaCyc"},
        )
        if resp.status_code != 200:
            return None
        # Detect BioCyc authentication wall (redirected to account-required page)
        if "account-required" in resp.url:
            return "AUTH_REQUIRED"
        content = resp.text
        # Verify it's actually XML (not an HTML error page)
        return content if content.strip().startswith("<?xml") else None



[文档]
    def _parse_xml_field(self, xml: str, tag: str) -> Optional[str]:
        """Extract the text content of the first matching XML tag."""
        m = re.search(rf"<{tag}[^>]*>([^<]+)</{tag}>", xml)
        return m.group(1).strip() if m else None



[文档]
    def _parse_xml_frameids(self, xml: str) -> List[str]:
        """Extract all frameid attribute values from an XML document."""
        return re.findall(r'frameid=["\']([^"\']+)["\']', xml)



[文档]
    def _search_pathways(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """
        Search MetaCyc for pathways.

        Args:
            arguments: Dict containing:
                - query: Search query (pathway name or keyword)
        """
        query = arguments.get("query", "")
        if not query:
            return {"status": "error", "error": "Missing required parameter: query"}

        try:
            # Use BioCyc quick search API
            response = requests.get(
                f"{BIOCYC_BASE_URL}/META/search-query",
                params={"type": "PATHWAY", "query": query},
                timeout=self.timeout,
                headers={
                    "User-Agent": "ToolUniverse/MetaCyc",
                    "Accept": "application/json",
                },
            )

            # If JSON response works
            if "json" in response.headers.get("Content-Type", ""):
                data = response.json()
                return {
                    "status": "success",
                    "data": {
                        "query": query,
                        "results": data
                        if isinstance(data, list)
                        else data.get("results", []),
                    },
                    "metadata": {"source": "MetaCyc"},
                }

            # Non-JSON response — likely auth wall
            return _AUTH_WALL_ERROR

        except requests.exceptions.RequestException as e:
            return {"status": "error", "error": f"Request failed: {str(e)}"}
        except Exception as e:
            return {"status": "error", "error": f"Unexpected error: {str(e)}"}



[文档]
    def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """
        Get pathway details by MetaCyc pathway ID.

        Args:
            arguments: Dict containing:
                - pathway_id: MetaCyc pathway ID (e.g., PWY-5177)
        """
        pathway_id = arguments.get("pathway_id", "")
        if not pathway_id:
            return {
                "status": "error",
                "error": "Missing required parameter: pathway_id",
            }

        try:
            xml = self._fetch_biocyc_xml(pathway_id)
            if xml == "AUTH_REQUIRED":
                return _AUTH_WALL_ERROR
            if xml is None:
                return {"status": "error", "error": f"Pathway not found: {pathway_id}"}

            name = self._parse_xml_field(xml, "common-name")
            reaction_ids = [
                fid
                for fid in self._parse_xml_frameids(xml)
                if fid != pathway_id and not fid.endswith("-VARIANTS")
            ]
            synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
            return {
                "status": "success",
                "data": {
                    "pathway_id": pathway_id,
                    "name": name,
                    "synonyms": synonyms,
                    "reaction_ids": list(dict.fromkeys(reaction_ids)),
                    "url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}",
                    "diagram_url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=PATHWAY&object={pathway_id}&detail-level=2",
                },
                "metadata": {"source": "MetaCyc", "pathway_id": pathway_id},
            }

        except requests.exceptions.RequestException as e:
            return {"status": "error", "error": f"Request failed: {str(e)}"}
        except Exception as e:
            return {"status": "error", "error": f"Unexpected error: {str(e)}"}



[文档]
    def _get_compound(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """
        Get compound details from MetaCyc.

        Args:
            arguments: Dict containing:
                - compound_id: MetaCyc compound ID (e.g., CPD-1)
        """
        compound_id = arguments.get("compound_id", "")
        if not compound_id:
            return {
                "status": "error",
                "error": "Missing required parameter: compound_id",
            }

        try:
            xml = self._fetch_biocyc_xml(compound_id)
            if xml == "AUTH_REQUIRED":
                return _AUTH_WALL_ERROR
            if xml is None:
                return {
                    "status": "error",
                    "error": f"Compound not found: {compound_id}",
                }

            name = self._parse_xml_field(xml, "common-name")
            formula = self._parse_xml_field(xml, "molecular-weight-exp")
            synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
            return {
                "status": "success",
                "data": {
                    "compound_id": compound_id,
                    "name": name,
                    "synonyms": synonyms,
                    "molecular_weight": formula,
                    "url": f"{BIOCYC_BASE_URL}/compound?orgid=META&id={compound_id}",
                },
                "metadata": {"source": "MetaCyc", "compound_id": compound_id},
            }

        except requests.exceptions.RequestException as e:
            return {"status": "error", "error": f"Request failed: {str(e)}"}
        except Exception as e:
            return {"status": "error", "error": f"Unexpected error: {str(e)}"}



[文档]
    def _get_reaction(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """
        Get reaction details from MetaCyc.

        Args:
            arguments: Dict containing:
                - reaction_id: MetaCyc reaction ID (e.g., RXN-14500)
        """
        reaction_id = arguments.get("reaction_id", "")
        if not reaction_id:
            return {
                "status": "error",
                "error": "Missing required parameter: reaction_id",
            }

        try:
            xml = self._fetch_biocyc_xml(reaction_id)
            if xml == "AUTH_REQUIRED":
                return _AUTH_WALL_ERROR
            if xml is None:
                return {
                    "status": "error",
                    "error": f"Reaction not found: {reaction_id}",
                }

            name = self._parse_xml_field(xml, "common-name")
            ec_numbers = re.findall(r"<ec-number[^>]*>([^<]+)</ec-number>", xml)
            synonyms = re.findall(r"<synonym[^>]*>([^<]+)</synonym>", xml)
            return {
                "status": "success",
                "data": {
                    "reaction_id": reaction_id,
                    "name": name,
                    "ec_numbers": ec_numbers,
                    "synonyms": synonyms,
                    "url": f"{BIOCYC_BASE_URL}/META/NEW-IMAGE?type=REACTION&object={reaction_id}",
                },
                "metadata": {"source": "MetaCyc", "reaction_id": reaction_id},
            }

        except requests.exceptions.RequestException as e:
            return {"status": "error", "error": f"Request failed: {str(e)}"}
        except Exception as e:
            return {"status": "error", "error": f"Unexpected error: {str(e)}"}