Source code for tooluniverse.reactome_content_tool

# reactome_content_tool.py
"""
Reactome Content Service tool for ToolUniverse.

Provides access to Reactome Content Service REST API for searching
pathways/reactions and retrieving detailed pathway event hierarchies.

API: https://reactome.org/ContentService/
No authentication required. Free public access.
"""

import requests
import re
from typing import Dict, Any, Optional
from .base_tool import BaseTool


REACTOME_CS_BASE_URL = "https://reactome.org/ContentService"



[docs]
class ReactomeContentTool(BaseTool):
    """
    Tool for Reactome Content Service providing pathway search,
    contained event retrieval, and enhanced pathway details.

    Complements existing Reactome tools by adding free-text search
    and hierarchical event decomposition.

    No authentication required.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout = tool_config.get("timeout", 60)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "search")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the Reactome Content Service API call."""
        try:
            return self._query(arguments)
        except requests.exceptions.Timeout:
            return {
                "status": "error",
                "error": f"Reactome Content Service timed out after {self.timeout}s",
            }
        except requests.exceptions.ConnectionError:
            return {
                "status": "error",
                "error": "Failed to connect to Reactome Content Service",
            }
        except requests.exceptions.HTTPError as e:
            code = e.response.status_code if e.response is not None else "unknown"
            if code == 404:
                return {
                    "status": "error",
                    "error": f"Entity not found: {arguments.get('identifier', arguments.get('query', ''))}",
                }
            return {
                "status": "error",
                "error": f"Reactome Content Service HTTP error: {code}",
            }
        except Exception as e:
            return {
                "status": "error",
                "error": f"Unexpected error querying Reactome Content Service: {str(e)}",
            }



[docs]
    def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Route to appropriate endpoint."""
        if self.endpoint == "search":
            return self._search(arguments)
        elif self.endpoint == "contained_events":
            return self._get_contained_events(arguments)
        elif self.endpoint == "enhanced_pathway":
            return self._get_enhanced_pathway(arguments)
        else:
            return {"status": "error", "error": f"Unknown endpoint: {self.endpoint}"}



[docs]
    @staticmethod
    def _strip_html(text: Optional[str]) -> Optional[str]:
        """Remove HTML tags from text."""
        if not text:
            return text
        return re.sub(r"<[^>]+>", "", text)



[docs]
    def _search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search Reactome for pathways, reactions, and other entities."""
        query = arguments.get("query", "")
        if not query:
            return {
                "status": "error",
                "error": "query parameter is required (e.g., 'apoptosis', 'TP53', 'cell cycle')",
            }

        species = arguments.get("species", "Homo sapiens")
        types = arguments.get("types", "Pathway")
        cluster = arguments.get("cluster", True)

        url = f"{REACTOME_CS_BASE_URL}/search/query"
        params = {
            "query": query,
            "species": species,
            "types": types,
            "cluster": str(cluster).lower(),
        }
        response = requests.get(
            url,
            params=params,
            headers={"Accept": "application/json"},
            timeout=self.timeout,
        )
        response.raise_for_status()
        data = response.json()

        results_groups = data.get("results", [])
        all_entries = []
        for group in results_groups:
            type_name = group.get("typeName", "Unknown")
            entries = group.get("entries", [])
            for entry in entries:
                all_entries.append(
                    {
                        "type": type_name,
                        "stId": entry.get("stId"),
                        "name": self._strip_html(entry.get("name", "")),
                        "species": entry.get("species", []),
                        "compartments": entry.get("compartmentNames", []),
                        "is_disease": entry.get("isDisease", False),
                    }
                )

        return {
            "status": "success",
            "data": {
                "query": query,
                "species": species,
                "types_searched": types,
                "total_results": len(all_entries),
                "results": all_entries[:30],
            },
            "metadata": {
                "source": "Reactome Content Service - Search",
                "query": query,
            },
        }



[docs]
    def _get_contained_events(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get all events (sub-pathways and reactions) contained in a pathway."""
        identifier = arguments.get("identifier", "")
        if not identifier:
            return {
                "status": "error",
                "error": "identifier parameter is required (Reactome pathway stable ID, e.g., 'R-HSA-109581')",
            }

        url = f"{REACTOME_CS_BASE_URL}/data/pathway/{identifier}/containedEvents"
        response = requests.get(
            url, headers={"Accept": "application/json"}, timeout=self.timeout
        )
        response.raise_for_status()
        events = response.json()

        # Count event types (some elements may be plain integer DB IDs, skip those)
        pathways = []
        reactions = []
        for e in events:
            if not isinstance(e, dict):
                continue
            schema = e.get("schemaClass", "")
            entry = {
                "stId": e.get("stId"),
                "name": e.get("displayName"),
                "schemaClass": schema,
                "is_disease": e.get("isInDisease", False),
            }
            if schema == "Pathway":
                pathways.append(entry)
            else:
                reactions.append(entry)

        return {
            "status": "success",
            "data": {
                "identifier": identifier,
                "total_events": len(events),
                "pathway_count": len(pathways),
                "reaction_count": len(reactions),
                "pathways": pathways,
                "reactions": reactions[:50],
            },
            "metadata": {
                "source": "Reactome Content Service - Contained Events",
                "identifier": identifier,
            },
        }



[docs]
    def _get_enhanced_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get enhanced pathway details including literature, GO terms, and sub-events."""
        identifier = arguments.get("identifier", "")
        if not identifier:
            return {
                "status": "error",
                "error": "identifier parameter is required (Reactome pathway stable ID, e.g., 'R-HSA-109581')",
            }

        url = f"{REACTOME_CS_BASE_URL}/data/query/enhanced/{identifier}"
        response = requests.get(
            url, headers={"Accept": "application/json"}, timeout=self.timeout
        )
        response.raise_for_status()
        data = response.json()

        # Extract sub-events (defensive: Reactome occasionally returns ints
        # in hasEvent for terminal references — only descend into dicts).
        sub_events = [
            {
                "stId": e.get("stId"),
                "name": e.get("displayName"),
                "schemaClass": e.get("schemaClass"),
            }
            for e in data.get("hasEvent", [])
            if isinstance(e, dict)
        ]

        # Extract literature
        literature = [
            {
                "title": ref.get("title"),
                "pubMedIdentifier": ref.get("pubMedIdentifier"),
                "year": ref.get("year"),
                "journal": ref.get("journal", {}).get("title")
                if isinstance(ref.get("journal"), dict)
                else None,
            }
            for ref in data.get("literatureReference", [])
            if isinstance(ref, dict)
        ]

        # Extract GO terms
        go_terms = []
        go_bp = data.get("goBiologicalProcess")
        if isinstance(go_bp, list):
            go_terms.extend(
                {"accession": g.get("accession"), "name": g.get("displayName")}
                for g in go_bp
                if isinstance(g, dict)
            )
        elif isinstance(go_bp, dict):
            go_terms.append(
                {"accession": go_bp.get("accession"), "name": go_bp.get("displayName")}
            )

        # Extract summation (description)
        summation = ""
        summ_list = data.get("summation", [])
        if summ_list:
            texts = [
                self._strip_html(s.get("text", "")) for s in summ_list if s.get("text")
            ]
            summation = " ".join(texts)

        return {
            "status": "success",
            "data": {
                "identifier": data.get("stId"),
                "name": data.get("displayName"),
                "species": data.get("speciesName"),
                "schemaClass": data.get("schemaClass"),
                "is_disease": data.get("isInDisease", False),
                "summation": summation[:2000] if summation else None,
                "go_biological_process": go_terms if go_terms else None,
                "sub_events": sub_events,
                "literature": literature[:20],
            },
            "metadata": {
                "source": "Reactome Content Service - Enhanced Pathway",
                "identifier": identifier,
            },
        }