Source code for tooluniverse.reactome_analysis_tool

# reactome_analysis_tool.py
"""
Reactome Analysis Service tool for ToolUniverse.

The Reactome Analysis Service provides pathway overrepresentation analysis,
expression data analysis, and species comparison for gene/protein lists.
This is separate from the Reactome Content Service (already in ToolUniverse).

API: https://reactome.org/AnalysisService
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

ANALYSIS_BASE_URL = "https://reactome.org/AnalysisService"


[docs] @register_tool("ReactomeAnalysisTool") class ReactomeAnalysisTool(BaseTool): """ Tool for Reactome pathway analysis (enrichment/overrepresentation). Accepts gene/protein identifiers and performs overrepresentation analysis or species comparison against Reactome pathways. Returns enriched pathways with p-values, FDR, and entity counts. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "pathway_enrichment")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Reactome Analysis API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return { "status": "error", "error": f"Reactome Analysis request timed out after {self.timeout} seconds", } except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to Reactome Analysis Service.", } except requests.exceptions.HTTPError as e: return { "status": "error", "error": f"Reactome Analysis HTTP error: {e.response.status_code}", } except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate analysis endpoint.""" if self.endpoint == "pathway_enrichment": return self._pathway_enrichment(arguments) elif self.endpoint == "species_comparison": return self._species_comparison(arguments) elif self.endpoint == "token_result": return self._token_result(arguments) elif self.endpoint == "expression_analysis": return self._expression_analysis(arguments) elif self.endpoint == "species_comparison_v2": return self._species_comparison_v2(arguments) elif self.endpoint == "found_entities": return self._found_entities(arguments) elif self.endpoint == "not_found_identifiers": return self._not_found_identifiers(arguments) else: return {"status": "error", "error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _pathway_enrichment(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Perform pathway overrepresentation analysis.""" identifiers = arguments.get("identifiers", "") if not identifiers: return { "status": "error", "error": "identifiers parameter required (newline-separated gene/protein IDs)", } # Ensure identifiers is newline-separated if isinstance(identifiers, list): identifiers = "\n".join(identifiers) page_size = arguments.get("page_size", 20) include_disease = arguments.get("include_disease", True) projection = arguments.get("projection", True) url = ( f"{ANALYSIS_BASE_URL}/identifiers/projection" if projection else f"{ANALYSIS_BASE_URL}/identifiers/" ) params = { "pageSize": min(page_size, 50), "page": 1, "includeDisease": str(include_disease).lower(), } response = requests.post( url, data=identifiers, headers={"Content-Type": "text/plain"}, params=params, timeout=self.timeout, ) response.raise_for_status() data = response.json() return self._format_analysis_result(data, identifiers)
[docs] def _species_comparison(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Perform species comparison analysis.""" identifiers = arguments.get("identifiers", "") if not identifiers: return { "status": "error", "error": "identifiers parameter required (newline-separated gene/protein IDs)", } if isinstance(identifiers, list): identifiers = "\n".join(identifiers) arguments.get("species", 9606) page_size = arguments.get("page_size", 20) url = f"{ANALYSIS_BASE_URL}/identifiers/projection" params = { "pageSize": min(page_size, 50), "page": 1, } response = requests.post( url, data=identifiers, headers={"Content-Type": "text/plain"}, params=params, timeout=self.timeout, ) response.raise_for_status() data = response.json() return self._format_analysis_result(data, identifiers)
[docs] def _token_result(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Retrieve analysis results by token.""" token = arguments.get("token", "") if not token: return {"status": "error", "error": "token parameter is required"} page_size = arguments.get("page_size", 20) url = f"{ANALYSIS_BASE_URL}/token/{token}" params = { "pageSize": min(page_size, 50), "page": 1, } response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() return self._format_analysis_result(data, "")
[docs] def _expression_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Quantitative expression analysis (type=EXPRESSION). Maps numeric expression / fold-change values onto Reactome pathways. Submit tab-delimited 'GENE\\tVALUE' lines so Reactome treats the input as an expression matrix and overlays the values per pathway. """ identifiers = arguments.get("identifiers", "") if not identifiers: return { "status": "error", "error": ( "identifiers parameter required: tab-delimited 'GENE\\tVALUE' " "lines, one per row (e.g. 'PTEN\\t2.5\\nTP53\\t-1.8')." ), } if isinstance(identifiers, list): identifiers = "\n".join(identifiers) page_size = arguments.get("page_size", 20) include_disease = arguments.get("include_disease", True) projection = arguments.get("projection", False) url = ( f"{ANALYSIS_BASE_URL}/identifiers/projection" if projection else f"{ANALYSIS_BASE_URL}/identifiers/" ) params = { "pageSize": min(page_size, 50), "page": 1, "includeDisease": str(include_disease).lower(), } response = requests.post( url, data=identifiers, headers={"Content-Type": "text/plain"}, params=params, timeout=self.timeout, ) response.raise_for_status() data = response.json() return self._format_analysis_result(data, identifiers, include_expression=True)
[docs] def _species_comparison_v2(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """True cross-species comparison (type=SPECIES_COMPARISON). Calls the genuine /species/{source}/{target} endpoint, which compares a source species' pathways against a target species by orthology. """ species = arguments.get("species") if species in (None, ""): return { "status": "error", "error": ( "species parameter required: Reactome dbId of the species to " "compare against the source (e.g. 48892 for Mus musculus)." ), } source = arguments.get("source_species", "homoSapiens") page_size = arguments.get("page_size", 20) url = f"{ANALYSIS_BASE_URL}/species/{source}/{species}" params = { "pageSize": min(page_size, 50), "page": 1, } response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() return self._format_analysis_result(data, "")
[docs] def _found_entities(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Per-pathway found-entities drill-down. For an analysis token + a hit pathway, returns exactly which submitted identifiers matched and their Reactome cross-references (mapsTo). """ token = arguments.get("token", "") pathway = arguments.get("pathway", "") if not token: return {"status": "error", "error": "token parameter is required"} if not pathway: return { "status": "error", "error": "pathway parameter is required (e.g. 'R-HSA-3700989')", } resource = arguments.get("resource", "TOTAL") url = f"{ANALYSIS_BASE_URL}/token/{token}/found/entities/{pathway}" params = {"resource": resource} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() identifiers = [] for ent in data.get("identifiers", []): maps_to = [] for m in ent.get("mapsTo", []): maps_to.append( { "resource": m.get("resource"), "ids": m.get("ids", []), } ) identifiers.append( { "id": ent.get("id"), "exp": ent.get("exp", []), "mapsTo": maps_to, } ) return { "status": "success", "data": { "pathway": pathway, "found": data.get("found", len(identifiers)), "total_entities_count": data.get("totalEntitiesCount"), "resources": data.get("resources", []), "identifiers": identifiers, }, "metadata": { "source": "Reactome Analysis Service", "token": token, "returned": len(identifiers), }, }
[docs] def _not_found_identifiers(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List submitted identifiers that did NOT map to any Reactome entity.""" token = arguments.get("token", "") if not token: return {"status": "error", "error": "token parameter is required"} url = f"{ANALYSIS_BASE_URL}/token/{token}/notFound" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() not_found = [] if isinstance(data, list): for ent in data: if isinstance(ent, dict): not_found.append(ent.get("id")) else: not_found.append(ent) return { "status": "success", "data": { "token": token, "not_found_count": len(not_found), "not_found": not_found, }, "metadata": { "source": "Reactome Analysis Service", "token": token, "returned": len(not_found), }, }
[docs] def _format_analysis_result( self, data: Dict, identifiers: str, include_expression: bool = False ) -> Dict[str, Any]: """Format analysis result into standard output.""" summary = data.get("summary", {}) pathways_raw = data.get("pathways", []) pathways = [] for pw in pathways_raw: entities = pw.get("entities", {}) reactions = pw.get("reactions", {}) species = pw.get("species", {}) pathway_entry = { "pathway_id": pw.get("stId"), "name": pw.get("name"), "species": species.get("name"), "is_disease": pw.get("inDisease", False), "is_lowest_level": pw.get("llp", False), "entities_found": entities.get("found"), "entities_total": entities.get("total"), "entities_ratio": entities.get("ratio"), "p_value": entities.get("pValue"), "fdr": entities.get("fdr"), "reactions_found": reactions.get("found"), "reactions_total": reactions.get("total"), } if include_expression: pathway_entry["entities_exp"] = entities.get("exp") pathways.append(pathway_entry) result_data = { "token": summary.get("token"), "analysis_type": summary.get("type"), "projection": summary.get("projection"), "identifiers_not_found": data.get("identifiersNotFound", 0), "pathways_found": data.get("pathwaysFound", 0), "pathways": pathways, } if include_expression: expression = data.get("expression") or {} result_data["expression_column_names"] = expression.get("columnNames", []) result_data["expression_min"] = expression.get("min") result_data["expression_max"] = expression.get("max") return { "status": "success", "data": result_data, "metadata": { "source": "Reactome Analysis Service", "total_pathways": data.get("pathwaysFound", 0), "returned": len(pathways), }, }