Source code for tooluniverse.reactome_analysis_tool
# reactome_analysis_tool.py
"""
Reactome Analysis Service tool for ToolUniverse.
The Reactome Analysis Service provides pathway overrepresentation analysis,
expression data analysis, and species comparison for gene/protein lists.
This is separate from the Reactome Content Service (already in ToolUniverse).
API: https://reactome.org/AnalysisService
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
ANALYSIS_BASE_URL = "https://reactome.org/AnalysisService"
[docs]
@register_tool("ReactomeAnalysisTool")
class ReactomeAnalysisTool(BaseTool):
"""
Tool for Reactome pathway analysis (enrichment/overrepresentation).
Accepts gene/protein identifiers and performs overrepresentation
analysis or species comparison against Reactome pathways. Returns
enriched pathways with p-values, FDR, and entity counts.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 60)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "pathway_enrichment")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the Reactome Analysis API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"Reactome Analysis request timed out after {self.timeout} seconds",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to Reactome Analysis Service.",
}
except requests.exceptions.HTTPError as e:
return {
"status": "error",
"error": f"Reactome Analysis HTTP error: {e.response.status_code}",
}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate analysis endpoint."""
if self.endpoint == "pathway_enrichment":
return self._pathway_enrichment(arguments)
elif self.endpoint == "species_comparison":
return self._species_comparison(arguments)
elif self.endpoint == "token_result":
return self._token_result(arguments)
elif self.endpoint == "expression_analysis":
return self._expression_analysis(arguments)
elif self.endpoint == "species_comparison_v2":
return self._species_comparison_v2(arguments)
elif self.endpoint == "found_entities":
return self._found_entities(arguments)
elif self.endpoint == "not_found_identifiers":
return self._not_found_identifiers(arguments)
else:
return {"status": "error", "error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _pathway_enrichment(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Perform pathway overrepresentation analysis."""
identifiers = arguments.get("identifiers", "")
if not identifiers:
return {
"status": "error",
"error": "identifiers parameter required (newline-separated gene/protein IDs)",
}
# Ensure identifiers is newline-separated
if isinstance(identifiers, list):
identifiers = "\n".join(identifiers)
page_size = arguments.get("page_size", 20)
include_disease = arguments.get("include_disease", True)
projection = arguments.get("projection", True)
url = (
f"{ANALYSIS_BASE_URL}/identifiers/projection"
if projection
else f"{ANALYSIS_BASE_URL}/identifiers/"
)
params = {
"pageSize": min(page_size, 50),
"page": 1,
"includeDisease": str(include_disease).lower(),
}
response = requests.post(
url,
data=identifiers,
headers={"Content-Type": "text/plain"},
params=params,
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
return self._format_analysis_result(data, identifiers)
[docs]
def _species_comparison(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Perform species comparison analysis."""
identifiers = arguments.get("identifiers", "")
if not identifiers:
return {
"status": "error",
"error": "identifiers parameter required (newline-separated gene/protein IDs)",
}
if isinstance(identifiers, list):
identifiers = "\n".join(identifiers)
arguments.get("species", 9606)
page_size = arguments.get("page_size", 20)
url = f"{ANALYSIS_BASE_URL}/identifiers/projection"
params = {
"pageSize": min(page_size, 50),
"page": 1,
}
response = requests.post(
url,
data=identifiers,
headers={"Content-Type": "text/plain"},
params=params,
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
return self._format_analysis_result(data, identifiers)
[docs]
def _token_result(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Retrieve analysis results by token."""
token = arguments.get("token", "")
if not token:
return {"status": "error", "error": "token parameter is required"}
page_size = arguments.get("page_size", 20)
url = f"{ANALYSIS_BASE_URL}/token/{token}"
params = {
"pageSize": min(page_size, 50),
"page": 1,
}
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
return self._format_analysis_result(data, "")
[docs]
def _expression_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Quantitative expression analysis (type=EXPRESSION).
Maps numeric expression / fold-change values onto Reactome pathways.
Submit tab-delimited 'GENE\\tVALUE' lines so Reactome treats the input
as an expression matrix and overlays the values per pathway.
"""
identifiers = arguments.get("identifiers", "")
if not identifiers:
return {
"status": "error",
"error": (
"identifiers parameter required: tab-delimited 'GENE\\tVALUE' "
"lines, one per row (e.g. 'PTEN\\t2.5\\nTP53\\t-1.8')."
),
}
if isinstance(identifiers, list):
identifiers = "\n".join(identifiers)
page_size = arguments.get("page_size", 20)
include_disease = arguments.get("include_disease", True)
projection = arguments.get("projection", False)
url = (
f"{ANALYSIS_BASE_URL}/identifiers/projection"
if projection
else f"{ANALYSIS_BASE_URL}/identifiers/"
)
params = {
"pageSize": min(page_size, 50),
"page": 1,
"includeDisease": str(include_disease).lower(),
}
response = requests.post(
url,
data=identifiers,
headers={"Content-Type": "text/plain"},
params=params,
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
return self._format_analysis_result(data, identifiers, include_expression=True)
[docs]
def _species_comparison_v2(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""True cross-species comparison (type=SPECIES_COMPARISON).
Calls the genuine /species/{source}/{target} endpoint, which compares
a source species' pathways against a target species by orthology.
"""
species = arguments.get("species")
if species in (None, ""):
return {
"status": "error",
"error": (
"species parameter required: Reactome dbId of the species to "
"compare against the source (e.g. 48892 for Mus musculus)."
),
}
source = arguments.get("source_species", "homoSapiens")
page_size = arguments.get("page_size", 20)
url = f"{ANALYSIS_BASE_URL}/species/{source}/{species}"
params = {
"pageSize": min(page_size, 50),
"page": 1,
}
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
return self._format_analysis_result(data, "")
[docs]
def _found_entities(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Per-pathway found-entities drill-down.
For an analysis token + a hit pathway, returns exactly which submitted
identifiers matched and their Reactome cross-references (mapsTo).
"""
token = arguments.get("token", "")
pathway = arguments.get("pathway", "")
if not token:
return {"status": "error", "error": "token parameter is required"}
if not pathway:
return {
"status": "error",
"error": "pathway parameter is required (e.g. 'R-HSA-3700989')",
}
resource = arguments.get("resource", "TOTAL")
url = f"{ANALYSIS_BASE_URL}/token/{token}/found/entities/{pathway}"
params = {"resource": resource}
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
identifiers = []
for ent in data.get("identifiers", []):
maps_to = []
for m in ent.get("mapsTo", []):
maps_to.append(
{
"resource": m.get("resource"),
"ids": m.get("ids", []),
}
)
identifiers.append(
{
"id": ent.get("id"),
"exp": ent.get("exp", []),
"mapsTo": maps_to,
}
)
return {
"status": "success",
"data": {
"pathway": pathway,
"found": data.get("found", len(identifiers)),
"total_entities_count": data.get("totalEntitiesCount"),
"resources": data.get("resources", []),
"identifiers": identifiers,
},
"metadata": {
"source": "Reactome Analysis Service",
"token": token,
"returned": len(identifiers),
},
}
[docs]
def _not_found_identifiers(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List submitted identifiers that did NOT map to any Reactome entity."""
token = arguments.get("token", "")
if not token:
return {"status": "error", "error": "token parameter is required"}
url = f"{ANALYSIS_BASE_URL}/token/{token}/notFound"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
not_found = []
if isinstance(data, list):
for ent in data:
if isinstance(ent, dict):
not_found.append(ent.get("id"))
else:
not_found.append(ent)
return {
"status": "success",
"data": {
"token": token,
"not_found_count": len(not_found),
"not_found": not_found,
},
"metadata": {
"source": "Reactome Analysis Service",
"token": token,
"returned": len(not_found),
},
}
[docs]
def _format_analysis_result(
self, data: Dict, identifiers: str, include_expression: bool = False
) -> Dict[str, Any]:
"""Format analysis result into standard output."""
summary = data.get("summary", {})
pathways_raw = data.get("pathways", [])
pathways = []
for pw in pathways_raw:
entities = pw.get("entities", {})
reactions = pw.get("reactions", {})
species = pw.get("species", {})
pathway_entry = {
"pathway_id": pw.get("stId"),
"name": pw.get("name"),
"species": species.get("name"),
"is_disease": pw.get("inDisease", False),
"is_lowest_level": pw.get("llp", False),
"entities_found": entities.get("found"),
"entities_total": entities.get("total"),
"entities_ratio": entities.get("ratio"),
"p_value": entities.get("pValue"),
"fdr": entities.get("fdr"),
"reactions_found": reactions.get("found"),
"reactions_total": reactions.get("total"),
}
if include_expression:
pathway_entry["entities_exp"] = entities.get("exp")
pathways.append(pathway_entry)
result_data = {
"token": summary.get("token"),
"analysis_type": summary.get("type"),
"projection": summary.get("projection"),
"identifiers_not_found": data.get("identifiersNotFound", 0),
"pathways_found": data.get("pathwaysFound", 0),
"pathways": pathways,
}
if include_expression:
expression = data.get("expression") or {}
result_data["expression_column_names"] = expression.get("columnNames", [])
result_data["expression_min"] = expression.get("min")
result_data["expression_max"] = expression.get("max")
return {
"status": "success",
"data": result_data,
"metadata": {
"source": "Reactome Analysis Service",
"total_pathways": data.get("pathwaysFound", 0),
"returned": len(pathways),
},
}