tooluniverse.enrichr_ext_tool 源代码
"""
Extended Enrichr API tools for ToolUniverse.
Enrichr is a comprehensive gene set enrichment analysis tool developed by
the Ma'ayan Lab. These extended tools provide direct access to enrichment
results, library listing, and gene set lookup.
API: https://maayanlab.cloud/Enrichr/
No authentication required.
"""
import requests
from typing import Any, Dict, List
from .base_tool import BaseTool
from .tool_registry import register_tool
ENRICHR_BASE = "https://maayanlab.cloud/Enrichr"
[文档]
@register_tool("EnrichrExtTool")
class EnrichrExtTool(BaseTool):
"""
Extended Enrichr tools for gene set enrichment analysis.
Operations:
- list_libraries: List all available gene set libraries with statistics
- enrich: Submit a gene list and get enrichment results for a library
- get_top_enriched: Submit genes and return top enriched terms across libraries
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
[文档]
def run(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""Route to the appropriate operation."""
operation = params.get("operation", "")
if not operation:
operation = self.get_schema_const_operation()
dispatch = {
"list_libraries": self._list_libraries,
"enrich": self._enrich,
"get_top_enriched": self._get_top_enriched,
}
handler = dispatch.get(operation)
if not handler:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Valid: {list(dispatch.keys())}",
}
return handler(params)
[文档]
def _list_libraries(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""List all available Enrichr gene set libraries with statistics."""
category = params.get("category")
try:
resp = requests.get(
f"{ENRICHR_BASE}/datasetStatistics",
timeout=self.timeout,
)
resp.raise_for_status()
data = resp.json()
libraries = data.get("statistics", [])
if category:
cat_lower = category.lower()
libraries = [
lib
for lib in libraries
if cat_lower in lib.get("libraryName", "").lower()
]
result = []
for lib in libraries:
result.append(
{
"library_name": lib.get("libraryName"),
"num_terms": lib.get("numTerms"),
"gene_coverage": lib.get("geneCoverage"),
"genes_per_term": lib.get("genesPerTerm"),
"category_id": lib.get("categoryId"),
}
)
return {"status": "success", "data": result}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
[文档]
def _submit_genes(self, gene_list: List[str]) -> Dict[str, Any]:
"""Submit a gene list to Enrichr and return the user list ID."""
gene_str = "\n".join(gene_list)
resp = requests.post(
f"{ENRICHR_BASE}/addList",
files={
"list": (None, gene_str),
"description": (None, "ToolUniverse enrichment query"),
},
timeout=self.timeout,
)
resp.raise_for_status()
return resp.json()
[文档]
def _enrich(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""Submit gene list and get enrichment results for a specific library."""
gene_list = params.get("gene_list", [])
library = params.get("library", "GO_Biological_Process_2023")
top_n = params.get("top_n", 10)
if not gene_list:
return {"status": "error", "error": "gene_list is required."}
try:
submit_resp = self._submit_genes(gene_list)
user_list_id = submit_resp.get("userListId")
if not user_list_id:
return {"status": "error", "error": "Failed to submit gene list."}
resp = requests.get(
f"{ENRICHR_BASE}/enrich",
params={"userListId": user_list_id, "backgroundType": library},
timeout=self.timeout,
)
resp.raise_for_status()
data = resp.json()
raw_results = data.get(library, [])
enriched = []
for r in raw_results[:top_n]:
enriched.append(
{
"rank": r[0],
"term": r[1],
"p_value": r[2],
"z_score": r[3],
"combined_score": r[4],
"overlapping_genes": r[5],
"adjusted_p_value": r[6],
"overlap_count": len(r[5]),
}
)
return {
"status": "success",
"data": {
"library": library,
"gene_count": len(gene_list),
"total_terms": len(raw_results),
"enriched_terms": enriched,
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
[文档]
def _get_top_enriched(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""Get top enriched terms across multiple libraries."""
gene_list = params.get("gene_list", [])
libraries = params.get(
"libraries",
[
"GO_Biological_Process_2023",
"KEGG_2021_Human",
"Reactome_2022",
"WikiPathways_2024_Human",
],
)
top_n = params.get("top_n", 5)
if not gene_list:
return {"status": "error", "error": "gene_list is required."}
try:
submit_resp = self._submit_genes(gene_list)
user_list_id = submit_resp.get("userListId")
if not user_list_id:
return {"status": "error", "error": "Failed to submit gene list."}
all_results: Dict[str, Any] = {}
for library in libraries:
resp = requests.get(
f"{ENRICHR_BASE}/enrich",
params={
"userListId": user_list_id,
"backgroundType": library,
},
timeout=self.timeout,
)
if resp.status_code != 200:
all_results[library] = {"error": f"HTTP {resp.status_code}"}
continue
data = resp.json()
raw_results = data.get(library, [])
enriched = []
for r in raw_results[:top_n]:
enriched.append(
{
"term": r[1],
"p_value": r[2],
"combined_score": r[4],
"overlapping_genes": r[5],
"adjusted_p_value": r[6],
}
)
all_results[library] = {
"total_terms": len(raw_results),
"top_terms": enriched,
}
return {
"status": "success",
"data": {
"gene_count": len(gene_list),
"results_by_library": all_results,
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}