Source code for tooluniverse.encori_tool
"""ENCORI / starBase miRNA-target interaction tool for ToolUniverse.
ENCORI (the Encyclopedia of RNA Interactomes, formerly starBase) aggregates
CLIP-seq-supported and computationally predicted miRNA-target interactions and
exposes them through a public REST API. ToolUniverse previously had no miRNA
target-lookup tool (skills had to fall back to bulk TargetScan/miRTarBase
downloads); this fills that gap.
API: https://rnasysu.com/encori/api/ (public, no authentication)
"""
from typing import Any, Dict
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
ENCORI_URL = "https://rnasysu.com/encori/api/miRNATarget/"
# Columns flagged 1 when that prediction program supports the interaction.
_PROGRAMS = ["PITA", "RNA22", "miRmap", "microT", "miRanda", "PicTar", "TargetScan"]
[docs]
@register_tool("ENCORITool")
class ENCORITool(BaseTool):
"""Look up miRNA-target interactions (CLIP-supported + predicted) from ENCORI."""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
mirna = (arguments.get("mirna") or "").strip()
gene = (arguments.get("gene") or arguments.get("gene_symbol") or "").strip()
if not mirna and not gene:
return {
"status": "error",
"error": "Provide 'mirna' (e.g. 'hsa-miR-21-5p') to get its targets, "
"or 'gene' (e.g. 'TP53') to get the miRNAs that target it.",
}
try:
clip_min = int(arguments.get("clip_min", 1))
except (TypeError, ValueError):
clip_min = 1
try:
program_min = int(arguments.get("program_min", 1))
except (TypeError, ValueError):
program_min = 1
try:
limit = max(1, min(int(arguments.get("limit", 50)), 500))
except (TypeError, ValueError):
limit = 50
params = {
"assembly": arguments.get("assembly", "hg38"),
"geneType": "mRNA",
"miRNA": mirna or "all",
"target": gene or "all",
"clipExpNum": clip_min,
"degraExpNum": 0,
"pancancerNum": 0,
"programNum": program_min,
"program": "None",
"cellType": "all",
}
try:
resp = requests.get(ENCORI_URL, params=params, timeout=self.timeout)
if resp.status_code != 200:
return {
"status": "error",
"error": f"ENCORI API returned HTTP {resp.status_code}",
}
lines = [
ln for ln in resp.text.splitlines() if ln and not ln.startswith("#")
]
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"ENCORI API timed out after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"ENCORI API request failed: {e}"}
if len(lines) < 2:
return {
"status": "success",
"data": [],
"metadata": {
"source": "ENCORI (starBase)",
"query": mirna or gene,
"total": 0,
"note": "No interactions met the CLIP/prediction thresholds.",
},
}
header = lines[0].split("\t")
idx = {h: i for i, h in enumerate(header)}
rows = []
for ln in lines[1:]:
f = ln.split("\t")
if len(f) < len(header):
continue
programs = [p for p in _PROGRAMS if p in idx and f[idx[p]] == "1"]
rows.append(
{
"mirna": f[idx["miRNAname"]],
"gene": f[idx["geneName"]],
"gene_id": f[idx["geneID"]],
"clip_experiments": int(f[idx["clipExpNum"]] or 0),
"predicted_by": programs,
"n_programs": len(programs),
"pan_cancer_num": int(f[idx.get("pancancerNum", -1)] or 0)
if "pancancerNum" in idx
else None,
}
)
# Strongest experimental support first (CLIP), then prediction breadth.
rows.sort(key=lambda r: (r["clip_experiments"], r["n_programs"]), reverse=True)
return {
"status": "success",
"data": rows[:limit],
"metadata": {
"source": "ENCORI (starBase)",
"query": mirna or gene,
"direction": "miRNA->targets" if mirna else "gene->miRNAs",
"total": len(rows),
"returned": min(len(rows), limit),
"interpretation": (
"clip_experiments = number of CLIP-seq experiments supporting the "
"site (experimental evidence; higher = stronger). predicted_by lists "
"the algorithms predicting it. CLIP-supported targets outrank "
"prediction-only ones."
),
},
}