Source code for tooluniverse.remap_tool
import re
import requests
from typing import Any, Dict
from .base_tool import BaseTool
from .tool_registry import register_tool
# Real ReMap REST API (region-based TR-binding peak retrieval).
REMAP_REST_BASE = "https://remap-rest.univ-amu.fr/api/V1"
# Match "chr1:1000000-1100000" (chrom may be e.g. chrX / chr1 / 1).
_REGION_RE = re.compile(r"^(chr)?[\w]+:\d+-\d+$", re.IGNORECASE)
[docs]
@register_tool("ReMapRESTTool")
class ReMapRESTTool(BaseTool):
[docs]
def __init__(self, tool_config: Dict):
super().__init__(tool_config)
self.session = requests.Session()
self.session.headers.update({"Accept": "application/json"})
self.timeout = 30
fields = tool_config.get("fields", {})
self.endpoint_template = fields.get(
"endpoint",
"https://www.encodeproject.org/search/?type=Experiment&assay_title=TF+ChIP-seq&target.label={gene_name}&biosample_ontology.term_name={cell_type}&format=json&limit={limit}",
)
# Optional operation hint from config (defaults to legacy ENCODE search).
self.operation = fields.get("operation", "")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
# Dispatch: the region-peak operation queries the real ReMap catalog;
# everything else preserves the legacy ENCODE experiment search so the
# existing ReMap_get_transcription_factor_binding tool is unchanged.
operation = arguments.get("operation", self.operation)
if operation == "get_peaks_in_region":
return self._get_peaks_in_region(arguments)
return self._encode_tf_binding(arguments)
[docs]
def _get_peaks_in_region(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Retrieve all ReMap TR-binding peaks overlapping a genomic region."""
try:
region = str(arguments.get("region", "")).strip().replace(",", "")
if not region:
return {
"status": "error",
"error": "region is required (e.g. chr1:1000000-1100000)",
}
if not _REGION_RE.match(region):
return {
"status": "error",
"error": f"Invalid region format: '{region}'. Use chrom:start-end (e.g. chr1:1000000-1100000).",
}
version = str(arguments.get("version", "2022"))
assembly = str(arguments.get("assembly", "hg38"))
datatype = str(arguments.get("datatype", "all"))
limit = arguments.get("limit")
url = f"{REMAP_REST_BASE}/get_peaks/{version}/{assembly}/{datatype}/{region}?format=json"
response = self.session.get(url, timeout=self.timeout)
response.raise_for_status()
resp_data = response.json()
raw_peaks = resp_data.get("peaks", []) or []
peaks = []
tfs = set()
for entry in raw_peaks:
pv = entry.get("peakValues", entry) if isinstance(entry, dict) else {}
name = pv.get("name", {}) if isinstance(pv, dict) else {}
if not isinstance(name, dict):
name = {}
treatments = name.get("Treatments", {})
treat_list = (
treatments.get("data", [])
if isinstance(treatments, dict)
else (treatments if isinstance(treatments, list) else [])
)
tf = name.get("TF")
if tf:
tfs.add(tf)
peaks.append(
{
"chrom": pv.get("chrom"),
"chromStart": pv.get("chromStart"),
"chromEnd": pv.get("chromEnd"),
"experiment": name.get("Experiment"),
"tf": tf,
"biotype": name.get("Biotype"),
"treatments": treat_list,
}
)
if limit is not None:
try:
peaks = peaks[: max(1, int(limit))]
except (TypeError, ValueError):
pass
return {
"status": "success",
"data": {
"region": resp_data.get("region", region),
"assembly": resp_data.get("assembly", assembly),
"version": resp_data.get("version", version),
"datatype": resp_data.get("datatype", datatype),
"size": resp_data.get("size"),
"peak_count": len(raw_peaks),
"returned_count": len(peaks),
"unique_tf_count": len(tfs),
"unique_tfs": sorted(tfs),
"peaks": peaks,
"url": url,
},
}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": "ReMap REST request timed out (region may be too large). Try a smaller interval.",
}
except Exception as e:
return {"status": "error", "error": f"ReMap REST API error: {str(e)}"}
[docs]
def _encode_tf_binding(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
gene_name = arguments.get("gene_name", "")
if not gene_name:
return {"status": "error", "error": "gene_name is required"}
cell_type = arguments.get("cell_type", "HepG2")
limit = min(int(arguments.get("limit", 10)), 50)
url = self.endpoint_template.format(
gene_name=gene_name,
cell_type=cell_type,
limit=limit,
)
response = self.session.get(url, timeout=self.timeout)
# ENCODE returns HTTP 404 when the search yields zero results;
# treat that as an empty result set rather than a hard error.
if response.status_code == 404:
return {
"status": "success",
"data": {
"experiments": [],
"count": 0,
"gene_name": gene_name,
"cell_type": cell_type,
"url": url,
"note": "No experiments found for this gene/cell-type combination.",
},
}
response.raise_for_status()
resp_data = response.json()
raw_experiments = resp_data.get("@graph", [])
experiments = [
{
"accession": e.get("accession"),
"assay_title": e.get("assay_title"),
"target": e.get("target"),
"biosample_ontology": e.get("biosample_ontology"),
"description": e.get("description"),
"status": e.get("status"),
}
for e in raw_experiments
]
return {
"status": "success",
"data": {
"experiments": experiments,
"count": len(experiments),
"gene_name": gene_name,
"cell_type": cell_type,
"url": url,
},
}
except Exception as e:
return {"status": "error", "error": f"ReMap API error: {str(e)}"}