Source code for tooluniverse.screen_ccre_tool
"""
ENCODE SCREEN cCRE Tool - Registry of candidate cis-Regulatory Elements
Queries the ENCODE SCREEN (Search Candidate cis-Regulatory Elements by ENCODE)
registry, which annotates the human/mouse genome with candidate regulatory
elements (cCREs) and their epigenomic signal strengths. Each cCRE carries
DNase, H3K4me3 (promoter), H3K27ac (enhancer), and CTCF z-scores plus an
element classification (PLS = promoter-like, pELS/dELS = proximal/distal
enhancer-like, CTCF-only, DNase-H3K4me3).
This is a regulatory-genomics annotation service: given a genomic region you
get the regulatory elements overlapping it, and given a cCRE accession you get
its classification and coordinates. It is the curated, experimentally grounded
counterpart to sequence-to-track deep-learning predictors (Enformer/Borzoi),
which are not available as public REST endpoints.
API base: https://ga.staging.wenglab.org/graphql (GraphQL, no authentication)
Reference: ENCODE Project Consortium / Weng Lab, SCREEN. Moore et al.,
Nature 2020 (the ENCODE3 Registry of cCREs). https://screen.encodeproject.org
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
SCREEN_GRAPHQL = "https://ga.staging.wenglab.org/graphql"
# SCREEN passes a z-score rank window for each assay; -10..10 spans the full
# range so that no element is filtered out by signal strength unless the caller
# narrows it via the *_min arguments below.
_RANK_FLOOR = -10
_RANK_CEIL = 10
[docs]
@register_tool("ScreenCcreTool")
class ScreenCcreTool(BaseTool):
"""
Tool for querying the ENCODE SCREEN registry of candidate cis-regulatory
elements (cCREs).
Supported operations (set via fields.operation):
- search_region: list cCREs overlapping a genomic region
- get_ccre: look up one or more cCREs by SCREEN accession
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = 30
self.operation = tool_config.get("fields", {}).get("operation", "search_region")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
arguments = arguments or {}
try:
if self.operation == "search_region":
return self._search_region(arguments)
if self.operation == "get_ccre":
return self._get_ccre(arguments)
return {
"status": "error",
"error": f"Unknown operation: {self.operation}",
}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": "ENCODE SCREEN API request timed out",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to ENCODE SCREEN API",
}
except Exception as e: # never raise out of run()
return {"status": "error", "error": f"ENCODE SCREEN API error: {e}"}
# ---------------------------------------------------------------- helpers
[docs]
def _post(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
resp = requests.post(
SCREEN_GRAPHQL,
json={"query": query, "variables": variables},
timeout=self.timeout,
headers={"Content-Type": "application/json"},
)
resp.raise_for_status()
return resp.json()
[docs]
@staticmethod
def _norm_chrom(chrom: str) -> str:
chrom = str(chrom).strip()
return chrom if chrom.startswith("chr") else f"chr{chrom}"
[docs]
@staticmethod
def _graphql_error(payload: Dict[str, Any]) -> str:
"""Join GraphQL error messages into one string for the error envelope."""
return "SCREEN query error: " + "; ".join(
err.get("message", "") for err in payload["errors"]
)
# ------------------------------------------------------------- operations
[docs]
def _search_region(self, args: Dict[str, Any]) -> Dict[str, Any]:
chrom = args.get("chrom")
start = args.get("start")
end = args.get("end")
if chrom is None or start is None or end is None:
return {
"status": "error",
"error": "Missing required parameters: chrom, start, end",
}
try:
start = int(start)
end = int(end)
except (TypeError, ValueError):
return {
"status": "error",
"error": "start and end must be integers (1-based genomic coordinates)",
}
if end <= start:
return {"status": "error", "error": "end must be greater than start"}
assembly = args.get("assembly", "GRCh38")
element_type = args.get("element_type") # optional hint filter
query = """
query R($a:String!,$c:String!,$s:Int!,$e:Int!,$rf:Int!,$rc:Int!,$et:String){
cCRESCREENSearch(
assembly:$a, coord_chrom:$c, coord_start:$s, coord_end:$e,
element_type:$et,
rank_ctcf_start:$rf, rank_ctcf_end:$rc,
rank_dnase_start:$rf, rank_dnase_end:$rc,
rank_enhancer_start:$rf, rank_enhancer_end:$rc,
rank_promoter_start:$rf, rank_promoter_end:$rc
){
chrom start len pct
ctcf_zscore dnase_zscore enhancer_zscore promoter_zscore maxz
info{ accession isproximal ctcfmax k4me3max k27acmax }
}
}
"""
variables = {
"a": assembly,
"c": self._norm_chrom(chrom),
"s": start,
"e": end,
"rf": _RANK_FLOOR,
"rc": _RANK_CEIL,
"et": element_type,
}
payload = self._post(query, variables)
if payload.get("errors"):
return {"status": "error", "error": self._graphql_error(payload)}
rows = (payload.get("data") or {}).get("cCRESCREENSearch") or []
results = []
for r in rows:
info = r.get("info") or {}
start_pos = r.get("start")
length = r.get("len")
results.append(
{
"accession": info.get("accession"),
"chrom": r.get("chrom"),
"start": start_pos,
"end": (start_pos + length)
if (start_pos is not None and length is not None)
else None,
"element_type": r.get("pct"),
"is_proximal": info.get("isproximal"),
"dnase_zscore": r.get("dnase_zscore"),
"promoter_zscore": r.get("promoter_zscore"),
"enhancer_zscore": r.get("enhancer_zscore"),
"ctcf_zscore": r.get("ctcf_zscore"),
"k4me3_max": info.get("k4me3max"),
"k27ac_max": info.get("k27acmax"),
"ctcf_max": info.get("ctcfmax"),
}
)
return {
"status": "success",
"data": results,
"metadata": {
"assembly": assembly,
"region": f"{self._norm_chrom(chrom)}:{start}-{end}",
"count": len(results),
"element_type_filter": element_type,
"source": "ENCODE SCREEN Registry of cCREs",
},
}
[docs]
def _get_ccre(self, args: Dict[str, Any]) -> Dict[str, Any]:
accession = args.get("accession")
if not accession:
return {
"status": "error",
"error": "Missing required parameter: accession (e.g. 'EH38E2666166')",
}
accessions = accession if isinstance(accession, list) else [str(accession)]
assembly = args.get("assembly", "GRCh38")
query = """
query A($a:String!,$acc:[String!]){
cCREQuery(assembly:$a, accession:$acc){
accession group rDHS ctcf_bound
coordinates{ chromosome start end }
}
}
"""
payload = self._post(query, {"a": assembly, "acc": accessions})
if payload.get("errors"):
return {"status": "error", "error": self._graphql_error(payload)}
rows = (payload.get("data") or {}).get("cCREQuery") or []
results = []
for r in rows:
coords = r.get("coordinates") or {}
results.append(
{
"accession": r.get("accession"),
"element_type": r.get("group"),
"rdhs": r.get("rDHS"),
"ctcf_bound": r.get("ctcf_bound"),
"chrom": coords.get("chromosome"),
"start": coords.get("start"),
"end": coords.get("end"),
}
)
if not results:
return {
"status": "error",
"error": f"No cCRE found for accession(s) {accessions} in assembly {assembly}",
}
return {
"status": "success",
"data": results,
"metadata": {
"assembly": assembly,
"requested": accessions,
"count": len(results),
"source": "ENCODE SCREEN Registry of cCREs",
},
}