Source code for tooluniverse.ldlink_tool
"""LDlink (NIH) linkage-disequilibrium tool for ToolUniverse.
LDlink computes population-specific LD between variants. The LDproxy module
returns variants in LD with a query SNP (proxies) — essential for resolving
whether a GWAS lead SNP is the likely causal variant or just tags one.
API: https://ldlink.nih.gov/LDlinkRest/ (free; requires a registered token,
read from the LDLINK_TOKEN environment variable).
Register: https://ldlink.nih.gov/?tab=apiaccess
"""
import os
from typing import Any, Dict
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
LDLINK_URL = "https://ldlink.nih.gov/LDlinkRest"
[docs]
@register_tool("LDlinkTool")
class LDlinkTool(BaseTool):
"""Find LD proxy variants for a SNP via LDlink's LDproxy module."""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.token = os.environ.get("LDLINK_TOKEN", "")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
if not self.token:
return {
"status": "error",
"error": (
"LDlink requires a free API token. Register at "
"https://ldlink.nih.gov/?tab=apiaccess and set the LDLINK_TOKEN "
"environment variable."
),
}
variant = (arguments.get("variant") or arguments.get("rsid") or "").strip()
if not variant:
return {
"status": "error",
"error": "variant (rsID, e.g. 'rs7903146') is required",
}
population = arguments.get("population", "CEU")
genome_build = arguments.get("genome_build", "grch38")
try:
r2_threshold = float(arguments.get("r2_threshold", 0.8))
except (TypeError, ValueError):
r2_threshold = 0.8
try:
limit = max(1, min(int(arguments.get("limit", 50)), 500))
except (TypeError, ValueError):
limit = 50
params = {
"var": variant,
"pop": population,
"r2_d": "r2",
"genome_build": genome_build,
"token": self.token,
}
try:
resp = requests.get(
f"{LDLINK_URL}/ldproxy", params=params, timeout=self.timeout
)
if resp.status_code != 200:
return {
"status": "error",
"error": f"LDlink returned HTTP {resp.status_code}",
}
text = resp.text
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"LDlink timed out after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"LDlink request failed: {e}"}
if text.lstrip().startswith("{") and '"error"' in text:
import json as _json
try:
return {
"status": "error",
"error": _json.loads(text).get("error", text[:200]),
}
except Exception:
return {"status": "error", "error": text[:200]}
lines = [ln for ln in text.splitlines() if ln.strip()]
if len(lines) < 2:
return {
"status": "success",
"data": [],
"metadata": {
"source": "LDlink (LDproxy)",
"query": variant,
"total": 0,
},
}
header = lines[0].split("\t")
idx = {h.strip(): i for i, h in enumerate(header)}
proxies = []
for ln in lines[1:]:
f = ln.split("\t")
if len(f) < len(header):
continue
try:
r2 = float(f[idx.get("R2", -1)]) if "R2" in idx else None
except (ValueError, TypeError):
r2 = None
if r2 is not None and r2 < r2_threshold:
continue
proxies.append(
{
"rsid": f[idx.get("RS_Number", 0)],
"coord": f[idx.get("Coord", 1)] if "Coord" in idx else None,
"alleles": f[idx.get("Alleles", 2)] if "Alleles" in idx else None,
"maf": f[idx.get("MAF", -1)] if "MAF" in idx else None,
"distance": f[idx.get("Distance", -1)]
if "Distance" in idx
else None,
"r2": r2,
}
)
proxies.sort(key=lambda p: (p["r2"] is not None, p["r2"] or 0), reverse=True)
return {
"status": "success",
"data": proxies[:limit],
"metadata": {
"source": "LDlink (LDproxy)",
"query": variant,
"population": population,
"genome_build": genome_build,
"r2_threshold": r2_threshold,
"total": len(proxies),
"returned": min(len(proxies), limit),
"interpretation": (
"Proxies with R2 close to 1 are in tight LD with the query SNP; "
"the causal variant may be any of them, so a high-R2 proxy in a "
"coding region or regulatory element is a better mechanistic "
"candidate than the lead SNP itself."
),
},
}