Source code for tooluniverse.finemap_abf_tool

"""Single-causal-variant fine-mapping by approximate Bayes factors (pure NumPy).

Given per-SNP association summary statistics (effect size + standard error) for
the SNPs in a genomic region, this tool computes each SNP's **posterior
inclusion probability (PIP)** and the **credible set** of SNPs under the
single-causal-variant assumption, using Wakefield approximate Bayes factors
(Wakefield 2009; the per-SNP model that underlies ``coloc`` and ``FINEMAP``'s
1-causal case).

  * **PIP_i** = ABF_i / sum_j ABF_j — the posterior probability SNP i is the
    causal variant (assuming exactly one causal variant in the region).
  * **credible set** — the smallest set of SNPs whose PIPs sum to >= the
    requested coverage (default 0.95); the causal variant is in this set with
    that probability.

This is the fast, LD-free fine-mapping baseline (no LD matrix needed because it
assumes a single causal signal). For regions with multiple independent signals,
a full method (SuSiE / FINEMAP with LD) is needed — noted in the output. Pure
deterministic math, so it runs and is testable directly.

``run()`` returns a ``{status, data, metadata}`` dict and never raises.

Reference
---------
Wakefield J. "Bayes factors for genome-wide association studies: comparison with
p-values." Genet Epidemiol 33, 79-86 (2009).
"""

import math
from typing import Any, Dict, Optional

import numpy as np

from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("FinemapABFTool") class FinemapABFTool(BaseTool): """Single-causal-variant fine-mapping (PIPs + credible set) from summary stats."""
[docs] def __init__(self, tool_config: Optional[Dict[str, Any]] = None): super().__init__(tool_config) self.tool_config = tool_config or {}
# ------------------------------------------------------------------ run
[docs] def run(self, arguments: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: args = arguments or {} parsed = self._validate(args) if isinstance(parsed, dict): return parsed beta, se = parsed sd_prior = float(args.get("sd_prior") or 0.15) coverage = float(args.get("coverage") or 0.95) if not 0 < coverage <= 1: return self._err("coverage must be in (0, 1].") labf = self._log_abf(beta, se, sd_prior) pip = np.exp(labf - self._logsumexp(labf)) # normalize -> posterior incl. prob. n = len(beta) snps = args.get("snp") labels = ( [str(s) for s in snps] if isinstance(snps, (list, tuple)) and len(snps) == n else [str(i) for i in range(n)] ) order = np.argsort(pip)[::-1] # highest PIP first cset, cum = [], 0.0 for idx in order: cset.append({"snp": labels[idx], "pip": float(pip[idx])}) cum += float(pip[idx]) if cum >= coverage: break top = int(order[0]) return { "status": "success", "data": { "n_snps": n, "top_snp": labels[top], "top_pip": float(pip[top]), "credible_set": cset, "credible_set_size": len(cset), "credible_set_coverage": cum, "pip": {labels[i]: float(pip[i]) for i in range(n)}, }, "metadata": { "method": "Wakefield ABF fine-mapping (single causal variant)", "coverage_requested": coverage, "sd_prior": sd_prior, "note": ( "PIP assumes exactly one causal variant in the region (no LD " "matrix used). For loci with multiple independent signals use " "SuSiE/FINEMAP with an LD reference." ), }, }
# -------------------------------------------------------------- helpers
[docs] @staticmethod def _log_abf(beta: np.ndarray, se: np.ndarray, sd_prior: float) -> np.ndarray: """Per-SNP Wakefield log approximate Bayes factor (association vs null).""" v = se**2 w = sd_prior**2 r = w / (w + v) z2 = (beta / se) ** 2 return 0.5 * np.log(1.0 - r) + 0.5 * r * z2
[docs] @staticmethod def _logsumexp(a: np.ndarray) -> float: m = float(np.max(a)) return m + math.log(float(np.sum(np.exp(a - m))))
[docs] def _validate(self, args: Dict[str, Any]): beta_in, se_in = args.get("beta"), args.get("se") cols = [] for k, v in (("beta", beta_in), ("se", se_in)): if not isinstance(v, (list, tuple)) or not v: return self._err(f"{k} must be a non-empty list of numbers.") try: cols.append(np.asarray(v, dtype=float)) except (TypeError, ValueError): return self._err(f"{k} must contain only numbers.") beta, se = cols if len(beta) != len(se): return self._err("beta and se must have the same length.") if len(beta) < 2: return self._err("At least 2 SNPs are required to fine-map.") if np.any(se <= 0): return self._err("Standard errors (se) must be positive.") return beta, se
[docs] @staticmethod def _err(message: str) -> Dict[str, Any]: return {"status": "error", "error": message, "source": "FinemapABFTool"}