Source code for tooluniverse.finemap_abf_tool

"""Single-causal-variant fine-mapping by approximate Bayes factors (pure NumPy).

Given per-SNP association summary statistics (effect size + standard error) for
the SNPs in a genomic region, this tool computes each SNP's **posterior
inclusion probability (PIP)** and the **credible set** of SNPs under the
single-causal-variant assumption, using Wakefield approximate Bayes factors
(Wakefield 2009; the per-SNP model that underlies ``coloc`` and ``FINEMAP``'s
1-causal case).

  * **PIP_i** = ABF_i / sum_j ABF_j — the posterior probability SNP i is the
    causal variant (assuming exactly one causal variant in the region).
  * **credible set** — the smallest set of SNPs whose PIPs sum to >= the
    requested coverage (default 0.95); the causal variant is in this set with
    that probability.

This is the fast, LD-free fine-mapping baseline (no LD matrix needed because it
assumes a single causal signal). For regions with multiple independent signals,
a full method (SuSiE / FINEMAP with LD) is needed — noted in the output. Pure
deterministic math, so it runs and is testable directly.

``run()`` returns a ``{status, data, metadata}`` dict and never raises.

Reference
---------
Wakefield J. "Bayes factors for genome-wide association studies: comparison with
p-values." Genet Epidemiol 33, 79-86 (2009).
"""

import math
from typing import Any, Dict, Optional

import numpy as np

from .base_tool import BaseTool
from .tool_registry import register_tool



[docs]
@register_tool("FinemapABFTool")
class FinemapABFTool(BaseTool):
    """Single-causal-variant fine-mapping (PIPs + credible set) from summary stats."""


[docs]
    def __init__(self, tool_config: Optional[Dict[str, Any]] = None):
        super().__init__(tool_config)
        self.tool_config = tool_config or {}


    # ------------------------------------------------------------------ run

[docs]
    def run(self, arguments: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        args = arguments or {}
        parsed = self._validate(args)
        if isinstance(parsed, dict):
            return parsed
        beta, se = parsed

        sd_prior = float(args.get("sd_prior") or 0.15)
        coverage = float(args.get("coverage") or 0.95)
        if not 0 < coverage <= 1:
            return self._err("coverage must be in (0, 1].")

        labf = self._log_abf(beta, se, sd_prior)
        pip = np.exp(labf - self._logsumexp(labf))  # normalize -> posterior incl. prob.

        n = len(beta)
        snps = args.get("snp")
        labels = (
            [str(s) for s in snps]
            if isinstance(snps, (list, tuple)) and len(snps) == n
            else [str(i) for i in range(n)]
        )

        order = np.argsort(pip)[::-1]  # highest PIP first
        cset, cum = [], 0.0
        for idx in order:
            cset.append({"snp": labels[idx], "pip": float(pip[idx])})
            cum += float(pip[idx])
            if cum >= coverage:
                break

        top = int(order[0])
        return {
            "status": "success",
            "data": {
                "n_snps": n,
                "top_snp": labels[top],
                "top_pip": float(pip[top]),
                "credible_set": cset,
                "credible_set_size": len(cset),
                "credible_set_coverage": cum,
                "pip": {labels[i]: float(pip[i]) for i in range(n)},
            },
            "metadata": {
                "method": "Wakefield ABF fine-mapping (single causal variant)",
                "coverage_requested": coverage,
                "sd_prior": sd_prior,
                "note": (
                    "PIP assumes exactly one causal variant in the region (no LD "
                    "matrix used). For loci with multiple independent signals use "
                    "SuSiE/FINEMAP with an LD reference."
                ),
            },
        }


    # -------------------------------------------------------------- helpers

[docs]
    @staticmethod
    def _log_abf(beta: np.ndarray, se: np.ndarray, sd_prior: float) -> np.ndarray:
        """Per-SNP Wakefield log approximate Bayes factor (association vs null)."""
        v = se**2
        w = sd_prior**2
        r = w / (w + v)
        z2 = (beta / se) ** 2
        return 0.5 * np.log(1.0 - r) + 0.5 * r * z2



[docs]
    @staticmethod
    def _logsumexp(a: np.ndarray) -> float:
        m = float(np.max(a))
        return m + math.log(float(np.sum(np.exp(a - m))))



[docs]
    def _validate(self, args: Dict[str, Any]):
        beta_in, se_in = args.get("beta"), args.get("se")
        cols = []
        for k, v in (("beta", beta_in), ("se", se_in)):
            if not isinstance(v, (list, tuple)) or not v:
                return self._err(f"{k} must be a non-empty list of numbers.")
            try:
                cols.append(np.asarray(v, dtype=float))
            except (TypeError, ValueError):
                return self._err(f"{k} must contain only numbers.")
        beta, se = cols
        if len(beta) != len(se):
            return self._err("beta and se must have the same length.")
        if len(beta) < 2:
            return self._err("At least 2 SNPs are required to fine-map.")
        if np.any(se <= 0):
            return self._err("Standard errors (se) must be positive.")
        return beta, se



[docs]
    @staticmethod
    def _err(message: str) -> Dict[str, Any]:
        return {"status": "error", "error": message, "source": "FinemapABFTool"}