Source code for tooluniverse.nca_tool

# nca_tool.py
"""
Non-Compartmental Analysis (NCA) Tool for Pharmacokinetics

Implements standard NCA methods per FDA/EMA regulatory guidelines:
- Full NCA parameter calculation from time-concentration data
  (Cmax, Tmax, AUC0-t, AUC0-inf, t½, CL, Vd)
- One-compartment IV bolus model fitting: C(t) = C0 * exp(-k_el * t)
- Oral bioavailability (F) calculation

No external API calls. Uses numpy/scipy for computation.

References:
- FDA Guidance for Industry: Pharmacokinetics in Patients with Impaired Renal Function
- Gabrielsson & Weiner, Pharmacokinetic and Pharmacodynamic Data Analysis (2016)
- Rowland & Tozer, Clinical Pharmacokinetics and Pharmacodynamics (2011)
"""

import math
from typing import Dict, Any

from .base_tool import BaseTool
from .tool_registry import register_tool

try:
    import numpy as np

    HAS_NUMPY = True
except ImportError:
    HAS_NUMPY = False

try:
    from scipy.optimize import curve_fit
    from scipy.stats import linregress

    HAS_SCIPY = True
except ImportError:
    HAS_SCIPY = False

# Unit conversion tables for automatic PK unit standardization
_MASS_TO_NG: dict = {
    "pg": 1e-3,
    "ng": 1.0,
    "μg": 1e3,
    "ug": 1e3,
    "mcg": 1e3,
    "mg": 1e6,
    "g": 1e9,
    "kg": 1e12,
}
_CONC_TO_NG_PER_ML: dict = {
    "pg/ml": 1e-3,
    "pg/l": 1e-6,
    "ng/ml": 1.0,
    "ng/l": 1e-3,
    "μg/ml": 1e3,
    "ug/ml": 1e3,
    "mcg/ml": 1e3,
    "μg/l": 1.0,
    "ug/l": 1.0,
    "mg/ml": 1e6,
    "mg/l": 1e3,
    "g/ml": 1e9,
    "g/l": 1e6,
}



[docs]
@register_tool("NCATool")
class NCATool(BaseTool):
    """
    Non-Compartmental Analysis (NCA) for pharmacokinetics.

    Endpoints:
    - compute_parameters: Full NCA from time-concentration data
    - fit_one_compartment: 1-compartment IV bolus model fitting
    - calculate_bioavailability: Oral bioavailability F calculation
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "compute_parameters")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        if not HAS_NUMPY:
            return {
                "status": "error",
                "error": "numpy is required for NCA calculations. Install with: pip install numpy",
            }

        try:
            if self.endpoint == "compute_parameters":
                return self._compute_parameters(arguments)
            elif self.endpoint == "fit_one_compartment":
                return self._fit_one_compartment(arguments)
            elif self.endpoint == "calculate_bioavailability":
                return self._calculate_bioavailability(arguments)
            else:
                return {
                    "status": "error",
                    "error": f"Unknown endpoint: {self.endpoint}",
                }
        except Exception as e:
            return {"status": "error", "error": f"NCA calculation error: {str(e)}"}



[docs]
    def _auc_trapezoid(self, times: "np.ndarray", concs: "np.ndarray") -> float:
        """
        Linear-log trapezoidal AUC calculation.

        Uses logarithmic trapezoid for declining phases (more accurate),
        linear trapezoid for ascending phases and zero concentrations.
        """
        auc = 0.0
        for i in range(len(times) - 1):
            dt = float(times[i + 1] - times[i])
            c1, c2 = float(concs[i]), float(concs[i + 1])
            if c1 <= 0 or c2 <= 0:
                # Linear trapezoid when values near zero
                auc += dt * (c1 + c2) / 2.0
            elif c2 < c1:
                # Logarithmic trapezoid (declining phase)
                auc += dt * (c1 - c2) / np.log(c1 / c2)
            else:
                # Linear trapezoid (ascending phase)
                auc += dt * (c1 + c2) / 2.0
        return auc



[docs]
    def _aumc_trapezoid(self, times: "np.ndarray", concs: "np.ndarray") -> float:
        """
        Linear trapezoidal AUMC calculation (area under the first-moment curve).

        AUMC = integral(t * C(t) dt).  Used to compute model-independent MRT:
        MRT_iv = AUMC_0-inf / AUC_0-inf.

        Uses the linear trapezoid rule throughout: each interval contributes
        dt * (t1*C1 + t2*C2) / 2.  This is consistent with most NCA software
        (e.g., Phoenix WinNonlin uses linear AUMC with log-linear AUC).
        """
        aumc = 0.0
        for i in range(len(times) - 1):
            dt = float(times[i + 1] - times[i])
            t1, t2 = float(times[i]), float(times[i + 1])
            c1, c2 = float(concs[i]), float(concs[i + 1])
            aumc += dt * (t1 * c1 + t2 * c2) / 2.0
        return aumc



[docs]
    def _estimate_terminal_slope(
        self,
        times: "np.ndarray",
        concs: "np.ndarray",
        n_points: int = 3,
        tmax: float = None,
    ):
        """
        Estimate terminal elimination rate constant (λz) from log-linear regression.

        Per FDA NCA guidance (2003) and Rowland & Tozer (2011), λz should be estimated
        from all time points in the log-linear terminal phase, which begins after Tmax.
        When tmax is provided, all post-Tmax positive-concentration points are used.
        Falls back to the last n_points if fewer than 2 post-Tmax points are available.

        Returns: (lambda_z, r_squared, intercept) or (None, None, None) on failure.
        """
        if not HAS_SCIPY:
            return None, None, None

        # Use positive concentrations only
        valid = concs > 0
        t_valid = times[valid]
        c_valid = concs[valid]

        if len(t_valid) < 2:
            return None, None, None

        # Select terminal phase points
        # FDA NCA guidance requires ≥3 points for reliable λz: a 2-point regression
        # is a perfect fit by construction (R²=1 always), giving no quality signal.
        if tmax is not None:
            # Use >= so the Tmax point itself (C_max) is the first point
            # of the terminal regression.  FDA NCA guidance defines the terminal phase
            # as starting AT Tmax (the concentration-time curve begins declining from
            # Cmax).  Using strict > excluded the Cmax point, leaving only the
            # declining portion.  For plateau profiles (multiple tied Cmax values)
            # combined with the Tmax=last-Cmax fix, >= is required to include enough
            # points (≥3) for a valid regression.
            post_tmax = t_valid >= tmax
            if np.sum(post_tmax) >= 3:
                # FDA NCA: use all post-Tmax points for λz estimation
                t_term = t_valid[post_tmax]
                c_term = c_valid[post_tmax]
            else:
                # Insufficient terminal-phase points (< 3): cannot reliably estimate λz.
                # A 2-point regression is a perfect fit by construction (R²=1 always),
                # giving no quality signal.  The caller will emit a terminal_phase_warning.
                return None, None, None
        else:
            n_use = min(n_points, len(t_valid))
            t_term = t_valid[-n_use:]
            c_term = c_valid[-n_use:]

        # If duplicate time points survive into the terminal phase
        # (e.g., re-measured samples at t=1 with different concentrations), the
        # log-linear regression treats them as independent observations.  Two points at
        # the same time but different concentrations pull the slope toward zero, causing
        # lambda_z to be underestimated by as much as 11% in test cases.  Average
        # concentrations at duplicate times before fitting, consistent with FDA NCA
        # guidance that each time point represents a single true concentration.
        unique_times = np.unique(t_term)
        if len(unique_times) < len(t_term):
            # One or more duplicate time points: average concentrations
            averaged_concs = np.array(
                [float(np.mean(c_term[t_term == ut])) for ut in unique_times]
            )
            t_term = unique_times
            c_term = averaged_concs
        if len(t_term) < 2:
            return None, None, None

        # log-linear fit: ln(C) = intercept - λz * t
        slope, intercept, r_value, _, _ = linregress(t_term, np.log(c_term))

        if slope >= 0:
            return None, None, None  # Must be negative (declining)

        lambda_z = -slope  # λz positive
        r_squared = r_value**2

        return float(lambda_z), float(r_squared), float(intercept)



[docs]
    def _try_pk_unit_conversion(self, dose_val: float, dose_unit: str, conc_unit: str):
        """
        Try to convert dose and concentration to standard base units (ng, ng/mL).

        Returns (dose_ng, conc_factor, True) on success, or (None, None, False) for
        unknown units.  conc_factor multiplies raw concentration to obtain ng/mL.
        """
        mass_factor = _MASS_TO_NG.get(dose_unit.lower().strip())
        conc_factor = _CONC_TO_NG_PER_ML.get(conc_unit.lower().strip())
        if mass_factor is not None and conc_factor is not None:
            return dose_val * mass_factor, conc_factor, True
        return None, None, False



[docs]
    def _compute_parameters(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Full NCA computation from time-concentration data."""
        times = arguments.get("times", [])
        concentrations = arguments.get("concentrations", [])
        dose = arguments.get("dose")
        route = arguments.get("route", "iv")
        dose_unit = arguments.get("dose_unit", "mg")
        conc_unit = arguments.get("conc_unit", "ng/mL")
        time_unit = arguments.get("time_unit", "h")

        if not times or not concentrations:
            return {
                "status": "error",
                "error": "times and concentrations are required. Provide paired lists of time points and measured concentrations.",
            }
        if len(times) != len(concentrations):
            return {
                "status": "error",
                "error": "times and concentrations must have the same length",
            }
        if len(times) < 3:
            return {
                "status": "error",
                "error": "At least 3 time-concentration points are required for NCA",
            }

        try:
            t = np.array([float(x) for x in times])
            c = np.array([float(x) for x in concentrations])
        except (ValueError, TypeError) as e:
            return {"status": "error", "error": f"Invalid numeric values: {e}"}

        # NaN/inf in times must be rejected: np.argsort on a NaN-containing array
        # produces undefined ordering, silently propagating NaN through all PK parameters.
        if np.any(~np.isfinite(t)):
            return {
                "status": "error",
                "error": (
                    "Times contain non-finite values (NaN or inf). "
                    "All time values must be finite real numbers. "
                    "Remove or correct the affected time points."
                ),
            }

        # NaN/inf must be rejected before any comparison (np.any(c < 0) returns False
        # for NaN, silently bypassing the negative-value guard).
        if np.any(~np.isfinite(c)):
            return {
                "status": "error",
                "error": (
                    "Concentrations contain non-finite values (NaN or inf). "
                    "All concentration values must be finite real numbers. "
                    "Replace NaN/inf with 0 (BLQ) or remove the affected time points."
                ),
            }

        if np.any(c < 0):
            return {
                "status": "error",
                "error": (
                    "All concentrations must be non-negative. Negative values likely "
                    "represent below-LOQ (BLQ) measurements. Per FDA/EMA NCA guidance, "
                    "replace BLQ values with 0 before submitting."
                ),
            }

        # Sort by time
        sort_idx = np.argsort(t)
        t = t[sort_idx]
        c = c[sort_idx]

        # Reject all-negative-time profiles: if every time point is negative (pre-dose
        # only), there is no post-administration pharmacokinetic data to analyse.
        # Without a t≥0 anchor, AUC0-t would integrate over the pre-dose window —
        # a physically meaningless result that would be returned silently.
        if not np.any(t >= 0):
            return {
                "status": "error",
                "error": (
                    "All time points are negative (pre-dose only). "
                    "No post-dose data is available for NCA calculation. "
                    "Provide at least one time point at t ≥ 0 (time of dosing or later)."
                ),
            }
        # The minimum-3 check at the top counts ALL time points including
        # pre-dose samples.  Two pre-dose + one post-dose = 3 total passes the guard
        # but yields AUC=0 (single-point trapezoid) silently under status=success.
        # Enforce a minimum of 2 POST-DOSE points for a non-trivial AUC.
        _n_postdose = int(np.sum(t >= 0))
        if _n_postdose < 2:
            return {
                "status": "error",
                "error": (
                    f"Only {_n_postdose} post-dose time point(s) (t ≥ 0) found "
                    f"(total input points: {len(t)}, pre-dose points: {len(t) - _n_postdose}). "
                    "At least 2 post-dose time points are required to compute a "
                    "non-trivial AUC (trapezoidal integration requires ≥2 points)."
                ),
            }

        # Detect duplicate time points: the linear-log trapezoid computes
        # dt = t[i+1] - t[i] = 0 for duplicates, contributing zero area.
        # Duplicate times are often caused by data entry errors or sample labelling
        # issues.  Warn, but continue — AUC may be underestimated.
        duplicate_times = []
        seen = {}
        for ti in t:
            key = float(ti)
            seen[key] = seen.get(key, 0) + 1
        duplicate_times = [k for k, cnt in seen.items() if cnt > 1]

        # Basic parameters
        # Cmax/Tmax must be computed from POST-DOSE samples only (t >= 0).
        # Pre-dose residual drug can exceed the new dose's Cmax, causing Tmax < 0 and
        # cascading into lambda_z (terminal phase selection uses t > Tmax).
        _postdose_mask = t >= 0
        c_post = c[_postdose_mask]
        t_post = t[_postdose_mask]
        # _postdose_mask guarantees len > 0 (all-pre-dose already caught above).
        # When multiple post-dose samples tie at Cmax (plateau profile),
        # np.argmax returns the FIRST index, so Tmax is set to the plateau start.
        # The terminal-slope filter then includes plateau points in the regression,
        # inflating t_half by up to 80% and underestimating lambda_z by up to 44%.
        # Per FDA NCA guidance, Tmax should be the LAST time at Cmax (end of plateau),
        # so that all t ≥ Tmax data genuinely represents the declining terminal phase.
        _cmax_val = np.max(c_post)
        cmax = float(_cmax_val)
        # When all post-dose concentrations are 0 (all BLQ),
        # _cmax_val=0 and np.where(c_post==0)[0][-1] points to the LAST sample,
        # reporting Tmax=last_sample_time (e.g., 24h) — physically meaningless.
        # When Cmax=0, Tmax should be the first post-dose time (t=0 or first sample).
        if cmax == 0.0:
            tmax = float(t_post[0])
        else:
            tmax = float(t_post[np.where(c_post == _cmax_val)[0][-1]])
        pos_mask = c > 0
        clast = float(c[pos_mask][-1]) if any(pos_mask) else 0.0
        tlast = float(t[pos_mask][-1]) if any(pos_mask) else 0.0

        # Detect mid-profile zeros: a zero concentration that appears between two
        # positive values is biologically implausible and may indicate assay failure,
        # sample swap, or re-absorption.  Collect positions for a warning.
        mid_profile_zero_times = []
        first_pos = int(np.argmax(pos_mask)) if any(pos_mask) else len(c)
        last_pos = len(c) - 1 - int(np.argmax(pos_mask[::-1])) if any(pos_mask) else -1
        for idx in range(first_pos + 1, last_pos):
            if c[idx] == 0.0:
                mid_profile_zero_times.append(float(t[idx]))

        # AUC0-t (linear-log trapezoidal).
        # FDA NCA convention: AUC0-t is integrated only up to Tlast, the time of
        # the last *positive* (measurable) concentration.  Trailing zeros beyond
        # Tlast represent BLQ samples and must not inflate the integral.
        # Pre-dose time points (t < 0) must also be excluded: including them inflates
        # the area by integrating over negative-time intervals that are not part of
        # the post-administration pharmacokinetic profile.
        if any(pos_mask):
            last_pos_idx = int(np.where(pos_mask)[0][-1])
            first_nonneg_idx = int(np.argmax(t >= 0)) if np.any(t >= 0) else 0
            t_auc = t[first_nonneg_idx : last_pos_idx + 1]
            c_auc = c[first_nonneg_idx : last_pos_idx + 1]
            # T_auc is empty when all positive concentrations are pre-dose
            # (last_pos_idx < first_nonneg_idx). The old code reached t_auc[0] below,
            # raising IndexError: "index 0 is out of bounds for axis 0 with size 0",
            # which was swallowed by the outer except and returned as an opaque message.
            if len(t_auc) == 0:
                return {
                    "status": "error",
                    "error": (
                        "All positive concentrations are pre-dose (t < 0). "
                        "No post-dose positive concentrations found for AUC computation. "
                        "Check that at least one sample with t ≥ 0 has a measurable "
                        "concentration > 0."
                    ),
                }
            pre_dose_times = [float(x) for x in t[:first_nonneg_idx]]
        else:
            # All post-dose concentrations are zero (all BLQ/undetectable).
            # AUC will be 0; use the post-dose observation window for the key label.
            # When pre-dose samples exist, the old code set
            # t_auc = t (the full array including pre-dose times), giving a key like
            # "AUC-2-8" and omitting pre_dose_time_warning. Apply the same
            # first_nonneg_idx filtering as the positive-concentrations path.
            _first_nonneg_idx = int(np.argmax(t >= 0)) if np.any(t >= 0) else 0
            t_auc = t[_first_nonneg_idx:]
            c_auc = c[_first_nonneg_idx:]
            pre_dose_times = [float(x) for x in t[:_first_nonneg_idx]]
        auc0t = self._auc_trapezoid(t_auc, c_auc)
        # AUMC_0-t for later use in model-independent MRT computation.
        aumc0t = self._aumc_trapezoid(t_auc, c_auc)

        # Np.float64 overflow — round(np.float64(huge), 4) multiplies by
        # 10^4 internally, overflowing to np.inf for values near the float max.
        # Guard with an isfinite check immediately after computing AUC.
        if not math.isfinite(float(auc0t)):
            return {
                "status": "error",
                "error": (
                    f"AUC computation resulted in a non-finite value ({float(auc0t)}). "
                    "This typically indicates extremely large concentration or time values. "
                    "Check input data units — rescaling may be required (e.g., ng/mL → μg/mL, "
                    "hours → days)."
                ),
            }

        # Terminal phase parameters — use all post-Tmax points per FDA NCA guidance
        lambda_z, r_sq_terminal, _ = self._estimate_terminal_slope(t, c, tmax=tmax)

        # Round(..., 4) truncates to 0.0 for sub-nanosecond values
        # (e.g., round(5e-5, 4) = 0.0), producing contradictory output (Tlast=0
        # but non-zero AUC or extrapolation_pct). Use full float precision so that
        # extreme-scale profiles are self-consistent.  This matches the lambda_z
        # treatment at line ~376: "Preserve full precision for lambda_z."
        # Additional protection: round(float(x), 4) overflows for x > ~1.79e304
        # because x * 10^4 exceeds the float maximum.  Use full precision (no rounding)
        # for AUC — consistent with the lambda_z / t_half treatment above.
        _auc0t = float(auc0t)
        result = {
            "Cmax": float(cmax),
            "Tmax": float(tmax),
            "Clast": float(clast),
            "Tlast": float(tlast),
            # Key was always "AUC0-X" even when data starts after t=0.
            # If the first time point is > 0, the integral starts there (not at 0),
            # so the key should reflect the actual integration bounds to avoid a 24%+
            # overstatement when the user reads "AUC0-24" but actually gets AUC_2-24.
            # Use t_auc[-1] (actual upper integration bound) instead of
            # tlast (last POSITIVE concentration time). When all concentrations are
            # zero, tlast=0.0 but integration spans 0 to the last observation time.
            # For non-zero profiles t_auc[-1]==tlast, so this is backward compatible.
            f"AUC{float(t_auc[0]):.6g}-{float(t_auc[-1]):.6g}": _auc0t,
            # "AUC0_last" is misleading when data starts after t=0 (no t=0
            # sample), because "0" implies integration from time of dosing (t=0) but
            # the actual integral starts at the first observed time (e.g., t=2).
            # Keep for backward compatibility; add an explanatory note when t[0] > 0.
            "AUC0_last": _auc0t,
            "units": {
                "Cmax": conc_unit,
                "Tmax": time_unit,
                "AUC": f"{conc_unit}·{time_unit}",
            },
        }

        # Clarify that AUC0_last does NOT start at t=0 when first sample > 0.
        if float(t_auc[0]) > 0:
            result["auc_start_note"] = (
                f"AUC integration starts at the first observed post-dose time "
                f"(t={float(t_auc[0]):.6g}), not at t=0 (no sample at dosing time). "
                f"AUC0_last and {float(t_auc[0]):.6g}-{float(t_auc[-1]):.6g} both reflect "
                "AUC from the first sample to Tlast, which underestimates true AUC0-last "
                "by the missing pre-first-sample area."
            )

        # Warn when pre-dose (t < 0) time points were excluded from AUC integration.
        if pre_dose_times:
            result["pre_dose_time_warning"] = (
                f"Pre-dose time points (t < 0) at t={pre_dose_times} were excluded "
                "from AUC integration. AUC0-t is computed from t=0 per FDA/EMA NCA "
                "guidance. Pre-dose samples are retained for visual inspection only."
            )

        # Warn when duplicate time points are present: dt=0 for duplicates means
        # those intervals contribute zero area to the trapezoid, so AUC is
        # underestimated.  This typically indicates a data entry or labelling error.
        if duplicate_times:
            result["duplicate_time_warning"] = (
                f"Duplicate time points detected at t={duplicate_times}. Each duplicate "
                "contributes a zero-width trapezoid interval (area = 0), which may "
                "underestimate AUC. If duplicates fall in the terminal phase, "
                "concentrations at the same time are averaged before lambda_z regression "
                "to prevent slope bias. Check for data entry errors or sample labelling issues."
            )

        # Check monotonicity of post-Tmax concentrations.
        # Non-monotonic profiles (rising concentrations after Tmax) indicate
        # re-absorption, enterohepatic circulation, or assay issues — making
        # terminal slope estimation unreliable.
        valid_mask = c > 0
        t_valid_all = t[valid_mask]
        c_valid_all = c[valid_mask]
        post_tmax_mask = t_valid_all >= tmax  # use >= to match terminal slope
        if np.sum(post_tmax_mask) >= 3:
            c_post = c_valid_all[post_tmax_mask]
            if np.any(np.diff(c_post) > 0):
                result["non_monotonic_terminal_warning"] = (
                    "Post-Tmax concentrations are not monotonically decreasing. "
                    "This may indicate re-absorption, enterohepatic circulation, a secondary "
                    "Cmax, or assay variability. Terminal slope (lambda_z) may be unreliable."
                )

        # Validate dose unconditionally — before the lambda_z branch.
        # The original validation was nested inside `if lambda_z is not None:`,
        # so invalid doses (negative, zero, NaN, inf) were silently accepted when
        # lambda_z could not be estimated (sparse terminal phase data).
        if dose is not None:
            try:
                _dose_early = float(dose)
            except (ValueError, TypeError):
                return {
                    "status": "error",
                    "error": "dose must be a numeric value. Non-numeric dose is not valid.",
                }
            if not math.isfinite(_dose_early):
                return {
                    "status": "error",
                    "error": (
                        f"dose ({_dose_early}) must be a finite positive number. "
                        "NaN and inf are not valid dose values."
                    ),
                }
            if _dose_early <= 0:
                return {
                    "status": "error",
                    "error": (
                        f"dose ({_dose_early}) must be positive (> 0). "
                        "Negative or zero dose is not physically meaningful. "
                        "CL and Vd cannot be computed without a positive dose."
                    ),
                }

        if lambda_z is not None:
            t_half = np.log(2) / lambda_z
            # AUC0-inf = AUC0-t + Clast/λz
            auc0inf = auc0t + clast / lambda_z
            extrap_pct = float((clast / lambda_z) / auc0inf * 100)
            # Preserve full precision for lambda_z: round(1e-9, 9) = 0.0 for any
            # lambda_z < 5e-10 h⁻¹ (t½ > ~1.4 billion hours).  Stored 0.0 contradicts
            # t_half and AUC0-inf (which both use the unrounded lambda_z) — a downstream
            # caller that re-derives t_half from stored lambda_z = 0 would get inf.
            # Use full float precision, consistent with t_half and AUC0-inf.
            result["lambda_z"] = float(lambda_z)
            result["t_half"] = float(t_half)
            result["r_squared_terminal_fit"] = round(float(r_sq_terminal), 4)
            # Emit a terminal_fit_quality_warning when the log-linear terminal
            # phase R² is poor.  _fit_one_compartment already has a similar guard
            # (R² < 0.85), but _compute_parameters never checked its own terminal fit.
            # A poor terminal R² means lambda_z, t_half, AUC0-inf, CL, and Vd are all
            # unreliable — the user should be informed before acting on these values.
            if r_sq_terminal < 0.85:
                warn_level = "Very poor" if r_sq_terminal < 0.5 else "Poor"
                result["terminal_fit_quality_warning"] = (
                    f"{warn_level} terminal-phase fit (R² = {round(r_sq_terminal, 4)} < 0.85). "
                    "lambda_z, t½, AUC0-inf, clearance, and Vd estimates may be unreliable. "
                    "Possible causes: non-log-linear terminal decline (multi-compartment "
                    "kinetics, redistribution), insufficient time points in the terminal "
                    "phase, or assay variability. Consider adding more late time points "
                    "or using a non-compartmental model review."
                )
            result["AUC0-inf"] = float(auc0inf)
            result["AUC_extrapolation_pct"] = round(extrap_pct, 1)
            if extrap_pct > 20.0:
                result["AUC_extrapolation_warning"] = (
                    f"AUC extrapolation is {extrap_pct:.1f}% (>20%). "
                    "AUC0-inf may be unreliable. Add more late time points to "
                    "better characterize the terminal elimination phase "
                    "(FDA/EMA guideline: extrapolation should be <20%)."
                )

            if dose is not None:
                try:
                    dose_val = float(dose)
                    # NaN/inf bypass the <= 0 guard: NaN <= 0 is False, inf > 0.
                    # Both produce invalid CL/Vd (NaN or inf) under status=success.
                    if not math.isfinite(dose_val):
                        return {
                            "status": "error",
                            "error": (
                                f"dose ({dose_val}) must be a finite positive number. "
                                "NaN and inf are not valid dose values."
                            ),
                        }
                    if dose_val <= 0:
                        return {
                            "status": "error",
                            "error": (
                                f"dose ({dose_val}) must be positive (> 0). "
                                "Negative or zero dose is not physically meaningful. "
                                "CL and Vd cannot be computed without a positive dose."
                            ),
                        }
                    dose_ng, conc_factor, converted = self._try_pk_unit_conversion(
                        dose_val, dose_unit, conc_unit
                    )
                    if converted:
                        # AUC in (ng/mL)·time_unit after applying conc_factor
                        auc_ng_ml = auc0inf * conc_factor
                        cl_ml = dose_ng / auc_ng_ml  # mL/time_unit
                        cl_l = cl_ml / 1000.0  # L/time_unit
                        # Round(cl_l, 6) silently truncates CL to 0.0 for
                        # any CL < 5e-7 L/time (e.g., rare biotech analytes with tiny
                        # doses in ng).  Use full precision consistent with lambda_z/AUC.
                        result["clearance_CL"] = float(cl_l)
                        result["clearance_CL_unit"] = f"L/{time_unit}"
                        # For non-IV routes, Dose/AUC = CL/F (apparent
                        # clearance), NOT true systemic clearance (which requires IV
                        # data or a known bioavailability F).  Label it clearly so
                        # users don't compare oral and IV CL values without adjustment.
                        # 'infusion' is also an intravenous route
                        # (F=1 by definition); the old `route != "iv"` check incorrectly
                        # labeled IV infusion clearance as CL/F (apparent clearance).
                        if route not in ("iv", "infusion"):
                            result["clearance_note"] = (
                                "clearance_CL is CL/F (apparent clearance) for non-IV "
                                f"route='{route}'. True systemic CL = CL/F × F, where "
                                "bioavailability F is unknown without an IV reference. "
                                "Do not compare directly with IV-derived clearance."
                            )
                        # 'infusion' is an IV route (F=1), so Vd
                        # and MRT_iv are just as valid for infusion as for iv bolus.
                        # The clearance_note guard already uses ("iv", "infusion");
                        # the Vd/MRT block was never updated to match.
                        if route in ("iv", "infusion"):
                            vd_l = cl_l / lambda_z  # L
                            # Round(vd_l, 4) truncates Vd to 0.0 for any
                            # Vd < 5e-5 L.  Use full precision (same rationale as CL).
                            result["volume_distribution_Vd"] = float(vd_l)
                            result["Vd_unit"] = "L"
                            # MRT_iv = AUMC_0-inf / AUC_0-inf (model-independent NCA).
                            # The previous formula 1/lambda_z is only valid for a
                            # mono-exponential model.  For multi-compartment profiles the
                            # error can reach 10%+.  AUMC_0-inf = AUMC_0-t + Clast*(tlast/λz + 1/λz²).
                            _aumc0inf = float(aumc0t) + clast * (
                                tlast / lambda_z + 1.0 / lambda_z**2
                            )
                            _mrt_iv = (
                                _aumc0inf / float(auc0inf)
                                if float(auc0inf) > 0
                                else None
                            )
                            if _mrt_iv is not None:
                                result["MRT_iv"] = round(_mrt_iv, 4)
                    else:
                        cl = dose_val / auc0inf
                        # Round(float(cl), 6) silently truncates
                        # CL to 0.0 for any CL < 5e-7.  The identical fix was applied
                        # to the known-units path (line ~625: float(cl_l) full precision)
                        # but was never ported here.  Use full float precision.
                        result["clearance_CL"] = float(cl)
                        result["clearance_CL_unit"] = (
                            f"{dose_unit}/({conc_unit}·{time_unit})"
                        )
                        result["pk_unit_note"] = (
                            f"Automatic unit conversion not available for "
                            f"dose_unit='{dose_unit}' / conc_unit='{conc_unit}'. "
                            "CL and Vd are in raw ratio units; convert manually."
                        )
                        # Same infusion fix for unknown-units path.
                        if route in ("iv", "infusion"):
                            vd = cl / lambda_z
                            # Same full-precision fix for Vd.
                            result["volume_distribution_Vd"] = float(vd)
                            result["Vd_unit"] = f"{dose_unit}/{conc_unit}"
                            # MRT_iv = AUMC_0-inf / AUC_0-inf (model-independent NCA).
                            # The previous formula 1/lambda_z is only valid for a
                            # mono-exponential model.  For multi-compartment profiles the
                            # error can reach 10%+.  AUMC_0-inf = AUMC_0-t + Clast*(tlast/λz + 1/λz²).
                            _aumc0inf = float(aumc0t) + clast * (
                                tlast / lambda_z + 1.0 / lambda_z**2
                            )
                            _mrt_iv = (
                                _aumc0inf / float(auc0inf)
                                if float(auc0inf) > 0
                                else None
                            )
                            if _mrt_iv is not None:
                                result["MRT_iv"] = round(_mrt_iv, 4)
                except (ValueError, TypeError):
                    pass
        else:
            result["terminal_phase_warning"] = (
                "Could not estimate terminal slope (λz). "
                "Add more time points in the terminal elimination phase."
            )

        # When the first sample is at t > 0, AUC0-inf is
        # underestimated (the pre-first-sample area is missing).  CL = Dose/AUC0-inf,
        # Vd = CL/λz, and MRT = AUMC0-inf/AUC0-inf are all derived from AUC0-inf
        # and therefore carry the same bias.  The existing auc_start_note documents
        # the AUC issue; this note explicitly flags the downstream PK parameters so
        # users who read only CL/Vd/MRT are not misled.
        if float(t_auc[0]) > 0 and any(
            k in result for k in ("clearance_CL", "volume_distribution_Vd", "MRT_iv")
        ):
            result["derived_pk_bias_note"] = (
                f"clearance_CL, volume_distribution_Vd, and MRT_iv are derived from "
                f"AUC0-inf, which is underestimated because the first observed sample "
                f"is at t={float(t_auc[0]):.6g} (not t=0). The missing pre-first-sample "
                "area causes CL and Vd to be overestimated and MRT to be biased. "
                "Include a sample at or near t=0, or apply compartmental modelling, "
                "to obtain accurate CL, Vd, and MRT."
            )

        if mid_profile_zero_times:
            result["mid_profile_zero_warning"] = (
                f"Zero concentration detected at t={mid_profile_zero_times} between "
                "positive values. This is biologically implausible and may indicate "
                "assay failure, sample swap, or re-absorption. "
                "AUC calculation proceeds via linear trapezoid at those intervals "
                "but the result may be unreliable."
            )

        result["method"] = "Linear-log trapezoidal NCA"
        result["note"] = (
            "AUC uses linear trapezoid for ascending phases and log-trapezoid "
            "for declining phases (FDA/EMA recommended method). "
            "Terminal t½ estimated from log-linear regression of last 3+ positive concentrations."
        )

        return {
            "status": "success",
            "data": result,
            "metadata": {
                "source": "Local NCA computation (numpy/scipy)",
                "n_timepoints": len(t),
                "route_of_administration": route,
                "reference": "FDA NCA Guidance; Gabrielsson & Weiner (2016)",
            },
        }



[docs]
    def _fit_one_compartment(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Fit 1-compartment IV bolus model: C(t) = C0 * exp(-k_el * t)."""
        if not HAS_SCIPY:
            return {
                "status": "error",
                "error": "scipy is required for model fitting. Install with: pip install scipy",
            }

        times = arguments.get("times", [])
        concentrations = arguments.get("concentrations", [])
        dose = arguments.get("dose")
        dose_unit = arguments.get("dose_unit", "mg")
        conc_unit = arguments.get("conc_unit", "ng/mL")
        time_unit = arguments.get("time_unit", "h")

        if not times or not concentrations:
            return {"status": "error", "error": "times and concentrations are required"}
        if len(times) != len(concentrations):
            return {
                "status": "error",
                "error": "times and concentrations must have the same length",
            }

        try:
            t = np.array([float(x) for x in times])
            c = np.array([float(x) for x in concentrations])
        except (ValueError, TypeError) as e:
            return {"status": "error", "error": f"Invalid numeric values: {e}"}

        # Validate times array: NaN in times gives a misleading "mono-exponential
        # decline" error from curve_fit instead of the true cause.
        if np.any(~np.isfinite(t)):
            return {
                "status": "error",
                "error": (
                    "Times contain non-finite values (NaN or inf). "
                    "All time values must be finite real numbers. "
                    "Remove or correct the affected time points."
                ),
            }

        # Validate concentration array (consistent with _compute_parameters).
        if np.any(~np.isfinite(c)):
            return {
                "status": "error",
                "error": (
                    "Concentrations contain non-finite values (NaN or inf). "
                    "Replace with valid measured values."
                ),
            }
        if np.any(c < 0):
            return {
                "status": "error",
                "error": (
                    "All concentrations must be non-negative. Negative values likely "
                    "represent below-LOQ (BLQ) measurements. Per FDA/EMA NCA guidance, "
                    "replace BLQ values with 0 before submitting."
                ),
            }

        # Use positive concentrations at non-negative times only.
        # Pre-dose (t < 0) data points represent baseline/endogenous levels
        # and should not be included in a mono-exponential IV bolus fit.  The original
        # `valid = c > 0` silently included negative-time data, biasing the C0 and
        # k_el estimates.  FDA/EMA NCA guidance excludes pre-dose samples from PK fitting.
        valid = (c > 0) & (t >= 0)
        if sum(valid) < 3:
            return {
                "status": "error",
                "error": "At least 3 positive concentration values required for model fitting",
            }

        t_fit = t[valid]
        c_fit = c[valid]

        # All-identical time values make exponential fitting
        # degenerate — the optimizer receives five observations all at the same
        # independent-variable value and returns an arbitrary k_el. Detect this
        # and return a clear error rather than a misleading result.
        if np.ptp(t_fit) == 0.0:  # peak-to-peak range == 0 means all times equal
            return {
                "status": "error",
                "error": (
                    "All time values are identical "
                    f"(t = {float(t_fit[0])} for all {int(sum(valid))} positive "
                    "concentration points). At least two distinct time points are "
                    "required to estimate an elimination rate constant."
                ),
            }

        def one_compartment(t_arr, c0, k_el):
            return c0 * np.exp(-k_el * t_arr)

        try:
            c0_init = float(c_fit[0]) if c_fit[0] > 0 else float(np.max(c_fit))
            k_init = np.log(2) / (float(t_fit[-1]) / 2 + 1e-9)

            popt, pcov = curve_fit(
                one_compartment,
                t_fit,
                c_fit,
                p0=[c0_init, k_init],
                bounds=([0.0, 1e-9], [np.inf, np.inf]),
                maxfev=10000,
            )
            c0_fit, k_el_fit = popt
            perr = np.sqrt(np.diag(pcov))
        except Exception as e:
            return {
                "status": "error",
                "error": f"Model fitting failed: {str(e)}. Ensure data follows mono-exponential decline.",
            }

        t_half = np.log(2) / k_el_fit

        # R² on fitted points
        c_pred = one_compartment(t_fit, c0_fit, k_el_fit)
        ss_res = float(np.sum((c_fit - c_pred) ** 2))
        ss_tot = float(np.sum((c_fit - np.mean(c_fit)) ** 2))
        # `1.0 - ss_res/ss_tot` can produce -0.0 (IEEE-754
        # negative zero) when ss_res ≈ ss_tot.  Apply the same `+ 0.0` pattern
        # used by DoseResponse to eliminate negative zero from JSON output.
        r_squared = (float(1.0 - ss_res / ss_tot) + 0.0) if ss_tot > 0 else 0.0

        if r_squared >= 0.95:
            gof = "Good (R²≥0.95)"
        elif r_squared >= 0.85:
            gof = "Moderate (0.85≤R²<0.95)"
        else:
            gof = "Poor (R²<0.85) — consider multi-compartment model"

        # Preserve full precision for k_el: round(1e-9, 6) = 0.0, which is wrong
        # for drugs near the lower optimization bound (1e-9 h⁻¹ ≡ t½ ≈ 693M h).
        # Same applies to t_half: round(float(t_half), 4) = 0.0 for sub-ns half-lives.
        k_el_val = float(k_el_fit)
        # SE of k_el needs same precision to avoid reporting 0.0.
        k_el_se_val = float(perr[1])

        result = {
            "model": "1-compartment IV bolus",
            "equation": "C(t) = C0 × exp(-k_el × t)",
            "C0_initial_concentration": round(float(c0_fit), 4),
            "k_el_elimination_rate": k_el_val,
            "t_half": float(t_half),
            # Apply + 0.0 AFTER round() because round() of a tiny
            # negative value (e.g. -1e-17) produces -0.0 in IEEE-754 arithmetic.
            # The + 0.0 at the computation site (line above) eliminates -0.0 from the
            # variable, but round(-tiny, 4) can produce -0.0 afresh.
            "r_squared": round(r_squared, 4) + 0.0,
            "goodness_of_fit": gof,
            "standard_errors": {
                "C0": round(float(perr[0]), 4),
                "k_el": k_el_se_val,
            },
            "units": {
                "C0": conc_unit,
                "k_el": f"1/{time_unit}",
                "t_half": time_unit,
            },
        }

        # Emit a structured fit_quality_warning for poor fits (R² < 0.85).
        # The goodness_of_fit label transitions at 0.85/0.95, so R² < 0.85 is
        # already labelled "Poor". Providing a machine-readable field allows
        # programmatic callers to detect and flag poor fits without parsing strings.
        # Threshold aligns with the existing "Poor (R²<0.85)" label.
        if r_squared < 0.85:
            if r_squared < 0.1:
                fit_warn_msg = (
                    f"Very poor fit (R² = {round(r_squared, 4) + 0.0}): the 1-compartment "
                    "model explains < 10% of the variance in the concentration data. "
                    "This often indicates a flat profile (no measurable elimination "
                    "over the sampling window), highly noisy data, or a multi-phasic "
                    "decline. The t½ and k_el estimates are unreliable."
                )
            else:
                fit_warn_msg = (
                    f"Poor fit (R² = {round(r_squared, 4) + 0.0} < 0.85): the 1-compartment "
                    "model does not describe the data well. Consider a 2-compartment "
                    "model or non-compartmental analysis. The t½ and k_el estimates "
                    "should be interpreted with caution."
                )
            result["fit_quality_warning"] = fit_warn_msg

        # Warn when k_el is at or near the optimization lower bound (1e-9).
        # At the bound, the optimizer could not find a smaller value — the half-life
        # estimate (ln2/k_el) may be astronomically large and unreliable.
        if k_el_val < 1e-8:
            result["k_el_bound_warning"] = (
                f"k_el ({k_el_val:.2e} 1/{time_unit}) is at or near the optimization "
                "lower bound (1e-9). This may indicate an extremely long half-life drug "
                "or that the data do not support reliable estimation of the elimination "
                f"rate constant. Corresponding t½ = {float(t_half):.4g} {time_unit} "
                "should be interpreted with great caution."
            )

        if dose is not None:
            try:
                dose_val = float(dose)
                # NaN/inf bypass the <= 0 guard: NaN <= 0 is False, inf > 0.
                if not math.isfinite(dose_val):
                    return {
                        "status": "error",
                        "error": (
                            f"dose ({dose_val}) must be a finite positive number. "
                            "NaN and inf are not valid dose values."
                        ),
                    }
                # Validate dose: zero or negative doses are not physically meaningful.
                if dose_val <= 0:
                    return {
                        "status": "error",
                        "error": (
                            f"dose ({dose_val}) must be positive (> 0). "
                            "Zero or negative doses are not physically meaningful. "
                            "Vd and CL cannot be computed without a positive dose."
                        ),
                    }
                auc0inf = c0_fit / k_el_fit
                dose_ng, conc_factor, converted = self._try_pk_unit_conversion(
                    dose_val, dose_unit, conc_unit
                )
                if converted:
                    c0_ng_per_ml = c0_fit * conc_factor
                    vd_ml = dose_ng / c0_ng_per_ml  # mL
                    vd_l = vd_ml / 1000.0  # L
                    cl_l = k_el_fit * vd_l  # L/time_unit
                    # Round(vd_l, 4) silently truncates Vd to
                    # 0.0 for any Vd < 5e-5 L (e.g., 1 ng dose ÷ 100 ng/mL = 1e-5 L).
                    # The _compute_parameters path already uses float(vd_l) (full
                    # precision); apply the identical fix here.
                    result["volume_distribution_Vd"] = float(vd_l)
                    result["Vd_unit"] = "L"
                    result["clearance_CL"] = float(cl_l)
                    result["CL_unit"] = f"L/{time_unit}"
                else:
                    vd_raw = dose_val / c0_fit
                    cl_raw = k_el_fit * vd_raw
                    # Same full-precision fix for unknown-units path.
                    result["volume_distribution_Vd"] = float(vd_raw)
                    result["Vd_unit"] = f"{dose_unit}/{conc_unit}"
                    result["clearance_CL"] = float(cl_raw)
                    result["CL_unit"] = f"{dose_unit}/({conc_unit}·{time_unit})"
                    result["pk_unit_note"] = (
                        f"Automatic unit conversion not available for "
                        f"dose_unit='{dose_unit}' / conc_unit='{conc_unit}'. "
                        "Vd and CL are in raw ratio units; convert manually."
                    )
                result["AUC0_inf_model"] = round(float(auc0inf), 4)
                result["AUC_unit"] = f"{conc_unit}·{time_unit}"
            except (ValueError, TypeError):
                pass

        return {
            "status": "success",
            "data": result,
            "metadata": {
                "source": "Local 1-compartment model fit (scipy.optimize.curve_fit)",
                "n_timepoints_fitted": int(sum(valid)),
            },
        }



[docs]
    def _calculate_bioavailability(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Calculate oral bioavailability F from IV and PO AUC and dose."""
        auc_po = arguments.get("auc_po")
        dose_po = arguments.get("dose_po")
        auc_iv = arguments.get("auc_iv")
        dose_iv = arguments.get("dose_iv")

        if auc_po is None or dose_po is None:
            return {
                "status": "error",
                "error": "auc_po and dose_po are required (PO arm AUC and dose)",
            }
        if auc_iv is None or dose_iv is None:
            return {
                "status": "error",
                "error": "auc_iv and dose_iv are required (IV arm AUC and dose)",
            }

        try:
            auc_po = float(auc_po)
            dose_po = float(dose_po)
            auc_iv = float(auc_iv)
            dose_iv = float(dose_iv)
        except (ValueError, TypeError) as e:
            return {"status": "error", "error": f"Invalid numeric values: {e}"}

        # Validate all inputs are finite (guards against inf propagating to F=inf).
        for name, val in [
            ("auc_po", auc_po),
            ("dose_po", dose_po),
            ("auc_iv", auc_iv),
            ("dose_iv", dose_iv),
        ]:
            if not np.isfinite(val):
                return {
                    "status": "error",
                    "error": f"{name} must be a finite number (received {val}).",
                }

        if auc_po < 0:
            return {
                "status": "error",
                "error": (
                    f"auc_po ({auc_po}) must be non-negative. "
                    "A negative AUC is not physically meaningful."
                ),
            }

        if auc_iv <= 0 or dose_iv <= 0 or dose_po <= 0:
            return {
                "status": "error",
                "error": "AUC_IV, dose_IV, and dose_PO must all be positive numbers",
            }

        # F = (AUC_PO × Dose_IV) / (AUC_IV × Dose_PO)
        f = (auc_po * dose_iv) / (auc_iv * dose_po)
        f_pct = f * 100.0

        auc_po_norm = auc_po / dose_po
        auc_iv_norm = auc_iv / dose_iv

        if f_pct >= 80.0:
            category = "High (≥80%)"
            note = "Excellent oral bioavailability. Suitable for oral dosing."
        elif f_pct >= 30.0:
            category = "Moderate (30–80%)"
            note = "Acceptable oral bioavailability for most indications."
        elif f_pct >= 10.0:
            category = "Low (10–30%)"
            note = "Poor oral absorption. Consider prodrug strategy or formulation optimization."
        else:
            category = "Very low (<10%)"
            note = "Insufficient oral bioavailability. Likely requires IV/SC/inhaled route."

        # F > 1.0 (> 100%) is physically unusual for most drugs.
        # While theoretically possible (e.g., saturable first-pass extraction reversal,
        # IV formulation with poor tolerability requiring dose reduction), it more
        # commonly indicates mismatched dose units or a data entry error.
        anomalous_f_warning = None
        if f > 1.0:
            anomalous_f_warning = (
                f"Bioavailability F = {f_pct:.2f}% (> 100%). This is physically unusual "
                "for most small molecule drugs. Common causes include: (1) mismatched AUC "
                "or dose units between the PO and IV arms, (2) different dose levels "
                "triggering saturable first-pass extraction, or (3) a data entry error. "
                "Verify that AUC_PO, AUC_IV, dose_PO, and dose_IV are expressed in "
                "consistent units."
            )

        result_data: dict = {
            "bioavailability_F": round(float(f), 4),
            "bioavailability_pct": round(float(f_pct), 2),
            "category": category,
            "clinical_note": note,
            "formula": "F = (AUC_PO × Dose_IV) / (AUC_IV × Dose_PO)",
            "inputs": {
                "AUC_PO": auc_po,
                "Dose_PO": dose_po,
                "AUC_IV": auc_iv,
                "Dose_IV": dose_iv,
                "AUC_PO_dose_normalized": round(float(auc_po_norm), 4),
                "AUC_IV_dose_normalized": round(float(auc_iv_norm), 4),
            },
            "general_note": (
                "F > 20% is typically considered acceptable for small molecule drugs. "
                "F ≥ 80% is required for bioequivalence studies. "
                "Low F may indicate poor absorption, first-pass metabolism, or low solubility."
            ),
        }
        if anomalous_f_warning:
            result_data["anomalous_f_warning"] = anomalous_f_warning

        return {
            "status": "success",
            "data": result_data,
            "metadata": {
                "source": "Local calculation",
                "method": "Dose-normalized AUC ratio (Rowland & Tozer, 2011)",
            },
        }