Source code for tooluniverse.nca_tool
# nca_tool.py
"""
Non-Compartmental Analysis (NCA) Tool for Pharmacokinetics
Implements standard NCA methods per FDA/EMA regulatory guidelines:
- Full NCA parameter calculation from time-concentration data
(Cmax, Tmax, AUC0-t, AUC0-inf, t½, CL, Vd)
- One-compartment IV bolus model fitting: C(t) = C0 * exp(-k_el * t)
- Oral bioavailability (F) calculation
No external API calls. Uses numpy/scipy for computation.
References:
- FDA Guidance for Industry: Pharmacokinetics in Patients with Impaired Renal Function
- Gabrielsson & Weiner, Pharmacokinetic and Pharmacodynamic Data Analysis (2016)
- Rowland & Tozer, Clinical Pharmacokinetics and Pharmacodynamics (2011)
"""
import math
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
try:
import numpy as np
HAS_NUMPY = True
except ImportError:
HAS_NUMPY = False
try:
from scipy.optimize import curve_fit
from scipy.stats import linregress
HAS_SCIPY = True
except ImportError:
HAS_SCIPY = False
# Unit conversion tables for automatic PK unit standardization
_MASS_TO_NG: dict = {
"pg": 1e-3,
"ng": 1.0,
"μg": 1e3,
"ug": 1e3,
"mcg": 1e3,
"mg": 1e6,
"g": 1e9,
"kg": 1e12,
}
_CONC_TO_NG_PER_ML: dict = {
"pg/ml": 1e-3,
"pg/l": 1e-6,
"ng/ml": 1.0,
"ng/l": 1e-3,
"μg/ml": 1e3,
"ug/ml": 1e3,
"mcg/ml": 1e3,
"μg/l": 1.0,
"ug/l": 1.0,
"mg/ml": 1e6,
"mg/l": 1e3,
"g/ml": 1e9,
"g/l": 1e6,
}
[docs]
@register_tool("NCATool")
class NCATool(BaseTool):
"""
Non-Compartmental Analysis (NCA) for pharmacokinetics.
Endpoints:
- compute_parameters: Full NCA from time-concentration data
- fit_one_compartment: 1-compartment IV bolus model fitting
- calculate_bioavailability: Oral bioavailability F calculation
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "compute_parameters")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
if not HAS_NUMPY:
return {
"status": "error",
"error": "numpy is required for NCA calculations. Install with: pip install numpy",
}
try:
if self.endpoint == "compute_parameters":
return self._compute_parameters(arguments)
elif self.endpoint == "fit_one_compartment":
return self._fit_one_compartment(arguments)
elif self.endpoint == "calculate_bioavailability":
return self._calculate_bioavailability(arguments)
else:
return {
"status": "error",
"error": f"Unknown endpoint: {self.endpoint}",
}
except Exception as e:
return {"status": "error", "error": f"NCA calculation error: {str(e)}"}
[docs]
def _auc_trapezoid(self, times: "np.ndarray", concs: "np.ndarray") -> float:
"""
Linear-log trapezoidal AUC calculation.
Uses logarithmic trapezoid for declining phases (more accurate),
linear trapezoid for ascending phases and zero concentrations.
"""
auc = 0.0
for i in range(len(times) - 1):
dt = float(times[i + 1] - times[i])
c1, c2 = float(concs[i]), float(concs[i + 1])
if c1 <= 0 or c2 <= 0:
# Linear trapezoid when values near zero
auc += dt * (c1 + c2) / 2.0
elif c2 < c1:
# Logarithmic trapezoid (declining phase)
auc += dt * (c1 - c2) / np.log(c1 / c2)
else:
# Linear trapezoid (ascending phase)
auc += dt * (c1 + c2) / 2.0
return auc
[docs]
def _aumc_trapezoid(self, times: "np.ndarray", concs: "np.ndarray") -> float:
"""
Linear trapezoidal AUMC calculation (area under the first-moment curve).
AUMC = integral(t * C(t) dt). Used to compute model-independent MRT:
MRT_iv = AUMC_0-inf / AUC_0-inf.
Uses the linear trapezoid rule throughout: each interval contributes
dt * (t1*C1 + t2*C2) / 2. This is consistent with most NCA software
(e.g., Phoenix WinNonlin uses linear AUMC with log-linear AUC).
"""
aumc = 0.0
for i in range(len(times) - 1):
dt = float(times[i + 1] - times[i])
t1, t2 = float(times[i]), float(times[i + 1])
c1, c2 = float(concs[i]), float(concs[i + 1])
aumc += dt * (t1 * c1 + t2 * c2) / 2.0
return aumc
[docs]
def _estimate_terminal_slope(
self,
times: "np.ndarray",
concs: "np.ndarray",
n_points: int = 3,
tmax: float = None,
):
"""
Estimate terminal elimination rate constant (λz) from log-linear regression.
Per FDA NCA guidance (2003) and Rowland & Tozer (2011), λz should be estimated
from all time points in the log-linear terminal phase, which begins after Tmax.
When tmax is provided, all post-Tmax positive-concentration points are used.
Falls back to the last n_points if fewer than 2 post-Tmax points are available.
Returns: (lambda_z, r_squared, intercept) or (None, None, None) on failure.
"""
if not HAS_SCIPY:
return None, None, None
# Use positive concentrations only
valid = concs > 0
t_valid = times[valid]
c_valid = concs[valid]
if len(t_valid) < 2:
return None, None, None
# Select terminal phase points
# FDA NCA guidance requires ≥3 points for reliable λz: a 2-point regression
# is a perfect fit by construction (R²=1 always), giving no quality signal.
if tmax is not None:
# Use >= so the Tmax point itself (C_max) is the first point
# of the terminal regression. FDA NCA guidance defines the terminal phase
# as starting AT Tmax (the concentration-time curve begins declining from
# Cmax). Using strict > excluded the Cmax point, leaving only the
# declining portion. For plateau profiles (multiple tied Cmax values)
# combined with the Tmax=last-Cmax fix, >= is required to include enough
# points (≥3) for a valid regression.
post_tmax = t_valid >= tmax
if np.sum(post_tmax) >= 3:
# FDA NCA: use all post-Tmax points for λz estimation
t_term = t_valid[post_tmax]
c_term = c_valid[post_tmax]
else:
# Insufficient terminal-phase points (< 3): cannot reliably estimate λz.
# A 2-point regression is a perfect fit by construction (R²=1 always),
# giving no quality signal. The caller will emit a terminal_phase_warning.
return None, None, None
else:
n_use = min(n_points, len(t_valid))
t_term = t_valid[-n_use:]
c_term = c_valid[-n_use:]
# If duplicate time points survive into the terminal phase
# (e.g., re-measured samples at t=1 with different concentrations), the
# log-linear regression treats them as independent observations. Two points at
# the same time but different concentrations pull the slope toward zero, causing
# lambda_z to be underestimated by as much as 11% in test cases. Average
# concentrations at duplicate times before fitting, consistent with FDA NCA
# guidance that each time point represents a single true concentration.
unique_times = np.unique(t_term)
if len(unique_times) < len(t_term):
# One or more duplicate time points: average concentrations
averaged_concs = np.array(
[float(np.mean(c_term[t_term == ut])) for ut in unique_times]
)
t_term = unique_times
c_term = averaged_concs
if len(t_term) < 2:
return None, None, None
# log-linear fit: ln(C) = intercept - λz * t
slope, intercept, r_value, _, _ = linregress(t_term, np.log(c_term))
if slope >= 0:
return None, None, None # Must be negative (declining)
lambda_z = -slope # λz positive
r_squared = r_value**2
return float(lambda_z), float(r_squared), float(intercept)
[docs]
def _try_pk_unit_conversion(self, dose_val: float, dose_unit: str, conc_unit: str):
"""
Try to convert dose and concentration to standard base units (ng, ng/mL).
Returns (dose_ng, conc_factor, True) on success, or (None, None, False) for
unknown units. conc_factor multiplies raw concentration to obtain ng/mL.
"""
mass_factor = _MASS_TO_NG.get(dose_unit.lower().strip())
conc_factor = _CONC_TO_NG_PER_ML.get(conc_unit.lower().strip())
if mass_factor is not None and conc_factor is not None:
return dose_val * mass_factor, conc_factor, True
return None, None, False
[docs]
def _compute_parameters(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Full NCA computation from time-concentration data."""
times = arguments.get("times", [])
concentrations = arguments.get("concentrations", [])
dose = arguments.get("dose")
route = arguments.get("route", "iv")
dose_unit = arguments.get("dose_unit", "mg")
conc_unit = arguments.get("conc_unit", "ng/mL")
time_unit = arguments.get("time_unit", "h")
if not times or not concentrations:
return {
"status": "error",
"error": "times and concentrations are required. Provide paired lists of time points and measured concentrations.",
}
if len(times) != len(concentrations):
return {
"status": "error",
"error": "times and concentrations must have the same length",
}
if len(times) < 3:
return {
"status": "error",
"error": "At least 3 time-concentration points are required for NCA",
}
try:
t = np.array([float(x) for x in times])
c = np.array([float(x) for x in concentrations])
except (ValueError, TypeError) as e:
return {"status": "error", "error": f"Invalid numeric values: {e}"}
# NaN/inf in times must be rejected: np.argsort on a NaN-containing array
# produces undefined ordering, silently propagating NaN through all PK parameters.
if np.any(~np.isfinite(t)):
return {
"status": "error",
"error": (
"Times contain non-finite values (NaN or inf). "
"All time values must be finite real numbers. "
"Remove or correct the affected time points."
),
}
# NaN/inf must be rejected before any comparison (np.any(c < 0) returns False
# for NaN, silently bypassing the negative-value guard).
if np.any(~np.isfinite(c)):
return {
"status": "error",
"error": (
"Concentrations contain non-finite values (NaN or inf). "
"All concentration values must be finite real numbers. "
"Replace NaN/inf with 0 (BLQ) or remove the affected time points."
),
}
if np.any(c < 0):
return {
"status": "error",
"error": (
"All concentrations must be non-negative. Negative values likely "
"represent below-LOQ (BLQ) measurements. Per FDA/EMA NCA guidance, "
"replace BLQ values with 0 before submitting."
),
}
# Sort by time
sort_idx = np.argsort(t)
t = t[sort_idx]
c = c[sort_idx]
# Reject all-negative-time profiles: if every time point is negative (pre-dose
# only), there is no post-administration pharmacokinetic data to analyse.
# Without a t≥0 anchor, AUC0-t would integrate over the pre-dose window —
# a physically meaningless result that would be returned silently.
if not np.any(t >= 0):
return {
"status": "error",
"error": (
"All time points are negative (pre-dose only). "
"No post-dose data is available for NCA calculation. "
"Provide at least one time point at t ≥ 0 (time of dosing or later)."
),
}
# The minimum-3 check at the top counts ALL time points including
# pre-dose samples. Two pre-dose + one post-dose = 3 total passes the guard
# but yields AUC=0 (single-point trapezoid) silently under status=success.
# Enforce a minimum of 2 POST-DOSE points for a non-trivial AUC.
_n_postdose = int(np.sum(t >= 0))
if _n_postdose < 2:
return {
"status": "error",
"error": (
f"Only {_n_postdose} post-dose time point(s) (t ≥ 0) found "
f"(total input points: {len(t)}, pre-dose points: {len(t) - _n_postdose}). "
"At least 2 post-dose time points are required to compute a "
"non-trivial AUC (trapezoidal integration requires ≥2 points)."
),
}
# Detect duplicate time points: the linear-log trapezoid computes
# dt = t[i+1] - t[i] = 0 for duplicates, contributing zero area.
# Duplicate times are often caused by data entry errors or sample labelling
# issues. Warn, but continue — AUC may be underestimated.
duplicate_times = []
seen = {}
for ti in t:
key = float(ti)
seen[key] = seen.get(key, 0) + 1
duplicate_times = [k for k, cnt in seen.items() if cnt > 1]
# Basic parameters
# Cmax/Tmax must be computed from POST-DOSE samples only (t >= 0).
# Pre-dose residual drug can exceed the new dose's Cmax, causing Tmax < 0 and
# cascading into lambda_z (terminal phase selection uses t > Tmax).
_postdose_mask = t >= 0
c_post = c[_postdose_mask]
t_post = t[_postdose_mask]
# _postdose_mask guarantees len > 0 (all-pre-dose already caught above).
# When multiple post-dose samples tie at Cmax (plateau profile),
# np.argmax returns the FIRST index, so Tmax is set to the plateau start.
# The terminal-slope filter then includes plateau points in the regression,
# inflating t_half by up to 80% and underestimating lambda_z by up to 44%.
# Per FDA NCA guidance, Tmax should be the LAST time at Cmax (end of plateau),
# so that all t ≥ Tmax data genuinely represents the declining terminal phase.
_cmax_val = np.max(c_post)
cmax = float(_cmax_val)
# When all post-dose concentrations are 0 (all BLQ),
# _cmax_val=0 and np.where(c_post==0)[0][-1] points to the LAST sample,
# reporting Tmax=last_sample_time (e.g., 24h) — physically meaningless.
# When Cmax=0, Tmax should be the first post-dose time (t=0 or first sample).
if cmax == 0.0:
tmax = float(t_post[0])
else:
tmax = float(t_post[np.where(c_post == _cmax_val)[0][-1]])
pos_mask = c > 0
clast = float(c[pos_mask][-1]) if any(pos_mask) else 0.0
tlast = float(t[pos_mask][-1]) if any(pos_mask) else 0.0
# Detect mid-profile zeros: a zero concentration that appears between two
# positive values is biologically implausible and may indicate assay failure,
# sample swap, or re-absorption. Collect positions for a warning.
mid_profile_zero_times = []
first_pos = int(np.argmax(pos_mask)) if any(pos_mask) else len(c)
last_pos = len(c) - 1 - int(np.argmax(pos_mask[::-1])) if any(pos_mask) else -1
for idx in range(first_pos + 1, last_pos):
if c[idx] == 0.0:
mid_profile_zero_times.append(float(t[idx]))
# AUC0-t (linear-log trapezoidal).
# FDA NCA convention: AUC0-t is integrated only up to Tlast, the time of
# the last *positive* (measurable) concentration. Trailing zeros beyond
# Tlast represent BLQ samples and must not inflate the integral.
# Pre-dose time points (t < 0) must also be excluded: including them inflates
# the area by integrating over negative-time intervals that are not part of
# the post-administration pharmacokinetic profile.
if any(pos_mask):
last_pos_idx = int(np.where(pos_mask)[0][-1])
first_nonneg_idx = int(np.argmax(t >= 0)) if np.any(t >= 0) else 0
t_auc = t[first_nonneg_idx : last_pos_idx + 1]
c_auc = c[first_nonneg_idx : last_pos_idx + 1]
# T_auc is empty when all positive concentrations are pre-dose
# (last_pos_idx < first_nonneg_idx). The old code reached t_auc[0] below,
# raising IndexError: "index 0 is out of bounds for axis 0 with size 0",
# which was swallowed by the outer except and returned as an opaque message.
if len(t_auc) == 0:
return {
"status": "error",
"error": (
"All positive concentrations are pre-dose (t < 0). "
"No post-dose positive concentrations found for AUC computation. "
"Check that at least one sample with t ≥ 0 has a measurable "
"concentration > 0."
),
}
pre_dose_times = [float(x) for x in t[:first_nonneg_idx]]
else:
# All post-dose concentrations are zero (all BLQ/undetectable).
# AUC will be 0; use the post-dose observation window for the key label.
# When pre-dose samples exist, the old code set
# t_auc = t (the full array including pre-dose times), giving a key like
# "AUC-2-8" and omitting pre_dose_time_warning. Apply the same
# first_nonneg_idx filtering as the positive-concentrations path.
_first_nonneg_idx = int(np.argmax(t >= 0)) if np.any(t >= 0) else 0
t_auc = t[_first_nonneg_idx:]
c_auc = c[_first_nonneg_idx:]
pre_dose_times = [float(x) for x in t[:_first_nonneg_idx]]
auc0t = self._auc_trapezoid(t_auc, c_auc)
# AUMC_0-t for later use in model-independent MRT computation.
aumc0t = self._aumc_trapezoid(t_auc, c_auc)
# Np.float64 overflow — round(np.float64(huge), 4) multiplies by
# 10^4 internally, overflowing to np.inf for values near the float max.
# Guard with an isfinite check immediately after computing AUC.
if not math.isfinite(float(auc0t)):
return {
"status": "error",
"error": (
f"AUC computation resulted in a non-finite value ({float(auc0t)}). "
"This typically indicates extremely large concentration or time values. "
"Check input data units — rescaling may be required (e.g., ng/mL → μg/mL, "
"hours → days)."
),
}
# Terminal phase parameters — use all post-Tmax points per FDA NCA guidance
lambda_z, r_sq_terminal, _ = self._estimate_terminal_slope(t, c, tmax=tmax)
# Round(..., 4) truncates to 0.0 for sub-nanosecond values
# (e.g., round(5e-5, 4) = 0.0), producing contradictory output (Tlast=0
# but non-zero AUC or extrapolation_pct). Use full float precision so that
# extreme-scale profiles are self-consistent. This matches the lambda_z
# treatment at line ~376: "Preserve full precision for lambda_z."
# Additional protection: round(float(x), 4) overflows for x > ~1.79e304
# because x * 10^4 exceeds the float maximum. Use full precision (no rounding)
# for AUC — consistent with the lambda_z / t_half treatment above.
_auc0t = float(auc0t)
result = {
"Cmax": float(cmax),
"Tmax": float(tmax),
"Clast": float(clast),
"Tlast": float(tlast),
# Key was always "AUC0-X" even when data starts after t=0.
# If the first time point is > 0, the integral starts there (not at 0),
# so the key should reflect the actual integration bounds to avoid a 24%+
# overstatement when the user reads "AUC0-24" but actually gets AUC_2-24.
# Use t_auc[-1] (actual upper integration bound) instead of
# tlast (last POSITIVE concentration time). When all concentrations are
# zero, tlast=0.0 but integration spans 0 to the last observation time.
# For non-zero profiles t_auc[-1]==tlast, so this is backward compatible.
f"AUC{float(t_auc[0]):.6g}-{float(t_auc[-1]):.6g}": _auc0t,
# "AUC0_last" is misleading when data starts after t=0 (no t=0
# sample), because "0" implies integration from time of dosing (t=0) but
# the actual integral starts at the first observed time (e.g., t=2).
# Keep for backward compatibility; add an explanatory note when t[0] > 0.
"AUC0_last": _auc0t,
"units": {
"Cmax": conc_unit,
"Tmax": time_unit,
"AUC": f"{conc_unit}·{time_unit}",
},
}
# Clarify that AUC0_last does NOT start at t=0 when first sample > 0.
if float(t_auc[0]) > 0:
result["auc_start_note"] = (
f"AUC integration starts at the first observed post-dose time "
f"(t={float(t_auc[0]):.6g}), not at t=0 (no sample at dosing time). "
f"AUC0_last and {float(t_auc[0]):.6g}-{float(t_auc[-1]):.6g} both reflect "
"AUC from the first sample to Tlast, which underestimates true AUC0-last "
"by the missing pre-first-sample area."
)
# Warn when pre-dose (t < 0) time points were excluded from AUC integration.
if pre_dose_times:
result["pre_dose_time_warning"] = (
f"Pre-dose time points (t < 0) at t={pre_dose_times} were excluded "
"from AUC integration. AUC0-t is computed from t=0 per FDA/EMA NCA "
"guidance. Pre-dose samples are retained for visual inspection only."
)
# Warn when duplicate time points are present: dt=0 for duplicates means
# those intervals contribute zero area to the trapezoid, so AUC is
# underestimated. This typically indicates a data entry or labelling error.
if duplicate_times:
result["duplicate_time_warning"] = (
f"Duplicate time points detected at t={duplicate_times}. Each duplicate "
"contributes a zero-width trapezoid interval (area = 0), which may "
"underestimate AUC. If duplicates fall in the terminal phase, "
"concentrations at the same time are averaged before lambda_z regression "
"to prevent slope bias. Check for data entry errors or sample labelling issues."
)
# Check monotonicity of post-Tmax concentrations.
# Non-monotonic profiles (rising concentrations after Tmax) indicate
# re-absorption, enterohepatic circulation, or assay issues — making
# terminal slope estimation unreliable.
valid_mask = c > 0
t_valid_all = t[valid_mask]
c_valid_all = c[valid_mask]
post_tmax_mask = t_valid_all >= tmax # use >= to match terminal slope
if np.sum(post_tmax_mask) >= 3:
c_post = c_valid_all[post_tmax_mask]
if np.any(np.diff(c_post) > 0):
result["non_monotonic_terminal_warning"] = (
"Post-Tmax concentrations are not monotonically decreasing. "
"This may indicate re-absorption, enterohepatic circulation, a secondary "
"Cmax, or assay variability. Terminal slope (lambda_z) may be unreliable."
)
# Validate dose unconditionally — before the lambda_z branch.
# The original validation was nested inside `if lambda_z is not None:`,
# so invalid doses (negative, zero, NaN, inf) were silently accepted when
# lambda_z could not be estimated (sparse terminal phase data).
if dose is not None:
try:
_dose_early = float(dose)
except (ValueError, TypeError):
return {
"status": "error",
"error": "dose must be a numeric value. Non-numeric dose is not valid.",
}
if not math.isfinite(_dose_early):
return {
"status": "error",
"error": (
f"dose ({_dose_early}) must be a finite positive number. "
"NaN and inf are not valid dose values."
),
}
if _dose_early <= 0:
return {
"status": "error",
"error": (
f"dose ({_dose_early}) must be positive (> 0). "
"Negative or zero dose is not physically meaningful. "
"CL and Vd cannot be computed without a positive dose."
),
}
if lambda_z is not None:
t_half = np.log(2) / lambda_z
# AUC0-inf = AUC0-t + Clast/λz
auc0inf = auc0t + clast / lambda_z
extrap_pct = float((clast / lambda_z) / auc0inf * 100)
# Preserve full precision for lambda_z: round(1e-9, 9) = 0.0 for any
# lambda_z < 5e-10 h⁻¹ (t½ > ~1.4 billion hours). Stored 0.0 contradicts
# t_half and AUC0-inf (which both use the unrounded lambda_z) — a downstream
# caller that re-derives t_half from stored lambda_z = 0 would get inf.
# Use full float precision, consistent with t_half and AUC0-inf.
result["lambda_z"] = float(lambda_z)
result["t_half"] = float(t_half)
result["r_squared_terminal_fit"] = round(float(r_sq_terminal), 4)
# Emit a terminal_fit_quality_warning when the log-linear terminal
# phase R² is poor. _fit_one_compartment already has a similar guard
# (R² < 0.85), but _compute_parameters never checked its own terminal fit.
# A poor terminal R² means lambda_z, t_half, AUC0-inf, CL, and Vd are all
# unreliable — the user should be informed before acting on these values.
if r_sq_terminal < 0.85:
warn_level = "Very poor" if r_sq_terminal < 0.5 else "Poor"
result["terminal_fit_quality_warning"] = (
f"{warn_level} terminal-phase fit (R² = {round(r_sq_terminal, 4)} < 0.85). "
"lambda_z, t½, AUC0-inf, clearance, and Vd estimates may be unreliable. "
"Possible causes: non-log-linear terminal decline (multi-compartment "
"kinetics, redistribution), insufficient time points in the terminal "
"phase, or assay variability. Consider adding more late time points "
"or using a non-compartmental model review."
)
result["AUC0-inf"] = float(auc0inf)
result["AUC_extrapolation_pct"] = round(extrap_pct, 1)
if extrap_pct > 20.0:
result["AUC_extrapolation_warning"] = (
f"AUC extrapolation is {extrap_pct:.1f}% (>20%). "
"AUC0-inf may be unreliable. Add more late time points to "
"better characterize the terminal elimination phase "
"(FDA/EMA guideline: extrapolation should be <20%)."
)
if dose is not None:
try:
dose_val = float(dose)
# NaN/inf bypass the <= 0 guard: NaN <= 0 is False, inf > 0.
# Both produce invalid CL/Vd (NaN or inf) under status=success.
if not math.isfinite(dose_val):
return {
"status": "error",
"error": (
f"dose ({dose_val}) must be a finite positive number. "
"NaN and inf are not valid dose values."
),
}
if dose_val <= 0:
return {
"status": "error",
"error": (
f"dose ({dose_val}) must be positive (> 0). "
"Negative or zero dose is not physically meaningful. "
"CL and Vd cannot be computed without a positive dose."
),
}
dose_ng, conc_factor, converted = self._try_pk_unit_conversion(
dose_val, dose_unit, conc_unit
)
if converted:
# AUC in (ng/mL)·time_unit after applying conc_factor
auc_ng_ml = auc0inf * conc_factor
cl_ml = dose_ng / auc_ng_ml # mL/time_unit
cl_l = cl_ml / 1000.0 # L/time_unit
# Round(cl_l, 6) silently truncates CL to 0.0 for
# any CL < 5e-7 L/time (e.g., rare biotech analytes with tiny
# doses in ng). Use full precision consistent with lambda_z/AUC.
result["clearance_CL"] = float(cl_l)
result["clearance_CL_unit"] = f"L/{time_unit}"
# For non-IV routes, Dose/AUC = CL/F (apparent
# clearance), NOT true systemic clearance (which requires IV
# data or a known bioavailability F). Label it clearly so
# users don't compare oral and IV CL values without adjustment.
# 'infusion' is also an intravenous route
# (F=1 by definition); the old `route != "iv"` check incorrectly
# labeled IV infusion clearance as CL/F (apparent clearance).
if route not in ("iv", "infusion"):
result["clearance_note"] = (
"clearance_CL is CL/F (apparent clearance) for non-IV "
f"route='{route}'. True systemic CL = CL/F × F, where "
"bioavailability F is unknown without an IV reference. "
"Do not compare directly with IV-derived clearance."
)
# 'infusion' is an IV route (F=1), so Vd
# and MRT_iv are just as valid for infusion as for iv bolus.
# The clearance_note guard already uses ("iv", "infusion");
# the Vd/MRT block was never updated to match.
if route in ("iv", "infusion"):
vd_l = cl_l / lambda_z # L
# Round(vd_l, 4) truncates Vd to 0.0 for any
# Vd < 5e-5 L. Use full precision (same rationale as CL).
result["volume_distribution_Vd"] = float(vd_l)
result["Vd_unit"] = "L"
# MRT_iv = AUMC_0-inf / AUC_0-inf (model-independent NCA).
# The previous formula 1/lambda_z is only valid for a
# mono-exponential model. For multi-compartment profiles the
# error can reach 10%+. AUMC_0-inf = AUMC_0-t + Clast*(tlast/λz + 1/λz²).
_aumc0inf = float(aumc0t) + clast * (
tlast / lambda_z + 1.0 / lambda_z**2
)
_mrt_iv = (
_aumc0inf / float(auc0inf)
if float(auc0inf) > 0
else None
)
if _mrt_iv is not None:
result["MRT_iv"] = round(_mrt_iv, 4)
else:
cl = dose_val / auc0inf
# Round(float(cl), 6) silently truncates
# CL to 0.0 for any CL < 5e-7. The identical fix was applied
# to the known-units path (line ~625: float(cl_l) full precision)
# but was never ported here. Use full float precision.
result["clearance_CL"] = float(cl)
result["clearance_CL_unit"] = (
f"{dose_unit}/({conc_unit}·{time_unit})"
)
result["pk_unit_note"] = (
f"Automatic unit conversion not available for "
f"dose_unit='{dose_unit}' / conc_unit='{conc_unit}'. "
"CL and Vd are in raw ratio units; convert manually."
)
# Same infusion fix for unknown-units path.
if route in ("iv", "infusion"):
vd = cl / lambda_z
# Same full-precision fix for Vd.
result["volume_distribution_Vd"] = float(vd)
result["Vd_unit"] = f"{dose_unit}/{conc_unit}"
# MRT_iv = AUMC_0-inf / AUC_0-inf (model-independent NCA).
# The previous formula 1/lambda_z is only valid for a
# mono-exponential model. For multi-compartment profiles the
# error can reach 10%+. AUMC_0-inf = AUMC_0-t + Clast*(tlast/λz + 1/λz²).
_aumc0inf = float(aumc0t) + clast * (
tlast / lambda_z + 1.0 / lambda_z**2
)
_mrt_iv = (
_aumc0inf / float(auc0inf)
if float(auc0inf) > 0
else None
)
if _mrt_iv is not None:
result["MRT_iv"] = round(_mrt_iv, 4)
except (ValueError, TypeError):
pass
else:
result["terminal_phase_warning"] = (
"Could not estimate terminal slope (λz). "
"Add more time points in the terminal elimination phase."
)
# When the first sample is at t > 0, AUC0-inf is
# underestimated (the pre-first-sample area is missing). CL = Dose/AUC0-inf,
# Vd = CL/λz, and MRT = AUMC0-inf/AUC0-inf are all derived from AUC0-inf
# and therefore carry the same bias. The existing auc_start_note documents
# the AUC issue; this note explicitly flags the downstream PK parameters so
# users who read only CL/Vd/MRT are not misled.
if float(t_auc[0]) > 0 and any(
k in result for k in ("clearance_CL", "volume_distribution_Vd", "MRT_iv")
):
result["derived_pk_bias_note"] = (
f"clearance_CL, volume_distribution_Vd, and MRT_iv are derived from "
f"AUC0-inf, which is underestimated because the first observed sample "
f"is at t={float(t_auc[0]):.6g} (not t=0). The missing pre-first-sample "
"area causes CL and Vd to be overestimated and MRT to be biased. "
"Include a sample at or near t=0, or apply compartmental modelling, "
"to obtain accurate CL, Vd, and MRT."
)
if mid_profile_zero_times:
result["mid_profile_zero_warning"] = (
f"Zero concentration detected at t={mid_profile_zero_times} between "
"positive values. This is biologically implausible and may indicate "
"assay failure, sample swap, or re-absorption. "
"AUC calculation proceeds via linear trapezoid at those intervals "
"but the result may be unreliable."
)
result["method"] = "Linear-log trapezoidal NCA"
result["note"] = (
"AUC uses linear trapezoid for ascending phases and log-trapezoid "
"for declining phases (FDA/EMA recommended method). "
"Terminal t½ estimated from log-linear regression of last 3+ positive concentrations."
)
return {
"status": "success",
"data": result,
"metadata": {
"source": "Local NCA computation (numpy/scipy)",
"n_timepoints": len(t),
"route_of_administration": route,
"reference": "FDA NCA Guidance; Gabrielsson & Weiner (2016)",
},
}
[docs]
def _fit_one_compartment(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Fit 1-compartment IV bolus model: C(t) = C0 * exp(-k_el * t)."""
if not HAS_SCIPY:
return {
"status": "error",
"error": "scipy is required for model fitting. Install with: pip install scipy",
}
times = arguments.get("times", [])
concentrations = arguments.get("concentrations", [])
dose = arguments.get("dose")
dose_unit = arguments.get("dose_unit", "mg")
conc_unit = arguments.get("conc_unit", "ng/mL")
time_unit = arguments.get("time_unit", "h")
if not times or not concentrations:
return {"status": "error", "error": "times and concentrations are required"}
if len(times) != len(concentrations):
return {
"status": "error",
"error": "times and concentrations must have the same length",
}
try:
t = np.array([float(x) for x in times])
c = np.array([float(x) for x in concentrations])
except (ValueError, TypeError) as e:
return {"status": "error", "error": f"Invalid numeric values: {e}"}
# Validate times array: NaN in times gives a misleading "mono-exponential
# decline" error from curve_fit instead of the true cause.
if np.any(~np.isfinite(t)):
return {
"status": "error",
"error": (
"Times contain non-finite values (NaN or inf). "
"All time values must be finite real numbers. "
"Remove or correct the affected time points."
),
}
# Validate concentration array (consistent with _compute_parameters).
if np.any(~np.isfinite(c)):
return {
"status": "error",
"error": (
"Concentrations contain non-finite values (NaN or inf). "
"Replace with valid measured values."
),
}
if np.any(c < 0):
return {
"status": "error",
"error": (
"All concentrations must be non-negative. Negative values likely "
"represent below-LOQ (BLQ) measurements. Per FDA/EMA NCA guidance, "
"replace BLQ values with 0 before submitting."
),
}
# Use positive concentrations at non-negative times only.
# Pre-dose (t < 0) data points represent baseline/endogenous levels
# and should not be included in a mono-exponential IV bolus fit. The original
# `valid = c > 0` silently included negative-time data, biasing the C0 and
# k_el estimates. FDA/EMA NCA guidance excludes pre-dose samples from PK fitting.
valid = (c > 0) & (t >= 0)
if sum(valid) < 3:
return {
"status": "error",
"error": "At least 3 positive concentration values required for model fitting",
}
t_fit = t[valid]
c_fit = c[valid]
# All-identical time values make exponential fitting
# degenerate — the optimizer receives five observations all at the same
# independent-variable value and returns an arbitrary k_el. Detect this
# and return a clear error rather than a misleading result.
if np.ptp(t_fit) == 0.0: # peak-to-peak range == 0 means all times equal
return {
"status": "error",
"error": (
"All time values are identical "
f"(t = {float(t_fit[0])} for all {int(sum(valid))} positive "
"concentration points). At least two distinct time points are "
"required to estimate an elimination rate constant."
),
}
def one_compartment(t_arr, c0, k_el):
return c0 * np.exp(-k_el * t_arr)
try:
c0_init = float(c_fit[0]) if c_fit[0] > 0 else float(np.max(c_fit))
k_init = np.log(2) / (float(t_fit[-1]) / 2 + 1e-9)
popt, pcov = curve_fit(
one_compartment,
t_fit,
c_fit,
p0=[c0_init, k_init],
bounds=([0.0, 1e-9], [np.inf, np.inf]),
maxfev=10000,
)
c0_fit, k_el_fit = popt
perr = np.sqrt(np.diag(pcov))
except Exception as e:
return {
"status": "error",
"error": f"Model fitting failed: {str(e)}. Ensure data follows mono-exponential decline.",
}
t_half = np.log(2) / k_el_fit
# R² on fitted points
c_pred = one_compartment(t_fit, c0_fit, k_el_fit)
ss_res = float(np.sum((c_fit - c_pred) ** 2))
ss_tot = float(np.sum((c_fit - np.mean(c_fit)) ** 2))
# `1.0 - ss_res/ss_tot` can produce -0.0 (IEEE-754
# negative zero) when ss_res ≈ ss_tot. Apply the same `+ 0.0` pattern
# used by DoseResponse to eliminate negative zero from JSON output.
r_squared = (float(1.0 - ss_res / ss_tot) + 0.0) if ss_tot > 0 else 0.0
if r_squared >= 0.95:
gof = "Good (R²≥0.95)"
elif r_squared >= 0.85:
gof = "Moderate (0.85≤R²<0.95)"
else:
gof = "Poor (R²<0.85) — consider multi-compartment model"
# Preserve full precision for k_el: round(1e-9, 6) = 0.0, which is wrong
# for drugs near the lower optimization bound (1e-9 h⁻¹ ≡ t½ ≈ 693M h).
# Same applies to t_half: round(float(t_half), 4) = 0.0 for sub-ns half-lives.
k_el_val = float(k_el_fit)
# SE of k_el needs same precision to avoid reporting 0.0.
k_el_se_val = float(perr[1])
result = {
"model": "1-compartment IV bolus",
"equation": "C(t) = C0 × exp(-k_el × t)",
"C0_initial_concentration": round(float(c0_fit), 4),
"k_el_elimination_rate": k_el_val,
"t_half": float(t_half),
# Apply + 0.0 AFTER round() because round() of a tiny
# negative value (e.g. -1e-17) produces -0.0 in IEEE-754 arithmetic.
# The + 0.0 at the computation site (line above) eliminates -0.0 from the
# variable, but round(-tiny, 4) can produce -0.0 afresh.
"r_squared": round(r_squared, 4) + 0.0,
"goodness_of_fit": gof,
"standard_errors": {
"C0": round(float(perr[0]), 4),
"k_el": k_el_se_val,
},
"units": {
"C0": conc_unit,
"k_el": f"1/{time_unit}",
"t_half": time_unit,
},
}
# Emit a structured fit_quality_warning for poor fits (R² < 0.85).
# The goodness_of_fit label transitions at 0.85/0.95, so R² < 0.85 is
# already labelled "Poor". Providing a machine-readable field allows
# programmatic callers to detect and flag poor fits without parsing strings.
# Threshold aligns with the existing "Poor (R²<0.85)" label.
if r_squared < 0.85:
if r_squared < 0.1:
fit_warn_msg = (
f"Very poor fit (R² = {round(r_squared, 4) + 0.0}): the 1-compartment "
"model explains < 10% of the variance in the concentration data. "
"This often indicates a flat profile (no measurable elimination "
"over the sampling window), highly noisy data, or a multi-phasic "
"decline. The t½ and k_el estimates are unreliable."
)
else:
fit_warn_msg = (
f"Poor fit (R² = {round(r_squared, 4) + 0.0} < 0.85): the 1-compartment "
"model does not describe the data well. Consider a 2-compartment "
"model or non-compartmental analysis. The t½ and k_el estimates "
"should be interpreted with caution."
)
result["fit_quality_warning"] = fit_warn_msg
# Warn when k_el is at or near the optimization lower bound (1e-9).
# At the bound, the optimizer could not find a smaller value — the half-life
# estimate (ln2/k_el) may be astronomically large and unreliable.
if k_el_val < 1e-8:
result["k_el_bound_warning"] = (
f"k_el ({k_el_val:.2e} 1/{time_unit}) is at or near the optimization "
"lower bound (1e-9). This may indicate an extremely long half-life drug "
"or that the data do not support reliable estimation of the elimination "
f"rate constant. Corresponding t½ = {float(t_half):.4g} {time_unit} "
"should be interpreted with great caution."
)
if dose is not None:
try:
dose_val = float(dose)
# NaN/inf bypass the <= 0 guard: NaN <= 0 is False, inf > 0.
if not math.isfinite(dose_val):
return {
"status": "error",
"error": (
f"dose ({dose_val}) must be a finite positive number. "
"NaN and inf are not valid dose values."
),
}
# Validate dose: zero or negative doses are not physically meaningful.
if dose_val <= 0:
return {
"status": "error",
"error": (
f"dose ({dose_val}) must be positive (> 0). "
"Zero or negative doses are not physically meaningful. "
"Vd and CL cannot be computed without a positive dose."
),
}
auc0inf = c0_fit / k_el_fit
dose_ng, conc_factor, converted = self._try_pk_unit_conversion(
dose_val, dose_unit, conc_unit
)
if converted:
c0_ng_per_ml = c0_fit * conc_factor
vd_ml = dose_ng / c0_ng_per_ml # mL
vd_l = vd_ml / 1000.0 # L
cl_l = k_el_fit * vd_l # L/time_unit
# Round(vd_l, 4) silently truncates Vd to
# 0.0 for any Vd < 5e-5 L (e.g., 1 ng dose ÷ 100 ng/mL = 1e-5 L).
# The _compute_parameters path already uses float(vd_l) (full
# precision); apply the identical fix here.
result["volume_distribution_Vd"] = float(vd_l)
result["Vd_unit"] = "L"
result["clearance_CL"] = float(cl_l)
result["CL_unit"] = f"L/{time_unit}"
else:
vd_raw = dose_val / c0_fit
cl_raw = k_el_fit * vd_raw
# Same full-precision fix for unknown-units path.
result["volume_distribution_Vd"] = float(vd_raw)
result["Vd_unit"] = f"{dose_unit}/{conc_unit}"
result["clearance_CL"] = float(cl_raw)
result["CL_unit"] = f"{dose_unit}/({conc_unit}·{time_unit})"
result["pk_unit_note"] = (
f"Automatic unit conversion not available for "
f"dose_unit='{dose_unit}' / conc_unit='{conc_unit}'. "
"Vd and CL are in raw ratio units; convert manually."
)
result["AUC0_inf_model"] = round(float(auc0inf), 4)
result["AUC_unit"] = f"{conc_unit}·{time_unit}"
except (ValueError, TypeError):
pass
return {
"status": "success",
"data": result,
"metadata": {
"source": "Local 1-compartment model fit (scipy.optimize.curve_fit)",
"n_timepoints_fitted": int(sum(valid)),
},
}
[docs]
def _calculate_bioavailability(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate oral bioavailability F from IV and PO AUC and dose."""
auc_po = arguments.get("auc_po")
dose_po = arguments.get("dose_po")
auc_iv = arguments.get("auc_iv")
dose_iv = arguments.get("dose_iv")
if auc_po is None or dose_po is None:
return {
"status": "error",
"error": "auc_po and dose_po are required (PO arm AUC and dose)",
}
if auc_iv is None or dose_iv is None:
return {
"status": "error",
"error": "auc_iv and dose_iv are required (IV arm AUC and dose)",
}
try:
auc_po = float(auc_po)
dose_po = float(dose_po)
auc_iv = float(auc_iv)
dose_iv = float(dose_iv)
except (ValueError, TypeError) as e:
return {"status": "error", "error": f"Invalid numeric values: {e}"}
# Validate all inputs are finite (guards against inf propagating to F=inf).
for name, val in [
("auc_po", auc_po),
("dose_po", dose_po),
("auc_iv", auc_iv),
("dose_iv", dose_iv),
]:
if not np.isfinite(val):
return {
"status": "error",
"error": f"{name} must be a finite number (received {val}).",
}
if auc_po < 0:
return {
"status": "error",
"error": (
f"auc_po ({auc_po}) must be non-negative. "
"A negative AUC is not physically meaningful."
),
}
if auc_iv <= 0 or dose_iv <= 0 or dose_po <= 0:
return {
"status": "error",
"error": "AUC_IV, dose_IV, and dose_PO must all be positive numbers",
}
# F = (AUC_PO × Dose_IV) / (AUC_IV × Dose_PO)
f = (auc_po * dose_iv) / (auc_iv * dose_po)
f_pct = f * 100.0
auc_po_norm = auc_po / dose_po
auc_iv_norm = auc_iv / dose_iv
if f_pct >= 80.0:
category = "High (≥80%)"
note = "Excellent oral bioavailability. Suitable for oral dosing."
elif f_pct >= 30.0:
category = "Moderate (30–80%)"
note = "Acceptable oral bioavailability for most indications."
elif f_pct >= 10.0:
category = "Low (10–30%)"
note = "Poor oral absorption. Consider prodrug strategy or formulation optimization."
else:
category = "Very low (<10%)"
note = "Insufficient oral bioavailability. Likely requires IV/SC/inhaled route."
# F > 1.0 (> 100%) is physically unusual for most drugs.
# While theoretically possible (e.g., saturable first-pass extraction reversal,
# IV formulation with poor tolerability requiring dose reduction), it more
# commonly indicates mismatched dose units or a data entry error.
anomalous_f_warning = None
if f > 1.0:
anomalous_f_warning = (
f"Bioavailability F = {f_pct:.2f}% (> 100%). This is physically unusual "
"for most small molecule drugs. Common causes include: (1) mismatched AUC "
"or dose units between the PO and IV arms, (2) different dose levels "
"triggering saturable first-pass extraction, or (3) a data entry error. "
"Verify that AUC_PO, AUC_IV, dose_PO, and dose_IV are expressed in "
"consistent units."
)
result_data: dict = {
"bioavailability_F": round(float(f), 4),
"bioavailability_pct": round(float(f_pct), 2),
"category": category,
"clinical_note": note,
"formula": "F = (AUC_PO × Dose_IV) / (AUC_IV × Dose_PO)",
"inputs": {
"AUC_PO": auc_po,
"Dose_PO": dose_po,
"AUC_IV": auc_iv,
"Dose_IV": dose_iv,
"AUC_PO_dose_normalized": round(float(auc_po_norm), 4),
"AUC_IV_dose_normalized": round(float(auc_iv_norm), 4),
},
"general_note": (
"F > 20% is typically considered acceptable for small molecule drugs. "
"F ≥ 80% is required for bioequivalence studies. "
"Low F may indicate poor absorption, first-pass metabolism, or low solubility."
),
}
if anomalous_f_warning:
result_data["anomalous_f_warning"] = anomalous_f_warning
return {
"status": "success",
"data": result_data,
"metadata": {
"source": "Local calculation",
"method": "Dose-normalized AUC ratio (Rowland & Tozer, 2011)",
},
}