tooluniverse.scientific_calculator_tools 源代码
"""
Scientific Calculator Tools
General-purpose scientific calculators: DNA translation (reading frames),
molecular formula analysis, equilibrium solver, enzyme kinetics, and
statistical tests. All pure Python — no external dependencies.
"""
import math
import re
from functools import reduce
from math import gcd
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
# ── Standard genetic codon table (NCBI Code 1) ──
CODON_TABLE = {
"TTT": "F",
"TTC": "F",
"TTA": "L",
"TTG": "L",
"CTT": "L",
"CTC": "L",
"CTA": "L",
"CTG": "L",
"ATT": "I",
"ATC": "I",
"ATA": "I",
"ATG": "M",
"GTT": "V",
"GTC": "V",
"GTA": "V",
"GTG": "V",
"TCT": "S",
"TCC": "S",
"TCA": "S",
"TCG": "S",
"CCT": "P",
"CCC": "P",
"CCA": "P",
"CCG": "P",
"ACT": "T",
"ACC": "T",
"ACA": "T",
"ACG": "T",
"GCT": "A",
"GCC": "A",
"GCA": "A",
"GCG": "A",
"TAT": "Y",
"TAC": "Y",
"TAA": "*",
"TAG": "*",
"CAT": "H",
"CAC": "H",
"CAA": "Q",
"CAG": "Q",
"AAT": "N",
"AAC": "N",
"AAA": "K",
"AAG": "K",
"GAT": "D",
"GAC": "D",
"GAA": "E",
"GAG": "E",
"TGT": "C",
"TGC": "C",
"TGA": "*",
"TGG": "W",
"CGT": "R",
"CGC": "R",
"CGA": "R",
"CGG": "R",
"AGT": "S",
"AGC": "S",
"AGA": "R",
"AGG": "R",
"GGT": "G",
"GGC": "G",
"GGA": "G",
"GGG": "G",
}
# ── Atomic masses ──
ATOMIC_MASS = {"C": 12.011, "H": 1.008, "O": 15.999, "N": 14.007}
ATOMIC_MASS_EXT = {
**ATOMIC_MASS,
"S": 32.06,
"P": 30.974,
"F": 18.998,
"Cl": 35.45,
"Br": 79.904,
"I": 126.904,
}
# ── Helper functions ──
def _translate_frame(dna: str, frame: int) -> str:
"""Translate DNA from given frame offset (0, 1, or 2)."""
protein = []
for i in range(frame, len(dna) - 2, 3):
codon = dna[i : i + 3]
protein.append(CODON_TABLE.get(codon, "?"))
return "".join(protein)
def _longest_orf(protein: str) -> str:
"""Find longest stretch without a stop codon."""
segments = protein.split("*")
return max(segments, key=len) if segments else protein
def _parse_formula(formula_str: str) -> Dict[str, int]:
"""Parse molecular formula string into element-count dict."""
tokens = re.findall(r"([A-Z][a-z]?)(\d*)", formula_str)
atoms: Dict[str, int] = {}
for element, count in tokens:
if not element:
continue
n = int(count) if count else 1
atoms[element] = atoms.get(element, 0) + n
return atoms
def _formula_str(atoms: Dict[str, int]) -> str:
"""Hill notation: C first, H second, then alphabetical."""
order = [e for e in ("C", "H") if e in atoms]
order += sorted(k for k in atoms if k not in ("C", "H"))
return "".join(
f"{e}{n if n > 1 else ''}" for e, n in ((e, atoms[e]) for e in order)
)
def _igcd(values: List[int]) -> int:
return reduce(gcd, values)
def _degrees_of_unsaturation(atoms: Dict[str, int]) -> float:
C = atoms.get("C", 0)
H = atoms.get("H", 0)
N = atoms.get("N", 0)
X = sum(atoms.get(x, 0) for x in ("F", "Cl", "Br", "I"))
return (2 * C + 2 + N - H - X) / 2
def _interpret_dou(dou: float) -> str:
if dou == 0:
return "saturated, open-chain (no rings or double bonds)"
hints = []
if dou >= 4:
hints.append("likely contains a benzene ring (4 DoU)")
if dou >= 2:
hints.append(
"could have a triple bond, or two double bonds, or a ring + double bond"
)
if dou == 1:
hints.append("one ring OR one double bond")
return "; ".join(hints) if hints else f"DoU = {dou}"
def _elemental_composition(atoms: Dict[str, int]) -> Dict[str, Any]:
total_mass = sum(n * ATOMIC_MASS_EXT.get(e, 0.0) for e, n in atoms.items())
pcts = {
e: round(n * ATOMIC_MASS_EXT.get(e, 0.0) / total_mass * 100, 2)
for e, n in atoms.items()
}
return {"molar_mass": round(total_mass, 3), "percentages": pcts}
def _linreg(xs: List[float], ys: List[float]):
"""Simple linear regression. Returns (slope, intercept, r2)."""
n = len(xs)
sx = sum(xs)
sy = sum(ys)
sxx = sum(x * x for x in xs)
sxy = sum(x * y for x, y in zip(xs, ys))
denom = n * sxx - sx * sx
if abs(denom) < 1e-30:
raise ValueError("Degenerate data: all x values identical.")
slope = (n * sxy - sx * sy) / denom
intercept = (sy * sxx - sx * sxy) / denom
y_mean = sy / n
ss_tot = sum((y - y_mean) ** 2 for y in ys)
ss_res = sum((y - (slope * x + intercept)) ** 2 for x, y in zip(xs, ys))
r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0
return slope, intercept, r2
# ── Chi-square p-value via gamma function ──
def _regularized_gamma_upper(
a: float, x: float, max_iter: int = 300, eps: float = 1e-12
) -> float:
"""Upper regularized incomplete gamma: Q(a, x) = 1 - P(a, x)."""
if x < 0 or a <= 0:
raise ValueError(f"Invalid arguments: a={a}, x={x}.")
if x == 0:
return 1.0
log_gamma_a = math.lgamma(a)
if x < a + 1.0:
ap = a
delta = 1.0 / a
total = delta
for _ in range(max_iter):
ap += 1.0
delta *= x / ap
total += delta
if abs(delta) < abs(total) * eps:
break
return 1.0 - math.exp(-x + a * math.log(x) - log_gamma_a) * total
else:
fpmin = 1e-300
b_cf = x + 1.0 - a
c = 1.0 / fpmin
d = 1.0 / b_cf
h = d
for i in range(1, max_iter + 1):
an = -i * (i - a)
b_cf += 2.0
d = an * d + b_cf
if abs(d) < fpmin:
d = fpmin
c = b_cf + an / c
if abs(c) < fpmin:
c = fpmin
d = 1.0 / d
delta = d * c
h *= delta
if abs(delta - 1.0) < eps:
break
return math.exp(-x + a * math.log(x) - log_gamma_a) * h
def _chi_square_p_value(chi2: float, df: int) -> float:
if df <= 0:
raise ValueError(f"Degrees of freedom must be > 0 (got {df}).")
if chi2 <= 0:
return 1.0
return _regularized_gamma_upper(df / 2.0, chi2 / 2.0)
# ── Fisher's exact test helpers ──
def _log_comb(n: int, k: int) -> float:
if k < 0 or k > n:
return float("-inf")
return math.lgamma(n + 1) - math.lgamma(k + 1) - math.lgamma(n - k + 1)
def _hypergeometric_pmf(k: int, n1: int, n2: int, n: int) -> float:
return math.exp(_log_comb(n1, k) + _log_comb(n2, n - k) - _log_comb(n1 + n2, n))
# ── Incomplete beta for t-distribution p-values ──
def _incomplete_beta_reg(
x: float, a: float, b: float, max_iter: int = 300, eps: float = 1e-12
) -> float:
"""Regularized incomplete beta I_x(a, b) via continued fraction."""
if x <= 0:
return 0.0
if x >= 1:
return 1.0
ln_beta = math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
front = math.exp(a * math.log(x) + b * math.log(1 - x) - ln_beta)
if x > (a + 1.0) / (a + b + 2.0):
return 1.0 - _incomplete_beta_reg(1 - x, b, a, max_iter, eps)
fpmin = 1e-300
qab = a + b
qap = a + 1.0
qam = a - 1.0
c = 1.0
d = 1.0 - qab * x / qap
if abs(d) < fpmin:
d = fpmin
d = 1.0 / d
h = d
for m in range(1, max_iter + 1):
m2 = 2 * m
aa = m * (b - m) * x / ((qam + m2) * (a + m2))
d = 1.0 + aa * d
if abs(d) < fpmin:
d = fpmin
c = 1.0 + aa / c
if abs(c) < fpmin:
c = fpmin
d = 1.0 / d
h *= d * c
aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2))
d = 1.0 + aa * d
if abs(d) < fpmin:
d = fpmin
c = 1.0 + aa / c
if abs(c) < fpmin:
c = fpmin
d = 1.0 / d
delta = d * c
h *= delta
if abs(delta - 1.0) < eps:
break
return front * h / a
def _t_pvalue(t_stat: float, df: int) -> float:
"""Two-tailed p-value from t distribution."""
if math.isnan(t_stat) or df <= 0:
return float("nan")
t2 = t_stat**2
x_beta = df / (df + t2)
return _incomplete_beta_reg(x_beta, df / 2.0, 0.5)
# ══════════════════════════════════════════════════════════════
# Tool class
# ══════════════════════════════════════════════════════════════
[文档]
@register_tool("ScientificCalculatorTool")
class ScientificCalculatorTool(BaseTool):
"""
General-purpose scientific calculators.
No external API calls. Provides:
- DNA translation in all 3 reading frames
- Molecular formula analysis and combustion analysis
- Chemical equilibrium solver (Ksp, complex formation, common-ion)
- Enzyme kinetics (Km/Vmax, Hill, Ki)
- Statistical tests (chi-square, Fisher, regression, t-test)
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.parameter = tool_config.get("parameter", {})
self.required = self.parameter.get("required", [])
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
operation = arguments.get("operation")
if not operation:
return {"status": "error", "error": "Missing required parameter: operation"}
handlers = {
"translate_reading_frames": self._translate_reading_frames,
"analyze_formula": self._analyze_formula,
"combustion_analysis": self._combustion_analysis,
"ksp_simple": self._ksp_simple,
"ksp_complex": self._ksp_complex,
"common_ion": self._common_ion,
"michaelis_menten": self._michaelis_menten,
"hill": self._hill,
"inhibition": self._inhibition,
"chi_square": self._chi_square,
"fisher_exact": self._fisher_exact,
"linear_regression": self._linear_regression,
"t_test": self._t_test,
}
handler = handlers.get(operation)
if not handler:
return {
"status": "error",
"error": f"Unknown operation: {operation}",
"available_operations": list(handlers.keys()),
}
try:
result = handler(arguments)
# Wrap successful results in data envelope for return_schema validation
if isinstance(result, dict) and result.get("status") == "success":
payload = {k: v for k, v in result.items() if k != "status"}
return {"status": "success", "data": payload}
return result
except Exception as e:
return {"status": "error", "error": f"Operation failed: {str(e)}"}
# ── Tool 1: DNA translation in reading frames ──
[文档]
def _translate_reading_frames(self, args: Dict[str, Any]) -> Dict[str, Any]:
sequence = args.get("sequence", "")
if not sequence or not sequence.strip():
return {"status": "error", "error": "sequence is required"}
dna = re.sub(r"[^ATGCatgc]", "", sequence).upper()
if not dna:
return {"status": "error", "error": "No valid DNA bases found in sequence."}
frame_arg = str(args.get("frame") or "all")
frames_to_do = [0, 1, 2] if frame_arg == "all" else [int(frame_arg) - 1]
results = {}
best_frame = 0
best_orf = ""
best_orf_len = 0
for f in frames_to_do:
protein = _translate_frame(dna, f)
orf = _longest_orf(protein)
results[f"frame_{f + 1}"] = {
"protein": protein,
"protein_length": len(protein),
"longest_orf": orf,
"longest_orf_length_aa": len(orf),
}
if len(orf) > best_orf_len:
best_orf_len = len(orf)
best_orf = orf
best_frame = f + 1
return {
"status": "success",
"sequence_length": len(dna),
"frames": results,
"best_frame": best_frame,
"best_orf": best_orf,
"best_orf_length_aa": best_orf_len,
}
# ── Tool 2: Molecular formula analysis ──
[文档]
def _analyze_formula(self, args: Dict[str, Any]) -> Dict[str, Any]:
formula = args.get("formula")
if not formula:
return {
"status": "error",
"error": "formula is required for analyze_formula mode.",
}
atoms = _parse_formula(formula)
if not atoms:
return {"status": "error", "error": f"Could not parse formula '{formula}'."}
comp = _elemental_composition(atoms)
dou = _degrees_of_unsaturation(atoms)
return {
"status": "success",
"formula": formula,
"atoms": atoms,
"molar_mass": comp["molar_mass"],
"degrees_of_unsaturation": dou,
"dou_interpretation": _interpret_dou(dou),
"elemental_composition": comp["percentages"],
}
[文档]
def _combustion_analysis(self, args: Dict[str, Any]) -> Dict[str, Any]:
sample_g = args.get("sample_g")
co2_g = args.get("CO2_g")
h2o_g = args.get("H2O_g")
n2_g = args.get("N2_g") or 0.0
molar_mass = args.get("molar_mass")
if sample_g is None or co2_g is None or h2o_g is None:
return {
"status": "error",
"error": "sample_g, CO2_g, and H2O_g are all required.",
}
if sample_g <= 0:
return {"status": "error", "error": "sample_g must be positive."}
M = ATOMIC_MASS
moles_C = co2_g / (M["C"] + 2 * M["O"])
moles_H = h2o_g / (2 * M["H"] + M["O"]) * 2
moles_N = (n2_g / (2 * M["N"])) * 2
mass_accounted = moles_C * M["C"] + moles_H * M["H"] + moles_N * M["N"]
mass_O = sample_g - mass_accounted
moles_O = max(mass_O / M["O"], 0.0)
present = {
k: v
for k, v in {"C": moles_C, "H": moles_H, "O": moles_O, "N": moles_N}.items()
if v > 1e-6
}
min_moles = min(present.values())
ratios = {k: v / min_moles for k, v in present.items()}
best_denom = 1
best_err = float("inf")
for denom in range(1, 7):
err = sum(abs(r * denom - round(r * denom)) for r in ratios.values())
if err < best_err:
best_err = err
best_denom = denom
scaled = {k: round(v * best_denom) for k, v in ratios.items()}
g = _igcd(list(scaled.values()))
empirical = {k: v // g for k, v in scaled.items()}
emp_formula = _formula_str(empirical)
emp_mass = sum(n * M.get(e, 0.0) for e, n in empirical.items())
result: Dict[str, Any] = {
"status": "success",
"moles": {
"C": round(moles_C, 6),
"H": round(moles_H, 6),
"O": round(moles_O, 6),
"N": round(moles_N, 6),
},
"empirical_formula": emp_formula,
"empirical_molar_mass": round(emp_mass, 3),
}
if molar_mass is not None:
factor = round(molar_mass / emp_mass)
mol_atoms = {k: v * factor for k, v in empirical.items()}
mol_formula = _formula_str(mol_atoms)
mol_mass = sum(n * M.get(e, 0.0) for e, n in mol_atoms.items())
dou = _degrees_of_unsaturation(mol_atoms)
comp = _elemental_composition(mol_atoms)
result.update(
{
"scale_factor": factor,
"molecular_formula": mol_formula,
"molecular_molar_mass": round(mol_mass, 3),
"degrees_of_unsaturation": dou,
"dou_interpretation": _interpret_dou(dou),
"elemental_composition": comp["percentages"],
}
)
else:
dou = _degrees_of_unsaturation(empirical)
comp = _elemental_composition(empirical)
result.update(
{
"formula": emp_formula,
"molar_mass": round(emp_mass, 3),
"degrees_of_unsaturation": dou,
"dou_interpretation": _interpret_dou(dou),
"elemental_composition": comp["percentages"],
}
)
return result
# ── Tool 3: Equilibrium solver ──
[文档]
def _ksp_simple(self, args: Dict[str, Any]) -> Dict[str, Any]:
ksp = args.get("Ksp")
if ksp is None:
return {"status": "error", "error": "Ksp is required."}
a = args.get("stoich_cation") or 1
b = args.get("stoich_anion") or 1
coeff = (a**a) * (b**b)
n = a + b
s = (ksp / coeff) ** (1.0 / n)
return {
"status": "success",
"problem_type": "ksp_simple",
"solubility_mol_per_L": s,
"cation_conc": a * s,
"anion_conc": b * s,
"expression": f"Ksp = ({a}s)^{a} * ({b}s)^{b} = {coeff} * s^{n}",
"Ksp": ksp,
}
[文档]
def _ksp_complex(self, args: Dict[str, Any]) -> Dict[str, Any]:
ksp = args.get("Ksp")
kf = args.get("Kf")
if ksp is None or kf is None:
return {
"status": "error",
"error": "Both Ksp and Kf are required for ksp_complex.",
}
args.get("stoich_cation") or 1
args.get("stoich_anion") or 3
k_overall = ksp * kf
kw = 1e-14
# Newton's method on charge balance equation
h = (3.0 * ksp / k_overall) ** 0.25 if k_overall > 0 else 1e-7
for _ in range(200):
h2, h3, h4 = h * h, h * h * h, h * h * h * h
f_val = 3.0 * ksp / h3 + kw / h - h - k_overall * h
f_prime = -9.0 * ksp / h4 - kw / h2 - 1.0 - k_overall
delta = f_val / f_prime
h_new = h - delta
if h_new <= 0:
h = h / 2.0
continue
if abs(delta) < 1e-20 * h:
h = h_new
break
h = h_new
x = ksp / (h**3) # free cation
y = k_overall * h # complex
s = x + y
return {
"status": "success",
"problem_type": "ksp_complex",
"solubility_mol_per_L": s,
"free_cation_conc": x,
"complex_conc": y,
"free_anion_conc": h,
"K_overall": k_overall,
"Ksp": ksp,
"Kf": kf,
"note": (
f"Overall K = Ksp * Kf = {k_overall:.4e}. "
f"Complex dominates: [complex]/[free cation] = {y / x:.2e}"
if x > 0
else "All dissolved as complex."
),
}
[文档]
def _common_ion(self, args: Dict[str, Any]) -> Dict[str, Any]:
ksp = args.get("Ksp")
if ksp is None:
return {"status": "error", "error": "Ksp is required."}
a = args.get("stoich_cation") or 1
b = args.get("stoich_anion") or 1
c = args.get("common_ion_conc")
if c is None:
return {
"status": "error",
"error": "common_ion_conc is required for common_ion mode.",
}
def ksp_expr(s):
return ((a * s) ** a) * ((b * s + c) ** b)
s_max = (ksp / ((a**a) * (b**b))) ** (1.0 / (a + b))
s_lo, s_hi = 0.0, s_max
for _ in range(200):
s_mid = (s_lo + s_hi) / 2.0
if ksp_expr(s_mid) < ksp:
s_lo = s_mid
else:
s_hi = s_mid
if (s_hi - s_lo) < 1e-20:
break
s = (s_lo + s_hi) / 2.0
s_approx = (ksp / ((a**a) * (c**b))) ** (1.0 / a)
return {
"status": "success",
"problem_type": "common_ion",
"solubility_mol_per_L": s,
"solubility_approx_mol_per_L": s_approx,
"cation_conc": a * s,
"anion_conc": b * s + c,
"common_ion_added": c,
"Ksp": ksp,
}
# ── Tool 4: Enzyme kinetics ──
[文档]
def _michaelis_menten(self, args: Dict[str, Any]) -> Dict[str, Any]:
substrate = args.get("substrate_concs")
velocity = args.get("velocities")
if not substrate or not velocity:
return {
"status": "error",
"error": "substrate_concs and velocities are required.",
}
if len(substrate) != len(velocity):
return {
"status": "error",
"error": "substrate_concs and velocities must have the same length.",
}
if len(substrate) < 3:
return {"status": "error", "error": "Need at least 3 data points."}
# Lineweaver-Burk
inv_s = [1.0 / s for s in substrate]
inv_v = [1.0 / v for v in velocity]
slope, intercept, r2 = _linreg(inv_s, inv_v)
lb_result = {}
if intercept > 0 and slope > 0:
lb_vmax = 1.0 / intercept
lb_km = slope * lb_vmax
lb_result = {
"Vmax": round(lb_vmax, 6),
"Km": round(lb_km, 6),
"R2": round(r2, 6),
}
else:
lb_result = {"note": "Non-physical parameters from Lineweaver-Burk."}
# Nonlinear grid search
best_sse = float("inf")
best_km, best_vmax = substrate[len(substrate) // 2], max(velocity) * 1.2
for vmax_mult in [x * 0.1 for x in range(5, 31)]:
for km_mult in [x * 0.1 for x in range(1, 51)]:
vm = max(velocity) * vmax_mult
km = max(substrate) * km_mult * 0.1
sse = sum(
(v - vm * s / (km + s)) ** 2 for s, v in zip(substrate, velocity)
)
if sse < best_sse:
best_sse = sse
best_km, best_vmax = km, vm
for _ in range(5):
step_v = best_vmax * 0.05
step_k = best_km * 0.05
improved = False
for dv in [-step_v, 0, step_v]:
for dk in [-step_k, 0, step_k]:
vm = best_vmax + dv
km = best_km + dk
if vm <= 0 or km <= 0:
continue
sse = sum(
(v - vm * s / (km + s)) ** 2
for s, v in zip(substrate, velocity)
)
if sse < best_sse:
best_sse = sse
best_km, best_vmax = km, vm
improved = True
if not improved:
break
v_mean = sum(velocity) / len(velocity)
ss_tot = sum((v - v_mean) ** 2 for v in velocity)
nl_r2 = 1.0 - best_sse / ss_tot if ss_tot > 0 else 0.0
predicted = [round(best_vmax * s / (best_km + s), 6) for s in substrate]
residuals = [round(v - p, 6) for v, p in zip(velocity, predicted)]
return {
"status": "success",
"calculation_type": "michaelis_menten",
"lineweaver_burk": lb_result,
"nonlinear_fit": {
"Vmax": round(best_vmax, 6),
"Km": round(best_km, 6),
"R2": round(nl_r2, 6),
"SSE": round(best_sse, 6),
},
"Vmax": round(best_vmax, 6),
"Km": round(best_km, 6),
"catalytic_efficiency": round(best_vmax / best_km, 6),
"predicted_velocities": predicted,
"residuals": residuals,
}
[文档]
def _hill(self, args: Dict[str, Any]) -> Dict[str, Any]:
substrate = args.get("substrate_concs")
velocity = args.get("velocities")
if not substrate or not velocity:
return {
"status": "error",
"error": "substrate_concs and velocities are required.",
}
if len(substrate) != len(velocity):
return {
"status": "error",
"error": "substrate_concs and velocities must have the same length.",
}
if len(substrate) < 3:
return {"status": "error", "error": "Need at least 3 data points."}
vmax_est = max(velocity) * 1.1
log_s, log_y = [], []
for s, v in zip(substrate, velocity):
if 0.1 * vmax_est < v < 0.9 * vmax_est:
y = v / (vmax_est - v)
if y > 0:
log_s.append(math.log10(s))
log_y.append(math.log10(y))
if len(log_s) < 2:
vmax_est = max(velocity) * 1.3
for s, v in zip(substrate, velocity):
if 0.05 * vmax_est < v < 0.95 * vmax_est:
y = v / (vmax_est - v)
if y > 0:
log_s.append(math.log10(s))
log_y.append(math.log10(y))
if len(log_s) < 2:
return {
"status": "error",
"error": "Insufficient data in the 10-90% saturation range for Hill analysis.",
}
slope, intercept, r2 = _linreg(log_s, log_y)
nH = slope
k05 = 10 ** (-intercept / nH) if abs(nH) > 1e-10 else float("inf")
if nH > 1.05:
interp = f"nH = {nH:.2f} > 1: POSITIVE cooperativity"
elif nH < 0.95:
interp = f"nH = {nH:.2f} < 1: NEGATIVE cooperativity"
else:
interp = f"nH = {nH:.2f} ~ 1: NO cooperativity (independent sites)"
predicted = [
round(vmax_est * (s**nH) / (k05**nH + s**nH), 6) for s in substrate
]
return {
"status": "success",
"calculation_type": "hill",
"hill_coefficient": round(nH, 4),
"K05": round(k05, 6),
"Vmax_estimated": round(vmax_est, 6),
"R2_hill_plot": round(r2, 6),
"interpretation": interp,
"data_points_used": len(log_s),
"predicted_velocities": predicted,
}
[文档]
def _inhibition(self, args: Dict[str, Any]) -> Dict[str, Any]:
substrate = args.get("substrate_concs")
v_no_inh = args.get("velocities_no_inhibitor")
v_inh = args.get("velocities_with_inhibitor")
inh_conc = args.get("inhibitor_conc")
inh_type = (args.get("inhibition_type") or "competitive").lower().strip()
if not substrate or not v_no_inh or not v_inh:
return {
"status": "error",
"error": "substrate_concs, velocities_no_inhibitor, and velocities_with_inhibitor required.",
}
if inh_conc is None:
return {"status": "error", "error": "inhibitor_conc is required."}
n = len(substrate)
if len(v_no_inh) != n or len(v_inh) != n:
return {"status": "error", "error": "All arrays must have the same length."}
if n < 3:
return {"status": "error", "error": "Need at least 3 data points."}
inv_s = [1.0 / s for s in substrate]
inv_v0 = [1.0 / v for v in v_no_inh]
slope0, intercept0, r2_0 = _linreg(inv_s, inv_v0)
if intercept0 <= 0 or slope0 <= 0:
return {
"status": "error",
"error": "Uninhibited data does not give valid Km/Vmax.",
}
vmax = 1.0 / intercept0
km = slope0 * vmax
inv_vi = [1.0 / v for v in v_inh]
slope_i, intercept_i, r2_i = _linreg(inv_s, inv_vi)
if intercept_i <= 0 or slope_i <= 0:
return {
"status": "error",
"error": "Inhibited data does not give valid apparent parameters.",
}
vmax_app = 1.0 / intercept_i
km_app = slope_i * vmax_app
ki = None
if inh_type == "competitive":
ratio = km_app / km
ki = inh_conc / (ratio - 1.0) if ratio > 1.0 else None
elif inh_type == "uncompetitive":
ratio = vmax / vmax_app
ki = inh_conc / (ratio - 1.0) if ratio > 1.0 else None
elif inh_type in ("noncompetitive", "non-competitive"):
ratio = vmax / vmax_app
ki = inh_conc / (ratio - 1.0) if ratio > 1.0 else None
else:
return {
"status": "error",
"error": f"Unknown inhibition type: {inh_type}. Use competitive, uncompetitive, or noncompetitive.",
}
result: Dict[str, Any] = {
"status": "success",
"calculation_type": "inhibition",
"inhibition_type": inh_type,
"Vmax": round(vmax, 6),
"Km": round(km, 6),
"Vmax_apparent": round(vmax_app, 6),
"Km_apparent": round(km_app, 6),
"inhibitor_conc": inh_conc,
}
if ki is not None:
result["Ki"] = round(ki, 6)
else:
result["Ki_note"] = (
"Ki could not be determined. Data inconsistent with the specified inhibition model."
)
return result
# ── Tool 5: Statistical tests ──
[文档]
def _chi_square(self, args: Dict[str, Any]) -> Dict[str, Any]:
observed = args.get("observed")
expected = args.get("expected")
if not observed or not expected:
return {"status": "error", "error": "observed and expected are required."}
if len(observed) != len(expected):
return {
"status": "error",
"error": "observed and expected must have the same length.",
}
if len(observed) < 2:
return {"status": "error", "error": "Need at least 2 categories."}
n_obs = sum(observed)
n_exp = sum(expected)
if n_obs <= 0 or n_exp <= 0:
return {"status": "error", "error": "Sums must be > 0."}
scale = n_obs / n_exp
exp_scaled = [e * scale for e in expected]
chi2 = sum((o - e) ** 2 / e for o, e in zip(observed, exp_scaled))
df = len(observed) - 1
p = _chi_square_p_value(chi2, df)
contributions = [
round((o - e) ** 2 / e, 6) for o, e in zip(observed, exp_scaled)
]
warning = None
if min(exp_scaled) < 5:
warning = f"Minimum expected count {min(exp_scaled):.2f} < 5. Chi-square approximation may be unreliable."
return {
"status": "success",
"test_type": "chi_square",
"chi2": round(chi2, 6),
"df": df,
"p_value": round(p, 8),
"significant_at_005": p < 0.05,
"contributions": contributions,
"expected_scaled": [round(e, 4) for e in exp_scaled],
"warning": warning,
}
[文档]
def _fisher_exact(self, args: Dict[str, Any]) -> Dict[str, Any]:
cell_a = args.get("a")
cell_b = args.get("b")
cell_c = args.get("c")
cell_d = args.get("d")
alternative = args.get("alternative") or "two-sided"
if any(v is None for v in [cell_a, cell_b, cell_c, cell_d]):
return {"status": "error", "error": "a, b, c, and d are all required."}
if any(v < 0 for v in [cell_a, cell_b, cell_c, cell_d]):
return {"status": "error", "error": "All cell counts must be >= 0."}
n1 = cell_a + cell_b
n2 = cell_c + cell_d
n = cell_a + cell_c
N = cell_a + cell_b + cell_c + cell_d
if N == 0:
return {"status": "error", "error": "All counts are zero."}
if cell_b * cell_c == 0:
OR = float("inf") if cell_a * cell_d != 0 else float("nan")
else:
OR = (cell_a * cell_d) / (cell_b * cell_c)
k_min = max(0, n - n2)
k_max = min(n1, n)
all_probs = {
k: _hypergeometric_pmf(k, n1, n2, n) for k in range(k_min, k_max + 1)
}
p_obs = all_probs.get(cell_a, 0.0)
if alternative == "two-sided":
p_value = sum(p for p in all_probs.values() if p <= p_obs + 1e-10)
elif alternative == "less":
p_value = sum(p for k, p in all_probs.items() if k <= cell_a)
elif alternative == "greater":
p_value = sum(p for k, p in all_probs.items() if k >= cell_a)
else:
return {"status": "error", "error": f"Invalid alternative: {alternative}"}
p_value = min(p_value, 1.0)
return {
"status": "success",
"test_type": "fisher_exact",
"odds_ratio": round(OR, 6)
if not (math.isinf(OR) or math.isnan(OR))
else str(OR),
"p_value": round(p_value, 8),
"alternative": alternative,
"significant_at_005": p_value < 0.05,
"table": {"a": cell_a, "b": cell_b, "c": cell_c, "d": cell_d},
"marginals": {
"row1": n1,
"row2": n2,
"col1": n,
"col2": cell_b + cell_d,
"N": N,
},
}
[文档]
def _linear_regression(self, args: Dict[str, Any]) -> Dict[str, Any]:
x = args.get("data_x")
y = args.get("data_y")
if not x or not y:
return {"status": "error", "error": "data_x and data_y are required."}
n = len(x)
if n != len(y):
return {
"status": "error",
"error": "data_x and data_y must have the same length.",
}
if n < 3:
return {"status": "error", "error": "Need at least 3 data points."}
x_mean = sum(x) / n
y_mean = sum(y) / n
Sxx = sum((xi - x_mean) ** 2 for xi in x)
Sxy = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, y))
Syy = sum((yi - y_mean) ** 2 for yi in y)
if Sxx == 0:
return {"status": "error", "error": "All x values are identical."}
b1 = Sxy / Sxx
b0 = y_mean - b1 * x_mean
y_pred = [b0 + b1 * xi for xi in x]
residuals = [yi - yp for yi, yp in zip(y, y_pred)]
SSR = sum(r**2 for r in residuals)
SST = Syy
R2 = 1.0 - SSR / SST if SST > 0 else float("nan")
R2_adj = (
1.0 - (SSR / (n - 2)) / (SST / (n - 1))
if n > 2 and SST > 0
else float("nan")
)
s2 = SSR / (n - 2)
se_b1 = math.sqrt(s2 / Sxx)
t_b1 = b1 / se_b1 if se_b1 > 0 else float("nan")
p_b1 = _t_pvalue(t_b1, n - 2)
r = Sxy / math.sqrt(Sxx * Syy) if Sxx > 0 and Syy > 0 else float("nan")
return {
"status": "success",
"test_type": "linear_regression",
"n": n,
"slope": round(b1, 6),
"intercept": round(b0, 6),
"se_slope": round(se_b1, 6),
"t_slope": round(t_b1, 6),
"p_slope": round(p_b1, 8),
"R2": round(R2, 6),
"R2_adj": round(R2_adj, 6),
"pearson_r": round(r, 6),
"equation": f"y = {b0:.4f} + {b1:.4f} * x",
"predicted": [round(yp, 6) for yp in y_pred],
"residuals": [round(ri, 6) for ri in residuals],
}
[文档]
def _t_test(self, args: Dict[str, Any]) -> Dict[str, Any]:
x = args.get("data_x")
y = args.get("data_y")
if not x or not y:
return {"status": "error", "error": "data_x and data_y are required."}
n1, n2 = len(x), len(y)
if n1 < 2 or n2 < 2:
return {
"status": "error",
"error": "Each group needs at least 2 observations.",
}
m1 = sum(x) / n1
m2 = sum(y) / n2
s1_sq = sum((xi - m1) ** 2 for xi in x) / (n1 - 1)
s2_sq = sum((yi - m2) ** 2 for yi in y) / (n2 - 1)
se = math.sqrt(s1_sq / n1 + s2_sq / n2)
if se == 0:
return {"status": "error", "error": "Both groups have zero variance."}
t_stat = (m1 - m2) / se
# Welch-Satterthwaite degrees of freedom
num = (s1_sq / n1 + s2_sq / n2) ** 2
denom = (s1_sq / n1) ** 2 / (n1 - 1) + (s2_sq / n2) ** 2 / (n2 - 1)
df = num / denom if denom > 0 else min(n1, n2) - 1
p_val = _t_pvalue(t_stat, int(round(df)))
return {
"status": "success",
"test_type": "t_test",
"mean_x": round(m1, 6),
"mean_y": round(m2, 6),
"mean_difference": round(m1 - m2, 6),
"t_statistic": round(t_stat, 6),
"degrees_of_freedom": round(df, 2),
"p_value": round(p_val, 8),
"significant_at_005": p_val < 0.05,
"variance_x": round(s1_sq, 6),
"variance_y": round(s2_sq, 6),
}