Source code for tooluniverse.drug_properties_tool

# drug_properties_tool.py
"""
Drug Property Filters and Drug-likeness Assessment Tool

Implements standard computational drug-likeness filters:
- Lipinski's Rule of Five (Ro5): MW≤500, cLogP≤5, HBD≤5, HBA≤10
- Veber filter: RotBonds≤10, TPSA≤140
- QED (Quantitative Estimate of Drug-likeness): rdkit.Chem.QED
- PAINS (Pan-Assay Interference Compounds): rdkit.Chem.FilterCatalog
- Pfizer 3/75 rule: cLogP≤3, TPSA≥75
- Egan filter, Ghose filter

No external API calls. Requires RDKit.

References:
- Lipinski et al., Adv. Drug Deliv. Rev. (1997)
- Veber et al., J. Med. Chem. (2002)
- Bickerton et al., Nature Chemistry (2012) [QED]
- Baell & Holloway, J. Med. Chem. (2010) [PAINS]
"""

from typing import Dict, Any

from .base_tool import BaseTool
from .tool_registry import register_tool

try:
    from rdkit import Chem
    from rdkit.Chem import Descriptors, rdMolDescriptors, QED as RDKitQED
    from rdkit.Chem.FilterCatalog import FilterCatalog, FilterCatalogParams

    HAS_RDKIT = True
except Exception:
    HAS_RDKIT = False


[docs] @register_tool("DrugPropertiesTool") class DrugPropertiesTool(BaseTool): """ Drug-likeness property filters and assessments using RDKit. Endpoints: - lipinski_filter: Lipinski Ro5 + Veber + Pfizer 3/75 + Egan + Ghose - calculate_qed: QED score + full descriptor profile - pains_filter: PAINS + Brenk + NIH substructure filters """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "lipinski_filter")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: if not HAS_RDKIT: return { "status": "error", "error": "RDKit is required for drug property calculations. Install with: pip install rdkit", } try: if self.endpoint == "lipinski_filter": return self._lipinski_filter(arguments) elif self.endpoint == "calculate_qed": return self._calculate_qed(arguments) elif self.endpoint == "pains_filter": return self._pains_filter(arguments) else: return { "status": "error", "error": f"Unknown endpoint: {self.endpoint}", } except Exception as e: return { "status": "error", "error": f"Drug property calculation error: {str(e)}", }
[docs] def _parse_smiles(self, arguments: Dict[str, Any]): smiles = arguments.get("smiles", "").strip() if not smiles: return None, { "status": "error", "error": "smiles parameter is required (SMILES string of the molecule)", } mol = Chem.MolFromSmiles(smiles) if mol is None: return None, { "status": "error", "error": f"Invalid SMILES string: {smiles}", } return mol, None
[docs] def _lipinski_filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Check Lipinski Ro5, Veber, Pfizer 3/75, Egan, and Ghose filters.""" mol, err = self._parse_smiles(arguments) if err: return err # Calculate descriptors mw = Descriptors.ExactMolWt(mol) logp = Descriptors.MolLogP(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) rotb = rdMolDescriptors.CalcNumRotatableBonds(mol) tpsa = Descriptors.TPSA(mol) arom_rings = rdMolDescriptors.CalcNumAromaticRings(mol) heavy_atoms = mol.GetNumHeavyAtoms() mr = Descriptors.MolMR(mol) # Molar refractivity csp3 = rdMolDescriptors.CalcFractionCSP3(mol) # Lipinski Ro5 (Lipinski 1997): allow 1 violation ro5_violations = sum([mw > 500, logp > 5, hbd > 5, hba > 10]) lipinski_pass = ro5_violations <= 1 # Veber (2002): oral bioavailability veber_pass = rotb <= 10 and tpsa <= 140 # Pfizer 3/75 rule (reduces promiscuity / CNS penetration risk) pfizer_pass = logp <= 3 and tpsa >= 75 # Egan filter (Egan 2000): bioavailability egan_pass = logp <= 5.88 and tpsa <= 131.6 # Ghose filter (1999): oral drug-like ghose_pass = ( -0.4 <= logp <= 5.6 and 160 <= mw <= 480 and 40 <= mr <= 130 and 20 <= heavy_atoms <= 70 ) # Beyond Ro5 territory (macrocycles / PROTACs) beyond_ro5 = ro5_violations > 1 and mw <= 1000 if lipinski_pass and veber_pass: overall = "drug-like" elif lipinski_pass: overall = "borderline (Ro5 pass, Veber fail)" elif beyond_ro5: overall = "beyond-Ro5 (macrocycle/PROTAC territory)" else: overall = "non-drug-like" return { "status": "success", "data": { "smiles": arguments.get("smiles", ""), "descriptors": { "molecular_weight_Da": round(mw, 2), "cLogP": round(logp, 3), "HBD": hbd, "HBA": hba, "rotatable_bonds": rotb, "TPSA_A2": round(tpsa, 2), "aromatic_rings": arom_rings, "heavy_atoms": heavy_atoms, "molar_refractivity": round(mr, 2), "Csp3_fraction": round(csp3, 3), }, "filters": { "lipinski_ro5": { "pass": bool(lipinski_pass), "violations": ro5_violations, "rule": "MW≤500, cLogP≤5, HBD≤5, HBA≤10 (max 1 violation)", }, "veber": { "pass": bool(veber_pass), "rule": "RotBonds≤10, TPSA≤140 Ų", }, "pfizer_3_75": { "pass": bool(pfizer_pass), "rule": "cLogP≤3, TPSA≥75 Ų (reduces promiscuity/CNS risk)", }, "egan": { "pass": bool(egan_pass), "rule": "cLogP≤5.88, TPSA≤131.6 Ų (Egan et al. 2000)", }, "ghose": { "pass": bool(ghose_pass), "rule": "-0.4≤cLogP≤5.6, 160≤MW≤480, 40≤MR≤130, 20≤Atoms≤70", }, "beyond_ro5_territory": { "applicable": bool(beyond_ro5), "note": "Fails Lipinski but MW≤1000 — may be viable as macrocycle or PROTAC", }, }, "overall_drug_likeness": overall, }, "metadata": { "source": "RDKit local computation", "filters_applied": "Lipinski (1997), Veber (2002), Pfizer 3/75, Egan (2000), Ghose (1999)", }, }
[docs] def _calculate_qed(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Calculate QED (Quantitative Estimate of Drug-likeness) score.""" mol, err = self._parse_smiles(arguments) if err: return err mw = Descriptors.ExactMolWt(mol) logp = Descriptors.MolLogP(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) rotb = rdMolDescriptors.CalcNumRotatableBonds(mol) tpsa = Descriptors.TPSA(mol) arom_rings = rdMolDescriptors.CalcNumAromaticRings(mol) qed_score = RDKitQED.qed(mol) qed_props = RDKitQED.properties(mol) if qed_score >= 0.67: qed_category = "High (drug-like)" elif qed_score >= 0.34: qed_category = "Medium (borderline)" else: qed_category = "Low (non-drug-like)" return { "status": "success", "data": { "smiles": arguments.get("smiles", ""), "qed_score": round(qed_score, 4), "qed_category": qed_category, "qed_components": { "MW": round(qed_props.MW, 2), "ALOGP": round(qed_props.ALOGP, 3), "HBA": qed_props.HBA, "HBD": qed_props.HBD, "PSA": round(qed_props.PSA, 2), "ROTB": qed_props.ROTB, "AROM": qed_props.AROM, "ALERTS": qed_props.ALERTS, }, "additional_descriptors": { "molecular_weight_Da": round(mw, 2), "cLogP": round(logp, 3), "HBD": hbd, "HBA": hba, "rotatable_bonds": rotb, "TPSA_A2": round(tpsa, 2), "aromatic_rings": arom_rings, }, "interpretation": ( "QED ranges from 0 (least drug-like) to 1 (most drug-like). " "Median approved drugs: ~0.49. Optimized leads typically >0.5. " "ALERTS = number of structural alerts (Brenk-like). " "Based on Bickerton et al., Nature Chemistry (2012)." ), }, "metadata": { "source": "RDKit local computation", "reference": "Bickerton et al., Nature Chemistry, 2012", }, }
[docs] def _pains_filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Check for PAINS, Brenk, and NIH undesirable substructures.""" mol, err = self._parse_smiles(arguments) if err: return err # PAINS filter (Baell & Holloway 2010) params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS) catalog = FilterCatalog(params) pains_matches = [] for entry in catalog.GetMatches(mol): pains_matches.append( { "name": entry.GetDescription(), "family": "PAINS", } ) # Brenk filter (undesirable reactive/toxic substructures) brenk_params = FilterCatalogParams() brenk_params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK) brenk_catalog = FilterCatalog(brenk_params) brenk_matches = [ entry.GetDescription() for entry in brenk_catalog.GetMatches(mol) ] # NIH filter nih_params = FilterCatalogParams() nih_params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH) nih_catalog = FilterCatalog(nih_params) nih_matches = [entry.GetDescription() for entry in nih_catalog.GetMatches(mol)] is_clean = len(pains_matches) == 0 and len(brenk_matches) == 0 if is_clean: recommendation = "Clean — no PAINS or Brenk alerts. Suitable for screening." else: parts = [] if pains_matches: parts.append(f"{len(pains_matches)} PAINS alert(s)") if brenk_matches: parts.append(f"{len(brenk_matches)} Brenk alert(s)") recommendation = ( f"CAUTION: {', '.join(parts)}. May cause assay interference or reactivity. " "Consider structural optimization." ) return { "status": "success", "data": { "smiles": arguments.get("smiles", ""), "is_clean": is_clean, "pains": { "pass": len(pains_matches) == 0, "matches": pains_matches, "count": len(pains_matches), }, "brenk": { "pass": len(brenk_matches) == 0, "matches": brenk_matches[:10], "count": len(brenk_matches), }, "nih": { "pass": len(nih_matches) == 0, "matches": nih_matches[:10], "count": len(nih_matches), }, "recommendation": recommendation, }, "metadata": { "source": "RDKit local computation", "filters": "PAINS (Baell & Holloway 2010), Brenk (2008), NIH", }, }