Source code for tooluniverse.drug_properties_tool

# drug_properties_tool.py
"""
Drug Property Filters and Drug-likeness Assessment Tool

Implements standard computational drug-likeness filters:
- Lipinski's Rule of Five (Ro5): MW≤500, cLogP≤5, HBD≤5, HBA≤10
- Veber filter: RotBonds≤10, TPSA≤140
- QED (Quantitative Estimate of Drug-likeness): rdkit.Chem.QED
- PAINS (Pan-Assay Interference Compounds): rdkit.Chem.FilterCatalog
- Pfizer 3/75 rule: cLogP≤3, TPSA≥75
- Egan filter, Ghose filter

No external API calls. Requires RDKit.

References:
- Lipinski et al., Adv. Drug Deliv. Rev. (1997)
- Veber et al., J. Med. Chem. (2002)
- Bickerton et al., Nature Chemistry (2012) [QED]
- Baell & Holloway, J. Med. Chem. (2010) [PAINS]
"""

from typing import Dict, Any

from .base_tool import BaseTool
from .tool_registry import register_tool

try:
    from rdkit import Chem
    from rdkit.Chem import Descriptors, rdMolDescriptors, QED as RDKitQED
    from rdkit.Chem.FilterCatalog import FilterCatalog, FilterCatalogParams

    HAS_RDKIT = True
except Exception:
    HAS_RDKIT = False



[docs]
@register_tool("DrugPropertiesTool")
class DrugPropertiesTool(BaseTool):
    """
    Drug-likeness property filters and assessments using RDKit.

    Endpoints:
    - lipinski_filter: Lipinski Ro5 + Veber + Pfizer 3/75 + Egan + Ghose
    - calculate_qed: QED score + full descriptor profile
    - pains_filter: PAINS + Brenk + NIH substructure filters
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "lipinski_filter")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        if not HAS_RDKIT:
            return {
                "status": "error",
                "error": "RDKit is required for drug property calculations. Install with: pip install rdkit",
            }

        try:
            if self.endpoint == "lipinski_filter":
                return self._lipinski_filter(arguments)
            elif self.endpoint == "calculate_qed":
                return self._calculate_qed(arguments)
            elif self.endpoint == "pains_filter":
                return self._pains_filter(arguments)
            else:
                return {
                    "status": "error",
                    "error": f"Unknown endpoint: {self.endpoint}",
                }
        except Exception as e:
            return {
                "status": "error",
                "error": f"Drug property calculation error: {str(e)}",
            }



[docs]
    def _parse_smiles(self, arguments: Dict[str, Any]):
        smiles = arguments.get("smiles", "").strip()
        if not smiles:
            return None, {
                "status": "error",
                "error": "smiles parameter is required (SMILES string of the molecule)",
            }
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return None, {
                "status": "error",
                "error": f"Invalid SMILES string: {smiles}",
            }
        return mol, None



[docs]
    def _lipinski_filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Check Lipinski Ro5, Veber, Pfizer 3/75, Egan, and Ghose filters."""
        mol, err = self._parse_smiles(arguments)
        if err:
            return err

        # Calculate descriptors
        mw = Descriptors.ExactMolWt(mol)
        logp = Descriptors.MolLogP(mol)
        hbd = rdMolDescriptors.CalcNumHBD(mol)
        hba = rdMolDescriptors.CalcNumHBA(mol)
        rotb = rdMolDescriptors.CalcNumRotatableBonds(mol)
        tpsa = Descriptors.TPSA(mol)
        arom_rings = rdMolDescriptors.CalcNumAromaticRings(mol)
        heavy_atoms = mol.GetNumHeavyAtoms()
        mr = Descriptors.MolMR(mol)  # Molar refractivity
        csp3 = rdMolDescriptors.CalcFractionCSP3(mol)

        # Lipinski Ro5 (Lipinski 1997): allow 1 violation
        ro5_violations = sum([mw > 500, logp > 5, hbd > 5, hba > 10])
        lipinski_pass = ro5_violations <= 1

        # Veber (2002): oral bioavailability
        veber_pass = rotb <= 10 and tpsa <= 140

        # Pfizer 3/75 rule (reduces promiscuity / CNS penetration risk)
        pfizer_pass = logp <= 3 and tpsa >= 75

        # Egan filter (Egan 2000): bioavailability
        egan_pass = logp <= 5.88 and tpsa <= 131.6

        # Ghose filter (1999): oral drug-like
        ghose_pass = (
            -0.4 <= logp <= 5.6
            and 160 <= mw <= 480
            and 40 <= mr <= 130
            and 20 <= heavy_atoms <= 70
        )

        # Beyond Ro5 territory (macrocycles / PROTACs)
        beyond_ro5 = ro5_violations > 1 and mw <= 1000

        if lipinski_pass and veber_pass:
            overall = "drug-like"
        elif lipinski_pass:
            overall = "borderline (Ro5 pass, Veber fail)"
        elif beyond_ro5:
            overall = "beyond-Ro5 (macrocycle/PROTAC territory)"
        else:
            overall = "non-drug-like"

        return {
            "status": "success",
            "data": {
                "smiles": arguments.get("smiles", ""),
                "descriptors": {
                    "molecular_weight_Da": round(mw, 2),
                    "cLogP": round(logp, 3),
                    "HBD": hbd,
                    "HBA": hba,
                    "rotatable_bonds": rotb,
                    "TPSA_A2": round(tpsa, 2),
                    "aromatic_rings": arom_rings,
                    "heavy_atoms": heavy_atoms,
                    "molar_refractivity": round(mr, 2),
                    "Csp3_fraction": round(csp3, 3),
                },
                "filters": {
                    "lipinski_ro5": {
                        "pass": bool(lipinski_pass),
                        "violations": ro5_violations,
                        "rule": "MW≤500, cLogP≤5, HBD≤5, HBA≤10 (max 1 violation)",
                    },
                    "veber": {
                        "pass": bool(veber_pass),
                        "rule": "RotBonds≤10, TPSA≤140 Å²",
                    },
                    "pfizer_3_75": {
                        "pass": bool(pfizer_pass),
                        "rule": "cLogP≤3, TPSA≥75 Å² (reduces promiscuity/CNS risk)",
                    },
                    "egan": {
                        "pass": bool(egan_pass),
                        "rule": "cLogP≤5.88, TPSA≤131.6 Å² (Egan et al. 2000)",
                    },
                    "ghose": {
                        "pass": bool(ghose_pass),
                        "rule": "-0.4≤cLogP≤5.6, 160≤MW≤480, 40≤MR≤130, 20≤Atoms≤70",
                    },
                    "beyond_ro5_territory": {
                        "applicable": bool(beyond_ro5),
                        "note": "Fails Lipinski but MW≤1000 — may be viable as macrocycle or PROTAC",
                    },
                },
                "overall_drug_likeness": overall,
            },
            "metadata": {
                "source": "RDKit local computation",
                "filters_applied": "Lipinski (1997), Veber (2002), Pfizer 3/75, Egan (2000), Ghose (1999)",
            },
        }



[docs]
    def _calculate_qed(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Calculate QED (Quantitative Estimate of Drug-likeness) score."""
        mol, err = self._parse_smiles(arguments)
        if err:
            return err

        mw = Descriptors.ExactMolWt(mol)
        logp = Descriptors.MolLogP(mol)
        hbd = rdMolDescriptors.CalcNumHBD(mol)
        hba = rdMolDescriptors.CalcNumHBA(mol)
        rotb = rdMolDescriptors.CalcNumRotatableBonds(mol)
        tpsa = Descriptors.TPSA(mol)
        arom_rings = rdMolDescriptors.CalcNumAromaticRings(mol)

        qed_score = RDKitQED.qed(mol)
        qed_props = RDKitQED.properties(mol)

        if qed_score >= 0.67:
            qed_category = "High (drug-like)"
        elif qed_score >= 0.34:
            qed_category = "Medium (borderline)"
        else:
            qed_category = "Low (non-drug-like)"

        return {
            "status": "success",
            "data": {
                "smiles": arguments.get("smiles", ""),
                "qed_score": round(qed_score, 4),
                "qed_category": qed_category,
                "qed_components": {
                    "MW": round(qed_props.MW, 2),
                    "ALOGP": round(qed_props.ALOGP, 3),
                    "HBA": qed_props.HBA,
                    "HBD": qed_props.HBD,
                    "PSA": round(qed_props.PSA, 2),
                    "ROTB": qed_props.ROTB,
                    "AROM": qed_props.AROM,
                    "ALERTS": qed_props.ALERTS,
                },
                "additional_descriptors": {
                    "molecular_weight_Da": round(mw, 2),
                    "cLogP": round(logp, 3),
                    "HBD": hbd,
                    "HBA": hba,
                    "rotatable_bonds": rotb,
                    "TPSA_A2": round(tpsa, 2),
                    "aromatic_rings": arom_rings,
                },
                "interpretation": (
                    "QED ranges from 0 (least drug-like) to 1 (most drug-like). "
                    "Median approved drugs: ~0.49. Optimized leads typically >0.5. "
                    "ALERTS = number of structural alerts (Brenk-like). "
                    "Based on Bickerton et al., Nature Chemistry (2012)."
                ),
            },
            "metadata": {
                "source": "RDKit local computation",
                "reference": "Bickerton et al., Nature Chemistry, 2012",
            },
        }



[docs]
    def _pains_filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Check for PAINS, Brenk, and NIH undesirable substructures."""
        mol, err = self._parse_smiles(arguments)
        if err:
            return err

        # PAINS filter (Baell & Holloway 2010)
        params = FilterCatalogParams()
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
        catalog = FilterCatalog(params)
        pains_matches = []
        for entry in catalog.GetMatches(mol):
            pains_matches.append(
                {
                    "name": entry.GetDescription(),
                    "family": "PAINS",
                }
            )

        # Brenk filter (undesirable reactive/toxic substructures)
        brenk_params = FilterCatalogParams()
        brenk_params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
        brenk_catalog = FilterCatalog(brenk_params)
        brenk_matches = [
            entry.GetDescription() for entry in brenk_catalog.GetMatches(mol)
        ]

        # NIH filter
        nih_params = FilterCatalogParams()
        nih_params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
        nih_catalog = FilterCatalog(nih_params)
        nih_matches = [entry.GetDescription() for entry in nih_catalog.GetMatches(mol)]

        is_clean = len(pains_matches) == 0 and len(brenk_matches) == 0

        if is_clean:
            recommendation = "Clean — no PAINS or Brenk alerts. Suitable for screening."
        else:
            parts = []
            if pains_matches:
                parts.append(f"{len(pains_matches)} PAINS alert(s)")
            if brenk_matches:
                parts.append(f"{len(brenk_matches)} Brenk alert(s)")
            recommendation = (
                f"CAUTION: {', '.join(parts)}. May cause assay interference or reactivity. "
                "Consider structural optimization."
            )

        return {
            "status": "success",
            "data": {
                "smiles": arguments.get("smiles", ""),
                "is_clean": is_clean,
                "pains": {
                    "pass": len(pains_matches) == 0,
                    "matches": pains_matches,
                    "count": len(pains_matches),
                },
                "brenk": {
                    "pass": len(brenk_matches) == 0,
                    "matches": brenk_matches[:10],
                    "count": len(brenk_matches),
                },
                "nih": {
                    "pass": len(nih_matches) == 0,
                    "matches": nih_matches[:10],
                    "count": len(nih_matches),
                },
                "recommendation": recommendation,
            },
            "metadata": {
                "source": "RDKit local computation",
                "filters": "PAINS (Baell & Holloway 2010), Brenk (2008), NIH",
            },
        }