Source code for tooluniverse.proteomicsdb_meltome_tool

"""ProteomicsDB meltome / thermal-proteome-profiling tool (live OData, keyless).

ProteomicsDB (https://www.proteomicsdb.org) exposes a keyless OData v2 API. This
module wraps its **meltome** (thermal proteome profiling / TPP) data: protein
melting curves whose inflection point (apparent melting temperature, Tm) shifts
when a ligand binds — the basis of CETSA/TPP target deconvolution. It is kept in
its own module, disjoint from the existing ``proteomicsdb_tool.py`` (expression /
search / peptides).

``ProteomicsDBGetProteinMeltomeTool`` (ProteomicsDB_get_protein_meltome): given a
gene symbol or UniProt accession, return the protein's melting curves with their
apparent Tm and fit quality.

COVERAGE NOTE: the meltome is a soluble-proteome resource. Membrane proteins —
including most GPCRs (e.g. GLP1R returns zero curves) — are typically absent.
"""

from typing import Any, Dict, List, Optional

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

_BASE = "https://www.proteomicsdb.org/proteomicsdb/logic/api_v2/api.xsodata"
_TIMEOUT = 30
_MELTING_CURVE_TYPE_ID = 2  # ProteomicsDB curve type for protein melting curves
_TM_MIN, _TM_MAX = 30.0, 90.0  # plausible Tm range (deg C) for the inflection param


def _err(message: str, **extra: Any) -> Dict[str, Any]:
    out: Dict[str, Any] = {"status": "error", "error": message}
    out.update(extra)
    return out


def _num(value: Any) -> Optional[float]:
    try:
        return float(value)
    except (TypeError, ValueError):
        return None


def _odata_get(path: str) -> List[Dict[str, Any]]:
    """GET an OData path under the ProteomicsDB API; return the results list."""
    resp = requests.get(f"{_BASE}{path}", timeout=_TIMEOUT)
    resp.raise_for_status()
    return (resp.json().get("d") or {}).get("results", [])


def _apparent_tm(fitted: List[Dict[str, Any]]) -> Optional[float]:
    """The melting-curve inflection parameter is the fitted value in the Tm range."""
    temps = [
        v
        for p in fitted
        if (v := _num(p.get("VALUE"))) is not None and _TM_MIN < v < _TM_MAX
    ]
    return round(temps[-1], 2) if temps else None


[docs] @register_tool( "ProteomicsDBGetProteinMeltomeTool", config={ "name": "ProteomicsDB_get_protein_meltome", "type": "ProteomicsDBGetProteinMeltomeTool", "description": ( "Get a protein's thermal proteome profiling (meltome / TPP) melting " "curves from ProteomicsDB by gene symbol or UniProt accession. Each " "curve yields an apparent melting temperature (Tm) and fit quality " "(R^2 / p-value); a ligand-induced Tm shift is the basis of CETSA/TPP " "target deconvolution. Keyless. NOTE: the meltome covers SOLUBLE " "proteins — most membrane GPCRs (e.g. GLP1R) have no data." ), "parameter": { "type": "object", "properties": { "gene_symbol": { "type": ["string", "null"], "description": "HGNC gene symbol, e.g. 'MAPK1'. Provide this OR uniprot_accession.", }, "uniprot_accession": { "type": ["string", "null"], "description": "UniProt accession, e.g. 'P28482'. Provide this OR gene_symbol.", }, "max_curves": { "type": "integer", "description": "Max melting curves to return (default 25, max 200).", }, }, }, "return_schema": { "oneOf": [ { "type": "object", "properties": { "status": {"type": "string", "enum": ["success"]}, "data": { "type": "object", "properties": { "gene_name": {"type": ["string", "null"]}, "uniprot": {"type": ["string", "null"]}, "protein_name": {"type": ["string", "null"]}, "protein_id": {"type": ["integer", "null"]}, "n_melting_curves": {"type": "integer"}, "median_apparent_tm_celsius": { "type": ["number", "null"] }, "curves": {"type": "array"}, }, }, "metadata": {"type": "object"}, }, "required": ["status", "data"], }, { "type": "object", "properties": { "status": {"type": "string", "enum": ["error"]}, "error": {"type": "string"}, "url": {"type": "string"}, }, "required": ["status", "error"], }, ] }, "test_examples": [ {"gene_symbol": "MAPK1"}, {"gene_symbol": "CDK2", "max_curves": 5}, {"uniprot_accession": "P28482"}, ], }, ) class ProteomicsDBGetProteinMeltomeTool(BaseTool): """Fetch ProteomicsDB meltome (TPP) melting curves for a protein."""
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: gene = (arguments.get("gene_symbol") or "").strip() acc = (arguments.get("uniprot_accession") or "").strip() if not gene and not acc: return _err("Provide gene_symbol or uniprot_accession") try: max_curves = int(arguments.get("max_curves", 25)) except (TypeError, ValueError): max_curves = 25 max_curves = max(1, min(200, max_curves)) pfilter = f"UNIQUE_IDENTIFIER eq '{acc}'" if acc else f"GENE_NAME eq '{gene}'" try: proteins = _odata_get( f"/Protein?$filter={requests.utils.quote(pfilter)}&$format=json&$top=1" ) except requests.RequestException as exc: return _err(f"ProteomicsDB protein lookup failed: {exc}", url=_BASE) except ValueError as exc: return _err(f"ProteomicsDB returned non-JSON: {exc}", url=_BASE) if not proteins: return _err(f"No ProteomicsDB protein for {acc or gene!r}", url=_BASE) protein = proteins[0] pid = protein.get("PROTEIN_ID") cfilter = f"PROTEIN_ID eq {pid} and CURVE_TYPE_ID eq {_MELTING_CURVE_TYPE_ID}" try: curves = _odata_get( f"/Curve?$filter={requests.utils.quote(cfilter)}" f"&$expand=FittedParameters&$format=json&$top={max_curves}" ) except requests.RequestException as exc: return _err(f"ProteomicsDB curve lookup failed: {exc}", url=_BASE) except ValueError as exc: return _err(f"ProteomicsDB returned non-JSON: {exc}", url=_BASE) records: List[Dict[str, Any]] = [] for c in curves: fitted = (c.get("FittedParameters") or {}).get("results") or [] records.append( { "curve_id": c.get("CURVE_ID"), "apparent_tm_celsius": _apparent_tm(fitted), "r_squared": _num(c.get("COD")), "p_value": _num(c.get("P_VALUE")), "bic": _num(c.get("BIC")), "scope": c.get("SCOPE"), "fitted_parameter_values": [p.get("VALUE") for p in fitted], } ) tms = sorted( r["apparent_tm_celsius"] for r in records if r["apparent_tm_celsius"] is not None ) median_tm = tms[len(tms) // 2] if tms else None return { "status": "success", "data": { "gene_name": protein.get("GENE_NAME"), "uniprot": protein.get("UNIQUE_IDENTIFIER"), "protein_name": protein.get("PROTEIN_NAME"), "protein_id": pid, "n_melting_curves": len(records), "median_apparent_tm_celsius": median_tm, "curves": records, }, "metadata": { "source": "ProteomicsDB meltome (thermal proteome profiling)", "url": f"{_BASE}/Protein({pid})", "curve_type": "melting curve (CURVE_TYPE_ID=2)", "note": ( "apparent_tm_celsius is the inflection-point parameter of each fitted " "melting curve. Soluble-proteome resource; membrane GPCRs are typically absent." ), }, }