Source code for tooluniverse.protacdb_tool

"""
PROTAC-DB 3.0 Tool

Provides access to PROTAC-DB 3.0 (https://cadd.zju.edu.cn/protacdb/) for querying
PROTAC (Proteolysis Targeting Chimera) compound data, including structures, targets,
E3 ligases, DC50 values, and degradation data.

API: https://cadd.zju.edu.cn/protacdb/
No authentication required. Uses Tornado CSRF session cookies.
"""

import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool

PROTACDB_BASE_URL = "https://cadd.zju.edu.cn/protacdb"

# Property filter fields required by the compound search endpoint
_PROPERTY_FILTERS = [
    "molwt_min",
    "molwt_max",
    "heavyatomcount_min",
    "heavyatomcount_max",
    "ringcount_min",
    "ringcount_max",
    "xlogp3_min",
    "xlogp3_max",
    "numhacceptors_min",
    "numhacceptors_max",
    "numhdonors_min",
    "numhdonors_max",
    "numrotatablebonds_min",
    "numrotatablebonds_max",
    "tpsa_min",
    "tpsa_max",
]


[docs] @register_tool("ProtacDBTool") class ProtacDBTool(BaseTool): """ Tool for querying PROTAC-DB 3.0 (Zhejiang University). Provides access to: - PROTAC compound search and retrieval by target or E3 ligase - Compound details (SMILES, DC50, Dmax, cell lines, references) - Target protein information """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.parameter = tool_config.get("parameter", {}) self.required = self.parameter.get("required", [])
[docs] def _get_session_with_xsrf(self): session = requests.Session() session.get(f"{PROTACDB_BASE_URL}/", timeout=30) return session
[docs] def _build_files( self, session: requests.Session, extra: Optional[Dict] = None ) -> Dict: xsrf = session.cookies.get("_xsrf", "") files: Dict = {"_xsrf": (None, xsrf)} for f in _PROPERTY_FILTERS: files[f] = (None, "none") if extra: files.update({k: (None, str(v)) for k, v in extra.items()}) return files
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: operation = arguments.get("operation") if not operation: return {"status": "error", "error": "Missing required parameter: operation"} operation_handlers = { "search_protacs": self._search_protacs, "get_protac": self._get_protac, "search_targets": self._search_targets, } handler = operation_handlers.get(operation) if not handler: return { "status": "error", "error": f"Unknown operation: {operation}", "available_operations": list(operation_handlers.keys()), } try: return handler(arguments) except requests.exceptions.Timeout: return {"status": "error", "error": "PROTAC-DB API request timed out"} except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to PROTAC-DB API"} except Exception as e: return {"status": "error", "error": f"Operation failed: {str(e)}"}
[docs] def _search_protacs(self, arguments: Dict[str, Any]) -> Dict[str, Any]: target = arguments.get("target") e3_ligase = arguments.get("e3_ligase") max_results = int(arguments.get("max_results", 50)) if not target and not e3_ligase: return { "status": "error", "error": "At least one of 'target' or 'e3_ligase' is required", } session = self._get_session_with_xsrf() if target: # Build search URL: path-params format used by PROTAC-DB path = ( f"search/target={requests.utils.quote(target, safe='')}&dataset=protac" ) if e3_ligase: path += f"&e3={requests.utils.quote(e3_ligase, safe='')}" url = f"{PROTACDB_BASE_URL}/{path}" else: # E3-only: browse all compounds, filter client-side url = f"{PROTACDB_BASE_URL}/browse/compound?dataset=protac" files = self._build_files( session, { "page": "1", "rows": str(min(max_results, 100)), "column_name": "none", "sort_way": "none", }, ) resp = session.post(url, files=files, timeout=60) if resp.status_code != 200: return { "status": "error", "error": f"PROTAC-DB API returned status {resp.status_code}", "detail": resp.text[:300], } try: data = resp.json() except Exception: return { "status": "error", "error": "Failed to parse PROTAC-DB response as JSON", } if not isinstance(data, list) or not data: return { "status": "error", "error": "Unexpected response format from PROTAC-DB", } # Last element is metadata meta = data[-1] if isinstance(data[-1], dict) and "total" in data[-1] else {} compounds: List[Dict] = [item for item in data[:-1] if isinstance(item, dict)] # Client-side E3 filter for browse-all mode if not target and e3_ligase: compounds = [ c for c in compounds if e3_ligase.upper() in str(c.get("e3_ligase", "")).upper() ] return { "status": "success", "data": compounds, "num_results": len(compounds), "total_in_db": meta.get("total"), }
[docs] def _get_protac(self, arguments: Dict[str, Any]) -> Dict[str, Any]: protac_id = arguments.get("protac_id") if not protac_id: return {"status": "error", "error": "protac_id is required"} session = self._get_session_with_xsrf() xsrf = session.cookies.get("_xsrf", "") url = f"{PROTACDB_BASE_URL}/compound/dataset=protac&id={protac_id}" resp = session.post(url, files={"_xsrf": (None, xsrf)}, timeout=30) if resp.status_code == 200: try: return {"status": "success", "data": resp.json()} except Exception: return { "status": "error", "error": "Failed to parse PROTAC-DB compound response", } elif resp.status_code == 404: return { "status": "error", "error": f"PROTAC compound ID {protac_id} not found", } else: return { "status": "error", "error": f"API request failed with status {resp.status_code}", "detail": resp.text[:300], }
[docs] def _search_targets(self, arguments: Dict[str, Any]) -> Dict[str, Any]: target_name = arguments.get("target_name") uniprot_id = arguments.get("uniprot_id") session = self._get_session_with_xsrf() xsrf = session.cookies.get("_xsrf", "") url = f"{PROTACDB_BASE_URL}/browse/target" resp = session.post( url, files={"_xsrf": (None, xsrf), "dataset": (None, "protac")}, timeout=30, ) if resp.status_code != 200: return { "status": "error", "error": f"PROTAC-DB target API returned status {resp.status_code}", } try: all_targets = resp.json() except Exception: return { "status": "error", "error": "Failed to parse PROTAC-DB target response", } if not isinstance(all_targets, list): return {"status": "error", "error": "Unexpected format for target list"} # Client-side filtering if target_name: filtered = [ t for t in all_targets if target_name.upper() in str(t.get("short_target_name", "")).upper() or target_name.upper() in str(t.get("long_target_name", "")).upper() or target_name.upper() in str(t.get("short_target_name_2", "")).upper() or target_name.upper() in str(t.get("short_target_name_3", "")).upper() ] elif uniprot_id: filtered = [ t for t in all_targets if uniprot_id.upper() in str(t.get("uniprot_id", "")).upper() ] else: filtered = all_targets return { "status": "success", "data": filtered, "num_results": len(filtered), }