Source code for tooluniverse.gtopdb_tool

import re
import requests
from typing import Any, Dict
from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool



[docs]
def _strip_html(text: Any) -> Any:
    """Strip HTML tags and decode HTML entities from a string.

    Feature-49A-H3: GtoPdb returns raw HTML tags in some fields (e.g., <sup>, <i>).
    Feature-51B-002: GtoPdb also returns HTML entities (e.g., &ouml; → ö, &alpha; → α).
    """
    if not isinstance(text, str):
        return text
    # First strip tags, then decode entities
    stripped = re.sub(r"<[^>]+>", "", text).strip()
    # Decode common HTML entities
    import html

    return html.unescape(stripped)



# Feature-53A-005: HGNC gene symbols → GtoPdb pharmacological receptor/enzyme names.
# GtoPdb indexes nuclear receptors and GPCRs by pharmacological names (ERα, D2 receptor)
# not HGNC gene symbols (ESR1, DRD2). When gene_symbol lookup returns 404, fall back to
# searching by the pharmacological name from this mapping.
_HGNC_TO_GTOPDB_NAME: dict = {
    "ESR1": "ERα",
    "ESR2": "ERβ",
    "AR": "androgen receptor",
    "PPARG": "PPARγ",
    "PPARA": "PPARα",
    "PPARD": "PPARδ",
    "NR3C1": "glucocorticoid receptor",
    "NR3C2": "mineralocorticoid receptor",
    "NR1I2": "pregnane X receptor",
    "VDR": "vitamin D receptor",
    "RXRA": "RXRα",
    "DRD1": "D1 receptor",
    "DRD2": "D2 receptor",
    "DRD3": "D3 receptor",
    "DRD4": "D4 receptor",
    "DRD5": "D5 receptor",
    "HTR1A": "5-HT1A receptor",
    "HTR2A": "5-HT2A receptor",
    "ADRB1": "β1-adrenoceptor",
    "ADRB2": "β2-adrenoceptor",
    "ADRA1A": "α1A-adrenoceptor",
    "CHRM1": "M1 receptor",
    "CHRM2": "M2 receptor",
    "CHRM3": "M3 receptor",
    "OPRD1": "δ receptor",
    "OPRM1": "μ receptor",
    "OPRK1": "κ receptor",
    "PTGER2": "EP2 receptor",
    "PTGER4": "EP4 receptor",
    "HDAC1": "HDAC1",
    "PARP1": "PARP1",
}



[docs]
@register_tool("GtoPdbRESTTool")
class GtoPdbRESTTool(BaseTool):

[docs]
    def __init__(self, tool_config: Dict):
        super().__init__(tool_config)
        self.base_url = "https://www.guidetopharmacology.org/services"
        self.session = requests.Session()
        self.session.headers.update({"Accept": "application/json"})
        self.timeout = 30



[docs]
    def _build_url(self, args: Dict[str, Any]) -> str:
        """Build URL with path parameters and query parameters."""
        url = self.tool_config["fields"]["endpoint"]

        # Feature-29A-07 fix: interactions endpoint requires path params, not query params
        # /services/interactions?targetId=X is ignored; must use /targets/{id}/interactions
        if (
            url.endswith("/interactions")
            and "{targetId}" not in url
            and "{ligandId}" not in url
        ):
            # Accept both camelCase and snake_case aliases
            target_id = args.get("targetId") or args.get("target_id")
            ligand_id = args.get("ligandId") or args.get("ligand_id")
            if target_id is not None:
                url = f"{self.base_url}/targets/{target_id}/interactions"
                # Feature-53A-002: when BOTH targetId AND ligandId are provided, the original code
                # only used targetId (correct for URL construction) but also silently removed
                # ligandId from args without setting _pending_ligand_id_filter. This meant the
                # client-side ligandId filter (set up in the elif branch) never ran, so the
                # ligandId parameter was completely ignored. Fix: preserve ligandId for
                # client-side filtering when both are provided.
                if ligand_id is not None:
                    self._pending_ligand_id_filter = ligand_id
                args = {
                    k: v
                    for k, v in args.items()
                    if k not in ("targetId", "target_id", "ligandId", "ligand_id")
                }
            elif ligand_id is not None:
                # Feature-38B-02: /ligands/{id}/interactions always returns [] per GtoPdb REST API.
                # GtoPdb interactions are indexed by TARGET. Store ligand_id on self for use in run().
                self._pending_ligand_id_filter = ligand_id
                # Fall back to main interactions endpoint; run() will filter client-side
                url = f"{self.base_url}/interactions"
                args = {
                    k: v
                    for k, v in args.items()
                    if k not in ("targetId", "target_id", "ligandId", "ligand_id")
                }

        query_params = {}

        # Separate path params from query params
        path_params = {}
        for k, v in args.items():
            if f"{{{k}}}" in url:
                # This is a path parameter
                path_params[k] = v
            else:
                # This is a query parameter
                query_params[k] = v

        # Replace path parameters in URL
        for k, v in path_params.items():
            url = url.replace(f"{{{k}}}", str(v))

        # Build query string for remaining parameters
        if query_params:
            # Map parameter names to GtoPdb API parameter names
            param_mapping = {
                "target_type": "type",
                "ligand_type": "type",
                "action_type": "type",
                "affinity_parameter": "affinityParameter",
                "min_affinity": "affinity",
                "approved_only": "approved",
                "query": "name",  # alias: query → name (GtoPdb API uses ?name=)
            }

            api_params = {}
            for k, v in query_params.items():
                # Skip limit as it's handled separately
                if k == "limit":
                    continue
                # Map parameter name
                api_key = param_mapping.get(k, k)
                # Convert boolean to lowercase string for API
                if isinstance(v, bool):
                    v = str(v).lower()
                api_params[api_key] = v

            # Build query string
            if api_params:
                from urllib.parse import urlencode

                url = f"{url}?{urlencode(api_params)}"

        return url



[docs]
    def _search_targets_by_abbreviation_variants(self, query: str, limit: int) -> list:
        """Feature-44A-04: When name search returns results whose names don't contain the query
        (e.g., 'PARP' → tankyrase via PARP5 synonym), also search for numbered variants
        like PARP1, PARP2, PARP3 and merge results.

        GtoPdb stores PARPs under full names ('poly(ADP-ribose) polymerase 1') but
        the abbreviation field has 'PARP1'. The API name= parameter matches abbreviations,
        so name=PARP1 finds the right target.
        """
        results = []
        seen_ids: set = set()
        from urllib.parse import urlencode

        # Try numbered variants: PARP1, PARP2, ..., PARP9
        for suffix in ("1", "2", "3", "4", "5", "6", "7", "8", "9"):
            if len(results) >= limit:
                break
            candidate = f"{query}{suffix}"
            try:
                url = f"{self.base_url}/targets?{urlencode({'name': candidate})}"
                response = request_with_retry(
                    self.session, "GET", url, timeout=self.timeout, max_attempts=1
                )
                if response.status_code == 200:
                    for t in response.json():
                        tid = t.get("targetId")
                        if tid and tid not in seen_ids:
                            seen_ids.add(tid)
                            results.append(t)
            except Exception:
                pass
        return results[:limit]



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        url = None
        self._pending_ligand_id_filter = None  # Feature-38B-02: reset per call

        # Feature-61B-001: GtoPdb species filter is case-sensitive ("Human" not "human").
        # Normalize species to Title Case so users can pass any case variant.
        if arguments.get("species"):
            arguments = dict(arguments)
            arguments["species"] = str(arguments["species"]).strip().title()

        # Feature-62B-003: GtoPdb API does not honor approved=true/false on /targets or /ligands
        # endpoints — it returns the same full set regardless. Remove approved_only from
        # the arguments so it is NOT forwarded to the API (where it is silently ignored).
        # We apply client-side filtering on interaction results by cross-referencing
        # the approved ligands endpoint (/services/ligands?approved=true).
        _approved_only_requested = arguments.get("approved_only")
        if _approved_only_requested is not None:
            arguments = dict(arguments)
            arguments.pop("approved_only", None)

        # Feature-63B-001: GtoPdb API silently ignores ligand_type= when combined with name=
        # (e.g., ligand_type='Approved' + name='vemurafenib' returns all name matches,
        # including non-approved compounds). Capture early for client-side post-filtering.
        _ligand_type_requested = arguments.get("ligand_type")

        # Feature-46A-04: gene_symbol convenience parameter for GtoPdb_get_interactions.
        # Auto-resolve gene symbol → targetId so users don't need a separate
        # GtoPdb_search_targets call before querying interactions.
        gene_symbol = arguments.get("gene_symbol")
        if (
            gene_symbol
            and not arguments.get("targetId")
            and not arguments.get("target_id")
        ):
            from urllib.parse import urlencode

            # Feature-54B-001: the previous approach used ?name=gene_symbol which does a
            # substring search on target names/abbreviations. For short symbols like "AR",
            # this returns all targets containing "AR" (adrenoceptors, etc.) and falls back
            # to targets[0] which is wrong. Fix: use ?geneSymbol= first — this is the
            # GtoPdb API parameter for unambiguous HGNC gene symbol lookup and returns
            # exactly the target associated with that gene. Fall back to ?name= only if
            # ?geneSymbol= returns nothing (for gene symbols not in GtoPdb's gene index).
            target_id = None
            try:
                gs_url = (
                    f"{self.base_url}/targets?{urlencode({'geneSymbol': gene_symbol})}"
                )
                gs_resp = request_with_retry(
                    self.session, "GET", gs_url, timeout=self.timeout, max_attempts=2
                )
                if gs_resp.status_code == 200:
                    gs_targets = gs_resp.json()
                    if isinstance(gs_targets, list) and gs_targets:
                        # geneSymbol lookup returns exact matches — prefer the one
                        # whose abbreviation matches the gene symbol, else use first
                        gene_upper = gene_symbol.upper()
                        for t in gs_targets:
                            if (t.get("abbreviation") or "").upper() == gene_upper:
                                target_id = t["targetId"]
                                break
                        if target_id is None:
                            target_id = gs_targets[0]["targetId"]
            except Exception:
                pass

            if target_id is not None:
                arguments = dict(arguments)
                arguments["targetId"] = target_id
                arguments.pop("gene_symbol", None)
            else:
                # ?geneSymbol= returned nothing — try ?name= as fallback
                lookup_url = (
                    f"{self.base_url}/targets?{urlencode({'name': gene_symbol})}"
                )
                try:
                    resp = request_with_retry(
                        self.session,
                        "GET",
                        lookup_url,
                        timeout=self.timeout,
                        max_attempts=2,
                    )
                    # Feature-52B-002: GtoPdb returns HTTP 404 (not 200) when a gene_symbol
                    # doesn't match any target name. Previously, resp.status_code != 200
                    # caused the entire lookup block to be skipped silently, leaving
                    # gene_symbol in arguments → _build_url adds it as an unknown query
                    # param → API ignores it → returns ALL interactions.
                    if resp.status_code == 404 or (
                        resp.status_code == 200 and not isinstance(resp.json(), list)
                    ):
                        # Feature-53A-005: try HGNC→GtoPdb pharmacological name mapping
                        # (e.g., ESR1 → "ERα", DRD2 → "D2 receptor") as a final fallback.
                        fallback_resolved = False
                        gtopdb_name = _HGNC_TO_GTOPDB_NAME.get(gene_symbol.upper())
                        if gtopdb_name:
                            try:
                                from urllib.parse import urlencode as _urlencode

                                fb_url = f"{self.base_url}/targets?{_urlencode({'name': gtopdb_name})}"
                                fb_resp = request_with_retry(
                                    self.session,
                                    "GET",
                                    fb_url,
                                    timeout=self.timeout,
                                    max_attempts=2,
                                )
                                if fb_resp.status_code == 200:
                                    fb_targets = fb_resp.json()
                                    if isinstance(fb_targets, list) and fb_targets:
                                        target_id = fb_targets[0]["targetId"]
                                        arguments = dict(arguments)
                                        arguments["targetId"] = target_id
                                        arguments.pop("gene_symbol", None)
                                        fallback_resolved = True
                            except Exception:
                                pass
                        if not fallback_resolved:
                            return {
                                "status": "success",
                                "data": [],
                                "count": 0,
                                "message": (
                                    f"No GtoPdb target found for gene_symbol='{gene_symbol}'. "
                                    "GtoPdb targets are indexed by pharmacological receptor/enzyme "
                                    "names and may not recognize all HGNC gene symbols. "
                                    f"Try GtoPdb_search_targets with a descriptive name "
                                    f"(e.g., query='MEK1' or 'MAP2K1' or 'MEK' for MAP2K1). "
                                    "Nuclear receptors use Greek-letter names (ESR1→'ERα', "
                                    "AR→'androgen receptor', PPARG→'PPARγ'). "
                                    "Note: many kinases and signaling enzymes (MAP2K1/MEK1, "
                                    "MAP2K2/MEK2, MAPK1/ERK2, MAPK3/ERK1, etc.) have limited "
                                    "or no interaction data in GtoPdb — use "
                                    "ChEMBL_get_drug_mechanisms or ChEMBL_search_compounds "
                                    "for approved inhibitors of MAP kinase pathway proteins."
                                ),
                            }
                    if resp.status_code == 200:
                        targets = resp.json()
                        if isinstance(targets, list) and targets:
                            # Prefer exact abbreviation match (e.g., "KRAS" → KRAS entry)
                            gene_upper = gene_symbol.upper()
                            target_id = None
                            for t in targets:
                                if (t.get("abbreviation") or "").upper() == gene_upper:
                                    target_id = t["targetId"]
                                    break
                            # Feature-48A-05: before falling back to targets[0], try prefix match.
                            # e.g., gene_symbol="ABL1", abbreviation="Abl" →
                            # "abl1".startswith("abl") with rest "1" being a digit → ABL1 selected.
                            # This prevents "ABL1" from silently returning ABL2 (abbr "Arg").
                            if target_id is None:
                                gene_lower = gene_symbol.lower()
                                best_match = None
                                best_len = 0
                                for t in targets:
                                    abbr = (t.get("abbreviation") or "").lower()
                                    if (
                                        abbr
                                        and gene_lower.startswith(abbr)
                                        and len(abbr) > best_len
                                    ):
                                        rest = gene_lower[len(abbr) :]
                                        if rest == "" or rest.isdigit():
                                            best_match = t["targetId"]
                                            best_len = len(abbr)
                                if best_match is not None:
                                    target_id = best_match
                            if target_id is None:
                                target_id = targets[0]["targetId"]
                            arguments = dict(arguments)
                            arguments["targetId"] = target_id
                            # Feature-47A-05: remove gene_symbol so it doesn't leak into the API URL
                            arguments.pop("gene_symbol", None)
                except Exception:
                    pass

        try:
            url = self._build_url(arguments)
            response = request_with_retry(
                self.session, "GET", url, timeout=self.timeout, max_attempts=3
            )
            if response.status_code == 404 and "?" in url:
                # Feature-37A-02: on search endpoints (URL has query params), 404 means no results
                # not a real error. Provide helpful guidance.
                hint = ""
                if "/targets" in url:
                    hint = " If searching for a drug/ligand name, use GtoPdb_search_ligands instead."
                elif "/ligands" in url:
                    hint = " If searching for a target name, use GtoPdb_search_targets instead."
                # Feature-54B-002: multi-word name searches often fail silently
                name_q = arguments.get("name") or arguments.get("query")
                if name_q and " " in str(name_q):
                    first_word = str(name_q).split()[0]
                    hint += (
                        f" GtoPdb text search may not match multi-word phrases. "
                        f"Try a single keyword instead, e.g., name='{first_word}'."
                    )
                return {
                    "status": "success",
                    "data": [],
                    "count": 0,
                    "url": url,
                    "message": f"No results found matching the search criteria.{hint}",
                }
            if response.status_code != 200:
                raw_detail = (response.text or "")[:500]
                # Feature-35A-01: extract human-readable API error from JSON detail
                try:
                    import json as _json

                    detail_obj = _json.loads(raw_detail)
                    api_msg = detail_obj.get("error", raw_detail)
                except Exception:
                    api_msg = raw_detail
                return {
                    "status": "error",
                    "error": f"GtoPdb API error: {api_msg} (HTTP {response.status_code})",
                    "url": url,
                    "status_code": response.status_code,
                    "detail": raw_detail,
                }
            data = response.json()

            # Feature-49A-H3: strip raw HTML tags from GtoPdb API fields.
            # GtoPdb returns HTML-formatted display values in some fields
            # (e.g., originalAffinity="6.3x10<sup>-6</sup>", ligandName="compound 5 [Smith <i>et al</i>., 2020]").
            # Strip tags so LLMs and downstream code receive plain text.
            _HTML_FIELDS = ("originalAffinity", "ligandName", "authors", "name")
            if isinstance(data, list):
                for item in data:
                    if isinstance(item, dict):
                        for field in _HTML_FIELDS:
                            if field in item:
                                item[field] = _strip_html(item[field])

            # Feature-38B-02: client-side filter by ligandId when requested
            # (/ligands/{id}/interactions always returns [], so we fetch all and filter)
            ligand_id_filter = getattr(self, "_pending_ligand_id_filter", None)
            if ligand_id_filter is not None and isinstance(data, list):
                data = [x for x in data if x.get("ligandId") == ligand_id_filter]

            # Feature-63A-001: approved_only for interactions must cross-reference the approved
            # ligands endpoint. Interaction records do NOT have an 'approvedDrug' field;
            # approval status lives on /services/ligands objects as the 'approved' boolean.
            # Feature-63B-001: ligand_type= is silently ignored by GtoPdb API when name= is also
            # present. Apply both filters client-side BEFORE computing total_available/limit.
            _pre_approved_filter_count = len(data) if isinstance(data, list) else 0
            if (
                _approved_only_requested
                and isinstance(data, list)
                and "/interactions" in url
            ):
                try:
                    approved_url = f"{self.base_url}/ligands?approved=true"
                    approved_resp = request_with_retry(
                        self.session,
                        "GET",
                        approved_url,
                        timeout=self.timeout,
                        max_attempts=2,
                    )
                    if approved_resp.status_code == 200:
                        approved_ids = {
                            lig.get("ligandId")
                            for lig in approved_resp.json()
                            if isinstance(lig, dict) and lig.get("approved")
                        }
                        data = [x for x in data if x.get("ligandId") in approved_ids]
                except Exception:
                    pass  # on API failure, keep all data unfiltered

            if _ligand_type_requested and isinstance(data, list) and "/ligands" in url:
                lt_lower = _ligand_type_requested.lower()
                if lt_lower == "approved":
                    # 'approved' is a boolean field on GtoPdb ligand records
                    data = [x for x in data if x.get("approved") is True]
                else:
                    # Match structural type field (case-insensitive)
                    data = [
                        x for x in data if (x.get("type") or "").lower() == lt_lower
                    ]

            # Apply limit if specified (max_results is an alias for limit)
            # Feature-47A-04: increased default from 20 to 50 — interaction-rich targets
            # like EGFR (90 interactions) would only show 22% of data at limit=20.
            limit = arguments.get("limit", arguments.get("max_results", 50))
            total_available = len(data) if isinstance(data, list) else None
            if isinstance(data, list) and len(data) > limit:
                data = data[:limit]

            result: Dict[str, Any] = {
                "status": "success",
                "data": data,
                "url": url,
                "count": len(data) if isinstance(data, list) else 1,
            }

            # Feature-62B-003 / Feature-63B-001: for non-interaction endpoints, approved_only is not
            # applicable. For ligand searches, use ligand_type='Approved' instead.
            if _approved_only_requested and "/interactions" not in url:
                result["approved_only_note"] = (
                    "Note: approved_only applies only to GtoPdb_get_interactions (filters by "
                    "cross-referencing the GtoPdb approved ligands registry). For ligand "
                    "searches, use ligand_type='Approved' instead to filter by approval status."
                )

            # Feature-60A-003: disclose truncation so users know data was cut off
            if total_available is not None and total_available > len(data):
                result["total_available"] = total_available
                result["returned"] = len(data)
                result["truncation_note"] = (
                    f"Returned {len(data)} of {total_available} total interactions."
                    f" Increase limit (e.g., limit={total_available}) to retrieve all."
                )

            # Feature-54B-002: multi-word name search hint when results empty
            name_q = arguments.get("name") or arguments.get("query")
            if (
                result["count"] == 0
                and ("/targets" in url or "/ligands" in url)
                and "?" in url
                and name_q
                and " " in str(name_q)
            ):
                first_word = str(name_q).split()[0]
                result["multi_word_hint"] = (
                    f"GtoPdb text search may not match multi-word phrases like '{name_q}'. "
                    f"Try a single keyword: name='{first_word}'."
                )

            # Feature-38B-02: if ligandId filter returned nothing, add informative hint
            ligand_id_filter = getattr(self, "_pending_ligand_id_filter", None)
            if ligand_id_filter is not None and result["count"] == 0:
                result["message"] = (
                    f"No interactions found for ligandId={ligand_id_filter} in the GtoPdb "
                    "interactions database. Possible reasons: (1) The drug may be stored under "
                    "a related compound entry — some approved drugs (e.g., vemurafenib ID=5893) "
                    "have pharmacological data under their research compound record (e.g., ID=8548). "
                    "Check the ligand details from GtoPdb_search_ligands for 'activeDrugIds' or "
                    "'prodrugIds' fields and try those IDs. (2) The drug may not be in the GtoPdb "
                    "interactions database. GtoPdb covers GPCR, ion channel, enzyme, and transporter "
                    "interactions; some targets may be absent. Search by target_id instead if you "
                    "know the GtoPdb target ID."
                )

            # Feature-44A-04: for target name searches, detect when returned target names
            # don't contain the query string (meaning the match was via abbreviation/synonym,
            # e.g. name=PARP matches tankyrase via its PARP5 synonym). In that case,
            # also search for numbered variants (PARP1, PARP2, ...) and merge results.
            query = arguments.get("query")
            if (
                query
                and "/targets" in url
                and "/targets/" not in url
                and isinstance(data, list)
            ):
                q_lower = query.lower()
                # Check if the query appears in any returned target name
                names_contain_query = any(
                    q_lower in t.get("name", "").lower() for t in data
                )
                if not names_contain_query and data:
                    # The match was via synonym/abbreviation; try numbered variants
                    extra = self._search_targets_by_abbreviation_variants(query, limit)
                    if extra:
                        existing_ids = {t.get("targetId") for t in data}
                        new_targets = [
                            t for t in extra if t.get("targetId") not in existing_ids
                        ]
                        if new_targets:
                            data = new_targets + data  # put canonical matches first
                            result["data"] = data
                            result["count"] = len(data)
                            result["note"] = (
                                f"Searched for '{query}'. Results include targets with abbreviation "
                                f"matching '{query}' (e.g., {data[0].get('name', '')}) as well as "
                                f"targets matched via synonym. For kinase/enzyme families, try "
                                f"searching with full gene symbols like '{query}1', '{query}2'."
                            )

            # Feature-59B-002: when an explicit target_id was provided and interactions is empty,
            # warn the user — the target ID may not exist in GtoPdb (the API returns HTTP 200
            # with [] for non-existent targets, indistinguishable from "target has no data").
            import re as _re_gtopdb

            _tid_match = _re_gtopdb.search(r"/targets/(\d+)/interactions", url)
            if _tid_match and isinstance(data, list) and len(data) == 0:
                # Feature-63A-001: if approved_only filter cleared all results, give a specific
                # explanation instead of the misleading "target may be invalid" warning.
                if _approved_only_requested and _pre_approved_filter_count > 0:
                    result["approved_only_info"] = (
                        f"approved_only=True filtered all {_pre_approved_filter_count} "
                        f"interaction(s) for this target. GtoPdb interaction data focuses on "
                        "pharmacological research compounds with measured affinity — approved "
                        "drugs are rarely listed here. For approved drug-target interactions, "
                        "use ChEMBL_get_drug_mechanisms or ChEMBL_search_compounds with the "
                        "target gene name."
                    )
                else:
                    result["warning"] = (
                        f"No interactions found for target_id={_tid_match.group(1)}. "
                        "This may mean (a) the target has no pharmacological data in GtoPdb, "
                        "OR (b) the target ID is invalid (GtoPdb returns an empty list for "
                        "non-existent target IDs without an error). "
                        "Verify the target exists using GtoPdb_search_targets(name='...') "
                        "and confirm the returned targetId before calling get_interactions."
                    )

            # Feature-35A-02: add top-level queried_target summary for interactions endpoint
            # so users can immediately verify they're getting the right target's data
            if isinstance(data, list) and data and "/interactions" in url:
                first = data[0]
                if "targetId" in first or "targetName" in first:
                    result["queried_target"] = {
                        "id": first.get("targetId"),
                        "name": first.get("targetName"),
                    }
                elif "ligandId" in first or "ligandName" in first:
                    result["queried_ligand"] = {
                        "id": first.get("ligandId"),
                        "name": first.get("ligandName"),
                    }
                # Feature-49B-002 / Feature-55A-002 / Feature-55A-003 / Feature-55B-001:
                # GtoPdb interactions list pharmacological research compounds — approved drugs
                # may be absent for any target class (kinases, GPCRs, ion channels, etc.).
                # The previous code checked item.get("approved") which is only present on
                # /ligands objects, not /interactions objects, so has_approved was always False.
                # Fix: always emit the note as factual guidance (not conditional on an
                # always-false check). Use neutral target-class-agnostic wording. Embed the
                # queried gene_symbol so the ChEMBL suggestion is immediately actionable.
                if isinstance(data, list) and len(data) > 0:
                    _chembl_target = gene_symbol or (
                        result.get("queried_target", {}).get("name", "the target")
                    )
                    result["coverage_note"] = (
                        "GtoPdb interactions list pharmacological research compounds — approved "
                        "drugs for this target are not represented in these results. For approved "
                        "drugs and clinical compounds, use ChEMBL_get_drug_mechanisms or "
                        f"ChEMBL_search_compounds with target_name='{_chembl_target}'."
                    )

            # Feature-49A-M5: for ligand search results, add a hint about getting interaction data.
            # Feature-51A-001: warn that ligandId-based lookups often fail for enzyme/kinase
            # inhibitors (PARP, HDAC, CDK, etc.) because GtoPdb indexes interactions by TARGET.
            # In those cases, querying by gene_symbol or targetId is more reliable.
            if isinstance(data, list) and data and "/ligands" in url and "?" in url:
                result["hint"] = (
                    "To find pharmacological interactions for a specific ligand, try "
                    "GtoPdb_get_interactions with ligandId=<id>. IMPORTANT: For enzyme "
                    "and kinase inhibitors (e.g., PARP inhibitors, CDK inhibitors, HDAC "
                    "inhibitors, kinase inhibitors), GtoPdb indexes interactions by TARGET, "
                    "and ligandId-based queries often return empty results even for approved "
                    "drugs. In that case, query by gene_symbol (e.g., gene_symbol='PARP1') "
                    "or targetId from GtoPdb_search_targets for more complete results."
                )

            return result
        except Exception as e:
            return {
                "status": "error",
                "error": f"GtoPdb API error: {str(e)}",
                "url": url,
            }