Source code for tooluniverse.interpro_member_db_tool

# interpro_member_db_tool.py
"""
InterPro member-database browse tool for ToolUniverse.

The existing InterPro tools (InterProRESTTool / InterProEntryTool /
InterProDomainArchTool) only browse INTEGRATED InterPro source entries
(IPRxxxxxx accessions) and protein->domain mappings. They cannot look up a
member-database SIGNATURE directly, browse a member database's catalog, or
enumerate the experimentally-solved PDB structures that contain a domain.

This tool fills those gaps using the InterPro REST API:

  * get_member_entry  -> GET /entry/{db}/{accession}
      Detail for a single member-database signature
      (Pfam PF00069, SMART SM00002, PANTHER PTHR10000, CDD cd00001,
       NCBIfam NF000004, SUPERFAMILY/ssf SSF52540, CATH-Gene3D, PRINTS,
       HAMAP, PIRSF, PROSITE profiles, SFLD, AntiFam), including which
       integrated InterPro entry it maps to.

  * list_member_entries -> GET /entry/{db}/
      Browse / paginate the signatures within one member database.

  * get_structures_for_entry -> GET /structure/pdb/entry/interpro/{id}
      List PDB structures whose chains contain a given InterPro domain,
      with experiment type and resolution.

  * list_member_databases -> GET /entry/
      The member-database catalog with entry counts (integrated /
      unintegrated totals per database).

API: https://www.ebi.ac.uk/interpro/api/
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool


INTERPRO_BASE_URL = "https://www.ebi.ac.uk/interpro/api"

# Member databases recognised by the InterPro REST API (the slug used in URLs).
VALID_MEMBER_DBS = {
    "pfam",
    "smart",
    "panther",
    "cdd",
    "ncbifam",
    "ssf",  # SUPERFAMILY
    "cathgene3d",
    "prints",
    "hamap",
    "pirsf",
    "profile",  # PROSITE profiles
    "prosite",  # PROSITE patterns
    "sfld",
    "antifam",
}


[docs] @register_tool("InterProMemberDBTool") class InterProMemberDBTool(BaseTool): """ Browse InterPro member-database signatures, the member-database catalog, and the PDB structures that contain an InterPro domain. Complements existing InterPro tools (which only handle integrated IPRxxxxxx entries) by exposing the member-database (signature) layer and structure cross-references. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "get_member_entry")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Dispatch to the configured endpoint handler.""" try: if self.endpoint == "get_member_entry": return self._get_member_entry(arguments) if self.endpoint == "list_member_entries": return self._list_member_entries(arguments) if self.endpoint == "get_structures_for_entry": return self._get_structures_for_entry(arguments) if self.endpoint == "list_member_databases": return self._list_member_databases(arguments) return { "status": "error", "error": f"Unknown endpoint: {self.endpoint}", } except requests.exceptions.Timeout: return { "status": "error", "error": "InterPro API request timed out", } except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to InterPro API", } except requests.exceptions.RequestException as exc: return { "status": "error", "error": f"InterPro API request failed: {exc}", } except (ValueError, KeyError, TypeError) as exc: return { "status": "error", "error": f"Failed to parse InterPro API response: {exc}", }
# ------------------------------------------------------------------ # # Helpers # ------------------------------------------------------------------ #
[docs] @staticmethod def _normalize_db(db: Any) -> str: """Map common aliases to the InterPro REST member-database slug.""" if not isinstance(db, str): return "" d = db.strip().lower() aliases = { "superfamily": "ssf", "supfam": "ssf", "scop": "ssf", "gene3d": "cathgene3d", "cath": "cathgene3d", "cath-gene3d": "cathgene3d", "tigrfam": "ncbifam", "tigrfams": "ncbifam", "ncbifams": "ncbifam", "prosite_patterns": "prosite", "prosite_profiles": "profile", "prosite-profiles": "profile", } return aliases.get(d, d)
[docs] @staticmethod def _entry_name(name_field: Any) -> Any: """InterPro entry 'name' may be a string or a {name, short} dict.""" if isinstance(name_field, dict): return name_field.get("name") return name_field
[docs] @staticmethod def _clamp_page_size(value: Any, default: int = 20) -> int: """Clamp a requested page_size into the InterPro-allowed 1..100 range.""" try: return max(1, min(int(value), 100)) except (ValueError, TypeError): return default
[docs] def _resolve_member_db(self, arguments: Dict[str, Any]): """Normalize + validate member_database. Returns (db, error_dict).""" db = self._normalize_db(arguments.get("member_database", "")) if not db: return None, {"status": "error", "error": "member_database is required"} if db not in VALID_MEMBER_DBS: return None, { "status": "error", "error": f"Unsupported member_database '{db}'. " f"Valid: {', '.join(sorted(VALID_MEMBER_DBS))}", } return db, None
[docs] def _get(self, url: str, params: Dict[str, Any] = None): """GET with shared timeout; returns (json, error_dict). One of them is None.""" resp = requests.get(url, params=params or {}, timeout=self.timeout) if resp.status_code == 404: return None, { "status": "error", "error": "Not found in InterPro (HTTP 404). " "Check the accession / database slug.", } if resp.status_code != 200: return None, { "status": "error", "error": f"InterPro API HTTP error: {resp.status_code}", } return resp.json(), None
# ------------------------------------------------------------------ # # Endpoint: get_member_entry # ------------------------------------------------------------------ #
[docs] def _get_member_entry(self, arguments: Dict[str, Any]) -> Dict[str, Any]: db, err = self._resolve_member_db(arguments) if err: return err accession = (arguments.get("accession") or "").strip() if not accession: return {"status": "error", "error": "accession is required"} url = f"{INTERPRO_BASE_URL}/entry/{db}/{accession}" payload, err = self._get(url) if err: return err meta = payload.get("metadata", {}) counters = meta.get("counters", {}) or {} result = { "accession": meta.get("accession"), "name": self._entry_name(meta.get("name")), "source_database": meta.get("source_database"), "type": meta.get("type"), "integrated_interpro": meta.get("integrated"), "description": self._first_description(meta.get("description")), "go_terms": [ { "identifier": g.get("identifier"), "name": g.get("name"), "category": (g.get("category") or {}).get("name"), } for g in (meta.get("go_terms") or []) ], "counters": { "proteins": counters.get("proteins"), "structures": counters.get("structures"), "taxa": counters.get("taxa"), }, } return {"status": "success", "data": result}
[docs] @staticmethod def _first_description(desc: Any) -> Any: """Normalize the 'description' field to a single string. InterPro returns this in several shapes depending on the member database: a plain string, a list of HTML strings, or a dict with a 'text' key (e.g. {"text": "<p>...</p>", "llm": false, ...}). """ if isinstance(desc, list): desc = desc[0] if desc else None if isinstance(desc, dict): return desc.get("text") return desc
# ------------------------------------------------------------------ # # Endpoint: list_member_entries # ------------------------------------------------------------------ #
[docs] def _list_member_entries(self, arguments: Dict[str, Any]) -> Dict[str, Any]: db, err = self._resolve_member_db(arguments) if err: return err page_size = self._clamp_page_size(arguments.get("page_size", 20)) params = {"page_size": page_size} entry_type = arguments.get("entry_type") if entry_type: params["type"] = str(entry_type).strip().lower() url = f"{INTERPRO_BASE_URL}/entry/{db}/" payload, err = self._get(url, params) if err: return err entries = [] for item in payload.get("results", []): meta = item.get("metadata", {}) entries.append( { "accession": meta.get("accession"), "name": self._entry_name(meta.get("name")), "type": meta.get("type"), "integrated_interpro": meta.get("integrated"), } ) result = { "member_database": db, "total_count": payload.get("count"), "returned": len(entries), "entries": entries, } return {"status": "success", "data": result}
# ------------------------------------------------------------------ # # Endpoint: get_structures_for_entry # ------------------------------------------------------------------ #
[docs] def _get_structures_for_entry(self, arguments: Dict[str, Any]) -> Dict[str, Any]: interpro_id = (arguments.get("interpro_id") or "").strip().upper() if not interpro_id: return {"status": "error", "error": "interpro_id is required"} if not interpro_id.startswith("IPR"): return { "status": "error", "error": "interpro_id must be an InterPro accession like IPR000719", } page_size = self._clamp_page_size(arguments.get("page_size", 20)) url = f"{INTERPRO_BASE_URL}/structure/pdb/entry/interpro/{interpro_id}/" payload, err = self._get(url, {"page_size": page_size}) if err: return err structures = [] for item in payload.get("results", []): meta = item.get("metadata", {}) structures.append( { "pdb_id": meta.get("accession"), "title": meta.get("name"), "experiment_type": meta.get("experiment_type"), "resolution": meta.get("resolution"), } ) result = { "interpro_id": interpro_id, "total_structures": payload.get("count"), "returned": len(structures), "structures": structures, } return {"status": "success", "data": result}
# ------------------------------------------------------------------ # # Endpoint: list_member_databases # ------------------------------------------------------------------ #
[docs] def _list_member_databases(self, arguments: Dict[str, Any]) -> Dict[str, Any]: url = f"{INTERPRO_BASE_URL}/entry/" payload, err = self._get(url) if err: return err entries = payload.get("entries", {}) member_dbs = entries.get("member_databases", {}) or {} databases = [ {"member_database": db, "entry_count": count} for db, count in member_dbs.items() ] result = { "databases": databases, "integrated_total": entries.get("integrated"), "unintegrated_total": entries.get("unintegrated"), "interpro_total": entries.get("interpro"), "all_total": entries.get("all"), } return {"status": "success", "data": result}