Source code for tooluniverse.cath_tool

# cath_tool.py
"""
CATH Protein Structure Classification Database API tool for ToolUniverse.

CATH is a hierarchical classification of protein domain structures that
clusters proteins at four major levels: Class (C), Architecture (A),
Topology (T), and Homologous superfamily (H). CATH classifies domains
from the PDB and AlphaFold Protein Structure Database.

API: https://www.cathdb.info/version/v4_3_0/api/rest/
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

CATH_BASE_URL = "https://www.cathdb.info/version/v4_3_0/api/rest"


[docs] @register_tool("CATHTool") class CATHTool(BaseTool): """ Tool for querying the CATH protein structure classification database. CATH classifies protein domain structures into a hierarchy: Class -> Architecture -> Topology -> Homologous superfamily. Covers 500,000+ domains from PDB and AFDB structures. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "superfamily")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the CATH API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"CATH API request timed out after {self.timeout} seconds"} except requests.exceptions.ConnectionError: return { "error": "Failed to connect to CATH API. Check network connectivity." } except requests.exceptions.HTTPError as e: return {"error": f"CATH API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying CATH: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate CATH endpoint.""" if self.endpoint == "superfamily": return self._get_superfamily(arguments) elif self.endpoint == "domain_summary": return self._get_domain_summary(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_superfamily(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get CATH superfamily information by CATH ID.""" cath_id = arguments.get("superfamily_id", "") if not cath_id: return { "error": "superfamily_id parameter is required (e.g. 2.40.50.140 for Nucleic acid-binding proteins)" } url = f"{CATH_BASE_URL}/superfamily/{cath_id}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() resp_data = response.json() if not resp_data.get("success"): return {"error": f"CATH API returned unsuccessful response for {cath_id}"} data = resp_data.get("data", {}) result = { "cath_id": data.get("cath_id"), "superfamily_id": data.get("superfamily_id"), "classification_name": data.get("classification_name"), "classification_description": data.get("classification_description"), "example_domain_id": data.get("example_domain_id"), "num_s35_families": data.get("child_count_s35_code"), "num_s60_families": data.get("child_count_s60_code"), "num_s95_families": data.get("child_count_s95_code"), "num_s100_domains": data.get("child_count_s100_code"), "total_domain_count": data.get("child_count_s100_count"), } return { "data": result, "metadata": { "source": "CATH v4.3.0", "query": cath_id, }, }
[docs] def _get_domain_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get domain summary for a CATH domain ID (PDB chain domain).""" domain_id = arguments.get("domain_id", "") if not domain_id: return { "error": "domain_id parameter is required (e.g. 1cukA01 for PDB 1CUK chain A domain 1)" } url = f"{CATH_BASE_URL}/domain_summary/{domain_id}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() resp_data = response.json() data = resp_data.get("data", {}) # Extract CATH classification from cath_id cath_id = data.get("cath_id", "") cath_parts = cath_id.split(".") if cath_id else [] result = { "domain_id": domain_id, "cath_id": cath_id, "superfamily_id": data.get("superfamily_id"), "class": cath_parts[0] if len(cath_parts) > 0 else None, "architecture": ".".join(cath_parts[:2]) if len(cath_parts) > 1 else None, "topology": ".".join(cath_parts[:3]) if len(cath_parts) > 2 else None, "homologous_superfamily": ".".join(cath_parts[:4]) if len(cath_parts) > 3 else None, "residue_count": len(data.get("residues", [])), } # CATH class names class_names = { "1": "Mainly Alpha", "2": "Mainly Beta", "3": "Alpha Beta", "4": "Few Secondary Structures", } if result["class"] in class_names: result["class_name"] = class_names[result["class"]] return { "data": result, "metadata": { "source": "CATH v4.3.0", "query": domain_id, }, }