Source code for tooluniverse.pdbe_validation_tool

# pdbe_validation_tool.py
"""
PDBe Validation tool for ToolUniverse.

Provides structure quality validation data from PDBe, including
global percentile scores and residue-level outlier information.

API: https://www.ebi.ac.uk/pdbe/api/validation/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

PDBE_BASE_URL = "https://www.ebi.ac.uk/pdbe/api"


[docs] @register_tool("PDBeValidationTool") class PDBeValidationTool(BaseTool): """ Tool for querying PDBe structure validation data. Supports: - Global quality percentile scores (Ramachandran, rotamer, clashscore) - Residue-level validation outliers No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "quality_scores")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the PDBe Validation API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"PDBe API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to PDBe API"} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" return { "error": f"PDBe API HTTP {status}: structure may not exist or have validation data" } except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "quality_scores": return self._get_quality_scores(arguments) elif self.endpoint == "outlier_residues": return self._get_outlier_residues(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_quality_scores(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get global quality validation percentile scores.""" pdb_id = arguments.get("pdb_id", "").lower() if not pdb_id: return {"error": "pdb_id is required (e.g., '4hhb')."} url = f"{PDBE_BASE_URL}/validation/global-percentiles/entry/{pdb_id}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() entry = data.get(pdb_id, {}) if not entry: return {"error": f"No validation data found for PDB {pdb_id}"} quality_metrics = {} rama = entry.get("percent-rama-outliers", {}) if rama: quality_metrics["ramachandran_outliers"] = { "raw_value": rama.get("rawvalue"), "absolute_percentile": rama.get("absolute"), "relative_percentile": rama.get("relative"), } rota = entry.get("percent-rota-outliers", {}) if rota: quality_metrics["rotamer_outliers"] = { "raw_value": rota.get("rawvalue"), "absolute_percentile": rota.get("absolute"), "relative_percentile": rota.get("relative"), } clash = entry.get("clashscore", {}) if clash: quality_metrics["clashscore"] = { "raw_value": clash.get("rawvalue"), "absolute_percentile": clash.get("absolute"), "relative_percentile": clash.get("relative"), } return { "data": { "pdb_id": pdb_id, "quality_metrics": quality_metrics, }, "metadata": { "source": "PDBe Validation API (ebi.ac.uk/pdbe)", "description": "Percentile scores: higher = better quality. Absolute = vs all structures. Relative = vs similar resolution.", }, }
[docs] def _get_outlier_residues(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get residue-level validation outliers.""" pdb_id = arguments.get("pdb_id", "").lower() if not pdb_id: return {"error": "pdb_id is required (e.g., '4hhb')."} url = f"{PDBE_BASE_URL}/validation/residuewise_outlier_summary/entry/{pdb_id}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() entry = data.get(pdb_id, {}) if not entry: return {"error": f"No validation outlier data found for PDB {pdb_id}"} molecules = [] total_outliers = 0 mol_data = entry.get("molecules", []) for mol in mol_data: entity_id = mol.get("entity_id") chains_data = mol.get("chains", []) chains = [] for chain in chains_data: chain_id = chain.get("chain_id") models = chain.get("models", []) outlier_residues = [] # Models is a list of {model_id, residues} objects if isinstance(models, list): for model in models: residues = model.get("residues", []) for res in residues: outlier_types = list(res.get("outlier_types", [])) if outlier_types: outlier_residues.append( { "residue_number": res.get("residue_number"), "residue_name": res.get( "author_residue_number", res.get("residue_number"), ), "outlier_types": outlier_types, } ) total_outliers += 1 elif isinstance(models, dict): # Fallback for dict format for model_id, residues in models.items(): for res in residues: outlier_types = list(res.get("outlier_types", [])) if outlier_types: outlier_residues.append( { "residue_number": res.get("residue_number"), "residue_name": res.get( "author_residue_number", res.get("residue_number"), ), "outlier_types": outlier_types, } ) total_outliers += 1 chains.append( { "chain_id": chain_id, "outlier_residues": outlier_residues, } ) molecules.append( { "entity_id": entity_id, "chains": chains, } ) return { "data": { "pdb_id": pdb_id, "molecules": molecules, }, "metadata": { "source": "PDBe Validation API (ebi.ac.uk/pdbe)", "total_outlier_residues": total_outliers, }, }