Source code for tooluniverse.cadd_tool

# cadd_tool.py
"""
CADD (Combined Annotation Dependent Depletion) API tool for ToolUniverse.

CADD scores the deleteriousness of single nucleotide variants, multi-nucleotide
substitutions, and insertion/deletion variants in the human genome.

PHRED-scaled scores interpretation:
- PHRED >= 10: Top 10% most deleterious
- PHRED >= 20: Top 1% most deleterious
- PHRED >= 30: Top 0.1% most deleterious

API Documentation: https://cadd.gs.washington.edu/api
Note: API is experimental, not for high-throughput use (thousands of variants).
"""

import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool

# Base URL for CADD API (BIH mirror - working instance)
CADD_BASE_URL = "https://cadd.bihealth.org/api/v1.0"


[docs] @register_tool("CADDTool") class CADDTool(BaseTool): """ Tool for querying CADD API for variant deleteriousness scores. CADD integrates diverse annotations into a single metric (PHRED score) by contrasting variants that survived natural selection with simulated mutations. PHRED score interpretation: - >= 10: Top 10% most deleterious (likely damaging) - >= 20: Top 1% most deleterious (damaging) - >= 30: Top 0.1% most deleterious (highly damaging) - Common pathogenic threshold: 15-20 Supported genome builds: GRCh37, GRCh38 Current version: v1.7 No authentication required. API is experimental. """ # Default CADD version DEFAULT_VERSION = "GRCh38-v1.7"
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) self.operation = tool_config.get("fields", {}).get( "operation", "get_variant_score" )
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the CADD API call.""" operation = self.operation if operation == "get_variant_score": return self._get_variant_score(arguments) elif operation == "get_position_scores": return self._get_position_scores(arguments) elif operation == "get_range_scores": return self._get_range_scores(arguments) else: return {"status": "error", "error": f"Unknown operation: {operation}"}
[docs] def _interpret_phred(self, phred) -> str: """Interpret PHRED score for user-friendly output.""" if phred is None: return "unknown" try: phred = float(phred) except (ValueError, TypeError): return "unknown" if phred >= 30: return "highly_deleterious (top 0.1%)" elif phred >= 20: return "deleterious (top 1%)" elif phred >= 15: return "likely_deleterious (commonly used pathogenic threshold)" elif phred >= 10: return "possibly_deleterious (top 10%)" else: return "likely_benign"
[docs] def _get_variant_score(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get CADD score for a specific SNV. Query format: chrom:pos_ref_alt (e.g., 7:140453136_A_T for BRAF V600E) """ chrom = arguments.get("chrom") or arguments.get("chromosome") pos = arguments.get("pos") or arguments.get("position") ref = arguments.get("ref") or arguments.get("reference") alt = arguments.get("alt") or arguments.get("alternate") version = arguments.get("version", self.DEFAULT_VERSION) include_annotations = arguments.get("include_annotations", False) # Validate required parameters if not chrom: return { "status": "error", "error": "chrom/chromosome parameter is required", } if not pos: return {"status": "error", "error": "pos/position parameter is required"} if not ref: return {"status": "error", "error": "ref/reference parameter is required"} if not alt: return {"status": "error", "error": "alt/alternate parameter is required"} # Clean chromosome chrom = str(chrom).replace("chr", "") try: # Build URL for specific variant version_suffix = "_inclAnno" if include_annotations else "" url = f"{CADD_BASE_URL}/{version}{version_suffix}/{chrom}:{pos}_{ref}_{alt}" response = requests.get(url, timeout=self.timeout) if response.status_code == 404: return { "status": "success", "data": None, "message": f"No CADD score found for {chrom}:{pos} {ref}>{alt}", } response.raise_for_status() data = response.json() # Parse response - CADD returns array of results if isinstance(data, list) and len(data) > 0: result = data[0] phred = result.get("PHRED") raw_score = result.get("RawScore") # Convert string scores to float try: phred = float(phred) if phred is not None else None except (ValueError, TypeError): phred = None try: raw_score = float(raw_score) if raw_score is not None else None except (ValueError, TypeError): raw_score = None return { "status": "success", "data": { "chrom": chrom, "pos": pos, "ref": ref, "alt": alt, "phred_score": phred, "raw_score": raw_score, "interpretation": self._interpret_phred(phred) if phred else None, "version": version, "annotations": result if include_annotations else None, "thresholds": { "highly_deleterious": "PHRED >= 30 (top 0.1%)", "deleterious": "PHRED >= 20 (top 1%)", "likely_pathogenic": "PHRED >= 15 (common threshold)", "possibly_deleterious": "PHRED >= 10 (top 10%)", }, }, } else: return { "status": "success", "data": None, "message": f"No CADD score found for {chrom}:{pos} {ref}>{alt}", "raw_response": data, } except requests.exceptions.Timeout: return { "status": "error", "error": f"CADD API timeout after {self.timeout}s", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"CADD API request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_position_scores(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get CADD scores for all possible SNVs at a genomic position. Returns scores for A, C, G, T substitutions at the given position. """ chrom = arguments.get("chrom") or arguments.get("chromosome") pos = arguments.get("pos") or arguments.get("position") version = arguments.get("version", self.DEFAULT_VERSION) if not chrom: return { "status": "error", "error": "chrom/chromosome parameter is required", } if not pos: return {"status": "error", "error": "pos/position parameter is required"} chrom = str(chrom).replace("chr", "") try: url = f"{CADD_BASE_URL}/{version}/{chrom}:{pos}" response = requests.get(url, timeout=self.timeout) if response.status_code == 404: return { "status": "success", "data": None, "message": f"No CADD scores found for {chrom}:{pos}", } response.raise_for_status() data = response.json() # Parse all variants at this position variants = [] if isinstance(data, list): for item in data: phred = item.get("PHRED") variants.append( { "ref": item.get("Ref"), "alt": item.get("Alt"), "phred_score": phred, "raw_score": item.get("RawScore"), "interpretation": self._interpret_phred(phred) if phred else None, } ) return { "status": "success", "data": { "chrom": chrom, "pos": pos, "variants": variants, "version": version, }, } except requests.exceptions.Timeout: return { "status": "error", "error": f"CADD API timeout after {self.timeout}s", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"CADD API request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_range_scores(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get CADD scores for a genomic range (max 100bp). Returns all pre-computed scores in the specified range. """ chrom = arguments.get("chrom") or arguments.get("chromosome") start = arguments.get("start") end = arguments.get("end") version = arguments.get("version", self.DEFAULT_VERSION) if not chrom: return { "status": "error", "error": "chrom/chromosome parameter is required", } if not start: return {"status": "error", "error": "start parameter is required"} if not end: return {"status": "error", "error": "end parameter is required"} chrom = str(chrom).replace("chr", "") try: start = int(start) end = int(end) except (ValueError, TypeError): return {"status": "error", "error": "start and end must be integers"} # Validate range (API limit is 100bp) if end - start > 100: return { "status": "error", "error": f"Range too large: {end - start}bp. CADD API allows max 100bp range.", } try: url = f"{CADD_BASE_URL}/{version}/{chrom}:{start}-{end}" response = requests.get(url, timeout=self.timeout) if response.status_code == 404: return { "status": "success", "data": None, "message": f"No CADD scores found for {chrom}:{start}-{end}", } response.raise_for_status() data = response.json() # Parse all variants in range # CADD range API returns list of lists with header as first row: # [['Chrom', 'Pos', 'Ref', 'Alt', 'RawScore', 'PHRED'], [data...], ...] variants = [] if isinstance(data, list) and len(data) > 1: # First row is header headers = data[0] if isinstance(data[0], list) else None if headers: # Create mapping from header names to indices header_map = {h: i for i, h in enumerate(headers)} for row in data[1:]: # Skip header row if isinstance(row, list) and len(row) >= len(headers): phred = ( row[header_map.get("PHRED", -1)] if "PHRED" in header_map else None ) if phred: try: phred = float(phred) except (ValueError, TypeError): phred = None variants.append( { "pos": row[header_map.get("Pos", 1)] if "Pos" in header_map else None, "ref": row[header_map.get("Ref", 2)] if "Ref" in header_map else None, "alt": row[header_map.get("Alt", 3)] if "Alt" in header_map else None, "phred_score": phred, "raw_score": row[header_map.get("RawScore", 4)] if "RawScore" in header_map else None, "interpretation": self._interpret_phred(phred) if phred else None, } ) else: # Fallback: try to parse as dict (old format) for item in data: if isinstance(item, dict): phred = item.get("PHRED") variants.append( { "pos": item.get("Pos"), "ref": item.get("Ref"), "alt": item.get("Alt"), "phred_score": phred, "raw_score": item.get("RawScore"), "interpretation": self._interpret_phred(phred) if phred else None, } ) return { "status": "success", "data": { "chrom": chrom, "start": start, "end": end, "variants": variants, "count": len(variants), "version": version, }, } except requests.exceptions.Timeout: return { "status": "error", "error": f"CADD API timeout after {self.timeout}s", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"CADD API request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}