Source code for tooluniverse.cosmic_tool

"""
COSMIC (Catalogue of Somatic Mutations in Cancer) API tool for ToolUniverse.

COSMIC is a comprehensive database of somatic mutations in human cancer.
This tool uses the NLM Clinical Tables Search Service API for COSMIC data.

API Documentation: https://clinicaltables.nlm.nih.gov/apidoc/cosmic/v4/doc.html
"""

import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool

# Base URL for COSMIC via NLM Clinical Tables API
COSMIC_API_URL = "https://clinicaltables.nlm.nih.gov/api/cosmic/v4/search"


[docs] @register_tool("COSMICTool") class COSMICTool(BaseTool): """ Tool for querying COSMIC (Catalogue of Somatic Mutations in Cancer). COSMIC provides: - Somatic mutation data in human cancers - Gene-level mutation information - Mutation coordinates and amino acid changes - Associated cancer types Uses NLM Clinical Tables API. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout: int = tool_config.get("timeout", 30) self.parameter = tool_config.get("parameter", {})
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the COSMIC API call based on operation type.""" operation = arguments.get("operation", "search") if operation == "search": return self._search_mutations(arguments) elif operation == "get_by_gene": return self._get_mutations_by_gene(arguments) else: return { "status": "error", "error": f"Unknown operation: {operation}. Supported: search, get_by_gene", }
[docs] def _search_mutations(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Search COSMIC for mutations by term. Args: arguments: Dict containing: - terms: Search query (gene name, mutation, etc.) - max_results: Maximum results to return (default 20, max 500) - genome_build: Genome build version (37 or 38, default 37) """ terms = arguments.get("terms", "") if not terms: return {"status": "error", "error": "Missing required parameter: terms"} max_results = min(arguments.get("max_results", 20), 500) genome_build = arguments.get("genome_build", 37) # Display fields: MutationID, GeneName, MutationCDS, MutationAA # Extra fields for more details params = { "terms": terms, "maxList": max_results, "grchv": genome_build, "df": "MutationID,GeneName,MutationCDS,MutationAA", "ef": "MutationID,GeneName,MutationCDS,MutationAA,PrimarySite,PrimaryHistology,MutationGenomePosition,MutationStrand", } try: response = requests.get( COSMIC_API_URL, params=params, timeout=self.timeout, headers={"User-Agent": "ToolUniverse/COSMIC"}, ) response.raise_for_status() data = response.json() # NLM API returns [total_count, code_list, extra_data, display_strings] if isinstance(data, list) and len(data) >= 4: total_count = data[0] codes = data[1] if data[1] else [] extra_data = data[2] if data[2] else {} display_strings = data[3] if data[3] else [] # Parse results results = [] for i, code in enumerate(codes): result = { "mutation_id": code, "display": display_strings[i] if i < len(display_strings) else None, } # Add extra fields if available if extra_data and code in extra_data: result.update(extra_data[code]) results.append(result) return { "status": "success", "data": { "total_count": total_count, "results": results, "genome_build": f"GRCh{genome_build}", }, "metadata": { "source": "COSMIC via NLM Clinical Tables API", "query": terms, }, } else: return { "status": "success", "data": {"total_count": 0, "results": []}, "metadata": {"source": "COSMIC via NLM Clinical Tables API"}, } except requests.exceptions.Timeout: return {"status": "error", "error": "Request timeout after 30 seconds"} except requests.exceptions.HTTPError as e: return {"status": "error", "error": f"HTTP error: {e.response.status_code}"} except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs] def _get_mutations_by_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get all mutations for a specific gene. Args: arguments: Dict containing: - gene: Gene symbol (e.g., BRAF, TP53) - max_results: Maximum results (default 100, max 500) - genome_build: Genome build version (37 or 38) """ gene = arguments.get("gene", "") if not gene: return {"status": "error", "error": "Missing required parameter: gene"} max_results = min(arguments.get("max_results", 100), 500) genome_build = arguments.get("genome_build", 37) params = { "terms": gene, "maxList": max_results, "grchv": genome_build, "q": f"GeneName:{gene}", "df": "MutationID,GeneName,MutationCDS,MutationAA", "ef": "MutationID,GeneName,MutationCDS,MutationAA,PrimarySite,PrimaryHistology,MutationGenomePosition,MutationStrand,Fathmm", } try: response = requests.get( COSMIC_API_URL, params=params, timeout=self.timeout, headers={"User-Agent": "ToolUniverse/COSMIC"}, ) response.raise_for_status() data = response.json() if isinstance(data, list) and len(data) >= 4: total_count = data[0] codes = data[1] if data[1] else [] extra_data = data[2] if data[2] else {} display_strings = data[3] if data[3] else [] results = [] for i, code in enumerate(codes): result = { "mutation_id": code, "display": display_strings[i] if i < len(display_strings) else None, } if extra_data and code in extra_data: result.update(extra_data[code]) results.append(result) return { "status": "success", "data": { "gene": gene, "total_count": total_count, "results": results, "genome_build": f"GRCh{genome_build}", }, "metadata": { "source": "COSMIC via NLM Clinical Tables API", "gene": gene, }, } else: return { "status": "success", "data": {"gene": gene, "total_count": 0, "results": []}, "metadata": {"source": "COSMIC via NLM Clinical Tables API"}, } except requests.exceptions.Timeout: return {"status": "error", "error": "Request timeout"} except requests.exceptions.HTTPError as e: return {"status": "error", "error": f"HTTP error: {e.response.status_code}"} except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}