Source code for tooluniverse.dbsnp_tool

"""
dbSNP REST API Tool

This tool provides access to the dbSNP (Single Nucleotide Polymorphism) database
for variant information, allele frequencies, and genomic coordinates.
"""

from typing import Dict, Any
from .ncbi_eutils_tool import NCBIEUtilsTool
from .tool_registry import register_tool


[docs] class dbSNPRESTTool(NCBIEUtilsTool): """Base class for dbSNP REST API tools with rate limiting."""
[docs] @register_tool("dbSNPGetVariantByRsID") class dbSNPGetVariantByRsID(dbSNPRESTTool): """Get variant information by rsID."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/esummary.fcgi"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get variant by rsID using E-utilities.""" rsid = arguments.get("rsid", "") if not rsid: return {"status": "error", "error": "rsid is required"} # Remove 'rs' prefix if present if rsid.startswith("rs"): rsid = rsid[2:] params = {"db": "snp", "id": rsid, "retmode": "json"} result = self._make_request(self.endpoint, params) # Parse and extract useful data from NCBI response if result.get("status") == "success": data = result.get("data", {}) if isinstance(data, dict) and "result" in data: result_data = data["result"] if rsid in result_data: variant_data = result_data[rsid] # Extract key information parsed_data = { "refsnp_id": f"rs{rsid}", "snp_id": variant_data.get("snp_id"), "chromosome": variant_data.get("chr"), "position": variant_data.get("chrpos"), "allele": variant_data.get("allele"), "snp_class": variant_data.get("snp_class"), "clinical_significance": variant_data.get( "clinical_significance", "" ).split(","), "genes": [ gene.get("name") for gene in variant_data.get("genes", []) ], "allele_frequencies": variant_data.get("global_mafs", []), "hgvs_notation": variant_data.get("docsum", ""), "spdi_notation": variant_data.get("spdi", ""), "function_class": variant_data.get("fxn_class", "").split(","), "validated": variant_data.get("validated", "").split(","), "created_date": variant_data.get("createdate"), "updated_date": variant_data.get("updatedate"), } result["data"] = parsed_data result["rsid"] = f"rs{rsid}" else: result["status"] = "error" result["error"] = f"Variant rs{rsid} not found in dbSNP" else: result["status"] = "error" result["error"] = "Invalid response format from NCBI E-utilities" return result
[docs] @register_tool("dbSNPSearchByGene") class dbSNPSearchByGene(dbSNPRESTTool): """Search variants by gene symbol."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/esearch.fcgi"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search variants by gene using E-utilities.""" gene_symbol = arguments.get("gene_symbol", "") if not gene_symbol: return {"status": "error", "error": "gene_symbol is required"} params = { "db": "snp", "term": f"{gene_symbol}[gene]", "retmode": "json", "retmax": arguments.get("limit", 20), } result = self._make_request(self.endpoint, params) # Parse and extract useful data from NCBI response if result.get("status") == "success": data = result.get("data", {}) if isinstance(data, dict) and "esearchresult" in data: esearch_data = data["esearchresult"] # Extract variant IDs variant_ids = esearch_data.get("idlist", []) count = int(esearch_data.get("count", 0)) # Create variant list with basic info variants = [] for variant_id in variant_ids: variants.append( {"refsnp_id": f"rs{variant_id}", "snp_id": int(variant_id)} ) parsed_data = { "variants": variants, "total_count": count, "returned_count": len(variants), } result["data"] = parsed_data result["gene_symbol"] = gene_symbol else: result["status"] = "error" result["error"] = "Invalid response format from NCBI E-utilities" return result
[docs] @register_tool("dbSNPGetFrequencies") class dbSNPGetFrequencies(dbSNPRESTTool): """Get allele frequencies for a variant."""
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.endpoint = "/esummary.fcgi"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get allele frequencies by rsID using E-utilities.""" rsid = arguments.get("rsid", "") if not rsid: return {"status": "error", "error": "rsid is required"} # Remove 'rs' prefix if present if rsid.startswith("rs"): rsid = rsid[2:] params = {"db": "snp", "id": rsid, "retmode": "json"} result = self._make_request(self.endpoint, params) # Parse and extract frequency data from NCBI response if result.get("status") == "success": data = result.get("data", {}) if isinstance(data, dict) and "result" in data: result_data = data["result"] if rsid in result_data: variant_data = result_data[rsid] # Extract allele frequency data frequencies = [] global_mafs = variant_data.get("global_mafs", []) for maf in global_mafs: study = maf.get("study", "Unknown") freq_str = maf.get("freq", "") # Parse frequency string (e.g., "C=0.1505591/754") if "=" in freq_str and "/" in freq_str: try: allele_part, count_part = freq_str.split("/") allele = allele_part.split("=")[0] frequency = float(allele_part.split("=")[1]) sample_count = int(count_part) frequencies.append( { "study": study, "allele": allele, "frequency": frequency, "sample_count": sample_count, } ) except (ValueError, IndexError): # Skip malformed frequency entries continue parsed_data = { "frequencies": frequencies, "total_studies": len(frequencies), } result["data"] = parsed_data result["rsid"] = f"rs{rsid}" else: result["status"] = "error" result["error"] = f"Variant rs{rsid} not found in dbSNP" else: result["status"] = "error" result["error"] = "Invalid response format from NCBI E-utilities" return result