Source code for tooluniverse.chipatlas_tool

"""
ChIP-Atlas API Tool

This tool provides access to ChIP-Atlas, a data-mining suite for exploring
epigenomic landscapes with 433,000+ ChIP-seq, ATAC-seq, and Bisulfite-seq experiments.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool


CHIPATLAS_BASE_URL = "https://chip-atlas.org"
CHIPATLAS_DATA_URL = "https://chip-atlas.dbcls.jp/data"


[docs] @register_tool("ChIPAtlasTool") class ChIPAtlasTool(BaseTool): """ ChIP-Atlas API tool for accessing chromatin data. Provides enrichment analysis, peak browsing, and dataset search. """
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the tool with given arguments.""" try: operation = arguments.get("operation", "enrichment_analysis") if operation == "enrichment_analysis": return self._enrichment_analysis(arguments) elif operation == "get_experiment_list": return self._get_experiment_list(arguments) elif operation == "get_peak_data": return self._get_peak_data(arguments) elif operation == "search_datasets": return self._search_datasets(arguments) else: return { "status": "error", "data": {"error": f"Unknown operation: {operation}"}, } except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _enrichment_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Perform enrichment analysis on genomic regions, motifs, or gene lists. Identifies proteins bound to input regions more often than expected. """ try: # Prepare enrichment analysis parameters bed_data = arguments.get("bed_data") motif = arguments.get("motif") gene_list = arguments.get("gene_list") genome = arguments.get("genome", "hg38") antigen_class = arguments.get("antigen_class", "") cell_type_class = arguments.get("cell_type_class", "") threshold = arguments.get("threshold", "05") distance = arguments.get("distance", "5000") # Build API request # Note: The actual API endpoint needs to be discovered from ChIP-Atlas documentation # For now, we provide information about how to use it result_data = {} if bed_data: result_data = { "message": "ChIP-Atlas Enrichment Analysis requires web form submission", "instruction": f"Submit BED data to: {CHIPATLAS_BASE_URL}/enrichment_analysis", "parameters": { "genome": genome, "antigen_class": antigen_class, "cell_type_class": cell_type_class, "threshold": threshold, }, "note": "ChIP-Atlas enrichment API requires form-based submission. Use Python 'requests' library for programmatic access.", } return {"status": "success", "data": result_data} elif motif: result_data = { "message": "Submit motif for enrichment analysis", "motif": motif, "url": f"{CHIPATLAS_BASE_URL}/enrichment_analysis", "note": "Motif should be in IUPAC nucleic acid notation (ATGCWSMKRYBDHVN)", } return {"status": "success", "data": result_data} elif gene_list: result_data = { "message": "Submit gene list for enrichment analysis", "genes": gene_list if isinstance(gene_list, list) else [gene_list], "distance_from_tss": distance, "url": f"{CHIPATLAS_BASE_URL}/enrichment_analysis", "note": "Use official gene symbols (HGNC, MGI, RGD, FlyBase, WormBase, SGD)", } return {"status": "success", "data": result_data} else: return { "status": "error", "data": { "error": "One of bed_data, motif, or gene_list must be provided" }, } except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _get_experiment_list(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get metadata for all ChIP-Atlas experiments. Note: The experimentList.tab file is 344MB+ (433k+ experiments). This method provides guidance on accessing the data rather than downloading. """ try: genome = arguments.get("genome") antigen = arguments.get("antigen") cell_type = arguments.get("cell_type") arguments.get("limit", 100) # The file is too large (344MB) to download efficiently # Provide guidance instead metadata_url = f"{CHIPATLAS_DATA_URL}/metadata/experimentList.tab" web_search_url = f"{CHIPATLAS_BASE_URL}/search" message = ( "ChIP-Atlas experimentList.tab is 344MB+ with 433,000+ experiments. " "For efficient searching, use: (1) ChIPAtlas_search_datasets tool " "for antigen/cell-type search, or (2) Download the file directly for " "local analysis." ) filters = {} if genome: filters["genome"] = genome if antigen: filters["antigen"] = antigen if cell_type: filters["cell_type"] = cell_type result_data = { "message": message, "metadata_file_url": metadata_url, "web_search_url": web_search_url, "file_size": "344MB", "total_experiments": "433,000+", "filters_requested": filters if filters else "none", "recommendation": ( "Use ChIPAtlas_search_datasets tool for filtered searches by " "antigen or cell type, or download the metadata file for local analysis." ), } return {"status": "success", "data": result_data} except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _get_peak_data(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get URL for peak-call data (BigWig or BED format).""" try: experiment_id = arguments.get("experiment_id") genome = arguments.get("genome", "hg38") threshold = arguments.get("threshold", "05") format_type = arguments.get("format", "bigwig") if not experiment_id: return { "status": "error", "data": {"error": "experiment_id is required"}, } if format_type.lower() == "bigwig": url = f"{CHIPATLAS_DATA_URL}/{genome}/eachData/bw/{experiment_id}.bw" elif format_type.lower() == "bed": url = f"{CHIPATLAS_DATA_URL}/{genome}/eachData/bed{threshold}/{experiment_id}.{threshold}.bed" elif format_type.lower() == "bigbed": url = f"{CHIPATLAS_DATA_URL}/{genome}/eachData/bb{threshold}/{experiment_id}.{threshold}.bb" else: return { "status": "error", "data": { "error": f"Invalid format: {format_type}. Use 'bigwig', 'bed', or 'bigbed'" }, } result_data = { "experiment_id": experiment_id, "genome": genome, "format": format_type, "url": url, "message": "Use this URL to download peak data", } return {"status": "success", "data": result_data} except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _search_datasets(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search for datasets by antigen or cell type.""" try: antigen = arguments.get("antigen") cell_type = arguments.get("cell_type") genome = arguments.get("genome", "hg38") if not antigen and not cell_type: return { "status": "error", "data": {"error": "Either antigen or cell_type must be provided"}, } # Download antigenList.tab or celltypeList.tab # These files are ~10MB each - reasonable to download fully if antigen: url = f"{CHIPATLAS_DATA_URL}/metadata/antigenList.tab" search_key = "antigen" search_value = antigen else: url = f"{CHIPATLAS_DATA_URL}/metadata/celltypeList.tab" search_key = "cell_type" search_value = cell_type # Download file (10MB, should complete in seconds) response = requests.get(url, timeout=30) response.raise_for_status() # Parse TSV lines = response.text.strip().split("\n") results = [] for line in lines: fields = line.split("\t") if len(fields) >= 5: if antigen: # Check genome match and search value # Format: Genome | Antigen_class | Antigen | Num_data | ID if ( fields[0] == genome and search_value.lower() in fields[2].lower() ): results.append( { "genome": fields[0], "class": fields[1], "name": fields[2], "num_experiments": fields[3], "experiment_ids": fields[4].split(",")[ :10 ], # Show first 10 IDs } ) else: # Check genome match and search value for cell type # Format: Genome | Cell_type_class | Cell_type | Num_data | ID if ( fields[0] == genome and search_value.lower() in fields[2].lower() ): results.append( { "genome": fields[0], "cell_type_class": fields[1], "cell_type": fields[2], "num_experiments": fields[3], "experiment_ids": fields[4].split(",")[ :10 ], # Show first 10 IDs } ) result_data = { "search_key": search_key, "search_value": search_value, "genome": genome, "num_results": len(results), "results": results, } return {"status": "success", "data": result_data} except requests.exceptions.Timeout: return { "status": "error", "data": { "error": "Request timeout - ChIP-Atlas server may be slow. Try again later." }, } except Exception as e: return {"status": "error", "data": {"error": str(e)}}