Source code for tooluniverse.epigenomics_tool

# epigenomics_tool.py
"""
Epigenomics and methylation analysis tools for ToolUniverse.

Integrates data from:
- ENCODE Project (histone ChIP-seq, WGBS methylation, ATAC-seq, DNase-seq, annotations)
- UCSC Genome Browser (CpG islands, ENCODE4 cCREs, TF binding clusters)
- NCBI GEO (methylation array datasets, ChIP-seq datasets)
- Ensembl Regulatory Build (regulatory features, enhancers, promoters)

No authentication required for any of these APIs.
"""

import json
import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool

ENCODE_BASE_URL = "https://www.encodeproject.org"
UCSC_API_URL = "https://api.genome.ucsc.edu"
NCBI_EUTILS_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
ENSEMBL_REST_URL = "https://rest.ensembl.org"



[docs]
@register_tool("EpigenomicsTool")
class EpigenomicsTool(BaseTool):
    """
    Tool for epigenomics and methylation analysis across multiple databases.

    Supports:
    - ENCODE histone ChIP-seq, methylation (WGBS/RRBS), chromatin accessibility
    - ENCODE annotations (cCREs, chromatin states)
    - GEO methylation and ChIP-seq dataset search
    - Ensembl regulatory features

    No authentication required.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout = tool_config.get("timeout", 30)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "histone_chipseq")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the epigenomics API call."""
        try:
            return self._dispatch(arguments)
        except requests.exceptions.Timeout:
            return {"error": f"API request timed out after {self.timeout}s"}
        except requests.exceptions.ConnectionError:
            return {"error": "Failed to connect to API. Check network connectivity."}
        except requests.exceptions.HTTPError as e:
            status = e.response.status_code if e.response is not None else "unknown"
            return {"error": f"API HTTP error: {status}"}
        except Exception as e:
            return {"error": f"Unexpected error: {str(e)}"}



[docs]
    def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Route to appropriate endpoint based on config."""
        if self.endpoint == "histone_chipseq":
            return self._encode_histone_search(arguments)
        elif self.endpoint == "methylation":
            return self._encode_methylation_search(arguments)
        elif self.endpoint == "chromatin_accessibility":
            return self._encode_chromatin_accessibility_search(arguments)
        elif self.endpoint == "annotations":
            return self._encode_annotations_search(arguments)
        elif self.endpoint == "chromatin_state":
            return self._encode_chromatin_state_search(arguments)
        elif self.endpoint == "geo_methylation_search":
            return self._geo_methylation_search(arguments)
        elif self.endpoint == "geo_chipseq_search":
            return self._geo_chipseq_search(arguments)
        elif self.endpoint == "geo_dataset_details":
            return self._geo_dataset_details(arguments)
        elif self.endpoint == "ensembl_regulatory":
            return self._ensembl_regulatory_features(arguments)
        else:
            return {"error": f"Unknown endpoint: {self.endpoint}"}


    # =========================================================================
    # ENCODE Search Tools
    # =========================================================================


[docs]
    def _encode_search(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Generic ENCODE search helper."""
        url = f"{ENCODE_BASE_URL}/search/"
        params["format"] = "json"
        response = requests.get(
            url,
            params=params,
            headers={"Accept": "application/json"},
            timeout=self.timeout,
        )
        response.raise_for_status()
        return response.json()



[docs]
    def _encode_histone_search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search ENCODE histone ChIP-seq experiments."""
        params = {
            "type": "Experiment",
            "assay_title": "Histone ChIP-seq",
            "status": "released",
        }

        histone_mark = arguments.get("histone_mark")
        if histone_mark:
            params["target.label"] = histone_mark

        biosample = arguments.get("biosample_term_name")
        if biosample:
            params["biosample_ontology.term_name"] = biosample

        organism = arguments.get("organism", "Homo sapiens")
        if organism:
            params["replicates.library.biosample.organism.scientific_name"] = organism

        limit = arguments.get("limit", 25)
        params["limit"] = min(int(limit), 100)

        raw = self._encode_search(params)

        experiments = []
        for exp in raw.get("@graph", []):
            target = exp.get("target", {})
            mark = target.get("label", "") if isinstance(target, dict) else str(target)
            lab = exp.get("lab", {})
            lab_name = lab.get("title", "") if isinstance(lab, dict) else str(lab)

            experiments.append(
                {
                    "accession": exp.get("accession", ""),
                    "histone_mark": mark,
                    "biosample_summary": exp.get("biosample_summary", ""),
                    "status": exp.get("status", ""),
                    "lab": lab_name,
                    "date_released": exp.get("date_released"),
                }
            )

        return {
            "data": {
                "total": raw.get("total", 0),
                "experiments": experiments,
            },
            "metadata": {
                "source": "ENCODE Project (encodeproject.org)",
                "assay": "Histone ChIP-seq",
                "histone_mark_filter": histone_mark,
                "organism": organism,
            },
        }



[docs]
    def _encode_methylation_search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search ENCODE methylation experiments (WGBS/RRBS)."""
        assay_type = arguments.get("assay_type", "WGBS")
        params = {
            "type": "Experiment",
            "assay_title": assay_type,
            "status": "released",
        }

        biosample = arguments.get("biosample_term_name")
        if biosample:
            params["biosample_ontology.term_name"] = biosample

        organism = arguments.get("organism", "Homo sapiens")
        if organism:
            params["replicates.library.biosample.organism.scientific_name"] = organism

        limit = arguments.get("limit", 25)
        params["limit"] = min(int(limit), 100)

        raw = self._encode_search(params)

        experiments = []
        for exp in raw.get("@graph", []):
            lab = exp.get("lab", {})
            lab_name = lab.get("title", "") if isinstance(lab, dict) else str(lab)

            experiments.append(
                {
                    "accession": exp.get("accession", ""),
                    "assay_title": exp.get("assay_title", ""),
                    "biosample_summary": exp.get("biosample_summary", ""),
                    "status": exp.get("status", ""),
                    "lab": lab_name,
                }
            )

        return {
            "data": {
                "total": raw.get("total", 0),
                "experiments": experiments,
            },
            "metadata": {
                "source": "ENCODE Project (encodeproject.org)",
                "assay": assay_type,
                "organism": organism,
            },
        }



[docs]
    def _encode_chromatin_accessibility_search(
        self, arguments: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Search ENCODE chromatin accessibility experiments (ATAC-seq / DNase-seq)."""
        assay_type = arguments.get("assay_type", "ATAC-seq")
        params = {
            "type": "Experiment",
            "assay_title": assay_type,
            "status": "released",
        }

        biosample = arguments.get("biosample_term_name")
        if biosample:
            params["biosample_ontology.term_name"] = biosample

        organism = arguments.get("organism", "Homo sapiens")
        if organism:
            params["replicates.library.biosample.organism.scientific_name"] = organism

        limit = arguments.get("limit", 25)
        params["limit"] = min(int(limit), 100)

        raw = self._encode_search(params)

        experiments = []
        for exp in raw.get("@graph", []):
            lab = exp.get("lab", {})
            lab_name = lab.get("title", "") if isinstance(lab, dict) else str(lab)

            experiments.append(
                {
                    "accession": exp.get("accession", ""),
                    "assay_title": exp.get("assay_title", ""),
                    "biosample_summary": exp.get("biosample_summary", ""),
                    "status": exp.get("status", ""),
                    "lab": lab_name,
                }
            )

        return {
            "data": {
                "total": raw.get("total", 0),
                "experiments": experiments,
            },
            "metadata": {
                "source": "ENCODE Project (encodeproject.org)",
                "assay": assay_type,
                "organism": organism,
            },
        }



[docs]
    def _encode_annotations_search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search ENCODE annotations (cCREs, chromatin states)."""
        annotation_type = arguments.get(
            "annotation_type", "candidate Cis-Regulatory Elements"
        )
        params = {
            "type": "Annotation",
            "annotation_type": annotation_type,
            "status": "released",
        }

        biosample = arguments.get("biosample_term_name")
        if biosample:
            params["biosample_ontology.term_name"] = biosample

        organism = arguments.get("organism", "Homo sapiens")
        if organism:
            params["organism.scientific_name"] = organism

        assembly = arguments.get("assembly", "GRCh38")
        if assembly:
            params["assembly"] = assembly

        limit = arguments.get("limit", 25)
        params["limit"] = min(int(limit), 100)

        raw = self._encode_search(params)

        annotations = []
        for ann in raw.get("@graph", []):
            annotations.append(
                {
                    "accession": ann.get("accession", ""),
                    "annotation_type": ann.get("annotation_type"),
                    "description": ann.get("description", ""),
                    "biosample_summary": ann.get("biosample_summary"),
                    "status": ann.get("status", ""),
                }
            )

        return {
            "data": {
                "total": raw.get("total", 0),
                "annotations": annotations,
            },
            "metadata": {
                "source": "ENCODE Project (encodeproject.org)",
                "annotation_type": annotation_type,
                "organism": organism,
                "assembly": assembly,
            },
        }



[docs]
    def _encode_chromatin_state_search(
        self, arguments: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Search ENCODE chromatin state annotations (ChromHMM)."""
        params = {
            "type": "Annotation",
            "annotation_type": "chromatin state",
            "status": "released",
        }

        biosample = arguments.get("biosample_term_name")
        if biosample:
            params["biosample_ontology.term_name"] = biosample

        organism = arguments.get("organism", "Homo sapiens")
        if organism:
            params["organism.scientific_name"] = organism

        limit = arguments.get("limit", 25)
        params["limit"] = min(int(limit), 100)

        raw = self._encode_search(params)

        annotations = []
        for ann in raw.get("@graph", []):
            annotations.append(
                {
                    "accession": ann.get("accession", ""),
                    "annotation_type": ann.get("annotation_type"),
                    "description": ann.get("description", ""),
                    "biosample_summary": ann.get("biosample_summary"),
                    "status": ann.get("status", ""),
                }
            )

        return {
            "data": {
                "total": raw.get("total", 0),
                "annotations": annotations,
            },
            "metadata": {
                "source": "ENCODE Project (encodeproject.org)",
                "annotation_type": "chromatin state",
                "organism": organism,
            },
        }


    # =========================================================================
    # GEO Search Tools
    # =========================================================================


[docs]
    def _geo_esearch(self, term: str, limit: int = 20) -> Dict[str, Any]:
        """Search GEO datasets via NCBI E-utilities."""
        url = f"{NCBI_EUTILS_URL}/esearch.fcgi"
        params = {
            "db": "gds",
            "term": term,
            "retmax": min(int(limit), 100),
            "retmode": "json",
        }
        response = requests.get(url, params=params, timeout=self.timeout)
        response.raise_for_status()
        return response.json()



[docs]
    def _geo_esummary(self, ids: list) -> Dict[str, Any]:
        """Get summary for GEO dataset IDs via NCBI E-utilities."""
        if not ids:
            return {"result": {}}
        url = f"{NCBI_EUTILS_URL}/esummary.fcgi"
        params = {
            "db": "gds",
            "id": ",".join(str(i) for i in ids),
            "retmode": "json",
        }
        response = requests.get(url, params=params, timeout=self.timeout)
        response.raise_for_status()
        return response.json()



[docs]
    def _geo_methylation_search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search GEO for methylation array datasets."""
        query = arguments.get("query", "")
        organism = arguments.get("organism", "Homo sapiens")
        limit = arguments.get("limit", 20)

        # Build search term with methylation context
        term_parts = [query, "methylation"]
        if organism:
            term_parts.append(f"{organism}[Organism]")
        term = " AND ".join(term_parts)

        search_result = self._geo_esearch(term, limit)
        esearch = search_result.get("esearchresult", {})
        total = int(esearch.get("count", 0))
        ids = esearch.get("idlist", [])

        datasets = []
        if ids:
            summary_result = self._geo_esummary(ids)
            result = summary_result.get("result", {})
            for uid in ids:
                uid_data = result.get(str(uid), {})
                if isinstance(uid_data, dict) and "accession" in uid_data:
                    datasets.append(
                        {
                            "accession": uid_data.get("accession", ""),
                            "title": uid_data.get("title", ""),
                            "summary": uid_data.get("summary", "")[:500],
                            "platform": uid_data.get("gpl"),
                            "organism": uid_data.get("taxon", ""),
                            "n_samples": uid_data.get("n_samples", 0),
                            "date_published": uid_data.get("pdat"),
                        }
                    )

        return {
            "data": {
                "total": total,
                "datasets": datasets,
            },
            "metadata": {
                "source": "NCBI GEO (ncbi.nlm.nih.gov/geo)",
                "query": query,
                "search_term": term,
                "organism": organism,
            },
        }



[docs]
    def _geo_chipseq_search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search GEO for ChIP-seq datasets."""
        query = arguments.get("query", "")
        organism = arguments.get("organism", "Homo sapiens")
        limit = arguments.get("limit", 20)

        # Build search term with ChIP-seq context
        term_parts = [query, "ChIP-seq"]
        if organism:
            term_parts.append(f"{organism}[Organism]")
        term = " AND ".join(term_parts)

        search_result = self._geo_esearch(term, limit)
        esearch = search_result.get("esearchresult", {})
        total = int(esearch.get("count", 0))
        ids = esearch.get("idlist", [])

        datasets = []
        if ids:
            summary_result = self._geo_esummary(ids)
            result = summary_result.get("result", {})
            for uid in ids:
                uid_data = result.get(str(uid), {})
                if isinstance(uid_data, dict) and "accession" in uid_data:
                    datasets.append(
                        {
                            "accession": uid_data.get("accession", ""),
                            "title": uid_data.get("title", ""),
                            "summary": uid_data.get("summary", "")[:500],
                            "organism": uid_data.get("taxon", ""),
                            "n_samples": uid_data.get("n_samples", 0),
                            "date_published": uid_data.get("pdat"),
                        }
                    )

        return {
            "data": {
                "total": total,
                "datasets": datasets,
            },
            "metadata": {
                "source": "NCBI GEO (ncbi.nlm.nih.gov/geo)",
                "query": query,
                "search_term": term,
                "organism": organism,
            },
        }



[docs]
    def _geo_dataset_details(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get detailed metadata for a GEO dataset."""
        geo_id = arguments.get("geo_id", "")
        if not geo_id:
            return {"error": "geo_id parameter is required (e.g., '200291249')"}

        summary_result = self._geo_esummary([geo_id])
        result = summary_result.get("result", {})
        uid_data = result.get(str(geo_id), {})

        if not isinstance(uid_data, dict) or "accession" not in uid_data:
            return {"error": f"Dataset with ID '{geo_id}' not found in GEO"}

        ftplink = uid_data.get("ftplink", "")
        suppfile = uid_data.get("suppfile", "")
        supp_data = []
        if ftplink:
            supp_data.append(ftplink)
        if suppfile:
            supp_data.append(suppfile)

        return {
            "data": {
                "accession": uid_data.get("accession", ""),
                "title": uid_data.get("title", ""),
                "summary": uid_data.get("summary", ""),
                "experiment_type": uid_data.get("gdstype"),
                "platform": uid_data.get("gpl"),
                "organism": uid_data.get("taxon", ""),
                "n_samples": uid_data.get("n_samples", 0),
                "date_published": uid_data.get("pdat"),
                "supplementary_data": supp_data if supp_data else None,
            },
            "metadata": {
                "source": "NCBI GEO (ncbi.nlm.nih.gov/geo)",
                "geo_id": geo_id,
            },
        }


    # =========================================================================
    # Ensembl Regulatory Features
    # =========================================================================


[docs]
    def _ensembl_regulatory_features(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get Ensembl regulatory features for a genomic region."""
        species = arguments.get("species", "homo_sapiens")
        chrom = arguments.get("chrom", "")
        start = arguments.get("start")
        end = arguments.get("end")

        if not chrom or start is None or end is None:
            return {"error": "chrom, start, and end parameters are required"}

        # Ensure region is not too large (max 5Mb)
        if end - start > 5000000:
            return {"error": "Region too large. Maximum region size is 5 Mb."}

        url = (
            f"{ENSEMBL_REST_URL}/overlap/region/{species}/{chrom}:{start}-{end}"
            f"?feature=regulatory;content-type=application/json"
        )
        # Ensembl REST API can be slow - use 90s timeout
        response = requests.get(url, timeout=max(self.timeout, 90))
        response.raise_for_status()
        raw = response.json()

        features = []
        for feat in raw:
            features.append(
                {
                    "id": feat.get("id", ""),
                    "description": feat.get("description", ""),
                    "feature_type": feat.get("feature_type", ""),
                    "start": feat.get("start"),
                    "end": feat.get("end"),
                    "strand": feat.get("strand", 0),
                    "seq_region_name": feat.get("seq_region_name", ""),
                }
            )

        return {
            "data": {
                "species": species,
                "region": f"{chrom}:{start}-{end}",
                "feature_count": len(features),
                "regulatory_features": features,
            },
            "metadata": {
                "source": "Ensembl Regulatory Build (rest.ensembl.org)",
                "species": species,
                "region": f"{chrom}:{start}-{end}",
            },
        }





[docs]
@register_tool("UCSCEpigenomicsTool")
class UCSCEpigenomicsTool(BaseTool):
    """
    UCSC Genome Browser epigenomics-specific tools.

    Provides access to:
    - CpG island annotations
    - ENCODE4 candidate cis-Regulatory Elements (cCREs)
    - Transcription Factor binding site clusters

    No authentication required.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout = tool_config.get("timeout", 30)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "cpg_islands")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the UCSC epigenomics API call."""
        try:
            return self._dispatch(arguments)
        except requests.exceptions.Timeout:
            return {"error": f"UCSC API request timed out after {self.timeout}s"}
        except requests.exceptions.ConnectionError:
            return {"error": "Failed to connect to UCSC API."}
        except requests.exceptions.HTTPError as e:
            status = e.response.status_code if e.response is not None else "unknown"
            return {"error": f"UCSC API HTTP error: {status}"}
        except Exception as e:
            return {"error": f"Unexpected error: {str(e)}"}



[docs]
    def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Route to appropriate endpoint."""
        if self.endpoint == "cpg_islands":
            return self._get_cpg_islands(arguments)
        elif self.endpoint == "encode_ccres":
            return self._get_encode_ccres(arguments)
        elif self.endpoint == "tf_binding":
            return self._get_tf_binding_clusters(arguments)
        else:
            return {"error": f"Unknown endpoint: {self.endpoint}"}



[docs]
    def _ucsc_get_track(
        self, genome: str, track: str, chrom: str, start: int, end: int
    ) -> Dict[str, Any]:
        """Helper to fetch UCSC track data."""
        url = (
            f"{UCSC_API_URL}/getData/track"
            f"?genome={genome}&track={track}&chrom={chrom}&start={start}&end={end}"
        )
        response = requests.get(url, timeout=self.timeout)
        response.raise_for_status()
        return response.json()



[docs]
    def _get_cpg_islands(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get CpG island annotations for a genomic region."""
        genome = arguments.get("genome", "hg38")
        chrom = arguments.get("chrom", "")
        start = arguments.get("start")
        end = arguments.get("end")

        if not chrom or start is None or end is None:
            return {"error": "chrom, start, and end parameters are required"}

        raw = self._ucsc_get_track(genome, "cpgIslandExt", chrom, start, end)
        items = raw.get("cpgIslandExt", [])
        if not isinstance(items, list):
            items = []

        cpg_islands = []
        for item in items:
            cpg_islands.append(
                {
                    "chrom": item.get("chrom", ""),
                    "chromStart": item.get("chromStart"),
                    "chromEnd": item.get("chromEnd"),
                    "name": item.get("name", ""),
                    "length": item.get("length", 0),
                    "cpgNum": item.get("cpgNum", 0),
                    "gcNum": item.get("gcNum", 0),
                    "perCpg": item.get("perCpg", 0),
                    "perGc": item.get("perGc", 0),
                    "obsExp": item.get("obsExp", 0),
                }
            )

        return {
            "data": {
                "genome": genome,
                "region": f"{chrom}:{start}-{end}",
                "cpg_island_count": len(cpg_islands),
                "cpg_islands": cpg_islands,
            },
            "metadata": {
                "source": "UCSC Genome Browser (api.genome.ucsc.edu)",
                "track": "cpgIslandExt",
                "genome": genome,
            },
        }



[docs]
    def _get_encode_ccres(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get ENCODE4 candidate cis-Regulatory Elements for a genomic region."""
        genome = arguments.get("genome", "hg38")
        chrom = arguments.get("chrom", "")
        start = arguments.get("start")
        end = arguments.get("end")

        if not chrom or start is None or end is None:
            return {"error": "chrom, start, and end parameters are required"}

        raw = self._ucsc_get_track(genome, "cCREregistry", chrom, start, end)
        items = raw.get("cCREregistry", [])
        if not isinstance(items, list):
            items = []

        ccres = []
        for item in items:
            ccres.append(
                {
                    "name": item.get("name", ""),
                    "chrom": item.get("chrom", ""),
                    "chromStart": item.get("chromStart"),
                    "chromEnd": item.get("chromEnd"),
                    "cCRE_class": item.get("cCRE_class", ""),
                    "DNase_maxZ": item.get("DNase_maxZ", 0),
                    "H3K4me3_maxZ": item.get("H3K4me3_maxZ", 0),
                    "H3K27ac_maxZ": item.get("H3K27ac_maxZ", 0),
                    "CTCF_maxZ": item.get("CTCF_maxZ", 0),
                }
            )

        return {
            "data": {
                "genome": genome,
                "region": f"{chrom}:{start}-{end}",
                "ccre_count": len(ccres),
                "ccres": ccres,
            },
            "metadata": {
                "source": "UCSC Genome Browser / ENCODE4 (api.genome.ucsc.edu)",
                "track": "cCREregistry",
                "genome": genome,
            },
        }



[docs]
    def _get_tf_binding_clusters(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get TF binding site clusters from ENCODE3."""
        genome = arguments.get("genome", "hg38")
        chrom = arguments.get("chrom", "")
        start = arguments.get("start")
        end = arguments.get("end")

        if not chrom or start is None or end is None:
            return {"error": "chrom, start, and end parameters are required"}

        raw = self._ucsc_get_track(genome, "encRegTfbsClustered", chrom, start, end)
        items = raw.get("encRegTfbsClustered", [])
        if not isinstance(items, list):
            items = []

        tf_clusters = []
        for item in items:
            tf_clusters.append(
                {
                    "name": item.get("name", ""),
                    "chrom": item.get("chrom", ""),
                    "chromStart": item.get("chromStart"),
                    "chromEnd": item.get("chromEnd"),
                    "score": item.get("score", 0),
                    "sourceCount": item.get("sourceCount", 0),
                }
            )

        return {
            "data": {
                "genome": genome,
                "region": f"{chrom}:{start}-{end}",
                "tf_cluster_count": len(tf_clusters),
                "tf_clusters": tf_clusters,
            },
            "metadata": {
                "source": "UCSC Genome Browser / ENCODE3 (api.genome.ucsc.edu)",
                "track": "encRegTfbsClustered",
                "genome": genome,
                "description": "340 TFs across 129 cell types",
            },
        }