Source code for tooluniverse.cryoet_tool

"""
CryoET Data Portal Tool

Provides access to the CZ BioHub CryoET Data Portal via GraphQL API.
No authentication required. Endpoint: https://graphql.cryoetdataportal.czscience.com/graphql
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

GRAPHQL_URL = "https://graphql.cryoetdataportal.czscience.com/graphql"


def _gql(query: str, variables: dict = None) -> Dict[str, Any]:
    """Execute a GraphQL query and return parsed JSON."""
    payload = {"query": query}
    if variables:
        payload["variables"] = variables
    resp = requests.post(
        GRAPHQL_URL,
        json=payload,
        headers={"Content-Type": "application/json"},
        timeout=30,
    )
    resp.raise_for_status()
    return resp.json()



[docs]
@register_tool("CryoETTool")
class CryoETTool(BaseTool):
    """
    CryoET Data Portal tool for browsing cryo-electron tomography datasets,
    runs, tomograms, and annotations from the CZ BioHub portal.
    """


[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the tool with given arguments."""
        try:
            operation = arguments.get("operation", "")

            if operation == "list_datasets":
                return self._list_datasets(arguments)
            elif operation == "get_dataset":
                return self._get_dataset(arguments)
            elif operation == "list_runs":
                return self._list_runs(arguments)
            elif operation == "list_tomograms":
                return self._list_tomograms(arguments)
            elif operation == "list_annotations":
                return self._list_annotations(arguments)
            else:
                return {
                    "status": "error",
                    "error": f"Unknown operation: {operation!r}. Valid operations: "
                    "list_datasets, get_dataset, list_runs, list_tomograms, list_annotations",
                }
        except Exception as e:
            return {"status": "error", "error": str(e)}


    # ------------------------------------------------------------------ #
    # list_datasets
    # ------------------------------------------------------------------ #

[docs]
    def _list_datasets(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """List/search CryoET datasets with optional filters."""
        try:
            organism = arguments.get("organism_name")
            tissue = arguments.get("tissue_name")
            limit = int(arguments.get("limit", 10))
            offset = int(arguments.get("offset", 0))

            where_parts = []
            if organism:
                where_parts.append(f'organismName: {{_ilike: "%{organism}%"}}')
            if tissue:
                where_parts.append(f'tissueName: {{_ilike: "%{tissue}%"}}')

            where_clause = (
                "{" + ", ".join(where_parts) + "}" if where_parts else "null"
            )
            where_arg = f"where: {where_clause}, " if where_parts else ""

            query = f"""
            {{
              datasets(
                {where_arg}limitOffset: {{limit: {limit}, offset: {offset}}}
              ) {{
                id
                title
                description
                organismName
                tissueName
                cellName
                sampleType
                depositionDate
                releaseDate
                relatedDatabaseEntries
                datasetPublications
                s3Prefix
                httpsPrefix
              }}
            }}
            """
            result = _gql(query)
            if "errors" in result:
                return {
                    "status": "error",
                    "error": str(result["errors"]),
                }
            datasets = result.get("data", {}).get("datasets", [])
            return {
                "status": "success",
                "data": {
                    "count": len(datasets),
                    "datasets": datasets,
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}


    # ------------------------------------------------------------------ #
    # get_dataset
    # ------------------------------------------------------------------ #

[docs]
    def _get_dataset(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get full details for a specific dataset by numeric ID."""
        try:
            dataset_id = arguments.get("dataset_id")
            if dataset_id is None:
                return {
                    "status": "error",
                    "error": "dataset_id is required for get_dataset operation",
                }
            dataset_id = int(dataset_id)

            query = f"""
            {{
              datasets(where: {{id: {{_eq: {dataset_id}}}}}) {{
                id
                title
                description
                organismName
                organismTaxid
                tissueName
                tissueId
                cellName
                cellTypeId
                cellStrainName
                sampleType
                samplePreparation
                gridPreparation
                otherSetup
                depositionDate
                releaseDate
                lastModifiedDate
                datasetPublications
                relatedDatabaseEntries
                keyPhotoUrl
                s3Prefix
                httpsPrefix
                fileSize
              }}
            }}
            """
            result = _gql(query)
            if "errors" in result:
                return {"status": "error", "error": str(result["errors"])}
            datasets = result.get("data", {}).get("datasets", [])
            if not datasets:
                return {
                    "status": "error",
                    "error": f"Dataset {dataset_id} not found",
                }
            return {"status": "success", "data": {"dataset": datasets[0]}}
        except Exception as e:
            return {"status": "error", "error": str(e)}


    # ------------------------------------------------------------------ #
    # list_runs
    # ------------------------------------------------------------------ #

[docs]
    def _list_runs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """List experimental runs in a dataset."""
        try:
            dataset_id = arguments.get("dataset_id")
            if dataset_id is None:
                return {
                    "status": "error",
                    "error": "dataset_id is required for list_runs operation",
                }
            dataset_id = int(dataset_id)
            limit = int(arguments.get("limit", 20))
            offset = int(arguments.get("offset", 0))

            query = f"""
            {{
              runs(
                where: {{datasetId: {{_eq: {dataset_id}}}}},
                limitOffset: {{limit: {limit}, offset: {offset}}}
              ) {{
                id
                name
                datasetId
                s3Prefix
                httpsPrefix
              }}
            }}
            """
            result = _gql(query)
            if "errors" in result:
                return {"status": "error", "error": str(result["errors"])}
            runs = result.get("data", {}).get("runs", [])
            return {
                "status": "success",
                "data": {
                    "dataset_id": dataset_id,
                    "count": len(runs),
                    "runs": runs,
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}


    # ------------------------------------------------------------------ #
    # list_tomograms
    # ------------------------------------------------------------------ #

[docs]
    def _list_tomograms(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """List tomograms with voxel spacing and reconstruction info."""
        try:
            run_id = arguments.get("run_id")
            if run_id is None:
                return {
                    "status": "error",
                    "error": "run_id is required for list_tomograms operation",
                }
            run_id = int(run_id)
            limit = int(arguments.get("limit", 10))
            offset = int(arguments.get("offset", 0))

            query = f"""
            {{
              tomograms(
                where: {{runId: {{_eq: {run_id}}}}},
                limitOffset: {{limit: {limit}, offset: {offset}}}
              ) {{
                id
                name
                runId
                voxelSpacing
                sizeX
                sizeY
                sizeZ
                reconstructionMethod
                processing
                processingSoftware
                reconstructionSoftware
                isPortalStandard
                isAuthorSubmitted
                isVisualizationDefault
                ctfCorrected
                s3OmezarrDir
                httpsMrcFile
                s3MrcFile
                depositionDate
                releaseDate
              }}
            }}
            """
            result = _gql(query)
            if "errors" in result:
                return {"status": "error", "error": str(result["errors"])}
            tomograms = result.get("data", {}).get("tomograms", [])
            return {
                "status": "success",
                "data": {
                    "run_id": run_id,
                    "count": len(tomograms),
                    "tomograms": tomograms,
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}


    # ------------------------------------------------------------------ #
    # list_annotations
    # ------------------------------------------------------------------ #

[docs]
    def _list_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """List annotations (segmentations) for a run."""
        try:
            run_id = arguments.get("run_id")
            if run_id is None:
                return {
                    "status": "error",
                    "error": "run_id is required for list_annotations operation",
                }
            run_id = int(run_id)
            limit = int(arguments.get("limit", 20))
            offset = int(arguments.get("offset", 0))
            curator_only = arguments.get("curator_recommended_only", False)

            where_parts = [f"runId: {{_eq: {run_id}}}"]
            if curator_only:
                where_parts.append("isCuratorRecommended: {_eq: true}")

            where_clause = "{" + ", ".join(where_parts) + "}"

            query = f"""
            {{
              annotations(
                where: {where_clause},
                limitOffset: {{limit: {limit}, offset: {offset}}}
              ) {{
                id
                objectName
                objectId
                objectDescription
                objectState
                objectCount
                annotationMethod
                annotationSoftware
                groundTruthStatus
                isCuratorRecommended
                methodType
                depositionDate
                releaseDate
                annotationPublication
                s3MetadataPath
                httpsMetadataPath
              }}
            }}
            """
            result = _gql(query)
            if "errors" in result:
                return {"status": "error", "error": str(result["errors"])}
            annotations = result.get("data", {}).get("annotations", [])
            return {
                "status": "success",
                "data": {
                    "run_id": run_id,
                    "count": len(annotations),
                    "annotations": annotations,
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}