Source code for tooluniverse.cryoet_tool
"""
CryoET Data Portal Tool
Provides access to the CZ BioHub CryoET Data Portal via GraphQL API.
No authentication required. Endpoint: https://graphql.cryoetdataportal.czscience.com/graphql
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
GRAPHQL_URL = "https://graphql.cryoetdataportal.czscience.com/graphql"
def _gql(query: str, variables: dict = None) -> Dict[str, Any]:
"""Execute a GraphQL query and return parsed JSON."""
payload = {"query": query}
if variables:
payload["variables"] = variables
resp = requests.post(
GRAPHQL_URL,
json=payload,
headers={"Content-Type": "application/json"},
timeout=30,
)
resp.raise_for_status()
return resp.json()
[docs]
@register_tool("CryoETTool")
class CryoETTool(BaseTool):
"""
CryoET Data Portal tool for browsing cryo-electron tomography datasets,
runs, tomograms, and annotations from the CZ BioHub portal.
"""
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the tool with given arguments."""
try:
operation = arguments.get("operation", "")
if operation == "list_datasets":
return self._list_datasets(arguments)
elif operation == "get_dataset":
return self._get_dataset(arguments)
elif operation == "list_runs":
return self._list_runs(arguments)
elif operation == "list_tomograms":
return self._list_tomograms(arguments)
elif operation == "list_annotations":
return self._list_annotations(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation!r}. Valid operations: "
"list_datasets, get_dataset, list_runs, list_tomograms, list_annotations",
}
except Exception as e:
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------------ #
# list_datasets
# ------------------------------------------------------------------ #
[docs]
def _list_datasets(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List/search CryoET datasets with optional filters."""
try:
organism = arguments.get("organism_name")
tissue = arguments.get("tissue_name")
limit = int(arguments.get("limit", 10))
offset = int(arguments.get("offset", 0))
where_parts = []
if organism:
where_parts.append(f'organismName: {{_ilike: "%{organism}%"}}')
if tissue:
where_parts.append(f'tissueName: {{_ilike: "%{tissue}%"}}')
where_clause = (
"{" + ", ".join(where_parts) + "}" if where_parts else "null"
)
where_arg = f"where: {where_clause}, " if where_parts else ""
query = f"""
{{
datasets(
{where_arg}limitOffset: {{limit: {limit}, offset: {offset}}}
) {{
id
title
description
organismName
tissueName
cellName
sampleType
depositionDate
releaseDate
relatedDatabaseEntries
datasetPublications
s3Prefix
httpsPrefix
}}
}}
"""
result = _gql(query)
if "errors" in result:
return {
"status": "error",
"error": str(result["errors"]),
}
datasets = result.get("data", {}).get("datasets", [])
return {
"status": "success",
"data": {
"count": len(datasets),
"datasets": datasets,
},
}
except Exception as e:
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------------ #
# get_dataset
# ------------------------------------------------------------------ #
[docs]
def _get_dataset(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get full details for a specific dataset by numeric ID."""
try:
dataset_id = arguments.get("dataset_id")
if dataset_id is None:
return {
"status": "error",
"error": "dataset_id is required for get_dataset operation",
}
dataset_id = int(dataset_id)
query = f"""
{{
datasets(where: {{id: {{_eq: {dataset_id}}}}}) {{
id
title
description
organismName
organismTaxid
tissueName
tissueId
cellName
cellTypeId
cellStrainName
sampleType
samplePreparation
gridPreparation
otherSetup
depositionDate
releaseDate
lastModifiedDate
datasetPublications
relatedDatabaseEntries
keyPhotoUrl
s3Prefix
httpsPrefix
fileSize
}}
}}
"""
result = _gql(query)
if "errors" in result:
return {"status": "error", "error": str(result["errors"])}
datasets = result.get("data", {}).get("datasets", [])
if not datasets:
return {
"status": "error",
"error": f"Dataset {dataset_id} not found",
}
return {"status": "success", "data": {"dataset": datasets[0]}}
except Exception as e:
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------------ #
# list_runs
# ------------------------------------------------------------------ #
[docs]
def _list_runs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List experimental runs in a dataset."""
try:
dataset_id = arguments.get("dataset_id")
if dataset_id is None:
return {
"status": "error",
"error": "dataset_id is required for list_runs operation",
}
dataset_id = int(dataset_id)
limit = int(arguments.get("limit", 20))
offset = int(arguments.get("offset", 0))
query = f"""
{{
runs(
where: {{datasetId: {{_eq: {dataset_id}}}}},
limitOffset: {{limit: {limit}, offset: {offset}}}
) {{
id
name
datasetId
s3Prefix
httpsPrefix
}}
}}
"""
result = _gql(query)
if "errors" in result:
return {"status": "error", "error": str(result["errors"])}
runs = result.get("data", {}).get("runs", [])
return {
"status": "success",
"data": {
"dataset_id": dataset_id,
"count": len(runs),
"runs": runs,
},
}
except Exception as e:
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------------ #
# list_tomograms
# ------------------------------------------------------------------ #
[docs]
def _list_tomograms(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List tomograms with voxel spacing and reconstruction info."""
try:
run_id = arguments.get("run_id")
if run_id is None:
return {
"status": "error",
"error": "run_id is required for list_tomograms operation",
}
run_id = int(run_id)
limit = int(arguments.get("limit", 10))
offset = int(arguments.get("offset", 0))
query = f"""
{{
tomograms(
where: {{runId: {{_eq: {run_id}}}}},
limitOffset: {{limit: {limit}, offset: {offset}}}
) {{
id
name
runId
voxelSpacing
sizeX
sizeY
sizeZ
reconstructionMethod
processing
processingSoftware
reconstructionSoftware
isPortalStandard
isAuthorSubmitted
isVisualizationDefault
ctfCorrected
s3OmezarrDir
httpsMrcFile
s3MrcFile
depositionDate
releaseDate
}}
}}
"""
result = _gql(query)
if "errors" in result:
return {"status": "error", "error": str(result["errors"])}
tomograms = result.get("data", {}).get("tomograms", [])
return {
"status": "success",
"data": {
"run_id": run_id,
"count": len(tomograms),
"tomograms": tomograms,
},
}
except Exception as e:
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------------ #
# list_annotations
# ------------------------------------------------------------------ #
[docs]
def _list_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List annotations (segmentations) for a run."""
try:
run_id = arguments.get("run_id")
if run_id is None:
return {
"status": "error",
"error": "run_id is required for list_annotations operation",
}
run_id = int(run_id)
limit = int(arguments.get("limit", 20))
offset = int(arguments.get("offset", 0))
curator_only = arguments.get("curator_recommended_only", False)
where_parts = [f"runId: {{_eq: {run_id}}}"]
if curator_only:
where_parts.append("isCuratorRecommended: {_eq: true}")
where_clause = "{" + ", ".join(where_parts) + "}"
query = f"""
{{
annotations(
where: {where_clause},
limitOffset: {{limit: {limit}, offset: {offset}}}
) {{
id
objectName
objectId
objectDescription
objectState
objectCount
annotationMethod
annotationSoftware
groundTruthStatus
isCuratorRecommended
methodType
depositionDate
releaseDate
annotationPublication
s3MetadataPath
httpsMetadataPath
}}
}}
"""
result = _gql(query)
if "errors" in result:
return {"status": "error", "error": str(result["errors"])}
annotations = result.get("data", {}).get("annotations", [])
return {
"status": "success",
"data": {
"run_id": run_id,
"count": len(annotations),
"annotations": annotations,
},
}
except Exception as e:
return {"status": "error", "error": str(e)}