Source code for tooluniverse.hca_tool
from typing import Dict, Any, List, Optional
import requests
import json
from .base_tool import BaseTool
from .tool_registry import register_tool
[docs]
@register_tool("HCATool")
class HCATool(BaseTool):
"""
Tool for interacting with the Human Cell Atlas (HCA) Data Coordination Platform (DCP) v2 API.
Allows searching for projects and retrieving file manifests.
"""
BASE_URL = "https://service.azul.data.humancellatlas.org"
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Executes the HCA tool action.
Args:
arguments (Dict[str, Any]): Dictionary containing the action and its parameters.
Expected keys:
- action (str): "search_projects" or "get_file_manifest"
- organ (str, optional): Organ to filter by (for search_projects)
- disease (str, optional): Disease to filter by (for search_projects)
- project_id (str, optional): Project ID (for get_file_manifest)
- limit (int, optional): Max results to return (default 10)
Returns:
Dict[str, Any]: The results of the action.
"""
action = arguments.get("action")
if action == "search_projects":
return self.search_projects(
organ=arguments.get("organ"),
disease=arguments.get("disease"),
limit=arguments.get("limit", 10),
)
elif action == "get_file_manifest":
project_id = arguments.get("project_id")
if not project_id:
raise ValueError("project_id is required for get_file_manifest")
return self.get_file_manifest(project_id, limit=arguments.get("limit", 10))
else:
raise ValueError(f"Unknown action: {action}")
[docs]
def search_projects(
self,
organ: Optional[str] = None,
disease: Optional[str] = None,
limit: int = 10,
) -> Dict[str, Any]:
"""
Search for projects in the HCA DCP.
"""
url = f"{self.BASE_URL}/index/projects"
filters = {}
if organ:
filters["organ"] = {"is": [organ]}
if disease:
filters["disease"] = {"is": [disease]}
params = {"size": limit, "filters": json.dumps(filters) if filters else "{}"}
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
projects = []
for hit in data.get("hits", []):
# Extract relevant info to make it cleaner
projects.append(
{
"entryId": hit.get("entryId"),
"projectTitle": hit.get("projects", [{}])[0].get(
"projectTitle"
),
"organ": hit.get("modelOrgan", {}).get(
"terms"
), # Inspect structure showed modelOrgan
"donorDisease": hit.get("donorDisease", {}).get("terms"),
}
)
return {
"total_hits": data.get("pagination", {}).get("total", 0),
"projects": projects,
}
except Exception as e:
return {"error": str(e)}
[docs]
def get_file_manifest(self, project_id: str, limit: int = 10) -> Dict[str, Any]:
"""
Get file download links for a project.
"""
url = f"{self.BASE_URL}/index/files"
filters = {"projectId": {"is": [project_id]}}
params = {"size": limit, "filters": json.dumps(filters)}
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
files = []
for hit in data.get("hits", []):
for f in hit.get("files", []):
files.append(
{
"name": f.get("name"),
"format": f.get("format"),
"size": f.get("size"),
"url": f.get("azul_url"),
}
)
return {
"total_files": data.get("pagination", {}).get("total", 0),
"files": files[
:limit
], # Pagination applies to hits (bundles), but we extract files, so slice again to be safe
}
except Exception as e:
return {"error": str(e)}