Source code for tooluniverse.hca_tool

from typing import Dict, Any, List, Optional
import requests
import json
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("HCATool") class HCATool(BaseTool): """ Tool for interacting with the Human Cell Atlas (HCA) Data Coordination Platform (DCP) v2 API. Allows searching for projects and retrieving file manifests. """ BASE_URL = "https://service.azul.data.humancellatlas.org"
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Executes the HCA tool action. Args: arguments (Dict[str, Any]): Dictionary containing the action and its parameters. Expected keys: - action (str): "search_projects" or "get_file_manifest" - organ (str, optional): Organ to filter by (for search_projects) - disease (str, optional): Disease to filter by (for search_projects) - project_id (str, optional): Project ID (for get_file_manifest) - limit (int, optional): Max results to return (default 10) Returns: Dict[str, Any]: The results of the action. """ action = arguments.get("action") if action == "search_projects": return self.search_projects( organ=arguments.get("organ"), disease=arguments.get("disease"), limit=arguments.get("limit", 10), ) elif action == "get_file_manifest": project_id = arguments.get("project_id") if not project_id: raise ValueError("project_id is required for get_file_manifest") return self.get_file_manifest(project_id, limit=arguments.get("limit", 10)) else: raise ValueError(f"Unknown action: {action}")
[docs] def search_projects( self, organ: Optional[str] = None, disease: Optional[str] = None, limit: int = 10, ) -> Dict[str, Any]: """ Search for projects in the HCA DCP. """ url = f"{self.BASE_URL}/index/projects" filters = {} if organ: filters["organ"] = {"is": [organ]} if disease: filters["disease"] = {"is": [disease]} params = {"size": limit, "filters": json.dumps(filters) if filters else "{}"} try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() projects = [] for hit in data.get("hits", []): # Extract relevant info to make it cleaner projects.append( { "entryId": hit.get("entryId"), "projectTitle": hit.get("projects", [{}])[0].get( "projectTitle" ), "organ": hit.get("modelOrgan", {}).get( "terms" ), # Inspect structure showed modelOrgan "donorDisease": hit.get("donorDisease", {}).get("terms"), } ) return { "total_hits": data.get("pagination", {}).get("total", 0), "projects": projects, } except Exception as e: return {"error": str(e)}
[docs] def get_file_manifest(self, project_id: str, limit: int = 10) -> Dict[str, Any]: """ Get file download links for a project. """ url = f"{self.BASE_URL}/index/files" filters = {"projectId": {"is": [project_id]}} params = {"size": limit, "filters": json.dumps(filters)} try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() files = [] for hit in data.get("hits", []): for f in hit.get("files", []): files.append( { "name": f.get("name"), "format": f.get("format"), "size": f.get("size"), "url": f.get("azul_url"), } ) return { "total_files": data.get("pagination", {}).get("total", 0), "files": files[ :limit ], # Pagination applies to hits (bundles), but we extract files, so slice again to be safe } except Exception as e: return {"error": str(e)}