Source code for tooluniverse.fourdn_tool

"""
4DN Data Portal API Tool

This tool provides access to the 4D Nucleome Data Portal, which hosts
350+ uniformly processed Hi-C contact files and chromatin conformation data.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool


FOURDN_BASE_URL = "https://data.4dnucleome.org"


[docs] @register_tool("FourDNTool") class FourDNTool(BaseTool): """ 4DN Data Portal API tool for accessing Hi-C and chromatin conformation data. """
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the tool with given arguments.""" try: operation = arguments.get("operation", "search") if operation == "search": return self._search(arguments) elif operation == "get_file_metadata": return self._get_file_metadata(arguments) elif operation == "get_experiment_metadata": return self._get_experiment_metadata(arguments) elif operation == "download_file_url": return self._download_file_url(arguments) else: return { "status": "error", "data": {"error": f"Unknown operation: {operation}"}, } except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _get_file_metadata(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get metadata for a specific file.""" try: file_accession = arguments.get("file_accession") include_full_metadata = arguments.get("include_full_metadata", False) if not file_accession: return { "status": "error", "data": {"error": "file_accession is required"}, } # Use frame=object for smaller response (84% reduction) url = f"{FOURDN_BASE_URL}/{file_accession}/?format=json&frame=object" response = requests.get(url, timeout=30) response.raise_for_status() api_data = response.json() # Handle file_format - can be dict or string with frame=object file_format = api_data.get("file_format") if isinstance(file_format, dict): file_format = file_format.get("display_title") elif isinstance(file_format, str): # Extract format from URL like /file-formats/bw/ file_format = ( file_format.split("/")[-2] if "/" in file_format else file_format ) result = { "status": "success", "accession": file_accession, "file_type": api_data.get("file_type"), "file_format": file_format, "file_size": api_data.get("file_size"), "description": api_data.get("description"), "data_status": api_data.get("status"), "biosource": api_data.get("biosource"), "experiment": api_data.get("experiment"), "download_url": (f"{FOURDN_BASE_URL}/{file_accession}/@@download"), } # Only include full metadata if explicitly requested # This reduces response size by ~97% in typical cases if include_full_metadata: result["metadata"] = api_data return {"status": "success", "data": result} except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _get_experiment_metadata(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get metadata for a specific experiment.""" try: experiment_accession = arguments.get("experiment_accession") include_full_metadata = arguments.get("include_full_metadata", False) if not experiment_accession: return { "status": "error", "data": {"error": "experiment_accession is required"}, } # Use frame=object for smaller response url = f"{FOURDN_BASE_URL}/{experiment_accession}/?format=json&frame=object" response = requests.get(url, timeout=30) response.raise_for_status() api_data = response.json() # Extract biosource from biosample # (biosource is nested inside biosample) biosample = api_data.get("biosample", {}) # Handle biosample - can be dict or string with frame=object if isinstance(biosample, str): biosample = {"@id": biosample} biosource_list = [] biosource_summary = "" else: biosource_list = biosample.get("biosource", []) biosource_summary = biosample.get("biosource_summary", "") # Combine files from different possible fields files = api_data.get("files", []) if not files: files = api_data.get("processed_files", []) # Handle experiment_type - can be dict or string exp_type = api_data.get("experiment_type") if isinstance(exp_type, dict): exp_type_name = exp_type.get("display_title") elif isinstance(exp_type, str): # Extract from URL like /experiment-types/in-situ-hi-c/ exp_type_name = ( exp_type.split("/")[-2].replace("-", " ") if "/" in exp_type else exp_type ) else: exp_type_name = None result = { "status": "success", "accession": experiment_accession, "experiment_type": exp_type_name, "biosample": biosample, "biosource": biosource_list, "biosource_summary": biosource_summary, "description": api_data.get("description"), "data_status": api_data.get("status"), "files": files, } # Only include full metadata if explicitly requested if include_full_metadata: result["metadata"] = api_data return {"status": "success", "data": result} except Exception as e: return {"status": "error", "data": {"error": str(e)}}
[docs] def _download_file_url(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get download URL for file (requires authentication).""" try: file_accession = arguments.get("file_accession") if not file_accession: return { "status": "error", "data": {"error": "file_accession is required"}, } # Get DRS API information drs_url = f"{FOURDN_BASE_URL}/ga4gh/drs/v1/objects/{file_accession}" result = { "status": "success", "accession": file_accession, "download_url": (f"{FOURDN_BASE_URL}/{file_accession}/@@download"), "drs_url": drs_url, "note": ( "File download requires authentication. " "Use curl with access key: " "curl -O -L --user <key>:<secret> <download-url>" ), "instruction": ("Create access key at https://data.4dnucleome.org/me"), } return {"status": "success", "data": result} except Exception as e: return {"status": "error", "data": {"error": str(e)}}