Source code for tooluniverse.bioimage_archive_tool

# bioimage_archive_tool.py
"""
BioImage Archive (EBI BioStudies) REST API tool for ToolUniverse.

The BioImage Archive is an open resource at EMBL-EBI for biological images from
life sciences research. It stores and distributes biological image datasets
spanning microscopy, imaging, and visualization experiments.

API: https://www.ebi.ac.uk/biostudies/api/v1/
No authentication required. Free for all use.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOSTUDIES_BASE_URL = "https://www.ebi.ac.uk/biostudies/api/v1"


[docs] @register_tool("BioImageArchiveTool") class BioImageArchiveTool(BaseTool): """ Tool for querying the BioImage Archive at EBI. Provides access to biological imaging datasets including fluorescence microscopy, cryo-EM, confocal imaging, and other life sciences imaging modalities. Supports searching studies and retrieving detailed metadata. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.action = fields.get("action", "search")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the BioImage Archive API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return { "error": f"BioImage Archive API request timed out after {self.timeout} seconds" } except requests.exceptions.ConnectionError: return { "error": "Failed to connect to BioImage Archive API. Check network connectivity." } except requests.exceptions.HTTPError as e: return { "error": f"BioImage Archive API HTTP error: {e.response.status_code}" } except Exception as e: return {"error": f"Unexpected error querying BioImage Archive: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate query method.""" if self.action == "search": return self._search(arguments) elif self.action == "get_study": return self._get_study(arguments) elif self.action == "search_bioimages": return self._search_bioimages(arguments) else: return {"error": f"Unknown action: {self.action}"}
[docs] def _get_study(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information about a specific study.""" accession = arguments.get("accession", "") if not accession: return {"error": "accession parameter is required"} url = f"{BIOSTUDIES_BASE_URL}/studies/{accession}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() # Parse attributes into a more usable format attributes = data.get("attributes", []) attr_dict = {} for attr in attributes: name = attr.get("name", "") value = attr.get("value", "") if name: attr_dict[name] = value # Extract section info section = data.get("section", {}) section_type = section.get("type") section_attrs = section.get("attributes", []) # Extract key metadata from section attributes title = None description = None organism = None imaging_method = None for attr in section_attrs: name = attr.get("name", "").lower() value = attr.get("value", "") if name == "title": title = value elif name in ("description", "abstract"): description = value elif name == "organism": organism = value elif name in ("imaging method", "imaging_method"): imaging_method = value return { "data": { "accession": data.get("accno", accession), "type": data.get("type"), "title": title or attr_dict.get("Title"), "release_date": attr_dict.get("ReleaseDate"), "description": description, "organism": organism, "imaging_method": imaging_method, "attributes": attributes, "section_type": section_type, }, "metadata": { "source": "BioImage Archive (EBI BioStudies)", "accession": accession, }, }
[docs] def _search_bioimages(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search the BioImages-specific collection.""" query = arguments.get("query", "") if not query: return {"error": "query parameter is required"} page_size = min(arguments.get("page_size") or 10, 100) params = { "query": query, "pageSize": page_size, } url = f"{BIOSTUDIES_BASE_URL}/BioImages/search" response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() hits = data.get("hits", []) results = [] for hit in hits: results.append( { "accession": hit.get("accession", ""), "title": hit.get("title"), "author": hit.get("author"), "release_date": hit.get("releaseDate") or hit.get("release_date"), "type": hit.get("type"), } ) return { "data": results, "metadata": { "source": "BioImage Archive (EBI BioStudies - BioImages collection)", "total_hits": data.get("totalHits", 0), "query": query, }, }