Source code for tooluniverse.bioimage_archive_tool
# bioimage_archive_tool.py
"""
BioImage Archive (EBI BioStudies) REST API tool for ToolUniverse.
The BioImage Archive is an open resource at EMBL-EBI for biological images from
life sciences research. It stores and distributes biological image datasets
spanning microscopy, imaging, and visualization experiments.
API: https://www.ebi.ac.uk/biostudies/api/v1/
No authentication required. Free for all use.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
BIOSTUDIES_BASE_URL = "https://www.ebi.ac.uk/biostudies/api/v1"
[docs]
@register_tool("BioImageArchiveTool")
class BioImageArchiveTool(BaseTool):
"""
Tool for querying the BioImage Archive at EBI.
Provides access to biological imaging datasets including fluorescence
microscopy, cryo-EM, confocal imaging, and other life sciences imaging
modalities. Supports searching studies and retrieving detailed metadata.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.action = fields.get("action", "search")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the BioImage Archive API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {
"error": f"BioImage Archive API request timed out after {self.timeout} seconds"
}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to BioImage Archive API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {
"error": f"BioImage Archive API HTTP error: {e.response.status_code}"
}
except Exception as e:
return {"error": f"Unexpected error querying BioImage Archive: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate query method."""
if self.action == "search":
return self._search(arguments)
elif self.action == "get_study":
return self._get_study(arguments)
elif self.action == "search_bioimages":
return self._search_bioimages(arguments)
else:
return {"error": f"Unknown action: {self.action}"}
[docs]
def _search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search for biological imaging studies."""
query = arguments.get("query", "")
if not query:
return {"error": "query parameter is required"}
page_size = min(arguments.get("page_size") or 10, 100)
page = arguments.get("page") or 1
params = {
"query": query,
"pageSize": page_size,
"page": page,
}
url = f"{BIOSTUDIES_BASE_URL}/search"
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
hits = data.get("hits", [])
results = []
for hit in hits:
results.append(
{
"accession": hit.get("accession", ""),
"title": hit.get("title"),
"author": hit.get("author"),
"release_date": hit.get("releaseDate") or hit.get("release_date"),
"type": hit.get("type"),
"links": hit.get("links"),
"files": hit.get("files"),
}
)
return {
"data": results,
"metadata": {
"source": "BioImage Archive (EBI BioStudies)",
"total_hits": data.get("totalHits", 0),
"page": page,
"page_size": page_size,
"query": query,
},
}
[docs]
def _get_study(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed information about a specific study."""
accession = arguments.get("accession", "")
if not accession:
return {"error": "accession parameter is required"}
url = f"{BIOSTUDIES_BASE_URL}/studies/{accession}"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
# Parse attributes into a more usable format
attributes = data.get("attributes", [])
attr_dict = {}
for attr in attributes:
name = attr.get("name", "")
value = attr.get("value", "")
if name:
attr_dict[name] = value
# Extract section info
section = data.get("section", {})
section_type = section.get("type")
section_attrs = section.get("attributes", [])
# Extract key metadata from section attributes
title = None
description = None
organism = None
imaging_method = None
for attr in section_attrs:
name = attr.get("name", "").lower()
value = attr.get("value", "")
if name == "title":
title = value
elif name in ("description", "abstract"):
description = value
elif name == "organism":
organism = value
elif name in ("imaging method", "imaging_method"):
imaging_method = value
return {
"data": {
"accession": data.get("accno", accession),
"type": data.get("type"),
"title": title or attr_dict.get("Title"),
"release_date": attr_dict.get("ReleaseDate"),
"description": description,
"organism": organism,
"imaging_method": imaging_method,
"attributes": attributes,
"section_type": section_type,
},
"metadata": {
"source": "BioImage Archive (EBI BioStudies)",
"accession": accession,
},
}
[docs]
def _search_bioimages(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search the BioImages-specific collection."""
query = arguments.get("query", "")
if not query:
return {"error": "query parameter is required"}
page_size = min(arguments.get("page_size") or 10, 100)
params = {
"query": query,
"pageSize": page_size,
}
url = f"{BIOSTUDIES_BASE_URL}/BioImages/search"
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
hits = data.get("hits", [])
results = []
for hit in hits:
results.append(
{
"accession": hit.get("accession", ""),
"title": hit.get("title"),
"author": hit.get("author"),
"release_date": hit.get("releaseDate") or hit.get("release_date"),
"type": hit.get("type"),
}
)
return {
"data": results,
"metadata": {
"source": "BioImage Archive (EBI BioStudies - BioImages collection)",
"total_hits": data.get("totalHits", 0),
"query": query,
},
}