Source code for tooluniverse.metabolights_tool

"""
MetaboLights Database Tool

This tool provides access to the MetaboLights database, the largest repository
of metabolomics experiments and raw data.
"""

import requests
from typing import Any, Dict
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("MetaboLightsRESTTool") class MetaboLightsRESTTool(BaseTool): """ MetaboLights REST API tool. Generic wrapper for MetaboLights API endpoints defined in metabolights_tools.json. """
[docs] def __init__(self, tool_config: Dict): super().__init__(tool_config) self.base_url = "https://www.ebi.ac.uk/metabolights/ws" self.session = requests.Session() self.session.headers.update( {"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"} ) self.timeout = 30
[docs] def _build_url(self, args: Dict[str, Any]) -> str: """Build URL from endpoint template and arguments""" endpoint_template = self.tool_config["fields"].get("endpoint", "") tool_name = self.tool_config.get("name", "") if endpoint_template: url = endpoint_template for k, v in args.items(): url = url.replace(f"{{{k}}}", str(v)) return url # Build URL based on tool name if tool_name == "metabolights_list_studies": return f"{self.base_url}/studies" elif tool_name == "metabolights_get_study": study_id = args.get("study_id", "") if study_id: return f"{self.base_url}/studies/{study_id}" elif tool_name == "metabolights_search_studies": return f"{self.base_url}/studies" elif tool_name == "metabolights_get_study_assays": study_id = args.get("study_id", "") if study_id: return f"{self.base_url}/studies/{study_id}/assays" elif tool_name == "metabolights_get_study_samples": study_id = args.get("study_id", "") if study_id: return f"{self.base_url}/studies/{study_id}/samples" elif tool_name == "metabolights_get_study_files": study_id = args.get("study_id", "") if study_id: return f"{self.base_url}/studies/{study_id}/files" return self.base_url
[docs] def _build_params(self, args: Dict[str, Any]) -> Dict[str, Any]: """Build query parameters for MetaboLights API""" params = {} tool_name = self.tool_config.get("name", "") if tool_name == "metabolights_search_studies": if "query" in args: params["query"] = args["query"] if "size" in args: params["size"] = args["size"] if "page" in args: params["page"] = args["page"] elif tool_name == "metabolights_list_studies": if "size" in args: params["size"] = args["size"] if "page" in args: params["page"] = args["page"] elif tool_name == "metabolights_get_study_data_files": # Required parameters for data-files endpoint if "search_pattern" in args: params["search_pattern"] = args["search_pattern"] if "file_match" in args: params["file_match"] = str(args["file_match"]).lower() if "folder_match" in args: params["folder_match"] = str(args["folder_match"]).lower() return params
[docs] def _extract_samples_from_study(self, study_id: str) -> Dict[str, Any]: """Extract sample information from study endpoint as fallback""" try: study_url = f"{self.base_url}/studies/{study_id}" response = self.session.get(study_url, timeout=self.timeout) response.raise_for_status() study_data = response.json() samples_info = { "samples": [], "note": "Samples extracted from study endpoint (samples API endpoint unavailable)", } # Extract samples from ISA investigation structure if "isaInvestigation" in study_data: isa = study_data["isaInvestigation"] # Check studies array for materials/samples if "studies" in isa and isinstance(isa["studies"], list): for study_item in isa["studies"]: if isinstance(study_item, dict): # Check for materials (samples) if "materials" in study_item: materials = study_item["materials"] if isinstance(materials, list): for material in materials: if isinstance(material, dict): samples_info["samples"].append(material) # Check for samples directly if "samples" in study_item: samples = study_item["samples"] if isinstance(samples, list): for sample in samples: if isinstance(sample, dict): samples_info["samples"].append(sample) samples_info["count"] = len(samples_info["samples"]) return samples_info except Exception as e: return {"error": f"Failed to extract samples from study endpoint: {str(e)}"}
[docs] def _extract_files_from_study(self, study_id: str) -> Dict[str, Any]: """Extract file information from study endpoint as fallback""" try: study_url = f"{self.base_url}/studies/{study_id}" response = self.session.get(study_url, timeout=self.timeout) response.raise_for_status() study_data = response.json() files_info = { "files": [], "file_urls": {}, "note": "Files extracted from study endpoint (files API endpoint unavailable)", } # Extract file URLs from study data if "mtblsStudy" in study_data: study = study_data["mtblsStudy"] # Get FTP and HTTP URLs if "studyFtpUrl" in study and study["studyFtpUrl"]: files_info["file_urls"]["ftp"] = study["studyFtpUrl"] if "studyHttpUrl" in study and study["studyHttpUrl"]: files_info["file_urls"]["http"] = study["studyHttpUrl"] if "studyGlobusUrl" in study and study["studyGlobusUrl"]: files_info["file_urls"]["globus"] = study["studyGlobusUrl"] # Extract ISA investigation file references if "isaInvestigation" in study_data: isa = study_data["isaInvestigation"] # Check for filename if "filename" in isa and isa["filename"]: files_info["files"].append( { "name": isa["filename"], "type": "investigation", "source": "isaInvestigation", } ) # Check studies array for files if "studies" in isa and isinstance(isa["studies"], list): for study_item in isa["studies"]: if isinstance(study_item, dict) and "filename" in study_item: files_info["files"].append( { "name": study_item["filename"], "type": "study", "source": "isaInvestigation.studies", } ) files_info["count"] = len(files_info["files"]) files_info["has_urls"] = len(files_info["file_urls"]) > 0 return files_info except Exception as e: return {"error": f"Failed to extract files from study endpoint: {str(e)}"}
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the MetaboLights API call""" tool_name = self.tool_config.get("name", "") try: url = self._build_url(arguments) params = self._build_params(arguments) response = self.session.get(url, params=params, timeout=self.timeout) # Handle samples endpoint 400 errors with fallback if ( tool_name == "metabolights_get_study_samples" and response.status_code == 400 ): study_id = arguments.get("study_id", "") if study_id: # Try to extract samples from study endpoint fallback_data = self._extract_samples_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("samples", []), "url": url, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, "original_error": "Samples endpoint returned 400 error, used study endpoint fallback", } else: return { "status": "error", "error": f"Samples endpoint returned 400 error for study {study_id}. Fallback to study endpoint also failed.", "url": url, "suggestion": f"Access samples via MetaboLights website: https://www.ebi.ac.uk/metabolights/studies/{study_id}", } # Handle files endpoint 500 errors with fallback if ( tool_name == "metabolights_get_study_files" and response.status_code == 500 ): study_id = arguments.get("study_id", "") if study_id: # Try to extract files from study endpoint fallback_data = self._extract_files_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("files", []), "file_urls": fallback_data.get("file_urls", {}), "url": url, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, "original_error": "Files endpoint returned 500 error, used study endpoint fallback", } else: return { "status": "error", "error": f"Files endpoint returned server error (500) for study {study_id}. Fallback to study endpoint also failed.", "url": url, "suggestion": f"Access files via MetaboLights website: https://www.ebi.ac.uk/metabolights/studies/{study_id} or use FTP/HTTP URLs if available in study data.", } else: return { "status": "error", "error": "Files endpoint returned server error (500). This is a known MetaboLights API issue.", "url": url, "suggestion": "The files endpoint is currently unavailable. Try accessing files via the MetaboLights website directly.", } response.raise_for_status() data = response.json() # Extract arrays from dict wrappers for protocols, factors, data-files endpoints # These endpoints return {'protocols': [...]}, {'factors': [...]}, {'files': [...]} tool_name = self.tool_config.get("name", "") extracted_data = data count = None if isinstance(data, dict): # Check for common array wrapper keys for key in ["protocols", "factors", "files", "assays", "samples"]: if key in data and isinstance(data[key], list): extracted_data = data[key] count = len(data[key]) break # Handle nested structure like {'data': {'assays': [...]}} if "data" in data and isinstance(data["data"], dict): for key in ["assays", "protocols", "factors", "files"]: if key in data["data"] and isinstance(data["data"][key], list): extracted_data = data["data"][key] count = len(data["data"][key]) break # Fallback: check for other common patterns if count is None: if "content" in data and isinstance(data["content"], list): extracted_data = data["content"] count = len(data["content"]) elif "studies" in data and isinstance(data["studies"], list): extracted_data = data["studies"] count = len(data["studies"]) elif isinstance(data, list): extracted_data = data count = len(data) response_data = { "status": "success", "data": extracted_data, "url": response.url, } if count is not None: response_data["count"] = count return response_data except requests.exceptions.HTTPError as e: tool_name = self.tool_config.get("name", "") # Handle samples endpoint 400 errors if ( tool_name == "metabolights_get_study_samples" and e.response.status_code == 400 ): study_id = arguments.get("study_id", "") if study_id: fallback_data = self._extract_samples_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("samples", []), "url": url if "url" in locals() else None, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, } # Handle files endpoint which may return 500 errors if ( tool_name == "metabolights_get_study_files" and e.response.status_code == 500 ): study_id = arguments.get("study_id", "") if study_id: # Try fallback fallback_data = self._extract_files_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("files", []), "file_urls": fallback_data.get("file_urls", {}), "url": url if "url" in locals() else None, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, } return { "status": "error", "error": f"MetaboLights API error: {str(e)}", "url": url if "url" in locals() else None, } except requests.exceptions.RequestException as e: tool_name = self.tool_config.get("name", "") # Handle samples endpoint if tool_name == "metabolights_get_study_samples" and "400" in str(e): study_id = arguments.get("study_id", "") if study_id: fallback_data = self._extract_samples_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("samples", []), "url": url if "url" in locals() else None, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, } # Handle files endpoint if tool_name == "metabolights_get_study_files" and "500" in str(e): study_id = arguments.get("study_id", "") if study_id: fallback_data = self._extract_files_from_study(study_id) if "error" not in fallback_data: return { "status": "success", "data": fallback_data.get("files", []), "file_urls": fallback_data.get("file_urls", {}), "url": url if "url" in locals() else None, "count": fallback_data.get("count", 0), "note": fallback_data.get("note", ""), "fallback_used": True, } return { "status": "error", "error": f"MetaboLights API error: {str(e)}", "url": url if "url" in locals() else None, } except Exception as e: return { "status": "error", "error": f"Unexpected error: {str(e)}", "url": url if "url" in locals() else None, }