Source code for tooluniverse.mgnify_expanded_tool

# mgnify_expanded_tool.py
"""
MGnify Expanded REST API tool for ToolUniverse.

MGnify (formerly EBI Metagenomics) provides analysis and archiving of
metagenomics data. This expanded tool covers genomes, taxonomy, biomes,
and samples - complementing the existing study/analysis search tools.

API: https://www.ebi.ac.uk/metagenomics/api/v1
No authentication required. Free for academic/research use.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

MGNIFY_BASE_URL = "https://www.ebi.ac.uk/metagenomics/api/v1"


[docs] @register_tool("MGnifyExpandedTool") class MGnifyExpandedTool(BaseTool): """ Expanded tool for querying MGnify metagenomics database. Covers genome catalog, taxonomic profiling, biome browsing, and sample metadata - extending existing study/analysis tools. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "genome" ) self.query_mode = tool_config.get("fields", {}).get("query_mode", "detail")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the MGnify API call.""" try: return self._dispatch(arguments) except requests.exceptions.Timeout: return { "error": f"MGnify API request timed out after {self.timeout} seconds" } except requests.exceptions.ConnectionError: return { "error": "Failed to connect to MGnify API. Check network connectivity." } except requests.exceptions.HTTPError as e: return {"error": f"MGnify API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying MGnify: {str(e)}"}
[docs] def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint based on config.""" if self.endpoint_type == "genome" and self.query_mode == "detail": return self._genome_detail(arguments) elif self.endpoint_type == "genome" and self.query_mode == "search": return self._genome_search(arguments) elif self.endpoint_type == "biome" and self.query_mode == "list": return self._biome_list(arguments) elif self.endpoint_type == "study" and self.query_mode == "detail": return self._study_detail(arguments) else: return { "error": f"Unknown endpoint_type/query_mode: {self.endpoint_type}/{self.query_mode}" }
[docs] def _genome_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information about a MGnify genome.""" genome_id = arguments.get("genome_id", "") if not genome_id: return {"error": "genome_id parameter is required (e.g., MGYG000000001)"} url = f"{MGNIFY_BASE_URL}/genomes/{genome_id}" response = requests.get(url, params={"format": "json"}, timeout=self.timeout) response.raise_for_status() raw = response.json() data = raw.get("data", {}) attrs = data.get("attributes", {}) result = { "genome_id": data.get("id"), "accession": attrs.get("accession"), "type": attrs.get("type"), "taxonomy": attrs.get("taxon-lineage"), "length": attrs.get("length"), "num_contigs": attrs.get("num-contigs"), "n50": attrs.get("n-50"), "gc_content": attrs.get("gc-content"), "completeness": attrs.get("completeness"), "contamination": attrs.get("contamination"), "num_proteins": attrs.get("num-proteins"), "rna_16s": attrs.get("rna-16s"), "rna_23s": attrs.get("rna-23s"), "trnas": attrs.get("trnas"), "geographic_origin": attrs.get("geographic-origin"), "geographic_range": attrs.get("geographic-range"), "ena_genome_accession": attrs.get("ena-genome-accession"), "ena_sample_accession": attrs.get("ena-sample-accession"), "pangenome_size": attrs.get("pangenome-size"), "pangenome_core_size": attrs.get("pangenome-core-size"), "pangenome_accessory_size": attrs.get("pangenome-accessory-size"), "eggnog_coverage": attrs.get("eggnog-coverage"), "ipr_coverage": attrs.get("ipr-coverage"), } return { "data": result, "metadata": { "source": "MGnify", "query": genome_id, "endpoint": "genomes/detail", }, }
[docs] def _biome_list(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Browse/search MGnify biome hierarchy.""" params = {"format": "json"} page_size = min(arguments.get("page_size", 25), 100) params["page_size"] = page_size if "page" in arguments: params["page"] = arguments["page"] if "depth" in arguments: params["depth"] = arguments["depth"] url = f"{MGNIFY_BASE_URL}/biomes" response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() raw = response.json() results = [] for item in raw.get("data", []): attrs = item.get("attributes", {}) results.append( { "biome_id": item.get("id"), "biome_name": attrs.get("biome-name"), "samples_count": attrs.get("samples-count"), } ) pagination = raw.get("meta", {}).get("pagination", {}) return { "data": results, "metadata": { "total_results": pagination.get("count", len(results)), "page": pagination.get("page", 1), "pages": pagination.get("pages"), "source": "MGnify", "endpoint": "biomes", }, }
[docs] def _study_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information about a specific MGnify study.""" study_accession = arguments.get("study_accession", "") if not study_accession: return { "error": "study_accession parameter is required (e.g., MGYS00002008)" } url = f"{MGNIFY_BASE_URL}/studies/{study_accession}" response = requests.get(url, params={"format": "json"}, timeout=self.timeout) response.raise_for_status() raw = response.json() data = raw.get("data", {}) attrs = data.get("attributes", {}) rels = data.get("relationships", {}) result = { "study_id": data.get("id"), "study_name": attrs.get("study-name"), "study_abstract": attrs.get("study-abstract"), "bioproject": attrs.get("bioproject"), "centre_name": attrs.get("centre-name"), "is_public": attrs.get("is-public"), "last_update": attrs.get("last-update"), "analyses_count": rels.get("analyses", {}).get("meta", {}).get("count"), "downloads_count": rels.get("downloads", {}).get("meta", {}).get("count"), "biomes": [b.get("id") for b in rels.get("biomes", {}).get("data", [])], } return { "data": result, "metadata": { "source": "MGnify", "query": study_accession, "endpoint": "studies/detail", }, }