tooluniverse.gmrepo_tool 源代码

"""
GMrepo Tool - Curated Human Gut Microbiome Repository

Provides access to the GMrepo database for querying gut microbiome species
and their associations with health phenotypes/diseases. GMrepo aggregates
curated 16S/metagenomic data from published human gut microbiome studies.

API base: https://gmrepo.humangut.info/api
No authentication required.

Note: The GMrepo API is a Django application. Some endpoints require POST with
JSON body and trailing slashes. Species detail endpoints are currently limited
to the bulk listing endpoints (get_all_gut_microbes, get_all_phenotypes).

Reference: Wu et al., Nucl. Acids Res. 2020
"""

import requests
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool


GMREPO_BASE = "https://gmrepo.humangut.info/api"


[文档] @register_tool("GmrepoTool") class GmrepoTool(BaseTool): """ Tool for querying the GMrepo gut microbiome database. Supported operations: - search_species: Search gut microbiome species by name, filtering the full species catalog. Can also filter by phenotype association count. - get_phenotypes: List all phenotypes/conditions with microbiome data, optionally filtered by keyword. """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = 60 # bulk endpoints can be slow self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "search_species" )
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the GMrepo API call.""" try: if self.endpoint_type == "search_species": return self._search_species(arguments) elif self.endpoint_type == "get_phenotypes": return self._get_phenotypes(arguments) else: return { "status": "error", "error": f"Unknown endpoint type: {self.endpoint_type}", } except requests.exceptions.Timeout: return {"status": "error", "error": "GMrepo API request timed out"} except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to GMrepo API"} except Exception as e: return {"status": "error", "error": f"GMrepo API error: {str(e)}"}
[文档] def _search_species(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search for gut microbiome species by name.""" query = arguments.get("query") or arguments.get("species_name", "") if not query: return {"status": "error", "error": "Missing required parameter: query"} limit = arguments.get("limit", 20) url = f"{GMREPO_BASE}/get_all_gut_microbes/" resp = requests.post( url, json={}, headers={"Content-Type": "application/json"}, timeout=self.timeout, ) resp.raise_for_status() data = resp.json() if data.get("code") != "200": return { "status": "error", "error": f"GMrepo returned error code: {data.get('code')}", } all_species = data.get("all_species", []) query_lower = query.lower() matches = [s for s in all_species if query_lower in s.get("name", "").lower()] # Sort by number of presented samples (most abundant first) matches.sort(key=lambda x: int(x.get("presented_samples", "0")), reverse=True) matches = matches[:limit] results = [] for sp in matches: results.append( { "ncbi_taxon_id": sp.get("ncbi_taxon_id"), "name": sp.get("name"), "presented_samples": sp.get("presented_samples"), "nr_phenotypes": sp.get("nr_phenotypes"), "pct_of_all_samples": sp.get("pct_of_all_samples"), } ) return { "status": "success", "data": results, "metadata": { "query": query, "total_matches": len( [s for s in all_species if query_lower in s.get("name", "").lower()] ), "returned": len(results), "total_species_in_db": len(all_species), }, }
[文档] def _get_phenotypes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List phenotypes/conditions with gut microbiome data.""" query = arguments.get("query", "") limit = arguments.get("limit", 20) url = f"{GMREPO_BASE}/get_all_phenotypes/" resp = requests.post( url, json={}, headers={"Content-Type": "application/json"}, timeout=self.timeout, ) resp.raise_for_status() data = resp.json() if data.get("code") != "200": return { "status": "error", "error": f"GMrepo returned error code: {data.get('code')}", } phenotypes = data.get("phenotypes", []) if query: query_lower = query.lower() phenotypes = [ p for p in phenotypes if query_lower in p.get("term", "").lower() ] # Sort by number of valid runs (most data first) phenotypes.sort(key=lambda x: x.get("valid_runs", 0), reverse=True) phenotypes = phenotypes[:limit] results = [] for p in phenotypes: results.append( { "phenotype_id": p.get("id"), "mesh_id": p.get("disease"), "term": p.get("term"), "all_samples": p.get("all_samples"), "valid_runs": p.get("valid_runs"), "nr_species": p.get("nr_species"), "nr_genus": p.get("nr_genus"), } ) return { "status": "success", "data": results, "metadata": { "query": query or "(all phenotypes)", "returned": len(results), "total_phenotypes_in_db": len(data.get("phenotypes", [])), }, }