tooluniverse.bioregistry_tool 源代码

"""
Bioregistry API tool for ToolUniverse.

Bioregistry is a community-curated meta-registry of biological databases,
ontologies, and other resources. It provides a unified way to resolve
identifiers across 2600+ databases.

API: https://bioregistry.io/apidocs/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOREGISTRY_API_URL = "https://bioregistry.io/api"


[文档] @register_tool("BioregistryTool") class BioregistryTool(BaseTool): """ Tool for querying the Bioregistry meta-registry. Provides: - Identifier resolution across 2600+ databases - Registry metadata (prefix, name, pattern, providers) - Search across all registered resources """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout: int = tool_config.get("timeout", 30) self.parameter = tool_config.get("parameter", {})
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute Bioregistry API call based on operation type.""" operation = arguments.get("operation", "") if not operation: operation = self.get_schema_const_operation() if operation == "resolve_reference": return self._resolve_reference(arguments) elif operation == "get_registry": return self._get_registry(arguments) elif operation == "search_registries": return self._search_registries(arguments) else: return { "status": "error", "error": f"Unknown operation: {operation}. Supported: resolve_reference, get_registry, search_registries", }
[文档] def _resolve_reference(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Resolve a compact identifier (prefix:id) to provider URLs.""" prefix = arguments.get("prefix", "") identifier = arguments.get("identifier", "") if not prefix or not identifier: return { "status": "error", "error": "Both 'prefix' and 'identifier' are required (e.g., prefix='uniprot', identifier='P04637')", } try: url = f"{BIOREGISTRY_API_URL}/reference/{prefix}:{identifier}" resp = requests.get(url, timeout=self.timeout) if resp.status_code != 200: return { "status": "error", "error": f"HTTP {resp.status_code}: Could not resolve {prefix}:{identifier}", } data = resp.json() providers = data.get("providers", {}) return { "status": "success", "data": { "prefix": prefix, "identifier": identifier, "providers": providers, "provider_count": len(providers), }, } except Exception as e: return {"status": "error", "error": str(e)}
[文档] def _get_registry(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get metadata for a specific registry/database by prefix.""" prefix = arguments.get("prefix", "") if not prefix: return { "status": "error", "error": "Parameter 'prefix' is required (e.g., 'uniprot', 'chebi', 'go')", } try: url = f"{BIOREGISTRY_API_URL}/registry/{prefix}" resp = requests.get(url, timeout=self.timeout) if resp.status_code == 404: return { "status": "error", "error": f"Registry prefix '{prefix}' not found. Try search_registries to find the correct prefix.", } if resp.status_code != 200: return { "status": "error", "error": f"HTTP {resp.status_code} from Bioregistry", } data = resp.json() result = { "prefix": data.get("prefix", prefix), "name": data.get("name", ""), "description": data.get("description", ""), "homepage": data.get("homepage", ""), "pattern": data.get("pattern", ""), "uri_format": data.get("uri_format", ""), "example": data.get("example", ""), "keywords": data.get("keywords", []), } if data.get("synonyms"): result["synonyms"] = data["synonyms"] if data.get("publications"): result["publications"] = data["publications"][:5] return {"status": "success", "data": result} except Exception as e: return {"status": "error", "error": str(e)}
[文档] def _search_registries(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search across all registered resources.""" query = arguments.get("query", "") if not query: return { "status": "error", "error": "Parameter 'query' is required (e.g., 'protein', 'gene ontology')", } limit = arguments.get("limit", 10) try: url = f"{BIOREGISTRY_API_URL}/search" resp = requests.get(url, params={"q": query}, timeout=self.timeout) if resp.status_code != 200: return { "status": "error", "error": f"HTTP {resp.status_code} from Bioregistry search", } results = resp.json() if not isinstance(results, list): results = [] trimmed = [] for item in results[:limit]: if isinstance(item, list) and len(item) >= 1: trimmed.append( { "prefix": item[0], "name": item[1] if len(item) > 1 else "", "description": "", } ) elif isinstance(item, dict): trimmed.append( { "prefix": item.get("prefix", ""), "name": item.get("name", ""), "description": item.get("description", "")[:200] if item.get("description") else "", } ) elif isinstance(item, str): trimmed.append({"prefix": item, "name": "", "description": ""}) return { "status": "success", "data": { "query": query, "results": trimmed, "total": len(results), }, } except Exception as e: return {"status": "error", "error": str(e)}