tooluniverse.bioregistry_tool 源代码

"""
Bioregistry API tool for ToolUniverse.

Bioregistry is a community-curated meta-registry of biological databases,
ontologies, and other resources. It provides a unified way to resolve
identifiers across 2600+ databases.

API: https://bioregistry.io/apidocs/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOREGISTRY_API_URL = "https://bioregistry.io/api"



[文档]
@register_tool("BioregistryTool")
class BioregistryTool(BaseTool):
    """
    Tool for querying the Bioregistry meta-registry.

    Provides:
    - Identifier resolution across 2600+ databases
    - Registry metadata (prefix, name, pattern, providers)
    - Search across all registered resources
    """


[文档]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout: int = tool_config.get("timeout", 30)
        self.parameter = tool_config.get("parameter", {})



[文档]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute Bioregistry API call based on operation type."""
        operation = arguments.get("operation", "")
        if not operation:
            operation = self.get_schema_const_operation()

        if operation == "resolve_reference":
            return self._resolve_reference(arguments)
        elif operation == "get_registry":
            return self._get_registry(arguments)
        elif operation == "search_registries":
            return self._search_registries(arguments)
        else:
            return {
                "status": "error",
                "error": f"Unknown operation: {operation}. Supported: resolve_reference, get_registry, search_registries",
            }



[文档]
    def _resolve_reference(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Resolve a compact identifier (prefix:id) to provider URLs."""
        prefix = arguments.get("prefix", "")
        identifier = arguments.get("identifier", "")
        if not prefix or not identifier:
            return {
                "status": "error",
                "error": "Both 'prefix' and 'identifier' are required (e.g., prefix='uniprot', identifier='P04637')",
            }
        try:
            url = f"{BIOREGISTRY_API_URL}/reference/{prefix}:{identifier}"
            resp = requests.get(url, timeout=self.timeout)
            if resp.status_code != 200:
                return {
                    "status": "error",
                    "error": f"HTTP {resp.status_code}: Could not resolve {prefix}:{identifier}",
                }
            data = resp.json()
            providers = data.get("providers", {})
            return {
                "status": "success",
                "data": {
                    "prefix": prefix,
                    "identifier": identifier,
                    "providers": providers,
                    "provider_count": len(providers),
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}



[文档]
    def _get_registry(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get metadata for a specific registry/database by prefix."""
        prefix = arguments.get("prefix", "")
        if not prefix:
            return {
                "status": "error",
                "error": "Parameter 'prefix' is required (e.g., 'uniprot', 'chebi', 'go')",
            }
        try:
            url = f"{BIOREGISTRY_API_URL}/registry/{prefix}"
            resp = requests.get(url, timeout=self.timeout)
            if resp.status_code == 404:
                return {
                    "status": "error",
                    "error": f"Registry prefix '{prefix}' not found. Try search_registries to find the correct prefix.",
                }
            if resp.status_code != 200:
                return {
                    "status": "error",
                    "error": f"HTTP {resp.status_code} from Bioregistry",
                }
            data = resp.json()
            result = {
                "prefix": data.get("prefix", prefix),
                "name": data.get("name", ""),
                "description": data.get("description", ""),
                "homepage": data.get("homepage", ""),
                "pattern": data.get("pattern", ""),
                "uri_format": data.get("uri_format", ""),
                "example": data.get("example", ""),
                "keywords": data.get("keywords", []),
            }
            if data.get("synonyms"):
                result["synonyms"] = data["synonyms"]
            if data.get("publications"):
                result["publications"] = data["publications"][:5]
            return {"status": "success", "data": result}
        except Exception as e:
            return {"status": "error", "error": str(e)}



[文档]
    def _search_registries(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Search across all registered resources."""
        query = arguments.get("query", "")
        if not query:
            return {
                "status": "error",
                "error": "Parameter 'query' is required (e.g., 'protein', 'gene ontology')",
            }
        limit = arguments.get("limit", 10)
        try:
            url = f"{BIOREGISTRY_API_URL}/search"
            resp = requests.get(url, params={"q": query}, timeout=self.timeout)
            if resp.status_code != 200:
                return {
                    "status": "error",
                    "error": f"HTTP {resp.status_code} from Bioregistry search",
                }
            results = resp.json()
            if not isinstance(results, list):
                results = []
            trimmed = []
            for item in results[:limit]:
                if isinstance(item, list) and len(item) >= 1:
                    trimmed.append(
                        {
                            "prefix": item[0],
                            "name": item[1] if len(item) > 1 else "",
                            "description": "",
                        }
                    )
                elif isinstance(item, dict):
                    trimmed.append(
                        {
                            "prefix": item.get("prefix", ""),
                            "name": item.get("name", ""),
                            "description": item.get("description", "")[:200]
                            if item.get("description")
                            else "",
                        }
                    )
                elif isinstance(item, str):
                    trimmed.append({"prefix": item, "name": "", "description": ""})
            return {
                "status": "success",
                "data": {
                    "query": query,
                    "results": trimmed,
                    "total": len(results),
                },
            }
        except Exception as e:
            return {"status": "error", "error": str(e)}