Source code for tooluniverse.encode_tool

import json
import requests
from typing import Any, Dict, Optional
from urllib.error import HTTPError
from urllib.parse import urlencode
from urllib.request import Request, urlopen

from tooluniverse.tool_registry import register_tool
from tooluniverse.base_rest_tool import BaseRESTTool
from tooluniverse.exceptions import (
    ToolError,
    ToolAuthError,
    ToolRateLimitError,
    ToolUnavailableError,
    ToolValidationError,
    ToolConfigError,
    ToolDependencyError,
    ToolServerError,
)


def _http_get(
    url: str,
    headers: Dict[str, str] | None = None,
    timeout: int = 30,
) -> Dict[str, Any]:
    req = Request(url, headers=headers or {})
    try:
        with urlopen(req, timeout=timeout) as resp:
            data = resp.read()
            try:
                return json.loads(data.decode("utf-8", errors="ignore"))
            except Exception:
                return {"raw": data.decode("utf-8", errors="ignore")}
    except HTTPError as e:
        # ENCODE API may return 404 even with valid JSON data
        # Read the response body from the error
        try:
            data = e.read()
            parsed = json.loads(data.decode("utf-8", errors="ignore"))
            # If we got valid JSON, return it even though status was 404
            return parsed
        except Exception:
            # If we can't parse, re-raise the original error
            raise


def _handle_encode_error(exception: Exception) -> ToolError:
    """Shared error handler for all ENCODE tools - eliminates code duplication."""
    error_str = str(exception).lower()
    if any(
        kw in error_str
        for kw in ["auth", "unauthorized", "401", "403", "api key", "token"]
    ):
        return ToolAuthError(f"Authentication failed: {exception}")
    elif any(
        kw in error_str for kw in ["rate limit", "429", "quota", "limit exceeded"]
    ):
        return ToolRateLimitError(f"Rate limit exceeded: {exception}")
    elif any(
        kw in error_str
        for kw in [
            "unavailable",
            "timeout",
            "connection",
            "network",
            "not found",
            "404",
        ]
    ):
        return ToolUnavailableError(f"Tool unavailable: {exception}")
    elif any(
        kw in error_str for kw in ["validation", "invalid", "schema", "parameter"]
    ):
        return ToolValidationError(f"Validation error: {exception}")
    elif any(kw in error_str for kw in ["config", "configuration", "setup"]):
        return ToolConfigError(f"Configuration error: {exception}")
    elif any(kw in error_str for kw in ["import", "module", "dependency", "package"]):
        return ToolDependencyError(f"Dependency error: {exception}")
    else:
        return ToolServerError(f"Unexpected error: {exception}")


[docs] @register_tool( "ENCODESearchTool", config={ "name": "ENCODE_search_experiments", "type": "ENCODESearchTool", "description": "Search ENCODE experiments", "parameter": { "type": "object", "properties": { "assay_title": {"type": "string"}, "target": {"type": "string"}, "organism": {"type": "string"}, "status": {"type": "string", "default": "released"}, "limit": {"type": "integer", "default": 10}, }, }, "settings": {"base_url": "https://www.encodeproject.org", "timeout": 30}, }, ) class ENCODESearchTool: """ Generic search tool for ENCODE database. Searches experiments, files, or biosamples depending on search_type in config. Consolidates ENCODESearchTool, ENCODEFilesTool, and ENCODESearchBiosamplesTool. """
[docs] def __init__(self, tool_config=None): self.tool_config = tool_config or {}
[docs] def handle_error(self, exception: Exception) -> ToolError: """Classify exceptions into structured ToolError.""" return _handle_encode_error(exception)
[docs] def run(self, arguments: Dict[str, Any]): # Read from fields.endpoint or settings.base_url fields = self.tool_config.get("fields", {}) settings = self.tool_config.get("settings", {}) endpoint = fields.get( "endpoint", settings.get("base_url", "https://www.encodeproject.org/search/"), ) # Extract base URL if endpoint includes /search/ if endpoint.endswith("/search/"): base = endpoint[:-7] # Remove "/search/" else: base = endpoint.rstrip("/") timeout = int(settings.get("timeout", 30)) # Get search_type from config fields (default: Experiment) # This allows one tool class to handle Experiment, File, and Biosample searches search_type = fields.get("search_type", "Experiment") query: Dict[str, Any] = {"type": search_type, "format": "json"} # Add all provided arguments to query for key, value in arguments.items(): if value is not None: query[key] = value url = f"{base}/search/?{urlencode(query, doseq=True)}" try: data = _http_get( url, headers={"Accept": "application/json"}, timeout=timeout ) return { "source": "ENCODE", "endpoint": "search", "query": query, "data": data, "success": True, } except Exception as e: return { "error": str(e), "source": "ENCODE", "endpoint": "search", "success": False, }
# Alias for backward compatibility - all search tools now use ENCODESearchTool ENCODEFilesTool = ENCODESearchTool ENCODESearchBiosamplesTool = ENCODESearchTool # Register the aliases register_tool("ENCODEFilesTool")(ENCODESearchTool) register_tool("ENCODESearchBiosamplesTool")(ENCODESearchTool)
[docs] @register_tool("ENCODERESTTool") class ENCODERESTTool(BaseRESTTool): """Generic REST tool for ENCODE detail endpoints."""
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ ENCODE uses custom _http_get helper, so we override the full run method. """ url = None try: url = self._build_url(arguments) # Add format=json parameter for ENCODE url_with_format = f"{url}?format=json&frame=object" data = _http_get( url_with_format, headers={"Accept": "application/json"}, timeout=self.timeout, ) return { "status": "success", "data": data, "url": url, } except Exception as e: return { "status": "error", "error": f"ENCODE API error: {str(e)}", "url": url, }