Source code for tooluniverse.build_optimizer

"""Build optimization utilities for ToolUniverse tools."""

import json
import hashlib
from pathlib import Path
from typing import Dict, Any, Set, Tuple


def _normalize_value(value: Any) -> Any:
    """Recursively normalize values for consistent hashing."""
    if isinstance(value, dict):
        # Sort dictionary keys and normalize values
        return {k: _normalize_value(v) for k, v in sorted(value.items())}
    elif isinstance(value, list):
        # Normalize list elements
        return [_normalize_value(item) for item in value]
    elif isinstance(value, (str, int, float, bool)) or value is None:
        return value
    else:
        # Convert other types to string representation for hashing
        return str(value)



[docs]
def calculate_tool_hash(tool_config: Dict[str, Any], verbose: bool = False) -> str:
    """Calculate a hash for tool configuration to detect changes.

    Args:
        tool_config: Tool configuration dictionary
        verbose: If True, print excluded fields (for debugging)

    Returns:
        MD5 hash string of the normalized configuration
    """
    # Fields to exclude from hash calculation (metadata/timestamp fields)
    excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}

    # Create a normalized version of the config for hashing
    normalized_config = {}
    excluded_values = []

    for key, value in sorted(tool_config.items()):
        if key not in excluded_fields:
            # Recursively normalize nested structures
            normalized_config[key] = _normalize_value(value)
        elif verbose:
            excluded_values.append(key)

    if verbose and excluded_values:
        print(f"  Excluded fields from hash: {', '.join(excluded_values)}")

    # Use consistent JSON serialization with sorted keys
    config_str = json.dumps(
        normalized_config, sort_keys=True, separators=(",", ":"), ensure_ascii=False
    )
    return hashlib.md5(config_str.encode("utf-8")).hexdigest()




[docs]
def load_metadata(metadata_file: Path) -> Dict[str, str]:
    """Load tool metadata from file."""
    if not metadata_file.exists():
        return {}

    try:
        with open(metadata_file, "r", encoding="utf-8") as f:
            return json.load(f)
    except (json.JSONDecodeError, IOError):
        return {}




[docs]
def save_metadata(metadata: Dict[str, str], metadata_file: Path) -> None:
    """Save tool metadata to file."""
    metadata_file.parent.mkdir(parents=True, exist_ok=True)
    with open(metadata_file, "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=2, sort_keys=True)




[docs]
def cleanup_orphaned_files(tools_dir: Path, current_tool_names: Set[str]) -> int:
    """Remove files for tools that no longer exist."""
    if not tools_dir.exists():
        return 0

    cleaned_count = 0
    keep_files = {"__init__", "_shared_client", "__pycache__"}

    for file_path in tools_dir.iterdir():
        if (
            file_path.is_file()
            and file_path.suffix == ".py"
            and file_path.stem not in keep_files
            and file_path.stem not in current_tool_names
        ):
            print(f"🗑️  Removing orphaned tool file: {file_path.name}")
            file_path.unlink()
            cleaned_count += 1

    return cleaned_count



def _compare_configs(old_config: Dict[str, Any], new_config: Dict[str, Any]) -> list:
    """Compare two configs and return list of changed field paths."""
    changes = []

    all_keys = set(old_config.keys()) | set(new_config.keys())
    excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}

    for key in all_keys:
        if key in excluded_fields:
            continue

        old_val = old_config.get(key)
        new_val = new_config.get(key)

        if old_val != new_val:
            changes.append(key)

    return changes



[docs]
def get_changed_tools(
    current_tools: Dict[str, Any],
    metadata_file: Path,
    force_regenerate: bool = False,
    verbose: bool = False,
) -> Tuple[list, list, list, Dict[str, list]]:
    """Get lists of new, changed, and unchanged tools.

    Args:
        current_tools: Dictionary of current tool configurations
        metadata_file: Path to metadata file storing previous hashes
        force_regenerate: If True, mark all tools as changed
        verbose: If True, provide detailed change information

    Returns:
        Tuple of (new_tools, changed_tools, unchanged_tools, change_details)
        where change_details maps tool_name -> list of changed field names
    """
    old_metadata = load_metadata(metadata_file)
    new_metadata = {}
    new_tools = []
    changed_tools = []
    unchanged_tools = []
    change_details: Dict[str, list] = {}

    if force_regenerate:
        print("🔄 Force regeneration enabled - all tools will be regenerated")
        for tool_name, tool_config in current_tools.items():
            current_hash = calculate_tool_hash(tool_config, verbose=verbose)
            new_metadata[tool_name] = current_hash
            if tool_name in old_metadata:
                changed_tools.append(tool_name)
                change_details[tool_name] = ["force_regenerate"]
            else:
                new_tools.append(tool_name)
    else:
        for tool_name, tool_config in current_tools.items():
            current_hash = calculate_tool_hash(tool_config, verbose=verbose)
            new_metadata[tool_name] = current_hash

            old_hash = old_metadata.get(tool_name)
            if old_hash is None:
                new_tools.append(tool_name)
                if verbose:
                    print(f"  ✨ New tool detected: {tool_name}")
            elif old_hash != current_hash:
                changed_tools.append(tool_name)
                # Try to identify which fields changed (if we have the old config)
                # Note: We only have hashes, so we can't do detailed field comparison
                # This would require storing full configs, which we avoid for size reasons
                change_details[tool_name] = ["hash_mismatch"]
                if verbose:
                    print(
                        f"  🔄 Tool changed: {tool_name} (hash: {old_hash[:8]}... -> {current_hash[:8]}...)"
                    )
            else:
                unchanged_tools.append(tool_name)

    # Save updated metadata
    save_metadata(new_metadata, metadata_file)

    return new_tools, changed_tools, unchanged_tools, change_details