Source code for tooluniverse.build_optimizer
"""Build optimization utilities for ToolUniverse tools."""
import json
import hashlib
from pathlib import Path
from typing import Dict, Any, Set, Tuple
def _normalize_value(value: Any) -> Any:
"""Recursively normalize values for consistent hashing."""
if isinstance(value, dict):
# Sort dictionary keys and normalize values
return {k: _normalize_value(v) for k, v in sorted(value.items())}
elif isinstance(value, list):
# Normalize list elements
return [_normalize_value(item) for item in value]
elif isinstance(value, (str, int, float, bool)) or value is None:
return value
else:
# Convert other types to string representation for hashing
return str(value)
[docs]
def calculate_tool_hash(tool_config: Dict[str, Any], verbose: bool = False) -> str:
"""Calculate a hash for tool configuration to detect changes.
Args:
tool_config: Tool configuration dictionary
verbose: If True, print excluded fields (for debugging)
Returns:
MD5 hash string of the normalized configuration
"""
# Fields to exclude from hash calculation (metadata/timestamp fields)
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
# Create a normalized version of the config for hashing
normalized_config = {}
excluded_values = []
for key, value in sorted(tool_config.items()):
if key not in excluded_fields:
# Recursively normalize nested structures
normalized_config[key] = _normalize_value(value)
elif verbose:
excluded_values.append(key)
if verbose and excluded_values:
print(f" Excluded fields from hash: {', '.join(excluded_values)}")
# Use consistent JSON serialization with sorted keys
config_str = json.dumps(
normalized_config, sort_keys=True, separators=(",", ":"), ensure_ascii=False
)
return hashlib.md5(config_str.encode("utf-8")).hexdigest()
[docs]
def load_metadata(metadata_file: Path) -> Dict[str, str]:
"""Load tool metadata from file."""
if not metadata_file.exists():
return {}
try:
with open(metadata_file, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return {}
[docs]
def save_metadata(metadata: Dict[str, str], metadata_file: Path) -> None:
"""Save tool metadata to file."""
metadata_file.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_file, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, sort_keys=True)
[docs]
def cleanup_orphaned_files(tools_dir: Path, current_tool_names: Set[str]) -> int:
"""Remove files for tools that no longer exist."""
if not tools_dir.exists():
return 0
cleaned_count = 0
keep_files = {"__init__", "_shared_client", "__pycache__"}
for file_path in tools_dir.iterdir():
if (
file_path.is_file()
and file_path.suffix == ".py"
and file_path.stem not in keep_files
and file_path.stem not in current_tool_names
):
print(f"🗑️ Removing orphaned tool file: {file_path.name}")
file_path.unlink()
cleaned_count += 1
return cleaned_count
def _compare_configs(old_config: Dict[str, Any], new_config: Dict[str, Any]) -> list:
"""Compare two configs and return list of changed field paths."""
changes = []
all_keys = set(old_config.keys()) | set(new_config.keys())
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
for key in all_keys:
if key in excluded_fields:
continue
old_val = old_config.get(key)
new_val = new_config.get(key)
if old_val != new_val:
changes.append(key)
return changes
[docs]
def get_changed_tools(
current_tools: Dict[str, Any],
metadata_file: Path,
force_regenerate: bool = False,
verbose: bool = False,
) -> Tuple[list, list, list, Dict[str, list]]:
"""Get lists of new, changed, and unchanged tools.
Args:
current_tools: Dictionary of current tool configurations
metadata_file: Path to metadata file storing previous hashes
force_regenerate: If True, mark all tools as changed
verbose: If True, provide detailed change information
Returns:
Tuple of (new_tools, changed_tools, unchanged_tools, change_details)
where change_details maps tool_name -> list of changed field names
"""
old_metadata = load_metadata(metadata_file)
new_metadata = {}
new_tools = []
changed_tools = []
unchanged_tools = []
change_details: Dict[str, list] = {}
if force_regenerate:
print("🔄 Force regeneration enabled - all tools will be regenerated")
for tool_name, tool_config in current_tools.items():
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
new_metadata[tool_name] = current_hash
if tool_name in old_metadata:
changed_tools.append(tool_name)
change_details[tool_name] = ["force_regenerate"]
else:
new_tools.append(tool_name)
else:
for tool_name, tool_config in current_tools.items():
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
new_metadata[tool_name] = current_hash
old_hash = old_metadata.get(tool_name)
if old_hash is None:
new_tools.append(tool_name)
if verbose:
print(f" ✨ New tool detected: {tool_name}")
elif old_hash != current_hash:
changed_tools.append(tool_name)
# Try to identify which fields changed (if we have the old config)
# Note: We only have hashes, so we can't do detailed field comparison
# This would require storing full configs, which we avoid for size reasons
change_details[tool_name] = ["hash_mismatch"]
if verbose:
print(
f" 🔄 Tool changed: {tool_name} (hash: {old_hash[:8]}... -> {current_hash[:8]}...)"
)
else:
unchanged_tools.append(tool_name)
# Save updated metadata
save_metadata(new_metadata, metadata_file)
return new_tools, changed_tools, unchanged_tools, change_details