Source code for tooluniverse.tool_registry
"""Simplified tool registry for automatic tool discovery and registration."""
import importlib
import pkgutil
import os
import json
import glob
import logging
from typing import Dict
# Initialize logger for this module
logger = logging.getLogger("ToolRegistry")
# Global registries
_tool_registry = {}
_config_registry = {}
_lazy_registry: Dict[str, str] = {} # Maps tool names to module names
_discovery_completed = False
_lazy_cache = {}
[docs]
def register_tool(tool_type_name=None, config=None):
"""
Decorator to automatically register tool classes and their configs.
Usage:
@register_tool('CustomToolName', config={...})
class MyTool:
pass
"""
def decorator(cls):
name = tool_type_name or cls.__name__
_tool_registry[name] = cls
if config:
_config_registry[name] = config
logger.info(f"Registered tool with config: {name}")
else:
logger.debug(f"Registered tool: {name} -> {cls.__name__}")
return cls
return decorator
[docs]
def register_external_tool(tool_name, tool_class):
"""Allow external registration of tool classes."""
_tool_registry[tool_name] = tool_class
logger.info(f"Externally registered tool: {tool_name}")
[docs]
def register_config(tool_type_name, config):
"""Register a config for a tool type."""
_config_registry[tool_type_name] = config
logger.info(f"Registered config for: {tool_type_name}")
[docs]
def get_tool_registry():
"""Get a copy of the current tool registry."""
return _tool_registry.copy()
[docs]
def get_config_registry():
"""Get a copy of the current config registry."""
return _config_registry.copy()
[docs]
def lazy_import_tool(tool_name):
"""
Lazily import a tool by name without importing all tool modules.
Only imports the specific module containing the requested tool.
"""
global _tool_registry, _lazy_registry, _lazy_cache
# If tool is already in registry, return it
if tool_name in _tool_registry:
return _tool_registry[tool_name]
# If we have a lazy mapping for this tool, import its module
if tool_name in _lazy_registry:
module_name = _lazy_registry[tool_name]
# Ensure we have the full module path
if not module_name.startswith("tooluniverse."):
full_module_name = f"tooluniverse.{module_name}"
else:
full_module_name = module_name
# Only import if we haven't cached this module yet
if full_module_name not in _lazy_cache:
try:
logger.debug(
f"Lazy importing module: {full_module_name} for tool: {tool_name}"
)
module = importlib.import_module(full_module_name)
_lazy_cache[full_module_name] = module
logger.debug(f"Successfully imported module: {full_module_name}")
# Check if the tool is now in the registry
if tool_name in _tool_registry:
logger.debug(f"Successfully lazy-loaded tool: {tool_name}")
return _tool_registry[tool_name]
else:
logger.warning(
f"Tool {tool_name} not found in module {full_module_name} after import"
)
except ImportError as e:
logger.warning(f"Failed to lazy import {full_module_name}: {e}")
# Remove this bad mapping so we don't try again
del _lazy_registry[tool_name]
else:
# Module was already imported, check if tool is now available
if tool_name in _tool_registry:
return _tool_registry[tool_name]
else:
logger.warning(
f"Tool {tool_name} not found in already imported module {full_module_name}"
)
# If still not found after lazy loading attempt, return None
# Don't fall back to full discovery as that defeats the purpose of lazy loading
logger.debug(f"Tool {tool_name} not found in lazy registry")
return None
[docs]
def build_lazy_registry(package_name=None):
"""
Build a mapping of tool names to module names using config files and naming patterns.
This is truly lazy - it doesn't import any modules, just creates the mapping.
"""
global _lazy_registry
if package_name is None:
package_name = "tooluniverse"
try:
package = importlib.import_module(package_name)
package_path = package.__path__
except (ImportError, AttributeError):
logger.warning(f"Could not import package {package_name}")
return {}
logger.debug(f"Building lazy registry for package: {package_name}")
# Strategy 1: Parse config files for accurate mappings WITHOUT importing modules
config_mappings = _discover_from_configs()
config_count = 0
for module_name, tool_classes in config_mappings.items():
# Don't verify module exists by importing - just trust the mapping
# The actual import will happen when the tool is first requested
for tool_class in tool_classes:
if tool_class not in _lazy_registry:
_lazy_registry[tool_class] = module_name
config_count += len(tool_classes)
# Strategy 2: Pattern-based fallback for modules without configs
pattern_count = 0
for _importer, modname, _ispkg in pkgutil.iter_modules(package_path):
if "_tool" in modname and modname not in [m for m in config_mappings.keys()]:
# Simple pattern: module_tool -> ModuleTool, ModuleRESTTool
base_name = modname.replace("_tool", "").replace("_", "")
potential_names = [
f"{base_name.title()}Tool",
f"{base_name.title()}RESTTool",
f"{base_name.upper()}Tool",
]
for tool_name in potential_names:
if tool_name not in _lazy_registry:
_lazy_registry[tool_name] = modname
pattern_count += 1
logger.info(
f"Built lazy registry: {config_count} from configs, {pattern_count} from patterns (no modules imported)"
)
return _lazy_registry.copy()
def _discover_from_configs():
"""
Fully dynamic config file discovery - no hardcoded mappings.
Automatically discovers config-to-module mappings by:
1. Finding all JSON config files
2. Finding all Python tool modules
3. Smart matching between config names and module names
"""
# Get the data directory path relative to tooluniverse module
try:
import tooluniverse
package_dir = os.path.dirname(tooluniverse.__file__)
data_dir = os.path.join(package_dir, "data")
except ImportError:
# Fallback: assume we're in the right directory structure
current_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(current_dir, "data")
if not os.path.exists(data_dir):
logger.warning(f"Data directory not found: {data_dir}")
return {}
# Step 1: Get all available tool modules
available_modules = _get_available_tool_modules()
logger.debug(f"Found {len(available_modules)} tool modules: {available_modules}")
tool_mapping = {}
try:
for json_file in glob.glob(os.path.join(data_dir, "*.json")):
try:
config_name = os.path.basename(json_file).replace(".json", "")
# Step 2: Smart matching to find the best module for this config
module_name = _smart_match_config_to_module(
config_name, available_modules
)
if not module_name:
logger.debug(f"No module match found for config: {config_name}")
continue
# Step 3: Extract tool types from config
with open(json_file, "r", encoding="utf-8") as f:
config_data = json.load(f)
tool_types = set()
if isinstance(config_data, list):
for tool_config in config_data:
if isinstance(tool_config, dict) and "type" in tool_config:
tool_types.add(tool_config["type"])
elif isinstance(config_data, dict) and "type" in config_data:
tool_types.add(config_data["type"])
if tool_types and module_name:
if module_name not in tool_mapping:
tool_mapping[module_name] = []
tool_mapping[module_name].extend(list(tool_types))
logger.debug(
f"Dynamic mapping: {config_name} -> {module_name} -> {tool_types}"
)
except Exception as e:
logger.debug(f"Skipped config file {json_file}: {e}")
continue
except Exception as e:
logger.warning(f"Error reading config files: {e}")
logger.debug(f"Dynamically discovered {len(tool_mapping)} modules from configs")
return tool_mapping
def _get_available_tool_modules():
"""
Get all available tool modules by scanning the tooluniverse package.
"""
try:
import tooluniverse
package_path = tooluniverse.__path__
except ImportError:
logger.warning("Cannot import tooluniverse package")
return []
modules = []
for _importer, modname, _ispkg in pkgutil.iter_modules(package_path):
if "_tool" in modname or modname in [
"compose_tool",
"agentic_tool",
]: # Include compose_tool and agentic_tool
modules.append(modname)
return modules
def _smart_match_config_to_module(config_name, available_modules):
"""
Smart matching algorithm to find the best module for a config file.
Uses multiple strategies in order of preference.
"""
# Strategy 1: Direct name matching
# "chembl_tools" -> "chem_tool"
if config_name.endswith("_tools"):
candidate = config_name.replace("_tools", "_tool")
if candidate in available_modules:
return candidate
# Strategy 2: Exact match
# "chem_tool" -> "chem_tool"
if config_name in available_modules:
return config_name
# Strategy 3: Fuzzy matching based on keywords
# Extract key parts from config name and match with modules
config_parts = set(config_name.replace("_", " ").split())
best_match = None
best_score = 0
for module in available_modules:
module_parts = set(module.replace("_", " ").split())
# Calculate similarity score
common_parts = config_parts & module_parts
if common_parts:
score = len(common_parts) / max(len(config_parts), len(module_parts))
if score > best_score:
best_score = score
best_match = module
# Only return match if score is reasonably high
if best_score > 0.3: # At least 30% similarity
return best_match
# Strategy 4: Pattern-based matching for known patterns
patterns = [
# FDA patterns
("fda", "openfda_tool"),
("clinicaltrials", "ctg_tool"),
("clinical_trials", "ctg_tool"),
("opentargets", "graphql_tool"),
("monarch", "restful_tool"),
("url_fetch", "url_tool"),
("europe_pmc", "europe_pmc_tool"),
("semantic_scholar", "semantic_scholar_tool"),
# ChEMBL pattern
("chembl", "chem_tool"),
]
for pattern, module in patterns:
if pattern in config_name and module in available_modules:
return module
return None
[docs]
def auto_discover_tools(package_name=None, lazy=True):
"""
Automatically discover and import all tool modules.
If lazy=True, only builds the mapping without importing any modules.
If lazy=False, imports all tool modules immediately.
"""
global _discovery_completed
if package_name is None:
package_name = "tooluniverse"
# In lazy mode, just build the registry without importing anything
if lazy:
if not _lazy_registry:
build_lazy_registry(package_name)
logger.debug(
f"Lazy discovery complete. Registry contains {len(_lazy_registry)} tool mappings (no modules imported)"
)
return _tool_registry.copy()
# Return cached registry if full discovery already done
if _discovery_completed:
return _tool_registry.copy()
try:
package = importlib.import_module(package_name)
package_path = package.__path__
except (ImportError, AttributeError):
logger.warning(f"Could not import package {package_name}")
return _tool_registry.copy()
logger.info(
f"Auto-discovering tools in package: {package_name} (lazy={lazy}) - importing ALL modules"
)
# Import all tool modules (non-lazy mode)
imported_count = 0
for _importer, modname, _ispkg in pkgutil.iter_modules(package_path):
if "_tool" in modname or modname in ["compose_tool", "agentic_tool"]:
try:
importlib.import_module(f"{package_name}.{modname}")
logger.debug(f"Imported tool module: {modname}")
imported_count += 1
except ImportError as e:
logger.warning(f"Could not import {modname}: {e}")
_discovery_completed = True
logger.info(
f"Full discovery complete. Imported {imported_count} modules, registered {len(_tool_registry)} tools"
)
return _tool_registry.copy()
[docs]
def get_tool_class_lazy(tool_name):
"""
Get a tool class by name, using lazy loading if possible.
Only imports the specific module needed, not all modules.
"""
# First try lazy import
tool_class = lazy_import_tool(tool_name)
if tool_class:
return tool_class
# If lazy loading fails and we haven't done full discovery yet,
# check if the tool exists in the current registry
if tool_name in _tool_registry:
return _tool_registry[tool_name]
# As a last resort, if full discovery hasn't been done, do it
# But this should be rare with a properly configured lazy registry
if not _discovery_completed:
logger.warning(
f"Tool {tool_name} not found in lazy registry, falling back to full discovery"
)
auto_discover_tools(lazy=False)
return _tool_registry.get(tool_name)
return None