Source code for tooluniverse.tool_finder_llm

"""
LLM-based Tool Finder - A tool that uses LLM to find relevant tools based on descriptions.

This tool leverages AgenticTool's LLM functionality to create an intelligent tool finder
that puts only essential tool information (name and description) in the prompt to minimize
context window cost while letting the LLM decide which tools to return based on the query.

Key optimizations:
- Only sends tool name and description to LLM (no parameters, configs, etc.)
- Uses compact formatting to reduce token count
- Caches tool descriptions to avoid repeated processing
- Excludes irrelevant tools from prompt
"""

import json
from datetime import datetime

from .base_tool import BaseTool
from .tool_registry import register_tool
from .agentic_tool import AgenticTool


[docs] @register_tool("ToolFinderLLM") class ToolFinderLLM(BaseTool): """ LLM-based tool finder that uses natural language processing to select relevant tools. This class leverages AgenticTool's LLM capabilities to analyze tool descriptions and match them with user queries. It's optimized for minimal context window cost by only sending essential information (tool name and description) to the LLM, providing an intelligent alternative to embedding-based similarity search. Cost optimizations: - Only includes tool name and description in LLM prompt - Uses compact formatting to minimize token usage - Excludes unnecessary tool metadata and parameters - Implements caching to avoid repeated tool processing """
[docs] def __init__(self, tool_config, tooluniverse=None): """ Initialize the LLM-based Tool Finder. Args: tool_config (dict): Configuration dictionary containing LLM settings and prompts tooluniverse: Reference to the ToolUniverse instance containing all tools """ super().__init__(tool_config) self.tooluniverse = tooluniverse # Extract configuration self.name = tool_config.get("name", "ToolFinderLLM") self.description = tool_config.get("description", "LLM-based tool finder") # Get LLM configuration from tool_config configs = tool_config.get("configs", {}) self.api_type = configs.get("api_type", "CHATGPT") self.model_id = configs.get("model_id", "gpt-4o-1120") self.temperature = configs.get("temperature", 0.1) self.max_new_tokens = configs.get("max_new_tokens", 4096) self.return_json = configs.get("return_json", True) # Tool filtering settings self.exclude_tools = tool_config.get( "exclude_tools", tool_config.get("configs", {}).get( "exclude_tools", ["Tool_RAG", "Tool_Finder", "Finish", "CallAgent", "ToolFinderLLM"], ), ) self.include_categories = tool_config.get("include_categories", None) self.exclude_categories = tool_config.get("exclude_categories", None) # Return format settings - defaults to False if not specified in config self.return_list_only = tool_config.get("configs", {}).get( "return_list_only", False ) # Initialize the underlying AgenticTool for LLM operations self._init_agentic_tool() # Cache for tool descriptions self._tool_cache = None self._cache_timestamp = None
[docs] def _init_agentic_tool(self): """Initialize the underlying AgenticTool for LLM operations.""" # Create AgenticTool configuration agentic_config = { "name": f"{self.name}_agentic", "description": "Internal agentic tool for LLM-based tool selection", "type": "AgenticTool", "prompt": self._get_tool_selection_prompt(), "input_arguments": ["query", "tools_descriptions", "limit"], "parameter": { "type": "object", "properties": { "query": { "type": "string", "description": "The user query describing what tools are needed", "required": True, }, "tools_descriptions": { "type": "string", "description": "JSON string containing all available tool descriptions", "required": True, }, "limit": { "type": "integer", "description": "Maximum number of tools to return", "required": True, }, }, "required": ["query", "tools_descriptions", "limit"], }, "configs": { "api_type": self.api_type, "model_id": self.model_id, "temperature": self.temperature, "max_new_tokens": self.max_new_tokens, "return_json": self.return_json, "return_metadata": False, }, } try: self.agentic_tool = AgenticTool(agentic_config) print( f"✅ Successfully initialized {self.name} with LLM model: {self.model_id}" ) except Exception as e: print(f"❌ Failed to initialize AgenticTool for {self.name}: {str(e)}") raise
[docs] def _get_tool_selection_prompt(self): """Get the prompt template for tool selection. Optimized for minimal token usage.""" return """You are a tool selection assistant. Select the most relevant tools for the user query. Query: {query} Tools: {tools_descriptions} Select the {limit} most relevant tools. Return JSON: {{ "selected_tools": [ {{ "name": "tool_name", "relevance_score": 0.95, "reasoning": "Why relevant" }} ], "total_selected": 1, "selection_reasoning": "Overall strategy" }} Requirements: - Only select existing tools from the list - Rank by relevance (0.0-1.0) - Prioritize domain-specific tools for specialized queries - Return requested number or fewer if insufficient relevant tools"""
[docs] def _get_available_tools(self, force_refresh=False): """ Get available tools with their descriptions, with caching. Args: force_refresh (bool): Whether to force refresh the cache Returns: list: List of tool dictionaries with names and descriptions """ current_time = datetime.now() # Use cache if available and not expired (cache for 5 minutes) if ( not force_refresh and self._tool_cache is not None and self._cache_timestamp is not None and (current_time - self._cache_timestamp).seconds < 300 ): return self._tool_cache if not self.tooluniverse: print("⚠️ ToolUniverse reference not available") return [] try: # Get tool names and descriptions tool_names, tool_descriptions = self.tooluniverse.refresh_tool_name_desc( enable_full_desc=True, exclude_names=self.exclude_tools, include_categories=self.include_categories, exclude_categories=self.exclude_categories, ) # Format tools for LLM available_tools = [] for name, desc in zip(tool_names, tool_descriptions): if name not in self.exclude_tools: available_tools.append({"name": name, "description": desc}) # Update cache self._tool_cache = available_tools self._cache_timestamp = current_time print(f"📋 Loaded {len(available_tools)} tools for LLM-based selection") return available_tools except Exception as e: print(f"❌ Error getting available tools: {str(e)}") return []
[docs] def _prefilter_tools_by_keywords(self, available_tools, query, max_tools=100): """ Pre-filter tools using keyword matching to reduce context size before LLM processing. Args: available_tools (list): All available tools query (str): User query max_tools (int): Maximum number of tools to send to LLM Returns: list: Filtered list of tools """ if len(available_tools) <= max_tools: return available_tools query_lower = query.lower() query_words = set(query_lower.split()) # Score tools based on keyword matches scored_tools = [] for tool in available_tools: name_lower = tool.get("name", "").lower() desc_lower = tool.get("description", "").lower() # Calculate basic relevance score score = 0 # Exact name matches get high priority if query_lower in name_lower: score += 10 # Word matches in name and description for word in query_words: if len(word) > 2: # Skip very short words if word in name_lower: score += 3 if word in desc_lower: score += 1 scored_tools.append((score, tool)) # Sort by score and take top tools scored_tools.sort(key=lambda x: x[0], reverse=True) filtered_tools = [tool for score, tool in scored_tools[:max_tools]] print( f"🔍 Pre-filtered from {len(available_tools)} to {len(filtered_tools)} tools using keywords" ) return filtered_tools
[docs] def _format_tools_for_prompt(self, tools): """ Format tools for inclusion in the LLM prompt with minimal information to reduce context cost. Only includes name and description to minimize token usage. Args: tools (list): List of tool dictionaries Returns: str: Compact formatted tool descriptions for the prompt """ formatted_tools = [] for i, tool in enumerate(tools, 1): name = tool.get("name", "Unknown") description = tool.get("description", "No description available") # Truncate very long descriptions to save tokens if len(description) > 150: description = description[:150] + "..." # Use more compact formatting to save tokens formatted_tools.append(f"{i}. {name}: {description}") return "\n".join(formatted_tools)
[docs] def find_tools_llm(self, query, limit=5, include_reasoning=False, categories=None): """ Find relevant tools using LLM-based selection. Args: query (str): User query describing needed functionality limit (int): Maximum number of tools to return include_reasoning (bool): Whether to include selection reasoning categories (list, optional): List of tool categories to filter by Returns: dict: Dictionary containing selected tools and metadata """ try: # Get available tools available_tools = self._get_available_tools() if not available_tools: return { "success": False, "error": "No tools available for selection", "selected_tools": [], "total_available": 0, } # Filter by categories if specified if categories: # Get full tool information for category filtering all_tools = self.tooluniverse.return_all_loaded_tools() category_filtered_tools = [] for tool_info in available_tools: tool_name = tool_info["name"] # Find the full tool data to check category for full_tool in all_tools: if full_tool.get("name") == tool_name: tool_category = full_tool.get("category", "unknown") if tool_category in categories: category_filtered_tools.append(tool_info) break available_tools = category_filtered_tools if not available_tools: return { "success": False, "error": f"No tools available in categories: {categories}", "selected_tools": [], "total_available": 0, } # Pre-filter tools to reduce context size for LLM available_tools = self._prefilter_tools_by_keywords( available_tools, query, max_tools=50 ) # Format tools for LLM prompt with minimal information to reduce context cost tools_formatted = self._format_tools_for_prompt(available_tools) # Prepare arguments for the agentic tool agentic_args = { "query": query, "tools_descriptions": tools_formatted, "limit": limit, } print(f"🤖 Querying LLM to select tools for: '{query[:100]}...'") # Call the LLM through AgenticTool result = self.agentic_tool.run(agentic_args) # Parse the LLM response if isinstance(result, dict) and "result" in result: llm_response = result["result"] else: llm_response = result # Parse JSON response from LLM if isinstance(llm_response, str): try: parsed_response = json.loads(llm_response) except json.JSONDecodeError as e: print(f"❌ Failed to parse LLM response as JSON: {e}") print(f"Raw response: {llm_response[:500]}...") return { "success": False, "error": f"Invalid JSON response from LLM: {str(e)}", "raw_response": llm_response, "selected_tools": [], } else: parsed_response = llm_response # Extract selected tools selected_tools = parsed_response.get("selected_tools", []) tool_names = [ tool.get("name") for tool in selected_tools if tool.get("name") ] # Get actual tool objects if tool_names: selected_tool_objects = self.tooluniverse.get_tool_by_name(tool_names) tool_prompts = self.tooluniverse.prepare_tool_prompts( selected_tool_objects ) else: selected_tool_objects = [] tool_prompts = [] result_dict = { "success": True, "selected_tools": tool_names, "tool_objects": selected_tool_objects, "tool_prompts": tool_prompts, "total_selected": len(tool_names), "total_available": len(available_tools), "query": query, "limit_requested": limit, } if include_reasoning: result_dict.update( { "selection_details": selected_tools, "selection_reasoning": parsed_response.get( "selection_reasoning", "" ), "llm_response": parsed_response, } ) print(f"✅ Selected {len(tool_names)} tools: {', '.join(tool_names)}") return result_dict except Exception as e: print(f"❌ Error in LLM-based tool selection: {str(e)}") return { "success": False, "error": str(e), "selected_tools": [], "query": query, }
[docs] def find_tools( self, message=None, picked_tool_names=None, rag_num=5, return_call_result=False, categories=None, return_list_only=None, ): """ Find relevant tools based on a message or pre-selected tool names. This method matches the interface of the original ToolFinderEmbedding to ensure seamless replacement. It uses LLM-based selection instead of embedding similarity. Args: message (str, optional): Query message to find tools for. Required if picked_tool_names is None. picked_tool_names (list, optional): Pre-selected tool names to process. Required if message is None. rag_num (int, optional): Number of tools to return after filtering. Defaults to 5. return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False. categories (list, optional): List of tool categories to filter by. Applied before LLM selection. return_list_only (bool, optional): If True, returns only a list of tool specifications. Overrides other return options. Returns: str, tuple, or list: - If return_list_only is True: List of tool specifications - If return_call_result is False: Tool prompts as a formatted string - If return_call_result is True: Tuple of (tool_prompts, tool_names) Raises: AssertionError: If both message and picked_tool_names are None """ # Use class-level configuration if parameter not specified if return_list_only is None: return_list_only = self.return_list_only if picked_tool_names is None: assert picked_tool_names is not None or message is not None # Use LLM-based tool selection with category filtering result = self.find_tools_llm( query=message, limit=rag_num, include_reasoning=False, categories=categories, ) if not result["success"]: # Return empty results on failure if return_list_only: return [] # Return empty list for tool specifications elif return_call_result: return "", [] return "" picked_tool_names = result["selected_tools"] # Filter out special tools (matching original behavior) picked_tool_names_no_special = [] for tool in picked_tool_names: if tool not in self.exclude_tools: picked_tool_names_no_special.append(tool) picked_tool_names_no_special = picked_tool_names_no_special[:rag_num] picked_tool_names = picked_tool_names_no_special[:rag_num] # Get tool objects and prepare prompts (needed for both list and other formats) picked_tools = self.tooluniverse.get_tool_by_name(picked_tool_names) picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools) # If only list format is requested, return the tool specifications as a list if return_list_only: return picked_tools_prompt # Return list of tool specifications instead of just names if return_call_result: return picked_tools_prompt, picked_tool_names return picked_tools_prompt
[docs] def get_tool_stats(self): """Get statistics about available tools.""" tools = self._get_available_tools(force_refresh=True) stats = { "total_tools": len(tools), "excluded_tools": len(self.exclude_tools), "cache_status": "cached" if self._tool_cache is not None else "no_cache", "last_updated": ( self._cache_timestamp.isoformat() if self._cache_timestamp else None ), } return stats
[docs] def _format_as_json(self, result, query, limit, categories, return_call_result): """ Format the find_tools result as a standardized JSON string. Args: result: Result from find_tools method (either string, list, or tuple) query: Original search query limit: Requested number of tools categories: Requested categories filter return_call_result: Whether return_call_result was True Returns: str: JSON formatted search results """ import json try: if return_call_result and isinstance(result, tuple) and len(result) == 2: # Result is (tool_prompts, tool_names) tuple tool_prompts, tool_names = result # Convert tool prompts to clean tool info format tools = [] for i, tool_name in enumerate(tool_names): if i < len(tool_prompts): tool_prompt = tool_prompts[i] tool_info = { "name": tool_name, "description": tool_prompt.get("description", ""), "type": tool_prompt.get("type", ""), "parameters": tool_prompt.get("parameter", {}), "required": tool_prompt.get("required", []), } tools.append(tool_info) return json.dumps( { "query": query, "search_method": "AI-powered (ToolFinderLLM)", "total_matches": len(tools), "categories_filtered": categories, "tools": tools, }, indent=2, ) elif isinstance(result, list): # Result is already a list of tool prompts tools = [] for tool_prompt in result: if isinstance(tool_prompt, dict): tool_info = { "name": tool_prompt.get("name", ""), "description": tool_prompt.get("description", ""), "type": tool_prompt.get("type", ""), "parameters": tool_prompt.get("parameter", {}), "required": tool_prompt.get("required", []), } tools.append(tool_info) return json.dumps( { "query": query, "search_method": "AI-powered (ToolFinderLLM)", "total_matches": len(tools), "categories_filtered": categories, "tools": tools, }, indent=2, ) else: # Fallback for unexpected result format return json.dumps( { "query": query, "search_method": "AI-powered (ToolFinderLLM)", "total_matches": 0, "categories_filtered": categories, "tools": [], "error": f"Unexpected result format: {type(result)}", }, indent=2, ) except Exception as e: # Error handling return json.dumps( { "query": query, "search_method": "AI-powered (ToolFinderLLM)", "total_matches": 0, "categories_filtered": categories, "tools": [], "error": f"Formatting error: {str(e)}", }, indent=2, )
[docs] def clear_cache(self): """Clear the tool cache to force refresh on next access.""" self._tool_cache = None self._cache_timestamp = None print("🔄 Tool cache cleared")
[docs] def run(self, arguments): """ Run the tool finder with given arguments following the standard tool interface. This method now returns JSON format by default to ensure consistency with other search tools and simplify integration with SMCP. Args: arguments (dict): Dictionary containing: - description (str, optional): Query message to find tools for (maps to 'message') - limit (int, optional): Number of tools to return (maps to 'rag_num'). Defaults to 5. - picked_tool_names (list, optional): Pre-selected tool names to process - return_call_result (bool, optional): Whether to return both prompts and names. Defaults to False. - return_format (str, optional): 'json' (default) or 'legacy' for old format - return_list_only (bool, optional): Whether to return only tool specifications as a list - categories (list, optional): List of tool categories to filter by """ import copy arguments = copy.deepcopy(arguments) # Extract parameters from arguments with defaults message = arguments.get("description", None) rag_num = arguments.get("limit", 5) picked_tool_names = arguments.get("picked_tool_names", None) return_call_result = arguments.get("return_call_result", False) return_format = arguments.get("return_format", "json") # Default to JSON format return_list_only = arguments.get( "return_list_only", None ) # Use class default if not specified categories = arguments.get("categories", None) # Call the find_tools method result = self.find_tools( message=message, picked_tool_names=picked_tool_names, rag_num=rag_num, return_call_result=return_call_result, categories=categories, return_list_only=return_list_only, ) # If return_list_only is True, return the list directly if return_list_only or (return_list_only is None and self.return_list_only): return result # If return_format is 'json', convert to standardized JSON format if return_format == "json": return self._format_as_json( result, message, rag_num, categories, return_call_result ) else: # Return legacy format (original behavior) return result
# Legacy methods for backward compatibility
[docs] def find_tools_legacy( self, query, limit=5, include_reasoning=False, return_format="prompts" ): """ Legacy method for finding tools with different parameter names. This provides backward compatibility for any code that might use 'query' instead of 'description'. """ return self.run( { "description": query, "limit": limit, "return_call_result": return_format == "full", } )