Source code for tooluniverse.tool_finder_llm

"""
LLM-based Tool Finder - A tool that uses LLM to find relevant tools based on descriptions.

This tool leverages AgenticTool's LLM functionality to create an intelligent tool finder
that puts only essential tool information (name and description) in the prompt to minimize
context window cost while letting the LLM decide which tools to return based on the query.

Key optimizations:
- Only sends tool name and description to LLM (no parameters, configs, etc.)
- Uses compact formatting to reduce token count
- Caches tool descriptions to avoid repeated processing
- Excludes irrelevant tools from prompt
"""

import json
from datetime import datetime

from .base_tool import BaseTool
from .tool_registry import register_tool
from .agentic_tool import AgenticTool



[docs]
@register_tool("ToolFinderLLM")
class ToolFinderLLM(BaseTool):
    """
    LLM-based tool finder that uses natural language processing to select relevant tools.

    This class leverages AgenticTool's LLM capabilities to analyze tool descriptions
    and match them with user queries. It's optimized for minimal context window cost
    by only sending essential information (tool name and description) to the LLM,
    providing an intelligent alternative to embedding-based similarity search.

    Cost optimizations:
    - Only includes tool name and description in LLM prompt
    - Uses compact formatting to minimize token usage
    - Excludes unnecessary tool metadata and parameters
    - Implements caching to avoid repeated tool processing
    """


[docs]
    def __init__(self, tool_config, tooluniverse=None):
        """
        Initialize the LLM-based Tool Finder.

        Args:
            tool_config (dict): Configuration dictionary containing LLM settings and prompts
            tooluniverse: Reference to the ToolUniverse instance containing all tools
        """
        super().__init__(tool_config)
        self.tooluniverse = tooluniverse

        # Extract configuration
        self.name = tool_config.get("name", "ToolFinderLLM")
        self.description = tool_config.get("description", "LLM-based tool finder")

        # Get LLM configuration from tool_config
        configs = tool_config.get("configs", {})
        self.api_type = configs.get("api_type", "CHATGPT")
        self.model_id = configs.get("model_id", "gpt-4o-1120")
        self.temperature = configs.get("temperature", 0.1)
        self.max_new_tokens = configs.get("max_new_tokens", 4096)
        self.return_json = configs.get("return_json", True)

        # Tool filtering settings
        self.exclude_tools = tool_config.get(
            "exclude_tools",
            tool_config.get("configs", {}).get(
                "exclude_tools",
                ["Tool_RAG", "Tool_Finder", "Finish", "CallAgent", "ToolFinderLLM"],
            ),
        )
        self.include_categories = tool_config.get("include_categories", None)
        self.exclude_categories = tool_config.get("exclude_categories", None)

        # Return format settings - defaults to False if not specified in config
        self.return_list_only = tool_config.get("configs", {}).get(
            "return_list_only", False
        )

        # Initialize the underlying AgenticTool for LLM operations
        self._init_agentic_tool()

        # Cache for tool descriptions
        self._tool_cache = None
        self._cache_timestamp = None



[docs]
    def _init_agentic_tool(self):
        """Initialize the underlying AgenticTool for LLM operations."""

        # Create AgenticTool configuration
        agentic_config = {
            "name": f"{self.name}_agentic",
            "description": "Internal agentic tool for LLM-based tool selection",
            "type": "AgenticTool",
            "prompt": self._get_tool_selection_prompt(),
            "input_arguments": ["query", "tools_descriptions", "limit"],
            "parameter": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The user query describing what tools are needed",
                        "required": True,
                    },
                    "tools_descriptions": {
                        "type": "string",
                        "description": "JSON string containing all available tool descriptions",
                        "required": True,
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum number of tools to return",
                        "required": True,
                    },
                },
                "required": ["query", "tools_descriptions", "limit"],
            },
            "configs": {
                "api_type": self.api_type,
                "model_id": self.model_id,
                "temperature": self.temperature,
                "max_new_tokens": self.max_new_tokens,
                "return_json": self.return_json,
                "return_metadata": False,
            },
        }
        try:
            self.agentic_tool = AgenticTool(agentic_config)
            print(
                f"✅ Successfully initialized {self.name} with LLM model: {self.model_id}"
            )
        except Exception as e:
            print(f"❌ Failed to initialize AgenticTool for {self.name}: {str(e)}")
            raise



[docs]
    def _get_tool_selection_prompt(self):
        """Get the prompt template for tool selection. Optimized for minimal token usage."""
        return """You are a tool selection assistant. Select the most relevant tools for the user query.

Query: {query}

Tools:
{tools_descriptions}

Select the {limit} most relevant tools. Return JSON:
{{
    "selected_tools": [
        {{
            "name": "tool_name",
            "relevance_score": 0.95,
            "reasoning": "Why relevant"
        }}
    ],
    "total_selected": 1,
    "selection_reasoning": "Overall strategy"
}}

Requirements:
- Only select existing tools from the list
- Rank by relevance (0.0-1.0)
- Prioritize domain-specific tools for specialized queries
- Return requested number or fewer if insufficient relevant tools"""



[docs]
    def _get_available_tools(self, force_refresh=False):
        """
        Get available tools with their descriptions, with caching.

        Args:
            force_refresh (bool): Whether to force refresh the cache

        Returns
            list: List of tool dictionaries with names and descriptions
        """
        current_time = datetime.now()

        # Use cache if available and not expired (cache for 5 minutes)
        if (
            not force_refresh
            and self._tool_cache is not None
            and self._cache_timestamp is not None
            and (current_time - self._cache_timestamp).seconds < 300
        ):
            return self._tool_cache

        if not self.tooluniverse:
            print("⚠️ ToolUniverse reference not available")
            return []

        try:
            # Get tool names and descriptions
            tool_names, tool_descriptions = self.tooluniverse.refresh_tool_name_desc(
                enable_full_desc=True,
                exclude_names=self.exclude_tools,
                include_categories=self.include_categories,
                exclude_categories=self.exclude_categories,
            )

            # Format tools for LLM
            available_tools = []
            for name, desc in zip(tool_names, tool_descriptions):
                if name not in self.exclude_tools:
                    available_tools.append({"name": name, "description": desc})

            # Update cache
            self._tool_cache = available_tools
            self._cache_timestamp = current_time

            print(f"📋 Loaded {len(available_tools)} tools for LLM-based selection")
            return available_tools

        except Exception as e:
            print(f"❌ Error getting available tools: {str(e)}")
            return []



[docs]
    def _prefilter_tools_by_keywords(self, available_tools, query, max_tools=100):
        """
        Pre-filter tools using keyword matching to reduce context size before LLM processing.

        Args:
            available_tools (list): All available tools
            query (str): User query
            max_tools (int): Maximum number of tools to send to LLM

        Returns
            list: Filtered list of tools
        """
        if len(available_tools) <= max_tools:
            return available_tools

        query_lower = query.lower()
        query_words = set(query_lower.split())

        # Score tools based on keyword matches
        scored_tools = []
        for tool in available_tools:
            name_lower = tool.get("name", "").lower()
            desc_lower = tool.get("description", "").lower()

            # Calculate basic relevance score
            score = 0

            # Exact name matches get high priority
            if query_lower in name_lower:
                score += 10

            # Word matches in name and description
            for word in query_words:
                if len(word) > 2:  # Skip very short words
                    if word in name_lower:
                        score += 3
                    if word in desc_lower:
                        score += 1

            scored_tools.append((score, tool))

        # Sort by score and take top tools
        scored_tools.sort(key=lambda x: x[0], reverse=True)
        filtered_tools = [tool for score, tool in scored_tools[:max_tools]]

        print(
            f"🔍 Pre-filtered from {len(available_tools)} to {len(filtered_tools)} tools using keywords"
        )
        return filtered_tools



[docs]
    def _format_tools_for_prompt(self, tools):
        """
        Format tools for inclusion in the LLM prompt with minimal information to reduce context cost.
        Only includes name and description to minimize token usage.

        Args:
            tools (list): List of tool dictionaries

        Returns
            str: Compact formatted tool descriptions for the prompt
        """
        formatted_tools = []
        for i, tool in enumerate(tools, 1):
            name = tool.get("name", "Unknown")
            description = tool.get("description", "No description available")

            # Truncate very long descriptions to save tokens
            if len(description) > 150:
                description = description[:150] + "..."

            # Use more compact formatting to save tokens
            formatted_tools.append(f"{i}. {name}: {description}")

        return "\n".join(formatted_tools)



[docs]
    def find_tools_llm(self, query, limit=5, include_reasoning=False, categories=None):
        """
        Find relevant tools using LLM-based selection.

        Args:
            query (str): User query describing needed functionality
            limit (int): Maximum number of tools to return
            include_reasoning (bool): Whether to include selection reasoning
            categories (list, optional): List of tool categories to filter by

        Returns
            dict: Dictionary containing selected tools and metadata
        """
        try:
            # Get available tools
            available_tools = self._get_available_tools()

            if not available_tools:
                return {
                    "success": False,
                    "error": "No tools available for selection",
                    "selected_tools": [],
                    "total_available": 0,
                }

            # Filter by categories if specified
            if categories:
                # Get full tool information for category filtering
                all_tools = self.tooluniverse.return_all_loaded_tools()
                category_filtered_tools = []

                for tool_info in available_tools:
                    tool_name = tool_info["name"]
                    # Find the full tool data to check category
                    for full_tool in all_tools:
                        if full_tool.get("name") == tool_name:
                            tool_category = full_tool.get("category", "unknown")
                            if tool_category in categories:
                                category_filtered_tools.append(tool_info)
                            break

                available_tools = category_filtered_tools

                if not available_tools:
                    return {
                        "success": False,
                        "error": f"No tools available in categories: {categories}",
                        "selected_tools": [],
                        "total_available": 0,
                    }

            # Pre-filter tools to reduce context size for LLM
            available_tools = self._prefilter_tools_by_keywords(
                available_tools, query, max_tools=50
            )

            # Format tools for LLM prompt with minimal information to reduce context cost
            tools_formatted = self._format_tools_for_prompt(available_tools)

            # Prepare arguments for the agentic tool
            agentic_args = {
                "query": query,
                "tools_descriptions": tools_formatted,
                "limit": limit,
            }

            print(f"🤖 Querying LLM to select tools for: '{query[:100]}...'")

            # Call the LLM through AgenticTool
            result = self.agentic_tool.run(agentic_args)

            # Parse the LLM response
            if isinstance(result, dict) and "result" in result:
                llm_response = result["result"]
            else:
                llm_response = result

            # Parse JSON response from LLM
            if isinstance(llm_response, str):
                try:
                    parsed_response = json.loads(llm_response)
                except json.JSONDecodeError as e:
                    print(f"❌ Failed to parse LLM response as JSON: {e}")
                    print(f"Raw response: {llm_response[:500]}...")
                    return {
                        "success": False,
                        "error": f"Invalid JSON response from LLM: {str(e)}",
                        "raw_response": llm_response,
                        "selected_tools": [],
                    }
            else:
                parsed_response = llm_response

            # Extract selected tools
            selected_tools = parsed_response.get("selected_tools", [])
            tool_names = [
                tool.get("name") for tool in selected_tools if tool.get("name")
            ]

            # Get actual tool objects
            if tool_names:
                selected_tool_objects = (
                    self.tooluniverse.get_tool_specification_by_names(tool_names)
                )
                tool_prompts = self.tooluniverse.prepare_tool_prompts(
                    selected_tool_objects
                )
            else:
                selected_tool_objects = []
                tool_prompts = []

            result_dict = {
                "success": True,
                "selected_tools": tool_names,
                "tool_objects": selected_tool_objects,
                "tool_prompts": tool_prompts,
                "total_selected": len(tool_names),
                "total_available": len(available_tools),
                "query": query,
                "limit_requested": limit,
            }

            if include_reasoning:
                result_dict.update(
                    {
                        "selection_details": selected_tools,
                        "selection_reasoning": parsed_response.get(
                            "selection_reasoning", ""
                        ),
                        "llm_response": parsed_response,
                    }
                )

            print(f"✅ Selected {len(tool_names)} tools: {', '.join(tool_names)}")
            return result_dict

        except Exception as e:
            print(f"❌ Error in LLM-based tool selection: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "selected_tools": [],
                "query": query,
            }



[docs]
    def find_tools(
        self,
        message=None,
        picked_tool_names=None,
        rag_num=5,
        return_call_result=False,
        categories=None,
        return_list_only=None,
    ):
        """
        Find relevant tools based on a message or pre-selected tool names.

        This method matches the interface of the original ToolFinderEmbedding to ensure
        seamless replacement. It uses LLM-based selection instead of embedding similarity.

        Args:
            message (str, optional): Query message to find tools for. Required if picked_tool_names is None.
            picked_tool_names (list, optional): Pre-selected tool names to process. Required if message is None.
            rag_num (int, optional): Number of tools to return after filtering. Defaults to 5.
            return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False.
            categories (list, optional): List of tool categories to filter by. Applied before LLM selection.
            return_list_only (bool, optional): If True, returns only a list of tool specifications. Overrides other return options.

        Returns
            str, tuple, or list:
                - If return_list_only is True: List of tool specifications
                - If return_call_result is False: Tool prompts as a formatted string
                - If return_call_result is True: Tuple of (tool_prompts, tool_names)

        Raises:
            AssertionError: If both message and picked_tool_names are None
        """
        # Use class-level configuration if parameter not specified
        if return_list_only is None:
            return_list_only = self.return_list_only

        if picked_tool_names is None:
            assert picked_tool_names is not None or message is not None

            # Use LLM-based tool selection with category filtering
            result = self.find_tools_llm(
                query=message,
                limit=rag_num,
                include_reasoning=False,
                categories=categories,
            )

            if not result["success"]:
                # Return empty results on failure
                if return_list_only:
                    return []  # Return empty list for tool specifications
                elif return_call_result:
                    return "", []
                return ""

            picked_tool_names = result["selected_tools"]

        # Filter out special tools (matching original behavior)
        picked_tool_names_no_special = []
        for tool in picked_tool_names:
            if tool not in self.exclude_tools:
                picked_tool_names_no_special.append(tool)
        picked_tool_names_no_special = picked_tool_names_no_special[:rag_num]
        picked_tool_names = picked_tool_names_no_special[:rag_num]

        # Get tool objects and prepare prompts (needed for both list and other formats)
        picked_tools = self.tooluniverse.get_tool_specification_by_names(
            picked_tool_names
        )
        picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)

        # If only list format is requested, return the tool specifications as a list
        if return_list_only:
            return picked_tools_prompt  # Return list of tool specifications instead of just names

        if return_call_result:
            return picked_tools_prompt, picked_tool_names
        return picked_tools_prompt



[docs]
    def get_tool_stats(self):
        """Get statistics about available tools."""
        tools = self._get_available_tools(force_refresh=True)

        stats = {
            "total_tools": len(tools),
            "excluded_tools": len(self.exclude_tools),
            "cache_status": "cached" if self._tool_cache is not None else "no_cache",
            "last_updated": (
                self._cache_timestamp.isoformat() if self._cache_timestamp else None
            ),
        }

        return stats



[docs]
    def _format_as_json(self, result, query, limit, categories, return_call_result):
        """
        Format the find_tools result as a standardized JSON string.

        Args:
            result: Result from find_tools method (either string, list, or tuple)
            query: Original search query
            limit: Requested number of tools
            categories: Requested categories filter
            return_call_result: Whether return_call_result was True

        Returns
            str: JSON formatted search results
        """
        import json

        try:
            if return_call_result and isinstance(result, tuple) and len(result) == 2:
                # Result is (tool_prompts, tool_names) tuple
                tool_prompts, tool_names = result

                # Convert tool prompts to clean tool info format
                tools = []
                for i, tool_name in enumerate(tool_names):
                    if i < len(tool_prompts):
                        tool_prompt = tool_prompts[i]
                        tool_info = {
                            "name": tool_name,
                            "description": tool_prompt.get("description", ""),
                            "type": tool_prompt.get("type", ""),
                            "parameters": tool_prompt.get("parameter", {}),
                            "required": tool_prompt.get("required", []),
                        }
                        tools.append(tool_info)

                return json.dumps(
                    {
                        "query": query,
                        "search_method": "AI-powered (ToolFinderLLM)",
                        "total_matches": len(tools),
                        "categories_filtered": categories,
                        "tools": tools,
                    },
                    indent=2,
                )

            elif isinstance(result, list):
                # Result is already a list of tool prompts
                tools = []
                for tool_prompt in result:
                    if isinstance(tool_prompt, dict):
                        tool_info = {
                            "name": tool_prompt.get("name", ""),
                            "description": tool_prompt.get("description", ""),
                            "type": tool_prompt.get("type", ""),
                            "parameters": tool_prompt.get("parameter", {}),
                            "required": tool_prompt.get("required", []),
                        }
                        tools.append(tool_info)

                return json.dumps(
                    {
                        "query": query,
                        "search_method": "AI-powered (ToolFinderLLM)",
                        "total_matches": len(tools),
                        "categories_filtered": categories,
                        "tools": tools,
                    },
                    indent=2,
                )

            else:
                # Fallback for unexpected result format
                return json.dumps(
                    {
                        "query": query,
                        "search_method": "AI-powered (ToolFinderLLM)",
                        "total_matches": 0,
                        "categories_filtered": categories,
                        "tools": [],
                        "error": f"Unexpected result format: {type(result)}",
                    },
                    indent=2,
                )

        except Exception as e:
            # Error handling
            return json.dumps(
                {
                    "query": query,
                    "search_method": "AI-powered (ToolFinderLLM)",
                    "total_matches": 0,
                    "categories_filtered": categories,
                    "tools": [],
                    "error": f"Formatting error: {str(e)}",
                },
                indent=2,
            )



[docs]
    def clear_cache(self):
        """Clear the tool cache to force refresh on next access."""
        self._tool_cache = None
        self._cache_timestamp = None
        print("🔄 Tool cache cleared")



[docs]
    def run(self, arguments):
        """
        Run the tool finder with given arguments following the standard tool interface.

        This method now returns JSON format by default to ensure consistency with other
        search tools and simplify integration with SMCP.

        Args:
            arguments (dict): Dictionary containing:
                - description (str, optional): Query message to find tools for (maps to 'message')
                - limit (int, optional): Number of tools to return (maps to 'rag_num'). Defaults to 5.
                - picked_tool_names (list, optional): Pre-selected tool names to process
                - return_call_result (bool, optional): Whether to return both prompts and names. Defaults to False.
                - return_format (str, optional): 'json' (default) or 'legacy' for old format
                - return_list_only (bool, optional): Whether to return only tool specifications as a list
                - categories (list, optional): List of tool categories to filter by
        """
        import copy

        arguments = copy.deepcopy(arguments)

        # Extract parameters from arguments with defaults
        message = arguments.get("description", None)
        rag_num = arguments.get("limit", 5)
        picked_tool_names = arguments.get("picked_tool_names", None)
        return_call_result = arguments.get("return_call_result", False)
        return_format = arguments.get("return_format", "json")  # Default to JSON format
        return_list_only = arguments.get(
            "return_list_only", None
        )  # Use class default if not specified
        categories = arguments.get("categories", None)

        # Call the find_tools method
        result = self.find_tools(
            message=message,
            picked_tool_names=picked_tool_names,
            rag_num=rag_num,
            return_call_result=return_call_result,
            categories=categories,
            return_list_only=return_list_only,
        )

        # If return_list_only is True, return the list directly
        if return_list_only or (return_list_only is None and self.return_list_only):
            return result

        # If return_format is 'json', convert to standardized JSON format
        if return_format == "json":
            return self._format_as_json(
                result, message, rag_num, categories, return_call_result
            )
        else:
            # Return legacy format (original behavior)
            return result


    # Legacy methods for backward compatibility

[docs]
    def find_tools_legacy(
        self, query, limit=5, include_reasoning=False, return_format="prompts"
    ):
        """
        Legacy method for finding tools with different parameter names.

        This provides backward compatibility for any code that might use 'query' instead of 'description'.
        """
        return self.run(
            {
                "description": query,
                "limit": limit,
                "return_call_result": return_format == "full",
            }
        )