Source code for tooluniverse.compose_scripts.output_summarizer

"""
Output Summarization Composer Script

This script handles the intelligent summarization of tool outputs by:
1. Chunking large outputs into manageable pieces
2. Processing each chunk with AI-powered summarization
3. Merging the summarized chunks into a coherent final summary

The script leverages ToolUniverse's AgenticTool infrastructure to provide
intelligent, context-aware summarization that focuses on information
relevant to the original query.
"""

import logging
from typing import Dict, Any, List

# Set up logger for this module
logger = logging.getLogger("tooluniverse.output_summarizer")


[docs] def compose(arguments: Dict[str, Any], tooluniverse, call_tool) -> Dict[str, Any]: """ Main composition function for output summarization. This function orchestrates the complete summarization workflow: - Chunks the input text into manageable pieces - Summarizes each chunk using AI - Merges the summaries into a final coherent result Args: arguments (Dict[str, Any]): Dictionary containing: - tool_output (str): The original tool output to be summarized - query_context (str): Context about the original query - tool_name (str): Name of the tool that generated the output - chunk_size (int, optional): Size of each chunk for processing - focus_areas (str, optional): Areas to focus on in summarization - max_summary_length (int, optional): Maximum length of final summary tooluniverse: ToolUniverse instance for tool execution call_tool: Function to call other tools within the composition Returns: Dict[str, Any]: Dictionary containing: - success (bool): Whether summarization was successful - original_length (int): Length of original output - summary_length (int): Length of final summary - chunks_processed (int): Number of chunks processed - summary (str): The summarized output - tool_name (str): Name of the original tool - error (str, optional): Error message if summarization failed """ try: # Extract and validate arguments tool_output = arguments.get("tool_output", "") query_context = arguments.get("query_context", "") tool_name = arguments.get("tool_name", "") chunk_size = arguments.get("chunk_size", 32000) focus_areas = arguments.get("focus_areas", "key_findings_and_results") max_summary_length = arguments.get("max_summary_length", 3000) # Validate required arguments if not tool_output: return { "success": False, "error": "tool_output is required", "original_output": "", } logger.info(f"🔍 Starting output summarization for {tool_name}") logger.info(f"📊 Original output length: {len(tool_output)} characters") # Check if text is long enough to warrant summarization if len(tool_output) < chunk_size: logger.info( f"📝 Text is shorter than chunk_size ({chunk_size}), " f"no summarization needed" ) return { "success": True, "original_length": len(tool_output), "summary_length": len(tool_output), "chunks_processed": 0, "summary": tool_output, "tool_name": tool_name, } # Step 1: Chunk the output chunks = _chunk_output(tool_output, chunk_size) logger.info(f"📝 Split into {len(chunks)} chunks") # Step 2: Summarize each chunk chunk_summaries = [] for i, chunk in enumerate(chunks): logger.info(f"🤖 Processing chunk {i+1}/{len(chunks)}") summary = _summarize_chunk( chunk, query_context, tool_name, focus_areas, call_tool ) if summary: chunk_summaries.append(summary) logger.info(f"✅ Chunk {i+1} summarized successfully") else: logger.warning(f"❌ Chunk {i+1} summarization failed") # Step 3: Merge summaries (or gracefully fall back) if chunk_summaries: final_summary = _merge_summaries( chunk_summaries, query_context, tool_name, max_summary_length, call_tool, ) logger.info( f"✅ Summarization completed. Final length: " f"{len(final_summary)} characters" ) return { "success": True, "original_length": len(tool_output), "summary_length": len(final_summary), "chunks_processed": len(chunks), "summary": final_summary, "tool_name": tool_name, } else: # Treat as a non-fatal failure so upstream falls back to original # output logger.warning( "❌ No chunk summaries were generated. This usually indicates:" ) logger.warning(" 1. ToolOutputSummarizer tool is not available") logger.warning(" 2. The output_summarization tools are not loaded") logger.warning(" 3. There was an error in the summarization process") logger.warning( " Please check that the SMCP server is started with hooks " "enabled." ) return { "success": False, "error": "No chunk summaries generated", "original_length": len(tool_output), "chunks_processed": len(chunks), "original_output": tool_output, "tool_name": tool_name, } except Exception as e: error_msg = f"Error in output summarization: {str(e)}" logger.error(f"❌ {error_msg}") return { "success": False, "error": error_msg, "original_output": tool_output, }
def _chunk_output(text: str, chunk_size: int) -> List[str]: """ Split text into chunks of specified size with intelligent boundary detection. This function attempts to break text at natural boundaries (sentences) to maintain coherence within chunks while respecting the size limit. Args: text (str): The text to be chunked chunk_size (int): Maximum size of each chunk Returns: List[str]: List of text chunks """ if len(text) <= chunk_size: return [text] chunks = [] start = 0 while start < len(text): end = start + chunk_size # Try to break at sentence boundary if end < len(text): # Look for sentence endings within the last 100 characters search_start = max(start + chunk_size - 100, start) for i in range(end, search_start, -1): if text[i] in ".!?": end = i + 1 break chunk = text[start:end].strip() if chunk: chunks.append(chunk) start = end return chunks def _summarize_chunk( chunk: str, query_context: str, tool_name: str, focus_areas: str, call_tool ) -> str: """ Summarize a single chunk using the AgenticTool summarizer. Args: chunk (str): The text chunk to summarize query_context (str): Context about the original query tool_name (str): Name of the tool that generated the output focus_areas (str): Areas to focus on during summarization call_tool: Function to call the summarizer tool Returns: str: Summarized chunk text, or empty string if summarization fails """ try: logger.debug( f"🔍 Attempting to call ToolOutputSummarizer with chunk length: " f"{len(chunk)}" ) result = call_tool( "ToolOutputSummarizer", { "tool_output": chunk, "query_context": query_context, "tool_name": tool_name, "focus_areas": focus_areas, "max_length": 500, # Shorter for individual chunks }, ) logger.debug( f"🔍 ToolOutputSummarizer returned: {type(result)} - " f"{str(result)[:100]}..." ) # Handle different result formats if isinstance(result, dict): if result.get("success"): return result.get("result", "") elif "result" in result and isinstance(result["result"], str): # ComposeTool._call_tool returns {'result': 'content'} format return result["result"] elif "error" in result and isinstance(result["error"], str): # Backward compatibility: ComposeTool._call_tool used to put # string results in error field. This workaround handles both # old and new behavior return result["error"] else: logger.warning(f"⚠️ ToolOutputSummarizer returned error: {result}") return "" elif isinstance(result, str): # When return_metadata=False and successful, AgenticTool returns # the string directly return result else: logger.warning( f"⚠️ ToolOutputSummarizer returned unexpected result format: " f"{type(result)}" ) return "" except Exception as e: error_msg = str(e) logger.warning(f"⚠️ Error summarizing chunk: {error_msg}") # Check if the error is due to missing tool if "not found" in error_msg.lower() or "ToolOutputSummarizer" in error_msg: logger.warning( "❌ ToolOutputSummarizer tool is not available. This indicates " "the output_summarization tools are not loaded." ) logger.warning( " Please ensure the SMCP server is started with hooks " "enabled and the output_summarization category is loaded." ) return "" def _merge_summaries( chunk_summaries: List[str], query_context: str, tool_name: str, max_length: int, call_tool, ) -> str: """ Merge chunk summaries into a final coherent summary. If the combined summaries exceed the maximum length, they are further summarized to create a concise final result. Args: chunk_summaries (List[str]): List of summarized chunks query_context (str): Context about the original query tool_name (str): Name of the tool that generated the output max_length (int): Maximum length of final summary call_tool: Function to call the summarizer tool Returns: str: Final merged summary """ if not chunk_summaries: return "" # If only one chunk, return it directly if len(chunk_summaries) == 1: return chunk_summaries[0] # Combine all chunk summaries combined_summaries = "\n\n".join(chunk_summaries) # If combined length is within limit, return as is if len(combined_summaries) <= max_length: return combined_summaries # Otherwise, summarize the combined summaries try: result = call_tool( "ToolOutputSummarizer", { "tool_output": combined_summaries, "query_context": query_context, "tool_name": tool_name, "focus_areas": "consolidate_and_prioritize", "max_length": max_length, }, ) # Handle different result formats if isinstance(result, dict) and result.get("success"): return result.get("result", combined_summaries) elif isinstance(result, str): return result else: return combined_summaries except Exception as e: logger.warning(f"⚠️ Error merging summaries: {str(e)}") return combined_summaries