import json
import os
import re
[docs]
def compose(arguments, tooluniverse, call_tool):
tool_config = arguments["tool_config"]
tool_name = tool_config.get("name", "unnamed_tool")
arguments.get("save_to_file", False)
output_file = arguments.get("output_file")
max_iterations = arguments.get("max_iterations", 3) # Maximum optimization rounds
satisfaction_threshold = arguments.get(
"satisfaction_threshold", 8
) # Quality score threshold (1-10)
# 1. Generate test cases
tc_result = call_tool("TestCaseGenerator", {"tool_config": tool_config})
print("TestCaseGenerator result:", json.dumps(tc_result, indent=2))
# Handle the result - it should be a list of test cases or a dict containing test cases
test_cases = []
if isinstance(tc_result, list):
test_cases = tc_result
elif isinstance(tc_result, dict):
# Check if it has a 'result' key (from agentic tool)
if "result" in tc_result:
result_data = tc_result["result"]
if isinstance(result_data, list):
test_cases = result_data
elif isinstance(result_data, str):
# Try to parse JSON string with robust whitespace handling
try:
# Multiple parsing strategies for robust handling
strategies = [
result_data.strip(), # Simple strip
re.sub(r"\s+", " ", result_data.strip()), # Collapse whitespace
re.sub(r"\s", "", result_data), # Remove all whitespace
]
for strategy in strategies:
try:
parsed_result = json.loads(strategy)
if isinstance(parsed_result, list):
test_cases = parsed_result
break
elif isinstance(parsed_result, dict):
test_cases = parsed_result.get("test_cases", [])
break
except json.JSONDecodeError:
continue
# If direct parsing fails, try pattern matching
if not test_cases:
json_patterns = [
r"\[.*?\]", # Array pattern
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Single object
]
for strategy in strategies:
for pattern in json_patterns:
matches = re.findall(pattern, strategy, re.DOTALL)
for match in matches:
try:
parsed_result = json.loads(match)
if isinstance(parsed_result, list):
test_cases = parsed_result
break
elif isinstance(parsed_result, dict):
test_cases = [parsed_result]
break
except json.JSONDecodeError:
continue
if test_cases:
break
if test_cases:
break
except Exception as e:
print(f"Failed to parse test cases from result: {e}")
test_cases = []
else:
test_cases = (
result_data.get("test_cases", [])
if isinstance(result_data, dict)
else []
)
else:
test_cases = tc_result.get("test_cases", [])
# If we still don't have test cases, generate some basic ones from the tool config
if not test_cases:
print("No valid test cases found, generating basic test cases from tool config")
tool_params = tool_config.get("parameter", {}).get("properties", {})
required_params = []
# Extract required parameters correctly
if "parameter" in tool_config and "properties" in tool_config["parameter"]:
properties = tool_config["parameter"]["properties"]
for param_name, param_info in properties.items():
if param_info.get("required", False):
required_params.append(param_name)
# If no explicitly required params found, check if there's a 'required' field at the parameter level
if not required_params and "required" in tool_config["parameter"]:
required_params = tool_config["parameter"]["required"]
# Generate a basic test case with required parameters
if required_params and tool_params:
basic_case = {}
for param in required_params:
if param in tool_params:
param_type = tool_params[param].get("type", "string")
if param_type == "string":
basic_case[param] = f"test_{param}_value"
elif param_type == "integer":
basic_case[param] = 10
elif param_type == "boolean":
basic_case[param] = True
else:
basic_case[param] = "test_value"
if basic_case:
test_cases = [basic_case]
# If still no test cases, create a minimal one with available params
if not test_cases and tool_params:
basic_case = {}
for param_name, param_info in list(tool_params.items())[
:1
]: # Take first param
param_type = param_info.get("type", "string")
if param_type == "string":
basic_case[param_name] = f"test_{param_name}_value"
elif param_type == "integer":
basic_case[param_name] = 10
elif param_type == "boolean":
basic_case[param_name] = True
else:
basic_case[param_name] = "test_value"
if basic_case:
test_cases = [basic_case]
if not test_cases:
return {
"error": "No test cases generated and could not create basic test cases.",
"raw_result": tc_result,
}
# 2. Run tool on each test case
results = []
for case in test_cases:
try:
# If case is a full tool call dict with 'name' and 'arguments', extract arguments
if isinstance(case, dict) and "arguments" in case:
arguments = case["arguments"]
elif isinstance(case, dict):
# If case is already just the arguments
arguments = case
else:
arguments = case
result = tooluniverse.run_one_function(
{"name": tool_name, "arguments": arguments}
)
except Exception as e:
result = {"error": str(e)}
results.append({"input": arguments, "output": result})
# 3. Multi-round optimization until satisfactory
current_tool_config = tool_config.copy()
original_description = tool_config.get("description", "")
optimization_history = []
previous_feedback = "" # Track previous round feedback
all_test_results = results.copy() # Accumulate test results from all rounds
for iteration in range(max_iterations):
print(f"\n=== Optimization Round {iteration + 1}/{max_iterations} ===")
current_description = current_tool_config.get("description", "")
# 3a. Generate additional test cases based on previous feedback (after first round)
current_round_results = []
if iteration > 0 and previous_feedback:
print("🧪 Generating additional test cases based on previous feedback...")
try:
# Create an enhanced TestCaseGenerator prompt that includes previous feedback
enhanced_tool_config = current_tool_config.copy()
enhanced_tool_config["_optimization_feedback"] = previous_feedback
enhanced_tool_config["_iteration"] = iteration + 1
new_tc_result = call_tool(
"TestCaseGenerator", {"tool_config": enhanced_tool_config}
)
print(
f"Additional TestCaseGenerator result: {json.dumps(new_tc_result, indent=2)}"
)
# Parse new test cases with robust whitespace handling
new_test_cases = []
if isinstance(new_tc_result, dict) and "result" in new_tc_result:
result_data = new_tc_result["result"]
if isinstance(result_data, str):
# Aggressive cleaning of whitespace and newlines
cleaned_result = re.sub(r"\s+", " ", result_data.strip())
# Remove all whitespace and newlines completely for pure JSON detection
minimal_result = re.sub(r"\s", "", result_data)
# Try multiple parsing strategies
parsing_strategies = [
cleaned_result, # Whitespace-collapsed version
minimal_result, # All whitespace removed
result_data.strip(), # Simple strip
]
# Look for JSON array patterns
json_patterns = [
r"\[.*?\]", # Array pattern
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Single object
]
for strategy in parsing_strategies:
# Try direct parsing first
try:
parsed_result = json.loads(strategy)
if isinstance(parsed_result, list):
new_test_cases = parsed_result
break
elif isinstance(parsed_result, dict):
new_test_cases = [parsed_result]
break
except json.JSONDecodeError:
pass
# Try pattern matching if direct parsing fails
if not new_test_cases:
for pattern in json_patterns:
matches = re.findall(pattern, strategy, re.DOTALL)
for match in matches:
try:
parsed_result = json.loads(match)
if isinstance(parsed_result, list):
new_test_cases = parsed_result
break
elif isinstance(parsed_result, dict):
new_test_cases = [parsed_result]
break
except json.JSONDecodeError:
continue
if new_test_cases:
break
if new_test_cases:
break
if not new_test_cases:
print(
f"Failed to parse new test cases from: {result_data[:200]}..."
)
elif isinstance(result_data, list):
new_test_cases = result_data
# Run new test cases
if new_test_cases:
print(f"📊 Running {len(new_test_cases)} additional test cases...")
for case in new_test_cases:
try:
if isinstance(case, dict) and "arguments" in case:
arguments = case["arguments"]
elif isinstance(case, dict):
arguments = case
else:
arguments = case
result = tooluniverse.run_one_function(
{"name": tool_name, "arguments": arguments}
)
except Exception as e:
result = {"error": str(e)}
current_round_results.append(
{"input": arguments, "output": result}
)
# Add new results to accumulated results
all_test_results.extend(current_round_results)
print(f"✅ Added {len(current_round_results)} new test results")
else:
print("⚠️ No additional test cases generated")
except Exception as e:
print(f"❌ Failed to generate additional test cases: {str(e)}")
# 3b. Analyze results and suggest optimized description using ALL accumulated test results
# Include previous feedback for iterative improvement
analysis_input = {
"original_description": current_description,
"test_results": json.dumps(
all_test_results
), # Use ALL accumulated test results
}
# Add previous feedback to help guide the next optimization
if previous_feedback and iteration > 0:
enhanced_description = f"{current_description}\n\nPrevious optimization feedback: {previous_feedback}"
analysis_input["original_description"] = enhanced_description
analysis = call_tool("DescriptionAnalyzer", analysis_input)
# Handle the analysis result
optimized_description = None
rationale = None
if isinstance(analysis, dict):
if "result" in analysis:
# If it's wrapped in a result key
result_data = analysis["result"]
if isinstance(result_data, str):
try:
parsed_analysis = json.loads(result_data)
optimized_description = parsed_analysis.get(
"optimized_description"
)
rationale = parsed_analysis.get("rationale")
except json.JSONDecodeError:
optimized_description = result_data
rationale = "Parsed from raw text result"
elif isinstance(result_data, dict):
optimized_description = result_data.get("optimized_description")
rationale = result_data.get("rationale")
else:
# Direct dict result
optimized_description = analysis.get("optimized_description")
rationale = analysis.get("rationale")
elif isinstance(analysis, str):
optimized_description = analysis
rationale = "Generated from string result"
# Fallback if we still don't have an optimized description
if not optimized_description:
optimized_description = f"Enhanced description: {current_description} (Based on test results analysis)"
rationale = (
"Generated fallback description based on original and test results"
)
# 3c. Optimize argument descriptions using ALL accumulated test results
optimized_parameters = {}
argument_rationale = ""
if (
"parameter" in current_tool_config
and "properties" in current_tool_config["parameter"]
):
try:
# Include previous feedback for parameter optimization too
arg_analysis_input = {
"parameter_schema": json.dumps(current_tool_config["parameter"]),
"test_results": json.dumps(
all_test_results
), # Use ALL accumulated test results
}
# Add previous feedback to parameter optimization
if previous_feedback and iteration > 0:
# Extract parameter-specific feedback from previous round
param_feedback = (
f"Previous feedback for improvement: {previous_feedback}"
)
enhanced_schema = current_tool_config["parameter"].copy()
enhanced_schema["_previous_feedback"] = param_feedback
arg_analysis_input["parameter_schema"] = json.dumps(enhanced_schema)
arg_analysis = call_tool(
"ArgumentDescriptionOptimizer", arg_analysis_input
)
# Parse argument optimization results
if isinstance(arg_analysis, dict):
if "result" in arg_analysis:
result_data = arg_analysis["result"]
if isinstance(result_data, str):
try:
parsed_arg_analysis = json.loads(result_data)
raw_params = parsed_arg_analysis.get(
"optimized_parameters", {}
)
# Extract description strings from the result structure
optimized_parameters = {}
for param_name, param_data in raw_params.items():
if (
isinstance(param_data, dict)
and "description" in param_data
):
optimized_parameters[param_name] = param_data[
"description"
]
elif isinstance(param_data, str):
optimized_parameters[param_name] = param_data
else:
optimized_parameters[param_name] = str(
param_data
)
argument_rationale = parsed_arg_analysis.get(
"rationale", ""
)
except json.JSONDecodeError:
print("Failed to parse argument optimization result")
elif isinstance(result_data, dict):
raw_params = result_data.get("optimized_parameters", {})
# Extract description strings from the result structure
optimized_parameters = {}
for param_name, param_data in raw_params.items():
if (
isinstance(param_data, dict)
and "description" in param_data
):
optimized_parameters[param_name] = param_data[
"description"
]
elif isinstance(param_data, str):
optimized_parameters[param_name] = param_data
else:
optimized_parameters[param_name] = str(param_data)
argument_rationale = result_data.get("rationale", "")
else:
raw_params = arg_analysis.get("optimized_parameters", {})
# Extract description strings from the result structure
optimized_parameters = {}
for param_name, param_data in raw_params.items():
if (
isinstance(param_data, dict)
and "description" in param_data
):
optimized_parameters[param_name] = param_data[
"description"
]
elif isinstance(param_data, str):
optimized_parameters[param_name] = param_data
else:
optimized_parameters[param_name] = str(param_data)
argument_rationale = arg_analysis.get("rationale", "")
except Exception as e:
print(f"Failed to optimize argument descriptions: {str(e)}")
argument_rationale = (
f"Failed to optimize argument descriptions: {str(e)}"
)
# 3d. Update current tool config with optimizations
current_tool_config["description"] = optimized_description
if (
optimized_parameters
and "parameter" in current_tool_config
and "properties" in current_tool_config["parameter"]
):
for param_name, new_description in optimized_parameters.items():
if param_name in current_tool_config["parameter"]["properties"]:
current_tool_config["parameter"]["properties"][param_name][
"description"
] = new_description
# 3e. Evaluate quality of current optimization using ALL accumulated test results
try:
quality_evaluation = call_tool(
"DescriptionQualityEvaluator",
{
"tool_description": optimized_description,
"parameter_descriptions": json.dumps(optimized_parameters),
"test_results": json.dumps(
all_test_results
), # Use ALL accumulated test results
},
)
# Parse quality evaluation result
quality_score = 0
is_satisfactory = False
feedback = ""
criteria_scores = {}
if isinstance(quality_evaluation, dict):
if "result" in quality_evaluation:
result_data = quality_evaluation["result"]
if isinstance(result_data, str):
try:
parsed_eval = json.loads(result_data)
quality_score = parsed_eval.get("overall_score", 0)
is_satisfactory = parsed_eval.get("is_satisfactory", False)
feedback = parsed_eval.get("feedback", "")
criteria_scores = parsed_eval.get("criteria_scores", {})
except json.JSONDecodeError:
quality_score = 5 # Default middle score
feedback = "Failed to parse evaluation result"
elif isinstance(result_data, dict):
quality_score = result_data.get("overall_score", 0)
is_satisfactory = result_data.get("is_satisfactory", False)
feedback = result_data.get("feedback", "")
criteria_scores = result_data.get("criteria_scores", {})
else:
quality_score = quality_evaluation.get("overall_score", 0)
is_satisfactory = quality_evaluation.get("is_satisfactory", False)
feedback = quality_evaluation.get("feedback", "")
criteria_scores = quality_evaluation.get("criteria_scores", {})
except Exception as e:
print(f"Failed to evaluate quality: {str(e)}")
quality_score = 5 # Default middle score
is_satisfactory = quality_score >= satisfaction_threshold
feedback = f"Quality evaluation failed: {str(e)}"
criteria_scores = {}
# Record this iteration
iteration_record = {
"iteration": iteration + 1,
"description": optimized_description,
"parameters": optimized_parameters.copy(),
"description_rationale": rationale,
"argument_rationale": argument_rationale,
"quality_score": quality_score,
"criteria_scores": criteria_scores,
"feedback": feedback,
"is_satisfactory": is_satisfactory,
}
optimization_history.append(iteration_record)
print(f"Quality Score: {quality_score}/10")
print(f"Satisfactory: {is_satisfactory}")
print(f"Feedback: {feedback}")
# Store current feedback for next iteration
previous_feedback = str(
feedback
) # Convert to string to ensure it's serializable
# Check if we've reached satisfactory quality
if is_satisfactory or quality_score >= satisfaction_threshold:
print(f"✅ Reached satisfactory quality in round {iteration + 1}")
break
elif iteration < max_iterations - 1:
print(f"🔄 Quality not satisfactory, continuing to round {iteration + 2}")
feedback_preview = (
previous_feedback[:100] + "..."
if len(previous_feedback) > 100
else previous_feedback
)
print(f"📝 Using feedback for next round: {feedback_preview}")
else:
print("⚠️ Reached maximum iterations without achieving satisfactory quality")
# Use the final optimized configuration
final_optimized_tool_config = current_tool_config
final_description = current_tool_config.get("description", "")
final_parameters = {}
final_rationale = (
optimization_history[-1]["description_rationale"]
if optimization_history
else "No optimization performed"
)
final_argument_rationale = (
optimization_history[-1]["argument_rationale"] if optimization_history else ""
)
# Extract final parameter descriptions
if (
"parameter" in final_optimized_tool_config
and "properties" in final_optimized_tool_config["parameter"]
):
for param_name, param_info in final_optimized_tool_config["parameter"][
"properties"
].items():
final_parameters[param_name] = param_info.get("description", "")
# Print final optimization results
print("\n" + "=" * 80)
print("🎉 OPTIMIZATION COMPLETED!")
print("=" * 80)
print("\n📊 Final Results Summary:")
print(f" • Total optimization rounds: {len(optimization_history)}")
print(
f" • Final quality score: {optimization_history[-1]['quality_score'] if optimization_history else 0}/10"
)
print(
f" • Achieved satisfaction: {optimization_history[-1]['is_satisfactory'] if optimization_history else False}"
)
print("\n✨ Final Optimized Tool Configuration:")
print(json.dumps(final_optimized_tool_config, indent=2, ensure_ascii=False))
# 4. Save the optimized description to a file (always save, regardless of save_to_file flag)
file_path = None
if final_description:
if not output_file:
file_path = f"{tool_name}_optimized_description.txt"
else:
file_path = output_file
# Create directory if it doesn't exist (only if there's a directory part)
dir_path = os.path.dirname(file_path)
if dir_path:
os.makedirs(dir_path, exist_ok=True)
# Do not overwrite if file exists
if os.path.exists(file_path):
base, ext = os.path.splitext(file_path)
file_path = f"{base}_new{ext}"
print(f"\n💾 Saving optimization report to: {file_path}")
# Save comprehensive optimization report
optimization_report = {
"original_tool_config": tool_config,
"final_optimized_tool_config": final_optimized_tool_config,
"optimization_history": optimization_history,
"optimization_summary": {
"total_iterations": len(optimization_history),
"final_description_changed": final_description != original_description,
"final_parameters_optimized": (
list(final_parameters.keys()) if final_parameters else []
),
"final_description_rationale": final_rationale,
"final_argument_rationale": final_argument_rationale,
"final_quality_score": (
optimization_history[-1]["quality_score"]
if optimization_history
else 0
),
"achieved_satisfaction": (
optimization_history[-1]["is_satisfactory"]
if optimization_history
else False
),
},
"test_results": results,
}
with open(file_path, "w", encoding="utf-8") as f:
f.write("# Multi-Round Tool Description Optimization Report\n\n")
f.write(f"## Final Optimized Tool Description\n{final_description}\n\n")
if final_parameters:
f.write("## Final Optimized Parameter Descriptions\n")
for param_name, new_desc in final_parameters.items():
f.write(f"- **{param_name}**: {new_desc}\n")
f.write("\n")
f.write(f"## Final Description Rationale\n{final_rationale}\n\n")
if final_argument_rationale:
f.write(
f"## Final Argument Optimization Rationale\n{final_argument_rationale}\n\n"
)
# Write optimization history
f.write("## Optimization History\n")
for _i, record in enumerate(optimization_history):
f.write(f"### Round {record['iteration']}\n")
f.write(f"- **Quality Score**: {record['quality_score']}/10\n")
f.write(f"- **Satisfactory**: {record['is_satisfactory']}\n")
f.write(f"- **Description**: {record['description']}\n")
f.write(f"- **Feedback**: {record['feedback']}\n\n")
f.write("## Complete Optimization Report\n")
f.write("```json\n")
f.write(json.dumps(optimization_report, indent=2))
f.write("\n```\n")
print(f"✅ Optimization report saved successfully to: {file_path}")
else:
print("⚠️ No optimized description to save")
return {
"optimized_description": final_description,
"optimized_parameters": final_parameters,
"optimized_tool_config": final_optimized_tool_config,
"rationale": final_rationale,
"argument_rationale": final_argument_rationale,
"optimization_history": optimization_history,
"total_iterations": len(optimization_history),
"final_quality_score": (
optimization_history[-1]["quality_score"] if optimization_history else 0
),
"achieved_satisfaction": (
optimization_history[-1]["is_satisfactory"]
if optimization_history
else False
),
"test_results": results,
"saved_to": file_path if final_description else None,
}