Add files via upload

2026-05-18 14:04:52 +02:00 · 2026-04-08 18:14:22 +08:00
4 changed files with 131 additions and 26 deletions
@@ -108,7 +108,13 @@ func runMCPToolInvocation(
 	var args map[string]interface{}
 	if argumentsInJSON != "" && argumentsInJSON != "null" {
 		if err := json.Unmarshal([]byte(argumentsInJSON), &args); err != nil {
-			return "", fmt.Errorf("invalid tool arguments JSON: %w", err)
+			// Return soft error (nil error) so the eino graph continues and the LLM can self-correct,
+			// instead of a hard error that terminates the iteration loop.
+			return ToolErrorPrefix + fmt.Sprintf(
+				"Invalid tool arguments JSON: %s\n\nPlease ensure the arguments are a valid JSON object "+
+					"(double-quoted keys, matched braces, no trailing commas) and retry.\n\n"+
+					"（工具参数 JSON 解析失败：%s。请确保 arguments 是合法的 JSON 对象并重试。）",
+				err.Error(), err.Error()), nil
 		}
 	}
 	if args == nil {
@@ -302,34 +302,20 @@ func RunDeepAgent(
 	var lastRunMsgs []adk.Message
 	var lastAssistant string

+	// retryHints tracks the corrective hint to append for each retry attempt.
+	// Index i corresponds to the hint that will be appended on attempt i+1.
+	var retryHints []adk.Message
+
 attemptLoop:
-	for attempt := 0; attempt < maxToolCallArgumentsJSONAttempts; attempt++ {
-		msgs := make([]adk.Message, 0, len(baseMsgs)+attempt)
+	for attempt := 0; attempt < maxToolCallRecoveryAttempts; attempt++ {
+		msgs := make([]adk.Message, 0, len(baseMsgs)+len(retryHints))
 		msgs = append(msgs, baseMsgs...)
-		for i := 0; i < attempt; i++ {
-			msgs = append(msgs, toolCallArgumentsJSONRetryHint())
-		}
+		msgs = append(msgs, retryHints...)

 		if attempt > 0 {
 			mcpIDsMu.Lock()
 			mcpIDs = mcpIDs[:0]
 			mcpIDsMu.Unlock()
-			if logger != nil {
-				logger.Warn("eino DeepAgent: 工具参数 JSON 被接口拒绝，追加提示后重试",
-					zap.Int("attempt", attempt),
-					zap.Int("maxAttempts", maxToolCallArgumentsJSONAttempts))
-			}
-			if progress != nil {
-				// 使用专用事件类型 eino_recovery，便于前端时间线展示（progress 仅改标题，不进时间线）
-				progress("eino_recovery", toolCallArgumentsJSONRecoveryTimelineMessage(attempt), map[string]interface{}{
-					"conversationId": conversationID,
-					"source":           "eino",
-					"einoRetry":        attempt,
-					"runIndex":         attempt + 1, // 第几轮完整运行（1 为首次，重试后递增）
-					"maxRuns":          maxToolCallArgumentsJSONAttempts,
-					"reason":           "invalid_tool_arguments_json",
-				})
-			}
 		}

 		// 仅保留主代理最后一次 assistant 输出；每轮重试重置，避免拼接失败轮次的片段。
@@ -357,12 +343,48 @@ attemptLoop:
 				continue
 			}
 			if ev.Err != nil {
-				if isRecoverableToolCallArgumentsJSONError(ev.Err) && attempt+1 < maxToolCallArgumentsJSONAttempts {
+				canRetry := attempt+1 < maxToolCallRecoveryAttempts
+
+				// Recoverable: API-level JSON argument validation error.
+				if canRetry && isRecoverableToolCallArgumentsJSONError(ev.Err) {
 					if logger != nil {
 						logger.Warn("eino: recoverable tool-call JSON error from model/API", zap.Error(ev.Err), zap.Int("attempt", attempt))
 					}
+					retryHints = append(retryHints, toolCallArgumentsJSONRetryHint())
+					if progress != nil {
+						progress("eino_recovery", toolCallArgumentsJSONRecoveryTimelineMessage(attempt), map[string]interface{}{
+							"conversationId": conversationID,
+							"source":         "eino",
+							"einoRetry":      attempt,
+							"runIndex":       attempt + 1,
+							"maxRuns":        maxToolCallRecoveryAttempts,
+							"reason":         "invalid_tool_arguments_json",
+						})
+					}
 					continue attemptLoop
 				}
+
+				// Recoverable: tool execution error (unknown sub-agent, tool not found, bad JSON in args, etc.).
+				if canRetry && isRecoverableToolExecutionError(ev.Err) {
+					if logger != nil {
+						logger.Warn("eino: recoverable tool execution error, will retry with corrective hint",
+							zap.Error(ev.Err), zap.Int("attempt", attempt))
+					}
+					retryHints = append(retryHints, toolExecutionRetryHint())
+					if progress != nil {
+						progress("eino_recovery", toolExecutionRecoveryTimelineMessage(attempt), map[string]interface{}{
+							"conversationId": conversationID,
+							"source":         "eino",
+							"einoRetry":      attempt,
+							"runIndex":       attempt + 1,
+							"maxRuns":        maxToolCallRecoveryAttempts,
+							"reason":         "tool_execution_error",
+						})
+					}
+					continue attemptLoop
+				}
+
+				// Non-recoverable error.
 				if progress != nil {
 					progress("error", ev.Err.Error(), map[string]interface{}{
 						"conversationId": conversationID,
@@ -7,9 +7,10 @@ import (
 	"github.com/cloudwego/eino/schema"
 )

-// maxToolCallArgumentsJSONAttempts 含首次运行：首次 + 自动重试次数。
+// maxToolCallRecoveryAttempts 含首次运行：首次 + 自动重试次数。
 // 例如为 3 表示最多共 3 次完整 DeepAgent 运行（2 次失败后各追加一条纠错提示）。
-const maxToolCallArgumentsJSONAttempts = 3
+// 该常量同时用于 JSON 参数错误和工具执行错误（如子代理名称不存在）的恢复重试。
+const maxToolCallRecoveryAttempts = 3

 // toolCallArgumentsJSONRetryHint 追加在用户消息后，提示模型输出合法 JSON 工具参数（部分云厂商会在流式阶段校验 arguments）。
 func toolCallArgumentsJSONRetryHint() *schema.Message {
@@ -24,7 +25,7 @@ func toolCallArgumentsJSONRecoveryTimelineMessage(attempt int) string {
 		"接口拒绝了无效的工具参数 JSON。已向对话追加系统提示并要求模型重新生成合法的 function.arguments。"+
 			"当前为第 %d/%d 轮完整运行。\n\n"+
 			"The API rejected invalid JSON in tool arguments. A system hint was appended. This is full run %d of %d.",
-		attempt+1, maxToolCallArgumentsJSONAttempts, attempt+1, maxToolCallArgumentsJSONAttempts,
+		attempt+1, maxToolCallRecoveryAttempts, attempt+1, maxToolCallRecoveryAttempts,
 	)
 }

@@ -0,0 +1,76 @@
+package multiagent
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/cloudwego/eino/schema"
+)
+
+// isRecoverableToolExecutionError detects tool-level execution errors that can be
+// recovered by retrying with a corrective hint. These errors originate from eino
+// framework internals (e.g. task_tool.go, tool_node.go) when the LLM produces
+// invalid tool calls such as non-existent sub-agent types, malformed JSON arguments,
+// or unregistered tool names.
+func isRecoverableToolExecutionError(err error) bool {
+	if err == nil {
+		return false
+	}
+	s := strings.ToLower(err.Error())
+
+	// Sub-agent type not found (from deep/task_tool.go)
+	if strings.Contains(s, "subagent type") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	// Tool not found in toolsNode indexes (from compose/tool_node.go, when UnknownToolsHandler is nil)
+	if strings.Contains(s, "tool") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	// Invalid tool arguments JSON (from einomcp/mcp_tools.go or eino internals)
+	if strings.Contains(s, "invalid tool arguments json") {
+		return true
+	}
+
+	// Failed to unmarshal task tool input json (from deep/task_tool.go)
+	if strings.Contains(s, "failed to unmarshal") && strings.Contains(s, "json") {
+		return true
+	}
+
+	// Generic tool call stream/invoke failure wrapping the above
+	if (strings.Contains(s, "failed to stream tool call") || strings.Contains(s, "failed to invoke tool")) &&
+		(strings.Contains(s, "not found") || strings.Contains(s, "json") || strings.Contains(s, "unmarshal")) {
+		return true
+	}
+
+	return false
+}
+
+// toolExecutionRetryHint returns a user message appended to the conversation to prompt
+// the LLM to correct its tool call after a tool execution error.
+func toolExecutionRetryHint() *schema.Message {
+	return schema.UserMessage(`[System] Your previous tool call failed because:
+- The tool or sub-agent name you used does not exist, OR
+- The tool call arguments were not valid JSON.
+
+Please carefully review the available tools and sub-agents listed in your context, use only exact registered names (case-sensitive), and ensure all arguments are well-formed JSON objects. Then retry your action.
+
+[系统提示] 上一次工具调用失败，可能原因：
+- 你使用的工具名或子代理名称不存在；
+- 工具调用参数不是合法 JSON。
+
+请仔细检查上下文中列出的可用工具和子代理名称（须完全匹配、区分大小写），确保所有参数均为合法的 JSON 对象，然后重新执行。`)
+}
+
+// toolExecutionRecoveryTimelineMessage returns a message for the eino_recovery event
+// displayed in the UI timeline when a tool execution error triggers a retry.
+func toolExecutionRecoveryTimelineMessage(attempt int) string {
+	return fmt.Sprintf(
+		"工具调用执行失败（工具/子代理名称不存在或参数 JSON 无效）。已向对话追加纠错提示并要求模型重新生成。"+
+			"当前为第 %d/%d 轮完整运行。\n\n"+
+			"Tool call execution failed (unknown tool/sub-agent name or invalid JSON arguments). "+
+			"A corrective hint was appended. This is full run %d of %d.",
+		attempt+1, maxToolCallRecoveryAttempts, attempt+1, maxToolCallRecoveryAttempts,
+	)
+}