Add files via upload

2026-07-15 16:37:33 +02:00 · 2026-04-19 01:25:30 +08:00
parent 362e12898f
commit 2de0bd4d31
37 changed files with 12693 additions and 0 deletions
@@ -0,0 +1,85 @@
+package multiagent
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"cyberstrike-ai/internal/config"
+
+	localbk "github.com/cloudwego/eino-ext/adk/backend/local"
+	"github.com/cloudwego/eino/adk"
+	"github.com/cloudwego/eino/adk/middlewares/filesystem"
+	"github.com/cloudwego/eino/adk/middlewares/skill"
+	"go.uber.org/zap"
+)
+
+// prepareEinoSkills builds Eino official skill backend + middleware, and a shared local disk backend
+// for skill discovery and (optionally) filesystem/execute tools. Returns nils when disabled or dir missing.
+func prepareEinoSkills(
+	ctx context.Context,
+	skillsDir string,
+	ma *config.MultiAgentConfig,
+	logger *zap.Logger,
+) (loc *localbk.Local, skillMW adk.ChatModelAgentMiddleware, fsTools bool, err error) {
+	if ma == nil || ma.EinoSkills.Disable {
+		return nil, nil, false, nil
+	}
+	root := strings.TrimSpace(skillsDir)
+	if root == "" {
+		if logger != nil {
+			logger.Warn("eino skills: skills_dir empty, skip")
+		}
+		return nil, nil, false, nil
+	}
+	abs, err := filepath.Abs(root)
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("skills_dir abs: %w", err)
+	}
+	if st, err := os.Stat(abs); err != nil || !st.IsDir() {
+		if logger != nil {
+			logger.Warn("eino skills: directory missing, skip", zap.String("dir", abs), zap.Error(err))
+		}
+		return nil, nil, false, nil
+	}
+
+	loc, err = localbk.NewBackend(ctx, &localbk.Config{})
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("eino local backend: %w", err)
+	}
+
+	skillBE, err := skill.NewBackendFromFilesystem(ctx, &skill.BackendFromFilesystemConfig{
+		Backend: loc,
+		BaseDir: abs,
+	})
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("eino skill filesystem backend: %w", err)
+	}
+
+	sc := &skill.Config{Backend: skillBE}
+	if name := strings.TrimSpace(ma.EinoSkills.SkillToolName); name != "" {
+		sc.SkillToolName = &name
+	}
+	skillMW, err = skill.NewMiddleware(ctx, sc)
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("eino skill middleware: %w", err)
+	}
+
+	fsTools = ma.EinoSkills.EinoSkillFilesystemToolsEffective()
+	return loc, skillMW, fsTools, nil
+}
+
+// subAgentFilesystemMiddleware returns filesystem middleware for a sub-agent when Deep itself
+// does not set Backend (fsTools false on orchestrator) but we still want tools on subs — not used;
+// when orchestrator has Backend, builtin FS is only on outer agent; subs need explicit FS for parity.
+func subAgentFilesystemMiddleware(ctx context.Context, loc *localbk.Local) (adk.ChatModelAgentMiddleware, error) {
+	if loc == nil {
+		return nil, nil
+	}
+	return filesystem.New(ctx, &filesystem.MiddlewareConfig{
+		Backend:        loc,
+		StreamingShell: loc,
+	})
+}
@@ -0,0 +1,140 @@
+package multiagent
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"cyberstrike-ai/internal/agent"
+	"cyberstrike-ai/internal/config"
+
+	"github.com/bytedance/sonic"
+	"github.com/cloudwego/eino/adk"
+	"github.com/cloudwego/eino/adk/middlewares/summarization"
+	"github.com/cloudwego/eino/components/model"
+	"github.com/cloudwego/eino/schema"
+	"go.uber.org/zap"
+)
+
+// einoSummarizeUserInstruction 与单 Agent MemoryCompressor 目标一致：压缩时保留渗透关键信息。
+const einoSummarizeUserInstruction = `在保持所有关键安全测试信息完整的前提下压缩对话历史。
+
+必须保留：已确认漏洞与攻击路径、工具输出中的核心发现、凭证与认证细节、架构与薄弱点、当前进度、失败尝试与死路、策略决策。
+保留精确技术细节（URL、路径、参数、Payload、版本号、报错原文可摘要但要点不丢）。
+将冗长扫描输出概括为结论；重复发现合并表述。
+
+输出须使后续代理能无缝继续同一授权测试任务。`
+
+// newEinoSummarizationMiddleware 使用 Eino ADK Summarization 中间件（见 https://www.cloudwego.io/zh/docs/eino/core_modules/eino_adk/eino_adk_chatmodelagentmiddleware/middleware_summarization/）。
+// 触发阈值与单 Agent MemoryCompressor 一致：当估算 token 超过 openai.max_total_tokens 的 90% 时摘要。
+func newEinoSummarizationMiddleware(
+	ctx context.Context,
+	summaryModel model.BaseChatModel,
+	appCfg *config.Config,
+	logger *zap.Logger,
+) (adk.ChatModelAgentMiddleware, error) {
+	if summaryModel == nil || appCfg == nil {
+		return nil, fmt.Errorf("multiagent: summarization 需要 model 与配置")
+	}
+	maxTotal := appCfg.OpenAI.MaxTotalTokens
+	if maxTotal <= 0 {
+		maxTotal = 120000
+	}
+	trigger := int(float64(maxTotal) * 0.9)
+	if trigger < 4096 {
+		trigger = maxTotal
+		if trigger < 4096 {
+			trigger = 4096
+		}
+	}
+	preserveMax := trigger / 3
+	if preserveMax < 2048 {
+		preserveMax = 2048
+	}
+
+	modelName := strings.TrimSpace(appCfg.OpenAI.Model)
+	if modelName == "" {
+		modelName = "gpt-4o"
+	}
+
+	mw, err := summarization.New(ctx, &summarization.Config{
+		Model: summaryModel,
+		Trigger: &summarization.TriggerCondition{
+			ContextTokens: trigger,
+		},
+		TokenCounter:       einoSummarizationTokenCounter(modelName),
+		UserInstruction:    einoSummarizeUserInstruction,
+		EmitInternalEvents: false,
+		PreserveUserMessages: &summarization.PreserveUserMessages{
+			Enabled:   true,
+			MaxTokens: preserveMax,
+		},
+		Callback: func(ctx context.Context, before, after adk.ChatModelAgentState) error {
+			if logger == nil {
+				return nil
+			}
+			logger.Info("eino summarization 已压缩上下文",
+				zap.Int("messages_before", len(before.Messages)),
+				zap.Int("messages_after", len(after.Messages)),
+				zap.Int("max_total_tokens", maxTotal),
+				zap.Int("trigger_context_tokens", trigger),
+			)
+			return nil
+		},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("summarization.New: %w", err)
+	}
+	return mw, nil
+}
+
+func einoSummarizationTokenCounter(openAIModel string) summarization.TokenCounterFunc {
+	tc := agent.NewTikTokenCounter()
+	return func(ctx context.Context, input *summarization.TokenCounterInput) (int, error) {
+		var sb strings.Builder
+		for _, msg := range input.Messages {
+			if msg == nil {
+				continue
+			}
+			sb.WriteString(string(msg.Role))
+			sb.WriteByte('\n')
+			if msg.Content != "" {
+				sb.WriteString(msg.Content)
+				sb.WriteByte('\n')
+			}
+			if msg.ReasoningContent != "" {
+				sb.WriteString(msg.ReasoningContent)
+				sb.WriteByte('\n')
+			}
+			if len(msg.ToolCalls) > 0 {
+				if b, err := sonic.Marshal(msg.ToolCalls); err == nil {
+					sb.Write(b)
+					sb.WriteByte('\n')
+				}
+			}
+			for _, part := range msg.UserInputMultiContent {
+				if part.Type == schema.ChatMessagePartTypeText && part.Text != "" {
+					sb.WriteString(part.Text)
+					sb.WriteByte('\n')
+				}
+			}
+		}
+		for _, tl := range input.Tools {
+			if tl == nil {
+				continue
+			}
+			cp := *tl
+			cp.Extra = nil
+			if text, err := sonic.MarshalString(cp); err == nil {
+				sb.WriteString(text)
+				sb.WriteByte('\n')
+			}
+		}
+		text := sb.String()
+		n, err := tc.Count(openAIModel, text)
+		if err != nil {
+			return (len(text) + 3) / 4, nil
+		}
+		return n, nil
+	}
+}
@@ -0,0 +1,62 @@
+package multiagent
+
+import (
+	"context"
+	"strings"
+
+	"github.com/cloudwego/eino/adk"
+	"github.com/cloudwego/eino/components/tool"
+)
+
+// noNestedTaskMiddleware 禁止在已经处于 task(sub-agent) 执行链中再次调用 task，
+// 避免子代理再次委派子代理造成的无限委派/递归。
+//
+// 通过在 ctx 中设置临时标记来实现嵌套检测：外层 task 调用会先标记 ctx，
+// 子代理内再调用 task 时会命中该标记并拒绝。
+type noNestedTaskMiddleware struct {
+	adk.BaseChatModelAgentMiddleware
+}
+
+type nestedTaskCtxKey struct{}
+
+func newNoNestedTaskMiddleware() adk.ChatModelAgentMiddleware {
+	return &noNestedTaskMiddleware{}
+}
+
+func (m *noNestedTaskMiddleware) WrapInvokableToolCall(
+	ctx context.Context,
+	endpoint adk.InvokableToolCallEndpoint,
+	tCtx *adk.ToolContext,
+) (adk.InvokableToolCallEndpoint, error) {
+	if tCtx == nil || strings.TrimSpace(tCtx.Name) == "" {
+		return endpoint, nil
+	}
+	// Deep 内置 task 工具名固定为 "task"；为兼容可能的大小写/空白，仅做不区分大小写匹配。
+	if !strings.EqualFold(strings.TrimSpace(tCtx.Name), "task") {
+		return endpoint, nil
+	}
+
+	// 已在 task 执行链中：拒绝继续委派，直接报错让上层快速终止。
+	if ctx != nil {
+		if v, ok := ctx.Value(nestedTaskCtxKey{}).(bool); ok && v {
+			return func(ctx context.Context, argumentsInJSON string, opts ...tool.Option) (string, error) {
+				// Important: return a tool result text (not an error) to avoid hard-stopping the whole multi-agent run.
+				// The nested task is still prevented from spawning another sub-agent, so recursion is avoided.
+				_ = argumentsInJSON
+				_ = opts
+				return "Nested task delegation is forbidden (already inside a sub-agent delegation chain) to avoid infinite delegation. Please continue the work using the current agent's tools.", nil
+			}, nil
+		}
+	}
+
+	// 标记当前 task 调用链，确保子代理内的再次 task 调用能检测到嵌套。
+	return func(ctx context.Context, argumentsInJSON string, opts ...tool.Option) (string, error) {
+		ctx2 := ctx
+		if ctx2 == nil {
+			ctx2 = context.Background()
+		}
+		ctx2 = context.WithValue(ctx2, nestedTaskCtxKey{}, true)
+		return endpoint(ctx2, argumentsInJSON, opts...)
+	}, nil
+}
+
@@ -0,0 +1,51 @@
+package multiagent
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/cloudwego/eino/schema"
+)
+
+// maxToolCallRecoveryAttempts 含首次运行：首次 + 自动重试次数。
+// 例如为 3 表示最多共 3 次完整 DeepAgent 运行（2 次失败后各追加一条纠错提示）。
+// 该常量同时用于 JSON 参数错误和工具执行错误（如子代理名称不存在）的恢复重试。
+const maxToolCallRecoveryAttempts = 5
+
+// toolCallArgumentsJSONRetryHint 追加在用户消息后，提示模型输出合法 JSON 工具参数（部分云厂商会在流式阶段校验 arguments）。
+func toolCallArgumentsJSONRetryHint() *schema.Message {
+	return schema.UserMessage(`[系统提示] 上一次输出中，工具调用的 function.arguments 不是合法 JSON，接口已拒绝。请重新生成：每个 tool call 的 arguments 必须是完整、可解析的 JSON 对象字符串（键名用双引号，无多余逗号，括号配对）。不要输出截断或不完整的 JSON。
+
+[System] Your previous tool call used invalid JSON in function.arguments and was rejected by the API. Regenerate with strictly valid JSON objects only (double-quoted keys, matched braces, no trailing commas).`)
+}
+
+// toolCallArgumentsJSONRecoveryTimelineMessage 供 eino_recovery 事件落库与前端时间线展示。
+func toolCallArgumentsJSONRecoveryTimelineMessage(attempt int) string {
+	return fmt.Sprintf(
+		"接口拒绝了无效的工具参数 JSON。已向对话追加系统提示并要求模型重新生成合法的 function.arguments。"+
+			"当前为第 %d/%d 轮完整运行。\n\n"+
+			"The API rejected invalid JSON in tool arguments. A system hint was appended. This is full run %d of %d.",
+		attempt+1, maxToolCallRecoveryAttempts, attempt+1, maxToolCallRecoveryAttempts,
+	)
+}
+
+// isRecoverableToolCallArgumentsJSONError 判断是否为「工具参数非合法 JSON」类流式错误，可通过追加提示后重跑一轮。
+func isRecoverableToolCallArgumentsJSONError(err error) bool {
+	if err == nil {
+		return false
+	}
+	s := strings.ToLower(err.Error())
+	if !strings.Contains(s, "json") {
+		return false
+	}
+	if strings.Contains(s, "function.arguments") || strings.Contains(s, "function arguments") {
+		return true
+	}
+	if strings.Contains(s, "invalidparameter") && strings.Contains(s, "json") {
+		return true
+	}
+	if strings.Contains(s, "must be in json format") {
+		return true
+	}
+	return false
+}
@@ -0,0 +1,17 @@
+package multiagent
+
+import (
+	"errors"
+	"testing"
+)
+
+func TestIsRecoverableToolCallArgumentsJSONError(t *testing.T) {
+	yes := errors.New(`failed to receive stream chunk: error, <400> InternalError.Algo.InvalidParameter: The "function.arguments" parameter of the code model must be in JSON format.`)
+	if !isRecoverableToolCallArgumentsJSONError(yes) {
+		t.Fatal("expected recoverable for function.arguments + JSON")
+	}
+	no := errors.New("unrelated network failure")
+	if isRecoverableToolCallArgumentsJSONError(no) {
+		t.Fatal("expected not recoverable")
+	}
+}
@@ -0,0 +1,131 @@
+package multiagent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/cloudwego/eino/compose"
+)
+
+// softRecoveryToolCallMiddleware returns an InvokableToolMiddleware that catches
+// specific recoverable errors from tool execution (JSON parse errors, tool-not-found,
+// etc.) and converts them into soft errors: nil error + descriptive error content
+// returned to the LLM. This allows the model to self-correct within the same
+// iteration rather than crashing the entire graph and requiring a full replay.
+//
+// Without this middleware, a JSON parse failure in any tool's InvokableRun propagates
+// as a hard error through the Eino ToolsNode → [NodeRunError] → ev.Err, which
+// either triggers the full-replay retry loop (expensive) or terminates the run
+// entirely once retries are exhausted. With it, the LLM simply sees an error message
+// in the tool result and can adjust its next tool call accordingly.
+func softRecoveryToolCallMiddleware() compose.InvokableToolMiddleware {
+	return func(next compose.InvokableToolEndpoint) compose.InvokableToolEndpoint {
+		return func(ctx context.Context, input *compose.ToolInput) (*compose.ToolOutput, error) {
+			output, err := next(ctx, input)
+			if err == nil {
+				return output, nil
+			}
+			if !isSoftRecoverableToolError(err) {
+				return output, err
+			}
+			// Convert the hard error into a soft error: the LLM will see this
+			// message as the tool's output and can self-correct.
+			msg := buildSoftRecoveryMessage(input.Name, input.Arguments, err)
+			return &compose.ToolOutput{Result: msg}, nil
+		}
+	}
+}
+
+// isSoftRecoverableToolError determines whether a tool execution error should be
+// silently converted to a tool-result message rather than crashing the graph.
+func isSoftRecoverableToolError(err error) bool {
+	if err == nil {
+		return false
+	}
+	s := strings.ToLower(err.Error())
+
+	// JSON unmarshal/parse failures — the model generated truncated or malformed arguments.
+	if isJSONRelatedError(s) {
+		return true
+	}
+
+	// Sub-agent type not found (from deep/task_tool.go)
+	if strings.Contains(s, "subagent type") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	// Tool not found in ToolsNode indexes
+	if strings.Contains(s, "tool") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	return false
+}
+
+// isJSONRelatedError checks whether an error string indicates a JSON parsing problem.
+func isJSONRelatedError(lower string) bool {
+	if !strings.Contains(lower, "json") {
+		return false
+	}
+	jsonIndicators := []string{
+		"unexpected end of json",
+		"unmarshal",
+		"invalid character",
+		"cannot unmarshal",
+		"invalid tool arguments",
+		"failed to unmarshal",
+		"must be in json format",
+		"unexpected eof",
+	}
+	for _, ind := range jsonIndicators {
+		if strings.Contains(lower, ind) {
+			return true
+		}
+	}
+	return false
+}
+
+// buildSoftRecoveryMessage creates a bilingual error message that the LLM can act on.
+func buildSoftRecoveryMessage(toolName, arguments string, err error) string {
+	// Truncate arguments preview to avoid flooding the context.
+	argPreview := arguments
+	if len(argPreview) > 300 {
+		argPreview = argPreview[:300] + "... (truncated)"
+	}
+
+	// Try to determine if it's specifically a JSON parse error for a friendlier message.
+	errStr := err.Error()
+	var jsonErr *json.SyntaxError
+	isJSONErr := strings.Contains(strings.ToLower(errStr), "json") ||
+		strings.Contains(strings.ToLower(errStr), "unmarshal")
+	_ = jsonErr // suppress unused
+
+	if isJSONErr {
+		return fmt.Sprintf(
+			"[Tool Error] The arguments for tool '%s' are not valid JSON and could not be parsed.\n"+
+				"Error: %s\n"+
+				"Arguments received: %s\n\n"+
+				"Please fix the JSON (ensure double-quoted keys, matched braces/brackets, no trailing commas, "+
+				"no truncation) and call the tool again.\n\n"+
+				"[工具错误] 工具 '%s' 的参数不是合法 JSON，无法解析。\n"+
+				"错误：%s\n"+
+				"收到的参数：%s\n\n"+
+				"请修正 JSON（确保双引号键名、括号配对、无尾部逗号、无截断），然后重新调用工具。",
+			toolName, errStr, argPreview,
+			toolName, errStr, argPreview,
+		)
+	}
+
+	return fmt.Sprintf(
+		"[Tool Error] Tool '%s' execution failed: %s\n"+
+			"Arguments: %s\n\n"+
+			"Please review the available tools and their expected arguments, then retry.\n\n"+
+			"[工具错误] 工具 '%s' 执行失败：%s\n"+
+			"参数：%s\n\n"+
+			"请检查可用工具及其参数要求，然后重试。",
+		toolName, errStr, argPreview,
+		toolName, errStr, argPreview,
+	)
+}
@@ -0,0 +1,166 @@
+package multiagent
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"testing"
+
+	"github.com/cloudwego/eino/compose"
+)
+
+func TestIsSoftRecoverableToolError(t *testing.T) {
+	tests := []struct {
+		name     string
+		err      error
+		expected bool
+	}{
+		{
+			name:     "nil error",
+			err:      nil,
+			expected: false,
+		},
+		{
+			name:     "unexpected end of JSON input",
+			err:      errors.New("unexpected end of JSON input"),
+			expected: true,
+		},
+		{
+			name:     "failed to unmarshal task tool input json",
+			err:      errors.New("failed to unmarshal task tool input json: unexpected end of JSON input"),
+			expected: true,
+		},
+		{
+			name:     "invalid tool arguments JSON",
+			err:      errors.New("invalid tool arguments JSON: unexpected end of JSON input"),
+			expected: true,
+		},
+		{
+			name:     "json invalid character",
+			err:      errors.New(`invalid character '}' looking for beginning of value in JSON`),
+			expected: true,
+		},
+		{
+			name:     "subagent type not found",
+			err:      errors.New("subagent type recon_agent not found"),
+			expected: true,
+		},
+		{
+			name:     "tool not found",
+			err:      errors.New("tool nmap_scan not found in toolsNode indexes"),
+			expected: true,
+		},
+		{
+			name:     "unrelated network error",
+			err:      errors.New("connection refused"),
+			expected: false,
+		},
+		{
+			name:     "context cancelled",
+			err:      context.Canceled,
+			expected: false,
+		},
+		{
+			name: "real json unmarshal error",
+			err: func() error {
+				var v map[string]interface{}
+				return json.Unmarshal([]byte(`{"key": `), &v)
+			}(),
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := isSoftRecoverableToolError(tt.err)
+			if got != tt.expected {
+				t.Errorf("isSoftRecoverableToolError(%v) = %v, want %v", tt.err, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestSoftRecoveryToolCallMiddleware_PassesThrough(t *testing.T) {
+	mw := softRecoveryToolCallMiddleware()
+	called := false
+	next := func(ctx context.Context, input *compose.ToolInput) (*compose.ToolOutput, error) {
+		called = true
+		return &compose.ToolOutput{Result: "success"}, nil
+	}
+	wrapped := mw(next)
+	out, err := wrapped(context.Background(), &compose.ToolInput{
+		Name:      "test_tool",
+		Arguments: `{"key": "value"}`,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !called {
+		t.Fatal("next endpoint was not called")
+	}
+	if out.Result != "success" {
+		t.Fatalf("expected 'success', got %q", out.Result)
+	}
+}
+
+func TestSoftRecoveryToolCallMiddleware_ConvertsJSONError(t *testing.T) {
+	mw := softRecoveryToolCallMiddleware()
+	next := func(ctx context.Context, input *compose.ToolInput) (*compose.ToolOutput, error) {
+		return nil, errors.New("failed to unmarshal task tool input json: unexpected end of JSON input")
+	}
+	wrapped := mw(next)
+	out, err := wrapped(context.Background(), &compose.ToolInput{
+		Name:      "task",
+		Arguments: `{"subagent_type": "recon`,
+	})
+	if err != nil {
+		t.Fatalf("expected nil error (soft recovery), got: %v", err)
+	}
+	if out == nil || out.Result == "" {
+		t.Fatal("expected non-empty recovery message")
+	}
+	if !containsAll(out.Result, "[Tool Error]", "task", "JSON") {
+		t.Fatalf("recovery message missing expected content: %s", out.Result)
+	}
+}
+
+func TestSoftRecoveryToolCallMiddleware_PropagatesNonRecoverable(t *testing.T) {
+	mw := softRecoveryToolCallMiddleware()
+	origErr := errors.New("connection timeout to remote server")
+	next := func(ctx context.Context, input *compose.ToolInput) (*compose.ToolOutput, error) {
+		return nil, origErr
+	}
+	wrapped := mw(next)
+	_, err := wrapped(context.Background(), &compose.ToolInput{
+		Name:      "test_tool",
+		Arguments: `{}`,
+	})
+	if err == nil {
+		t.Fatal("expected error to propagate for non-recoverable errors")
+	}
+	if err != origErr {
+		t.Fatalf("expected original error, got: %v", err)
+	}
+}
+
+func containsAll(s string, subs ...string) bool {
+	for _, sub := range subs {
+		if !contains(s, sub) {
+			return false
+		}
+	}
+	return true
+}
+
+func contains(s, sub string) bool {
+	return len(s) >= len(sub) && searchString(s, sub)
+}
+
+func searchString(s, sub string) bool {
+	for i := 0; i <= len(s)-len(sub); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,76 @@
+package multiagent
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/cloudwego/eino/schema"
+)
+
+// isRecoverableToolExecutionError detects tool-level execution errors that can be
+// recovered by retrying with a corrective hint. These errors originate from eino
+// framework internals (e.g. task_tool.go, tool_node.go) when the LLM produces
+// invalid tool calls such as non-existent sub-agent types, malformed JSON arguments,
+// or unregistered tool names.
+func isRecoverableToolExecutionError(err error) bool {
+	if err == nil {
+		return false
+	}
+	s := strings.ToLower(err.Error())
+
+	// Sub-agent type not found (from deep/task_tool.go)
+	if strings.Contains(s, "subagent type") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	// Tool not found in toolsNode indexes (from compose/tool_node.go, when UnknownToolsHandler is nil)
+	if strings.Contains(s, "tool") && strings.Contains(s, "not found") {
+		return true
+	}
+
+	// Invalid tool arguments JSON (from einomcp/mcp_tools.go or eino internals)
+	if strings.Contains(s, "invalid tool arguments json") {
+		return true
+	}
+
+	// Failed to unmarshal task tool input json (from deep/task_tool.go)
+	if strings.Contains(s, "failed to unmarshal") && strings.Contains(s, "json") {
+		return true
+	}
+
+	// Generic tool call stream/invoke failure wrapping the above
+	if (strings.Contains(s, "failed to stream tool call") || strings.Contains(s, "failed to invoke tool")) &&
+		(strings.Contains(s, "not found") || strings.Contains(s, "json") || strings.Contains(s, "unmarshal")) {
+		return true
+	}
+
+	return false
+}
+
+// toolExecutionRetryHint returns a user message appended to the conversation to prompt
+// the LLM to correct its tool call after a tool execution error.
+func toolExecutionRetryHint() *schema.Message {
+	return schema.UserMessage(`[System] Your previous tool call failed because:
+- The tool or sub-agent name you used does not exist, OR
+- The tool call arguments were not valid JSON.
+
+Please carefully review the available tools and sub-agents listed in your context, use only exact registered names (case-sensitive), and ensure all arguments are well-formed JSON objects. Then retry your action.
+
+[系统提示] 上一次工具调用失败，可能原因：
+- 你使用的工具名或子代理名称不存在；
+- 工具调用参数不是合法 JSON。
+
+请仔细检查上下文中列出的可用工具和子代理名称（须完全匹配、区分大小写），确保所有参数均为合法的 JSON 对象，然后重新执行。`)
+}
+
+// toolExecutionRecoveryTimelineMessage returns a message for the eino_recovery event
+// displayed in the UI timeline when a tool execution error triggers a retry.
+func toolExecutionRecoveryTimelineMessage(attempt int) string {
+	return fmt.Sprintf(
+		"工具调用执行失败（工具/子代理名称不存在或参数 JSON 无效）。已向对话追加纠错提示并要求模型重新生成。"+
+			"当前为第 %d/%d 轮完整运行。\n\n"+
+			"Tool call execution failed (unknown tool/sub-agent name or invalid JSON arguments). "+
+			"A corrective hint was appended. This is full run %d of %d.",
+		attempt+1, maxToolCallRecoveryAttempts, attempt+1, maxToolCallRecoveryAttempts,
+	)
+}