From c0171ef60a8da96a017ec8bb53003a5adff4c492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=85=AC=E6=98=8E?= <83812544+Ed1s0nZ@users.noreply.github.com> Date: Wed, 1 Jul 2026 14:34:50 +0800 Subject: [PATCH] Add files via upload --- internal/multiagent/eino_summarize.go | 60 ++++++++++++--- internal/multiagent/eino_summarize_output.go | 73 +++++++++++++++++++ .../multiagent/eino_summarize_output_test.go | 67 +++++++++++++++++ 3 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 internal/multiagent/eino_summarize_output.go create mode 100644 internal/multiagent/eino_summarize_output_test.go diff --git a/internal/multiagent/eino_summarize.go b/internal/multiagent/eino_summarize.go index eb79a565..1abc79f6 100644 --- a/internal/multiagent/eino_summarize.go +++ b/internal/multiagent/eino_summarize.go @@ -23,20 +23,59 @@ import ( ) // einoSummarizeUserInstruction:压缩历史时保留渗透测试与用户约束关键信息。 -const einoSummarizeUserInstruction = `在保持所有关键安全测试信息完整的前提下压缩对话历史。 +// 结构对齐 Eino 最佳实践(禁止工具、+),章节为安全测试领域化。 +const einoSummarizeUserInstruction = `关键:仅以纯文本响应。禁止调用任何工具(read_file、exec、grep、glob、write、edit 等)。 +上述对话中已包含全部待压缩上下文;不要要求用户粘贴历史,不要输出「请提供待压缩的对话历史」等占位/meta 回复。 +工具调用将被拒绝并浪费唯一一次摘要机会。 -必须保留:已确认漏洞与攻击路径、工具输出中的核心发现、凭证与认证细节、架构与薄弱点、当前进度、失败尝试与死路、策略决策。 -保留精确技术细节(URL、路径、参数、Payload、版本号、报错原文可摘要但要点不丢)。 -将冗长扫描输出概括为结论;重复发现合并表述。 -已枚举资产须保留**可继承的摘要**:主域、关键子域/主机短表(或数量+代表样例)、高价值目标与已识别服务/端口要点,避免后续子代理因「看不见清单」而重复全量枚举。 +你的任务:在保持所有关键安全测试信息完整的前提下压缩对话历史,使后续代理能无缝继续同一授权测试任务。 -用户消息中的约束须精确保留(可摘要表述,但要点不可丢或改写): -- 授权测试目标、范围与禁止项(域名、路径、IP、环境) -- 用户提供的凭证、账号、Cookie、Token(敏感值原文保留) +压缩原则: +- 必须保留:已确认漏洞与攻击路径、工具输出核心发现、凭证与认证细节、架构与薄弱点、当前进度、失败尝试与死路、策略决策 +- 保留精确技术细节(URL、路径、参数、Payload、版本号;报错原文可摘要但要点不丢) +- 冗长扫描输出概括为结论;重复发现合并表述 +- 已枚举资产须保留可继承摘要:主域、关键子域/主机短表(或数量+代表样例)、高价值目标、已识别服务/端口要点 + +输出格式(严格遵循,仅一轮回复): +1. 先输出 块:按时间顺序梳理对话,检查是否涵盖下方各章节要点;analysis 仅供自检,保持简洁(建议 ≤400 字) +2. 再输出 块:按以下章节写入可继承的压缩报告(无信息处写「无」,禁止留空模板占位符) + + +## 1. 授权范围与约束 +- 目标/范围/禁止项(域名、路径、IP、环境) +- 凭证/认证信息(账号、Token、Cookie;敏感值原文保留) - 用户指定的方法、工具、优先级与待办 -- 用户明确的否定约束(不要测什么、不要用什么手法) +- 否定约束(不测什么、不用什么手法) -输出须使后续代理能无缝继续同一授权测试任务。` +## 2. 资产与服务枚举摘要 +- 主域/核心资产、关键子域或主机短表(或数量+代表样例) +- 高价值目标、已识别服务/端口要点 +- 资产状态(存活/可攻/已排除/待验证) + +## 3. 架构与已知薄弱点 +- 技术栈/部署拓扑/信任边界 +- 已识别薄弱点列表 + +## 4. 已确认漏洞与攻击路径 +- 漏洞名/CVE、URL/路径、参数/Payload、PoC 要点、影响等级 +- 攻击链/利用路径(步骤化) + +## 5. 工具核心发现与扫描结论 +- 各工具结论(概括核心输出,非冗长日志) +- 重复发现合并表述 + +## 6. 所有用户消息 + +- [逐条列出非 tool 结果的用户消息要点;敏感约束与原文措辞尽量保留] + + +## 7. 当前进度、策略决策与下一步 +- 当前位置(已完成/进行中/卡点) +- 失败尝试与死路(方法、现象/报错摘要、结论) +- 策略决策与下一步具体操作(须与最近用户请求及未完成任务一致) + + +提醒:不要调用任何工具;必须基于上文已有对话直接输出 ,勿输出 analysis 以外的正文。` // newEinoSummarizationMiddleware 使用 Eino ADK Summarization 中间件(见 https://www.cloudwego.io/zh/docs/eino/core_modules/eino_adk/eino_adk_chatmodelagentmiddleware/middleware_summarization/)。 // 触发阈值:估算 token 超过 openai.max_total_tokens * summarization_trigger_ratio(默认 0.8)时摘要。 @@ -150,6 +189,7 @@ func newEinoSummarizationMiddleware( }, }, Finalize: func(ctx context.Context, originalMessages []adk.Message, summary adk.Message) ([]adk.Message, error) { + summary = stripAnalysisFromSummarizationMessage(summary) out, ferr := summarizeFinalizeWithRecentAssistantToolTrail(ctx, originalMessages, summary, tokenCounter, recentTrailMax) if ferr != nil { return nil, ferr diff --git a/internal/multiagent/eino_summarize_output.go b/internal/multiagent/eino_summarize_output.go new file mode 100644 index 00000000..9f96b975 --- /dev/null +++ b/internal/multiagent/eino_summarize_output.go @@ -0,0 +1,73 @@ +package multiagent + +import ( + "regexp" + "strings" + + "github.com/cloudwego/eino/adk" + "github.com/cloudwego/eino/schema" +) + +var ( + summarizationAnalysisBlockRegex = regexp.MustCompile(`(?is)\s*.*?\s*`) + summarizationSummaryBlockRegex = regexp.MustCompile(`(?is)\s*(.*?)\s*`) +) + +// stripAnalysisFromSummarizationMessage removes the block from a post-processed +// Eino summary user message. Analysis helps one-shot generation quality but should not +// occupy continuation context after compaction. +func stripAnalysisFromSummarizationMessage(msg adk.Message) adk.Message { + if msg == nil { + return msg + } + cloned := *msg + if cloned.Content != "" { + cloned.Content = stripAnalysisFromSummarizationText(cloned.Content) + } + if len(cloned.UserInputMultiContent) > 0 { + parts := make([]schema.MessageInputPart, len(cloned.UserInputMultiContent)) + copy(parts, cloned.UserInputMultiContent) + // Only the first text part carries model output plus Eino preamble/transcript path. + for i := range parts { + if parts[i].Type != schema.ChatMessagePartTypeText || parts[i].Text == "" { + continue + } + if i == 0 { + parts[i].Text = stripAnalysisFromSummarizationText(parts[i].Text) + } + break + } + cloned.UserInputMultiContent = parts + } + return &cloned +} + +func stripAnalysisFromSummarizationText(text string) string { + text = strings.TrimSpace(text) + if text == "" { + return text + } + stripped := strings.TrimSpace(summarizationAnalysisBlockRegex.ReplaceAllString(text, "")) + if stripped == "" { + return text + } + return stripped +} + +// extractSummarizationSummaryBody returns the inner text of the last block when present. +// Used by tests and optional strict compaction paths. +func extractSummarizationSummaryBody(text string) (string, bool) { + text = strings.TrimSpace(text) + if text == "" { + return "", false + } + all := summarizationSummaryBlockRegex.FindAllStringSubmatch(text, -1) + if len(all) == 0 || len(all[len(all)-1]) < 2 { + return "", false + } + body := strings.TrimSpace(all[len(all)-1][1]) + if body == "" { + return "", false + } + return body, true +} diff --git a/internal/multiagent/eino_summarize_output_test.go b/internal/multiagent/eino_summarize_output_test.go new file mode 100644 index 00000000..47abc91e --- /dev/null +++ b/internal/multiagent/eino_summarize_output_test.go @@ -0,0 +1,67 @@ +package multiagent + +import ( + "strings" + "testing" + + "github.com/cloudwego/eino/schema" +) + +func TestStripAnalysisFromSummarizationText(t *testing.T) { + in := "internal notes\n\n\n## 1. 授权\n- example.com\n" + got := stripAnalysisFromSummarizationText(in) + if strings.Contains(got, "") { + t.Fatalf("analysis block should be removed: %q", got) + } + if !strings.Contains(got, "## 1. 授权") { + t.Fatalf("summary body should remain: %q", got) + } +} + +func TestStripAnalysisFromSummarizationMessage_UserInputMultiContent(t *testing.T) { + msg := &schema.Message{ + Role: schema.User, + UserInputMultiContent: []schema.MessageInputPart{ + { + Type: schema.ChatMessagePartTypeText, + Text: "此会话延续自此前一段因上下文耗尽而终止的对话。\n\ndraft\nbody\n\n完整记录位于:/tmp/transcript.txt", + }, + { + Type: schema.ChatMessagePartTypeText, + Text: "请从我们中断的地方继续对话,无需向用户提出任何进一步的问题。", + }, + }, + } + out := stripAnalysisFromSummarizationMessage(msg) + if len(out.UserInputMultiContent) != 2 { + t.Fatalf("expected 2 parts, got %d", len(out.UserInputMultiContent)) + } + if strings.Contains(out.UserInputMultiContent[0].Text, "") { + t.Fatalf("part 0 should drop analysis: %q", out.UserInputMultiContent[0].Text) + } + if !strings.Contains(out.UserInputMultiContent[0].Text, "body") { + t.Fatalf("part 0 should keep summary: %q", out.UserInputMultiContent[0].Text) + } + if out.UserInputMultiContent[1].Text != "请从我们中断的地方继续对话,无需向用户提出任何进一步的问题。" { + t.Fatalf("continue instruction part should be unchanged: %q", out.UserInputMultiContent[1].Text) + } +} + +func TestExtractSummarizationSummaryBody(t *testing.T) { + body, ok := extractSummarizationSummaryBody("x kept ") + if !ok || body != "kept" { + t.Fatalf("extract summary body: ok=%v body=%q", ok, body) + } + _, ok = extractSummarizationSummaryBody("plain text only") + if ok { + t.Fatal("expected false for plain text") + } +} + +func TestStripAnalysisFromSummarizationText_NoAnalysisUnchanged(t *testing.T) { + in := "only summary" + got := stripAnalysisFromSummarizationText(in) + if got != in { + t.Fatalf("expected unchanged text, got %q", got) + } +}