From 0492365430f16965c2a3dcbe1190441359ff17c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=85=AC=E6=98=8E?= <83812544+Ed1s0nZ@users.noreply.github.com> Date: Wed, 3 Jun 2026 17:11:44 +0800 Subject: [PATCH] Add files via upload --- internal/config/config.go | 1 + internal/config/vision.go | 98 +++++++++++++++++++++++++++++++ internal/config/vision_test.go | 55 +++++++++++++++++ internal/mcp/builtin/constants.go | 5 ++ 4 files changed, 159 insertions(+) create mode 100644 internal/config/vision.go create mode 100644 internal/config/vision_test.go diff --git a/internal/config/config.go b/internal/config/config.go index a25b4a11..a2ed60d8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,6 +37,7 @@ type Config struct { AgentsDir string `yaml:"agents_dir,omitempty" json:"agents_dir,omitempty"` // 多代理子 Agent Markdown 定义目录(*.md,YAML front matter) MultiAgent MultiAgentConfig `yaml:"multi_agent,omitempty" json:"multi_agent,omitempty"` Project ProjectConfig `yaml:"project,omitempty" json:"project,omitempty"` + Vision VisionConfig `yaml:"vision,omitempty" json:"vision,omitempty"` } // ProjectConfig 项目黑板(跨对话共享事实)配置。 diff --git a/internal/config/vision.go b/internal/config/vision.go new file mode 100644 index 00000000..c11c8c8a --- /dev/null +++ b/internal/config/vision.go @@ -0,0 +1,98 @@ +package config + +import "strings" + +// VisionConfig 独立视觉模型与 analyze_image 工具参数;enabled 时注册 MCP 工具 analyze_image。 +type VisionConfig struct { + Enabled bool `yaml:"enabled" json:"enabled"` + APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"` + BaseURL string `yaml:"base_url,omitempty" json:"base_url,omitempty"` + Model string `yaml:"model,omitempty" json:"model,omitempty"` + Provider string `yaml:"provider,omitempty" json:"provider,omitempty"` + TimeoutSeconds int `yaml:"timeout_seconds,omitempty" json:"timeout_seconds,omitempty"` + MaxImageBytes int64 `yaml:"max_image_bytes,omitempty" json:"max_image_bytes,omitempty"` + MaxDimension int `yaml:"max_dimension,omitempty" json:"max_dimension,omitempty"` + JPEGQuality int `yaml:"jpeg_quality,omitempty" json:"jpeg_quality,omitempty"` + MaxPayloadBytes int64 `yaml:"max_payload_bytes,omitempty" json:"max_payload_bytes,omitempty"` + SkipPreprocessBelowBytes int64 `yaml:"skip_preprocess_below_bytes,omitempty" json:"skip_preprocess_below_bytes,omitempty"` // 0=始终压缩;默认 2MB 且长边已<=max_dimension 时原图直传 + Detail string `yaml:"detail,omitempty" json:"detail,omitempty"` // low | high | auto + AllowedRoots []string `yaml:"allowed_roots,omitempty" json:"allowed_roots,omitempty"` +} + +func (v VisionConfig) TimeoutSecondsEffective() int { + if v.TimeoutSeconds <= 0 { + return 60 + } + return v.TimeoutSeconds +} + +func (v VisionConfig) MaxImageBytesEffective() int64 { + if v.MaxImageBytes <= 0 { + return 5 * 1024 * 1024 + } + return v.MaxImageBytes +} + +func (v VisionConfig) MaxDimensionEffective() int { + if v.MaxDimension <= 0 { + return 2048 + } + return v.MaxDimension +} + +func (v VisionConfig) JPEGQualityEffective() int { + if v.JPEGQuality <= 0 || v.JPEGQuality > 100 { + return 82 + } + return v.JPEGQuality +} + +func (v VisionConfig) MaxPayloadBytesEffective() int64 { + if v.MaxPayloadBytes <= 0 { + return 512 * 1024 + } + return v.MaxPayloadBytes +} + +// SkipPreprocessBelowBytesEffective 低于该字节数且长边<=max_dimension、且<=max_payload 时可原图直传;0 表示始终压缩。 +func (v VisionConfig) SkipPreprocessBelowBytesEffective() int64 { + if v.SkipPreprocessBelowBytes < 0 { + return 0 + } + return v.SkipPreprocessBelowBytes +} + +func (v VisionConfig) DetailEffective() string { + d := strings.ToLower(strings.TrimSpace(v.Detail)) + switch d { + case "high", "low", "auto": + return d + default: + return "low" + } +} + +// OpenAICfgEffective 合并主 openai 配置与 vision 覆盖项,供 VL ChatModel 使用。 +// vision.api_key / base_url / provider 留空或省略时,沿用 main(openai)对应字段;vision.model 必填(由 Ready 校验)。 +func (v VisionConfig) OpenAICfgEffective(main OpenAIConfig) OpenAIConfig { + out := main + if k := strings.TrimSpace(v.APIKey); k != "" { + out.APIKey = k + } + if u := strings.TrimSpace(v.BaseURL); u != "" { + out.BaseURL = u + } + if m := strings.TrimSpace(v.Model); m != "" { + out.Model = m + } + if p := strings.TrimSpace(v.Provider); p != "" { + out.Provider = p + } + out.Reasoning.Mode = "off" + return out +} + +// Ready 表示已启用且模型名非空。 +func (v VisionConfig) Ready() bool { + return v.Enabled && strings.TrimSpace(v.Model) != "" +} diff --git a/internal/config/vision_test.go b/internal/config/vision_test.go new file mode 100644 index 00000000..0620a181 --- /dev/null +++ b/internal/config/vision_test.go @@ -0,0 +1,55 @@ +package config + +import "testing" + +func TestVisionConfig_OpenAICfgEffective_fallbackToMain(t *testing.T) { + main := OpenAIConfig{ + APIKey: "main-key", + BaseURL: "https://main.example/v1", + Model: "main-model", + Provider: "openai", + } + v := VisionConfig{Model: "qwen-vl-max"} + out := v.OpenAICfgEffective(main) + if out.APIKey != main.APIKey || out.BaseURL != main.BaseURL || out.Provider != main.Provider { + t.Fatalf("expected openai fallback, got key=%q url=%q provider=%q", out.APIKey, out.BaseURL, out.Provider) + } + if out.Model != "qwen-vl-max" { + t.Fatalf("model: %s", out.Model) + } +} + +func TestVisionConfig_OpenAICfgEffective(t *testing.T) { + main := OpenAIConfig{ + APIKey: "main-key", + BaseURL: "https://main.example/v1", + Model: "main-model", + Provider: "openai", + Reasoning: OpenAIReasoningConfig{Mode: "on"}, + } + v := VisionConfig{ + Model: "vl-model", + APIKey: "vl-key", + BaseURL: "https://vl.example/v1", + Provider: "claude", + } + out := v.OpenAICfgEffective(main) + if out.APIKey != "vl-key" || out.BaseURL != "https://vl.example/v1" || out.Model != "vl-model" { + t.Fatalf("unexpected merge: %+v", out) + } + if out.Provider != "claude" { + t.Fatalf("provider: %s", out.Provider) + } + if out.Reasoning.Mode != "off" { + t.Fatalf("reasoning should be off for vision, got %s", out.Reasoning.Mode) + } +} + +func TestVisionConfig_Ready(t *testing.T) { + if (VisionConfig{Enabled: true, Model: "x"}).Ready() != true { + t.Fatal("expected ready") + } + if (VisionConfig{Enabled: true}).Ready() != false { + t.Fatal("expected not ready without model") + } +} diff --git a/internal/mcp/builtin/constants.go b/internal/mcp/builtin/constants.go index 11dc1bba..eed31455 100644 --- a/internal/mcp/builtin/constants.go +++ b/internal/mcp/builtin/constants.go @@ -20,6 +20,9 @@ const ( ToolListKnowledgeRiskTypes = "list_knowledge_risk_types" ToolSearchKnowledgeBase = "search_knowledge_base" + // 视觉分析(本地图片 → VL 模型 → 文本摘要) + ToolAnalyzeImage = "analyze_image" + // WebShell 助手工具(AI 在 WebShell 管理 - AI 助手 中使用) ToolWebshellExec = "webshell_exec" ToolWebshellFileList = "webshell_file_list" @@ -73,6 +76,7 @@ func IsBuiltinTool(toolName string) bool { ToolRestoreProjectFact, ToolListKnowledgeRiskTypes, ToolSearchKnowledgeBase, + ToolAnalyzeImage, ToolWebshellExec, ToolWebshellFileList, ToolWebshellFileRead, @@ -124,6 +128,7 @@ func GetAllBuiltinTools() []string { ToolRestoreProjectFact, ToolListKnowledgeRiskTypes, ToolSearchKnowledgeBase, + ToolAnalyzeImage, ToolWebshellExec, ToolWebshellFileList, ToolWebshellFileRead,