From 3a401ade683eee6267bfb38eaba3673b23ae1630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=85=AC=E6=98=8E?= <83812544+Ed1s0nZ@users.noreply.github.com> Date: Wed, 3 Jun 2026 17:08:59 +0800 Subject: [PATCH] Add files via upload --- agents/orchestrator-supervisor.md | 1 + agents/orchestrator.md | 1 + docs/VISION.md | 51 +++++++++++++ web/static/i18n/en-US.json | 20 +++++ web/static/i18n/zh-CN.json | 20 +++++ web/static/js/settings.js | 122 ++++++++++++++++++++++++++++++ web/templates/index.html | 83 ++++++++++++++++++++ 7 files changed, 298 insertions(+) create mode 100644 docs/VISION.md diff --git a/agents/orchestrator-supervisor.md b/agents/orchestrator-supervisor.md index f72702ec..7de06ec6 100644 --- a/agents/orchestrator-supervisor.md +++ b/agents/orchestrator-supervisor.md @@ -97,6 +97,7 @@ description: supervisor 模式下的协调者:通过 transfer 委派专家子 - **`transfer` 交接包(强制,避免专家重复侦察)**:**把专家当作刚走进房间的同事——它没看过你的对话,不知道你做了什么,也不了解这个任务为什么重要。** 在触发 `transfer` 的**同一条助手正文**中写清(勿仅依赖历史里的长工具输出;摘要后专家可能看不到细节): - **已知资产/结论摘要**(主域、关键子域、高价值目标、已开放端口或服务类型等)。 - **本轮唯一任务**与 **禁止项**(例如:「不得再做全量子域枚举;仅对下列主机做 MQTT 验证」)。 + - **图片/验证码(若有)**:本地绝对路径 + 期望输出格式(如验证码「只输出字符」);专家默认看不到父对话识图结果,须在交接正文中写明。 - **专家类型**:验证/利用/协议分析派对应专家,**避免**把「仅差验证」的工作交给 `recon` 导致其按习惯从侦察阶段重来。 - **transfer 前目标完整性校验(强制)**:在 `transfer` 前必须具备并显式写入: - 目标标识:`URL` 或 `IP:Port` 或 `域名 + 具体路径/API 基址` diff --git a/agents/orchestrator.md b/agents/orchestrator.md index 98158a23..25b7d2d9 100644 --- a/agents/orchestrator.md +++ b/agents/orchestrator.md @@ -33,6 +33,7 @@ description: 多代理模式下的 Deep 编排者:在已授权安全场景中 - **`task` 上下文交接(强制,避免重复劳动)**:**把子代理当作刚走进房间的同事——它没看过你的对话,不知道你做了什么,也不了解这个任务为什么重要。** 框架下子代理默认**只看到**你传入的 `description` 文本,**看不到**你在父对话里已跑过的工具输出全文。因此每次 `task` 的 `description` 必须自带**交接包**(可精简,但不可省略关键事实): - **已完成**:已枚举的主域/子域要点、已扫端口或服务结论、已确认 IP/URL、协调者已知的漏洞假设等(用列表或短段落即可)。 - **本轮只做**:明确写「本轮禁止重复全量子域爆破 / 禁止重复相同 subfinder 参数集」等(若确实需要增量,写清增量范围)。 + - **图片/验证码(若有)**:本地绝对路径 + 期望输出格式(如验证码「只输出字符」、登录页 UI 要素列表);子代理默认看不到父对话里的识图结果,须在 description 中写明路径与格式。 - **专家匹配**:验证、利用、协议深挖(如 MQTT)等应委派给**对应专项子代理**;不要把此类子目标交给纯侦察(`recon`)角色除非任务仅为补充攻击面。 - **派单前目标完整性校验(强制)**:在调用 `task` 前,你必须检查并写入最小必需字段;任一缺失时**禁止委派**,先向用户澄清或先自行补充证据: - **目标标识**:`URL` 或 `IP:Port` 或 `域名 + 具体路径/API 基址` diff --git a/docs/VISION.md b/docs/VISION.md new file mode 100644 index 00000000..ed6b7f2d --- /dev/null +++ b/docs/VISION.md @@ -0,0 +1,51 @@ +# 视觉分析(analyze_image) + +## 概述 + +- **工具名**:`analyze_image`(MCP 内置) +- **行为**:读取本地图片 → `imaging` 缩放/JPEG 压缩 → 调用独立 **Vision** 模型 → 返回**纯文本**给 Agent +- **上下文**:图片字节**不会**写入对话历史;仅路径与文字摘要进入 Agent 上下文 + +## 配置(`config.yaml` → `vision`) + +```yaml +vision: + enabled: true + model: qwen-vl-max # 必填 + api_key: # 留空 → openai.api_key + base_url: # 留空 → openai.base_url + provider: # 留空 → openai.provider + max_image_bytes: 5242880 + max_dimension: 2048 + jpeg_quality: 82 + max_payload_bytes: 524288 + skip_preprocess_below_bytes: 2097152 # 低于 2MB 且长边<=max_dimension 时原图直传;0=始终 JPEG 压缩 + detail: low # low | high | auto + timeout_seconds: 60 + # allowed_roots: [] # 额外绝对路径根 +``` + +`enabled: false` 时不注册工具。 + +## Web 设置 + +**系统设置 → 基本设置 → 视觉分析(analyze_image)** 可配置启用开关、视觉模型、API Key/Base URL(留空复用 OpenAI)、预处理参数;**保存并应用** 后写入 `config.yaml` 并重新注册 MCP 工具。 + +## 路径白名单 + +默认可读: + +- 进程工作目录(`cwd`)及其子路径 +- `chat_uploads/` +- `agent.result_storage_dir`(默认 `tmp/`) +- `vision.allowed_roots` 中配置的绝对路径 + +## Agent 使用 + +系统提示已说明:遇图片调用 `analyze_image`,勿用 `read_file` 读二进制图。 + +`multi_agent.eino_middleware.tool_search_always_visible_tools` 建议包含 `analyze_image`。 + +## 合规 + +启用后图片会发往 Vision API 配置的上游;敏感环境请使用可信网关或保持 `enabled: false`。 diff --git a/web/static/i18n/en-US.json b/web/static/i18n/en-US.json index 8271a877..9ab8cdbd 100644 --- a/web/static/i18n/en-US.json +++ b/web/static/i18n/en-US.json @@ -1957,6 +1957,26 @@ "retryDelay": "Retry delay (ms)", "retryDelayPlaceholder": "1000", "retryDelayHint": "Delay between retries (ms)", + "visionConfig": "Vision analysis (analyze_image)", + "visionEnabled": "Enable analyze_image vision tool", + "visionEnabledHint": "Registers the MCP tool when enabled; images are sent only for one VL call; agent context keeps text summaries only. Save & apply to take effect.", + "visionBaseUrlPlaceholder": "Leave empty to reuse OpenAI Base URL", + "visionApiKeyPlaceholder": "Leave empty to reuse OpenAI API Key", + "visionModel": "Vision model", + "visionModelPlaceholder": "qwen-vl-max", + "visionModelRequired": "Vision model name is required when vision is enabled", + "visionAdvanced": "Advanced: preprocessing & limits", + "visionMaxImageBytes": "Max original file size (bytes)", + "visionMaxDimension": "Max long-edge pixels", + "visionJpegQuality": "JPEG quality", + "visionMaxPayloadBytes": "Max API payload (bytes)", + "visionSkipPreprocessBytes": "Passthrough below (bytes)", + "visionSkipPreprocessHint": "0 = always JPEG compress; must also fit long-edge and payload limits.", + "visionDetail": "Image detail", + "visionTimeout": "Timeout (seconds)", + "visionAllowedRoots": "Extra allowed path roots", + "visionAllowedRootsPlaceholder": "One absolute path per line, optional", + "visionTestFillRequired": "Enter vision model and ensure API Key is available (or reuse OpenAI)", "testConnection": "Test Connection", "testFillRequired": "Please fill in API Key and Model first", "testing": "Testing connection...", diff --git a/web/static/i18n/zh-CN.json b/web/static/i18n/zh-CN.json index 740e0928..4847c6f3 100644 --- a/web/static/i18n/zh-CN.json +++ b/web/static/i18n/zh-CN.json @@ -1946,6 +1946,26 @@ "retryDelay": "重试间隔(毫秒)", "retryDelayPlaceholder": "1000", "retryDelayHint": "重试间隔毫秒数(默认 1000),每次重试会递增延迟", + "visionConfig": "视觉分析(analyze_image)", + "visionEnabled": "启用视觉分析工具 analyze_image", + "visionEnabledHint": "启用后注册 MCP 工具;图片仅在单次 VL 调用中出现,Agent 上下文只保留文字摘要。保存并应用后生效。", + "visionBaseUrlPlaceholder": "留空则复用 OpenAI Base URL", + "visionApiKeyPlaceholder": "留空则复用 OpenAI API Key", + "visionModel": "视觉模型", + "visionModelPlaceholder": "qwen-vl-max", + "visionModelRequired": "启用视觉分析时请填写视觉模型名称", + "visionAdvanced": "高级:预处理与限制", + "visionMaxImageBytes": "原始文件上限(字节)", + "visionMaxDimension": "长边缩放像素", + "visionJpegQuality": "JPEG 质量", + "visionMaxPayloadBytes": "送 API 体积上限(字节)", + "visionSkipPreprocessBytes": "低于该字节可原图直传", + "visionSkipPreprocessHint": "0 表示始终 JPEG 压缩;需同时满足长边与 payload 限制。", + "visionDetail": "Image detail", + "visionTimeout": "超时(秒)", + "visionAllowedRoots": "额外允许路径根目录", + "visionAllowedRootsPlaceholder": "每行一个绝对路径,可选", + "visionTestFillRequired": "请填写视觉模型,并确保 API Key 可用(可复用 OpenAI)", "testConnection": "测试连接", "testFillRequired": "请先填写 API Key 和模型", "testing": "测试中...", diff --git a/web/static/js/settings.js b/web/static/js/settings.js index dc8ca1fb..a973b5e8 100644 --- a/web/static/js/settings.js +++ b/web/static/js/settings.js @@ -197,6 +197,8 @@ async function loadConfig(loadTools = true) { orAllowEl.checked = orm.allow_client_reasoning !== false; } + fillVisionConfigFromCurrent(currentConfig.vision || {}); + // 填充FOFA配置 const fofa = currentConfig.fofa || {}; const fofaEmailEl = document.getElementById('fofa-email'); @@ -1074,6 +1076,14 @@ async function applySettings() { alert(msg); return; } + + const visionPayload = collectVisionConfigFromForm(); + if (visionPayload.enabled && !visionPayload.model) { + const vm = document.getElementById('vision-model'); + if (vm) vm.classList.add('error'); + alert((typeof window.t === 'function') ? window.t('settingsBasic.visionModelRequired') : '启用视觉分析时请填写视觉模型名称'); + return; + } // 收集配置 const knowledgeEnabledCheckbox = document.getElementById('knowledge-enabled'); @@ -1146,6 +1156,7 @@ async function applySettings() { allow_client_reasoning: document.getElementById('openai-reasoning-allow-client')?.checked !== false } }, + vision: visionPayload, fofa: { email: document.getElementById('fofa-email')?.value.trim() || '', api_key: document.getElementById('fofa-api-key')?.value.trim() || '', @@ -1341,6 +1352,117 @@ async function applySettings() { } } +function fillVisionConfigFromCurrent(v) { + const en = document.getElementById('vision-enabled'); + if (en) en.checked = v.enabled === true; + const prov = document.getElementById('vision-provider'); + if (prov) prov.value = (v.provider || '').trim(); + const setVal = (id, val) => { + const el = document.getElementById(id); + if (el) el.value = val != null && val !== '' ? String(val) : ''; + }; + setVal('vision-api-key', v.api_key || ''); + setVal('vision-base-url', v.base_url || ''); + setVal('vision-model', v.model || ''); + setVal('vision-max-image-bytes', v.max_image_bytes || 5242880); + setVal('vision-max-dimension', v.max_dimension || 2048); + setVal('vision-jpeg-quality', v.jpeg_quality || 82); + setVal('vision-max-payload-bytes', v.max_payload_bytes || 524288); + setVal('vision-skip-preprocess-bytes', v.skip_preprocess_below_bytes != null ? v.skip_preprocess_below_bytes : 2097152); + setVal('vision-timeout-seconds', v.timeout_seconds || 60); + const det = document.getElementById('vision-detail'); + if (det) { + const d = (v.detail || 'low').toString().toLowerCase(); + det.value = ['low', 'auto', 'high'].includes(d) ? d : 'low'; + } + const rootsEl = document.getElementById('vision-allowed-roots'); + if (rootsEl) { + const roots = Array.isArray(v.allowed_roots) ? v.allowed_roots : []; + rootsEl.value = roots.join('\n'); + } + syncVisionFormEnabled(); +} + +function collectVisionConfigFromForm() { + const parseIntOr = (id, fallback) => { + const n = parseInt(document.getElementById(id)?.value, 10); + return Number.isNaN(n) ? fallback : n; + }; + const rootsRaw = document.getElementById('vision-allowed-roots')?.value || ''; + const allowed_roots = rootsRaw.split(/\r?\n/).map(s => s.trim()).filter(Boolean); + const provider = document.getElementById('vision-provider')?.value.trim() || ''; + return { + enabled: document.getElementById('vision-enabled')?.checked === true, + api_key: document.getElementById('vision-api-key')?.value.trim() || '', + base_url: document.getElementById('vision-base-url')?.value.trim() || '', + model: document.getElementById('vision-model')?.value.trim() || '', + provider: provider, + timeout_seconds: parseIntOr('vision-timeout-seconds', 60), + max_image_bytes: parseIntOr('vision-max-image-bytes', 5242880), + max_dimension: parseIntOr('vision-max-dimension', 2048), + jpeg_quality: parseIntOr('vision-jpeg-quality', 82), + max_payload_bytes: parseIntOr('vision-max-payload-bytes', 524288), + skip_preprocess_below_bytes: parseIntOr('vision-skip-preprocess-bytes', 2097152), + detail: document.getElementById('vision-detail')?.value || 'low', + allowed_roots: allowed_roots + }; +} + +function syncVisionFormEnabled() { + const enabled = document.getElementById('vision-enabled')?.checked === true; + const panel = document.getElementById('vision-fields-panel'); + if (panel) { + panel.style.opacity = enabled ? '1' : '0.55'; + panel.querySelectorAll('input, select, textarea, a').forEach(el => { + if (el.id === 'test-vision-btn') return; + el.disabled = !enabled; + }); + } +} + +async function testVisionConnection() { + const resultEl = document.getElementById('test-vision-result'); + const vision = collectVisionConfigFromForm(); + const openai = { + provider: document.getElementById('openai-provider')?.value || 'openai', + api_key: document.getElementById('openai-api-key')?.value.trim() || '', + base_url: document.getElementById('openai-base-url')?.value.trim() || '', + model: document.getElementById('openai-model')?.value.trim() || '' + }; + const apiKey = vision.api_key || openai.api_key; + const model = vision.model; + if (!apiKey || !model) { + if (resultEl) { + resultEl.textContent = typeof window.t === 'function' ? window.t('settingsBasic.visionTestFillRequired') : '请填写视觉模型,并确保 API Key 可用'; + } + return; + } + if (resultEl) { + resultEl.textContent = typeof window.t === 'function' ? window.t('settingsBasic.testing') : '测试中...'; + resultEl.style.color = ''; + } + try { + const response = await apiFetch('/api/config/test-vision', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ vision: vision, openai: openai }) + }); + const result = await response.json(); + if (result.success) { + const latency = result.latency_ms != null ? ` (${result.latency_ms}ms)` : ''; + const modelInfo = result.model ? ` [${result.model}]` : ''; + resultEl.textContent = (typeof window.t === 'function' ? window.t('settingsBasic.testSuccess') : '连接成功') + modelInfo + latency; + resultEl.style.color = 'var(--success-color, #38a169)'; + } else { + resultEl.textContent = (typeof window.t === 'function' ? window.t('settingsBasic.testFailed') : '连接失败') + ': ' + (result.error || '未知错误'); + resultEl.style.color = 'var(--error-color, #e53e3e)'; + } + } catch (error) { + resultEl.textContent = (typeof window.t === 'function' ? window.t('settingsBasic.testError') : '测试出错') + ': ' + error.message; + resultEl.style.color = 'var(--error-color, #e53e3e)'; + } +} + // 测试OpenAI连接 async function testOpenAIConnection() { const btn = document.getElementById('test-openai-btn'); diff --git a/web/templates/index.html b/web/templates/index.html index 822a170d..4c0e7395 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -2475,6 +2475,89 @@ + +
+

视觉分析(analyze_image)

+
+
+ + 启用后注册 MCP 工具;图片仅在单次 VL 调用中出现,Agent 上下文只保留文字摘要。 +
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ 高级:预处理与限制 +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + 0 表示始终 JPEG 压缩;需同时满足长边与 payload 限制。 +
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ 测试连接 + +
+
+
+
+

Agent 配置