diff --git a/web/static/i18n/en-US.json b/web/static/i18n/en-US.json index 9d742e5f..5dde56eb 100644 --- a/web/static/i18n/en-US.json +++ b/web/static/i18n/en-US.json @@ -2178,11 +2178,27 @@ "subIndexFilter": "Sub-index filter (optional)", "subIndexFilterPlaceholder": "e.g. prod, must match an indexing sub_indexes tag", "subIndexFilterHint": "Empty = no filter. When set, only rows whose sub_indexes contain this tag (legacy rows with empty sub_indexes still match).", + "ragPipelineHeader": "RAG pipeline (MultiQuery + Rerank)", + "ragPipelineHint": "MultiQuery and rerank are always on: LLM query rewrite → vector prefetch & fusion → HTTP rerank → dedupe & budget truncate.", + "multiQueryMaxQueries": "MultiQuery rewrite variant limit", + "multiQueryMaxQueriesPlaceholder": "4", + "multiQueryMaxQueriesHint": "Max LLM-generated retrieval variants (including paraphrases of the original query). Recommended 3–4, max 8.", + "rerankProvider": "Rerank provider", + "rerankProviderAuto": "Auto (infer from Base URL)", + "rerankProviderCohere": "Cohere-compatible API", + "rerankProviderHint": "DashScope uses gte-rerank; other compatible endpoints use /v1/rerank. Leave empty to infer from Base URL below.", + "rerankModel": "Rerank model (optional)", + "rerankModelPlaceholder": "Empty: DashScope→gte-rerank, Cohere→rerank-multilingual-v3.0", + "rerankBaseUrl": "Rerank Base URL (optional)", + "rerankBaseUrlPlaceholder": "Leave empty to reuse embedding / OpenAI base_url", + "rerankApiKey": "Rerank API Key (optional)", + "rerankApiKeyPlaceholder": "Leave empty to reuse embedding / OpenAI api_key", + "rerankApiKeyHint": "On rerank failure, results fall back to fusion order; search still works.", "postRetrieveHeader": "Post-retrieval (dedupe / budget)", "postRetrieveDedupeAuto": "Results are always deduped by normalized text (whitespace-collapsed bodies). No setting required.", "prefetchTopK": "Prefetch candidates (vector stage)", - "prefetchTopKPlaceholder": "0", - "prefetchTopKHint": "0 = same as Top-K; larger values fetch more vector hits before dedupe/truncate (max 200).", + "prefetchTopKPlaceholder": "20", + "prefetchTopKHint": "Vector candidates per MultiQuery variant; 0 uses built-in max(top_k×4, 20) (max 200).", "maxContextChars": "Max returned characters (Unicode)", "maxContextCharsPlaceholder": "0", "maxContextCharsHint": "0 = unlimited; keeps whole chunks in rank order until the budget is exceeded.", diff --git a/web/static/i18n/zh-CN.json b/web/static/i18n/zh-CN.json index 08a52d71..96ce1f5d 100644 --- a/web/static/i18n/zh-CN.json +++ b/web/static/i18n/zh-CN.json @@ -2166,11 +2166,27 @@ "subIndexFilter": "子索引过滤(可选)", "subIndexFilterPlaceholder": "如 prod,与索引 sub_indexes 一致", "subIndexFilterHint": "留空不过滤;填写后仅检索向量行 sub_indexes 中含该标签的结果(未打标旧行仍保留)。", + "ragPipelineHeader": "RAG 管线(MultiQuery + Rerank)", + "ragPipelineHint": "MultiQuery 与精排始终启用:LLM 改写多路检索 → 向量预取与融合 → HTTP 精排 → 去重与预算截断。", + "multiQueryMaxQueries": "MultiQuery 改写变体上限", + "multiQueryMaxQueriesPlaceholder": "4", + "multiQueryMaxQueriesHint": "LLM 生成的检索变体数量上限(含原问语义覆盖);建议 3~4,最大 8。", + "rerankProvider": "精排提供商", + "rerankProviderAuto": "自动(按 Base URL 推断)", + "rerankProviderCohere": "Cohere 兼容 API", + "rerankProviderHint": "DashScope 使用 gte-rerank;其他兼容端点走 /v1/rerank。留空时按下方 Base URL 自动推断。", + "rerankModel": "精排模型(可选)", + "rerankModelPlaceholder": "留空:DashScope→gte-rerank,Cohere→rerank-multilingual-v3.0", + "rerankBaseUrl": "精排 Base URL(可选)", + "rerankBaseUrlPlaceholder": "留空则复用嵌入 / OpenAI 的 base_url", + "rerankApiKey": "精排 API Key(可选)", + "rerankApiKeyPlaceholder": "留空则复用嵌入 / OpenAI 的 api_key", + "rerankApiKeyHint": "精排失败时自动降级为融合排序,检索仍可用。", "postRetrieveHeader": "检索后处理(去重 / 预算)", "postRetrieveDedupeAuto": "检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。", "prefetchTopK": "预取候选数(向量阶段)", - "prefetchTopKPlaceholder": "0", - "prefetchTopKHint": "0 表示与 Top-K 相同;大于 Top-K 时先多取候选再经去重/截断回到 Top-K(上限 200)。", + "prefetchTopKPlaceholder": "20", + "prefetchTopKHint": "每条 MultiQuery 变体的向量候选数;0 表示内置 max(top_k×4, 20)(上限 200)。", "maxContextChars": "返回内容最大字符数(Unicode)", "maxContextCharsPlaceholder": "0", "maxContextCharsHint": "0 表示不限制;按检索顺序整段保留 chunk,超出则丢弃后续。", diff --git a/web/static/js/settings.js b/web/static/js/settings.js index b5d875df..654cbed8 100644 --- a/web/static/js/settings.js +++ b/web/static/js/settings.js @@ -389,10 +389,35 @@ async function loadConfig(loadTools = true) { subIdxFilterInput.value = knowledge.retrieval?.sub_index_filter || ''; } + const mq = knowledge.retrieval?.multi_query || {}; + const mqMaxInput = document.getElementById('knowledge-multi-query-max-queries'); + if (mqMaxInput) { + const mqVal = parseInt(mq.max_queries, 10); + mqMaxInput.value = (!isNaN(mqVal) && mqVal > 0) ? mqVal : 4; + } + const rr = knowledge.retrieval?.rerank || {}; + const rerankProviderSelect = document.getElementById('knowledge-rerank-provider'); + if (rerankProviderSelect) { + const p = (rr.provider || '').toLowerCase(); + rerankProviderSelect.value = (p === 'dashscope' || p === 'cohere') ? p : ''; + } + const rerankModelInput = document.getElementById('knowledge-rerank-model'); + if (rerankModelInput) { + rerankModelInput.value = rr.model || ''; + } + const rerankBaseUrlInput = document.getElementById('knowledge-rerank-base-url'); + if (rerankBaseUrlInput) { + rerankBaseUrlInput.value = rr.base_url || ''; + } + const rerankApiKeyInput = document.getElementById('knowledge-rerank-api-key'); + if (rerankApiKeyInput) { + rerankApiKeyInput.value = rr.api_key || ''; + } + const post = knowledge.retrieval?.post_retrieve || {}; const prefetchInput = document.getElementById('knowledge-post-retrieve-prefetch-top-k'); if (prefetchInput) { - prefetchInput.value = post.prefetch_top_k ?? 0; + prefetchInput.value = post.prefetch_top_k ?? 20; } const maxCharsInput = document.getElementById('knowledge-post-retrieve-max-chars'); if (maxCharsInput) { @@ -1273,8 +1298,25 @@ async function applySettings() { return isNaN(val) ? 0.7 : val; })(), sub_index_filter: document.getElementById('knowledge-retrieval-sub-index-filter')?.value?.trim() || '', + multi_query: { + max_queries: (() => { + const v = parseInt(document.getElementById('knowledge-multi-query-max-queries')?.value, 10); + if (isNaN(v) || v <= 0) return 4; + return Math.min(8, v); + })() + }, + rerank: { + provider: document.getElementById('knowledge-rerank-provider')?.value?.trim() || '', + model: document.getElementById('knowledge-rerank-model')?.value?.trim() || '', + base_url: document.getElementById('knowledge-rerank-base-url')?.value?.trim() || '', + api_key: document.getElementById('knowledge-rerank-api-key')?.value?.trim() || '' + }, post_retrieve: { - prefetch_top_k: parseInt(document.getElementById('knowledge-post-retrieve-prefetch-top-k')?.value, 10) || 0, + prefetch_top_k: (() => { + const raw = document.getElementById('knowledge-post-retrieve-prefetch-top-k')?.value; + const v = parseInt(raw, 10); + return isNaN(v) ? 20 : Math.max(0, v); + })(), max_context_chars: parseInt(document.getElementById('knowledge-post-retrieve-max-chars')?.value, 10) || 0, max_context_tokens: parseInt(document.getElementById('knowledge-post-retrieve-max-tokens')?.value, 10) || 0 } diff --git a/web/templates/index.html b/web/templates/index.html index 6820bdd6..9ee25e43 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -3009,14 +3009,46 @@ 留空表示不过滤;非空时仅检索 sub_indexes 含该标签的向量行(未打标旧数据仍会命中)。 +
MultiQuery 与精排始终启用:LLM 改写多路检索 → 向量预取与融合 → HTTP 精排 → 去重与预算截断。
+检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。