Add files via upload

This commit is contained in:
公明
2026-04-18 23:33:48 +08:00
committed by GitHub
parent 0fe39fb98a
commit b8372adf5d
4 changed files with 176 additions and 27 deletions
+29 -3
View File
@@ -1299,10 +1299,36 @@
"similarityThreshold": "Similarity threshold",
"similarityPlaceholder": "0.7",
"similarityHint": "Results below this value are filtered (0-1)",
"hybridWeight": "Hybrid weight",
"hybridPlaceholder": "0.7",
"hybridHint": "Vector weight (0-1); 1.0 = vector only, 0.0 = keyword only",
"subIndexFilter": "Sub-index filter (optional)",
"subIndexFilterPlaceholder": "e.g. prod, must match an indexing sub_indexes tag",
"subIndexFilterHint": "Empty = no filter. When set, only rows whose sub_indexes contain this tag (legacy rows with empty sub_indexes still match).",
"postRetrieveHeader": "Post-retrieval (dedupe / budget)",
"postRetrieveDedupeAuto": "Results are always deduped by normalized text (whitespace-collapsed bodies). No setting required.",
"prefetchTopK": "Prefetch candidates (vector stage)",
"prefetchTopKPlaceholder": "0",
"prefetchTopKHint": "0 = same as Top-K; larger values fetch more vector hits before dedupe/truncate (max 200).",
"maxContextChars": "Max returned characters (Unicode)",
"maxContextCharsPlaceholder": "0",
"maxContextCharsHint": "0 = unlimited; keeps whole chunks in rank order until the budget is exceeded.",
"maxContextTokens": "Max returned tokens",
"maxContextTokensPlaceholder": "0",
"maxContextTokensHint": "0 = unlimited; tiktoken estimate (embedding model name, fallback cl100k_base).",
"indexConfig": "Index config",
"chunkStrategy": "Chunking strategy",
"chunkStrategyMarkdownRecursive": "Markdown headers, then recursive (recommended)",
"chunkStrategyRecursive": "Recursive only",
"chunkStrategyHint": "Matches Eino-style pipelines: Markdown headers + recursive for docs; plain text can use recursive only.",
"requestTimeoutSeconds": "Embedding HTTP timeout (seconds)",
"requestTimeoutPlaceholder": "120",
"requestTimeoutHint": "0 uses the default 120s embedding HTTP client timeout.",
"batchSize": "Embedding batch size",
"batchSizePlaceholder": "64",
"batchSizeHint": "Max texts per embedding request (SQLite indexer batches writes accordingly).",
"preferSourceFile": "Prefer on-disk source file when indexing (Eino FileLoader)",
"preferSourceFileHint": "When enabled, content comes from file_path; falls back to DB content if load fails.",
"subIndexes": "Eino sub-indexes (comma-separated)",
"subIndexesPlaceholder": "e.g. prod, knowledge",
"subIndexesHint": "Passed to indexer.WithSubIndexes; stored in the sub_indexes column.",
"chunkSize": "Chunk size",
"chunkSizePlaceholder": "512",
"chunkSizeHint": "Max tokens per chunk (default 512)",
+29 -3
View File
@@ -1299,10 +1299,36 @@
"similarityThreshold": "相似度阈值",
"similarityPlaceholder": "0.7",
"similarityHint": "相似度阈值(0-1),低于此值的结果将被过滤",
"hybridWeight": "混合检索权重",
"hybridPlaceholder": "0.7",
"hybridHint": "向量检索的权重(0-1),1.0表示纯向量检索,0.0表示纯关键词检索",
"subIndexFilter": "子索引过滤(可选)",
"subIndexFilterPlaceholder": "如 prod,与索引 sub_indexes 一致",
"subIndexFilterHint": "留空不过滤;填写后仅检索向量行 sub_indexes 中含该标签的结果(未打标旧行仍保留)。",
"postRetrieveHeader": "检索后处理(去重 / 预算)",
"postRetrieveDedupeAuto": "检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。",
"prefetchTopK": "预取候选数(向量阶段)",
"prefetchTopKPlaceholder": "0",
"prefetchTopKHint": "0 表示与 Top-K 相同;大于 Top-K 时先多取候选再经去重/截断回到 Top-K(上限 200)。",
"maxContextChars": "返回内容最大字符数(Unicode",
"maxContextCharsPlaceholder": "0",
"maxContextCharsHint": "0 表示不限制;按检索顺序整段保留 chunk,超出则丢弃后续。",
"maxContextTokens": "返回内容最大 Token 数",
"maxContextTokensPlaceholder": "0",
"maxContextTokensHint": "0 表示不限制;tiktoken 估算(与嵌入模型名一致,失败则用 cl100k_base)。",
"indexConfig": "索引配置",
"chunkStrategy": "分块策略",
"chunkStrategyMarkdownRecursive": "Markdown 标题切分后递归(推荐)",
"chunkStrategyRecursive": "仅递归切分",
"chunkStrategyHint": "与 Eino 索引链一致:技术文档建议 Markdown 标题 + 递归;纯文本可仅用递归。",
"requestTimeoutSeconds": "嵌入 HTTP 超时(秒)",
"requestTimeoutPlaceholder": "120",
"requestTimeoutHint": "0 表示使用默认 120 秒,与嵌入 HTTP 客户端一致。",
"batchSize": "嵌入批大小",
"batchSizePlaceholder": "64",
"batchSizeHint": "单次嵌入请求的文本条数上限(SQLite 写入按此分批)。",
"preferSourceFile": "索引时优先从磁盘源文件读取(Eino FileLoader",
"preferSourceFileHint": "开启后以 file_path 为准;读取失败时回退数据库中的 content。",
"subIndexes": "Eino 子索引(逗号分隔)",
"subIndexesPlaceholder": "例如: prod, knowledge",
"subIndexesHint": "对应 indexer.WithSubIndexes,持久化到向量表 sub_indexes 字段。",
"chunkSize": "分块大小(Chunk Size",
"chunkSizePlaceholder": "512",
"chunkSizeHint": "每个块的最大 token 数(默认 512),长文本会被分割成多个块",
+65 -18
View File
@@ -182,15 +182,49 @@ async function loadConfig(loadTools = true) {
retrievalThresholdInput.value = knowledge.retrieval?.similarity_threshold || 0.7;
}
const retrievalWeightInput = document.getElementById('knowledge-retrieval-hybrid-weight');
if (retrievalWeightInput) {
const hybridWeight = knowledge.retrieval?.hybrid_weight;
// 允许0.0值,只有undefined/null时才使用默认值
retrievalWeightInput.value = (hybridWeight !== undefined && hybridWeight !== null) ? hybridWeight : 0.7;
const subIdxFilterInput = document.getElementById('knowledge-retrieval-sub-index-filter');
if (subIdxFilterInput) {
subIdxFilterInput.value = knowledge.retrieval?.sub_index_filter || '';
}
const post = knowledge.retrieval?.post_retrieve || {};
const prefetchInput = document.getElementById('knowledge-post-retrieve-prefetch-top-k');
if (prefetchInput) {
prefetchInput.value = post.prefetch_top_k ?? 0;
}
const maxCharsInput = document.getElementById('knowledge-post-retrieve-max-chars');
if (maxCharsInput) {
maxCharsInput.value = post.max_context_chars ?? 0;
}
const maxTokInput = document.getElementById('knowledge-post-retrieve-max-tokens');
if (maxTokInput) {
maxTokInput.value = post.max_context_tokens ?? 0;
}
// 索引配置
const indexing = knowledge.indexing || {};
const chunkStrategySelect = document.getElementById('knowledge-indexing-chunk-strategy');
if (chunkStrategySelect) {
const v = (indexing.chunk_strategy || 'markdown_then_recursive').toLowerCase();
chunkStrategySelect.value = v === 'recursive' ? 'recursive' : 'markdown_then_recursive';
}
const reqTimeoutInput = document.getElementById('knowledge-indexing-request-timeout');
if (reqTimeoutInput) {
reqTimeoutInput.value = indexing.request_timeout_seconds ?? 120;
}
const batchSizeInput = document.getElementById('knowledge-indexing-batch-size');
if (batchSizeInput) {
batchSizeInput.value = indexing.batch_size ?? 64;
}
const preferFileCb = document.getElementById('knowledge-indexing-prefer-source-file');
if (preferFileCb) {
preferFileCb.checked = indexing.prefer_source_file === true;
}
const subIdxInput = document.getElementById('knowledge-indexing-sub-indexes');
if (subIdxInput) {
const arr = indexing.sub_indexes;
subIdxInput.value = Array.isArray(arr) ? arr.join(', ') : (typeof arr === 'string' ? arr : '');
}
const chunkSizeInput = document.getElementById('knowledge-indexing-chunk-size');
if (chunkSizeInput) {
chunkSizeInput.value = indexing.chunk_size || 512;
@@ -811,20 +845,33 @@ async function applySettings() {
const val = parseFloat(document.getElementById('knowledge-retrieval-similarity-threshold')?.value);
return isNaN(val) ? 0.7 : val;
})(),
hybrid_weight: (() => {
const val = parseFloat(document.getElementById('knowledge-retrieval-hybrid-weight')?.value);
return isNaN(val) ? 0.7 : val; // 允许0.0值,只有NaN时才使用默认值
})()
sub_index_filter: document.getElementById('knowledge-retrieval-sub-index-filter')?.value?.trim() || '',
post_retrieve: {
prefetch_top_k: parseInt(document.getElementById('knowledge-post-retrieve-prefetch-top-k')?.value, 10) || 0,
max_context_chars: parseInt(document.getElementById('knowledge-post-retrieve-max-chars')?.value, 10) || 0,
max_context_tokens: parseInt(document.getElementById('knowledge-post-retrieve-max-tokens')?.value, 10) || 0
}
},
indexing: {
chunk_size: parseInt(document.getElementById("knowledge-indexing-chunk-size")?.value) || 512,
chunk_overlap: parseInt(document.getElementById("knowledge-indexing-chunk-overlap")?.value) ?? 50,
max_chunks_per_item: parseInt(document.getElementById("knowledge-indexing-max-chunks-per-item")?.value) ?? 0,
max_rpm: parseInt(document.getElementById("knowledge-indexing-max-rpm")?.value) ?? 0,
rate_limit_delay_ms: parseInt(document.getElementById("knowledge-indexing-rate-limit-delay-ms")?.value) ?? 300,
max_retries: parseInt(document.getElementById("knowledge-indexing-max-retries")?.value) ?? 3,
retry_delay_ms: parseInt(document.getElementById("knowledge-indexing-retry-delay-ms")?.value) ?? 1000
}
indexing: (() => {
const subRaw = document.getElementById("knowledge-indexing-sub-indexes")?.value?.trim() || "";
const sub_indexes = subRaw
? subRaw.split(/[,]/).map(s => s.trim()).filter(Boolean)
: [];
return {
chunk_strategy: document.getElementById("knowledge-indexing-chunk-strategy")?.value || "markdown_then_recursive",
request_timeout_seconds: parseInt(document.getElementById("knowledge-indexing-request-timeout")?.value, 10) || 0,
batch_size: parseInt(document.getElementById("knowledge-indexing-batch-size")?.value, 10) || 0,
prefer_source_file: document.getElementById("knowledge-indexing-prefer-source-file")?.checked === true,
sub_indexes,
chunk_size: parseInt(document.getElementById("knowledge-indexing-chunk-size")?.value) || 512,
chunk_overlap: parseInt(document.getElementById("knowledge-indexing-chunk-overlap")?.value) ?? 50,
max_chunks_per_item: parseInt(document.getElementById("knowledge-indexing-max-chunks-per-item")?.value) ?? 0,
max_rpm: parseInt(document.getElementById("knowledge-indexing-max-rpm")?.value) ?? 0,
rate_limit_delay_ms: parseInt(document.getElementById("knowledge-indexing-rate-limit-delay-ms")?.value) ?? 300,
max_retries: parseInt(document.getElementById("knowledge-indexing-max-retries")?.value) ?? 3,
retry_delay_ms: parseInt(document.getElementById("knowledge-indexing-retry-delay-ms")?.value) ?? 1000
};
})()
};
const wecomAgentIdVal = document.getElementById('robot-wecom-agent-id')?.value.trim();
+53 -3
View File
@@ -1526,14 +1526,64 @@
<small class="form-hint" data-i18n="settingsBasic.similarityHint">相似度阈值(0-1),低于此值的结果将被过滤</small>
</div>
<div class="form-group">
<label for="knowledge-retrieval-hybrid-weight" data-i18n="settingsBasic.hybridWeight">混合检索权重</label>
<input type="number" id="knowledge-retrieval-hybrid-weight" min="0" max="1" step="0.1" data-i18n="settingsBasic.hybridPlaceholder" data-i18n-attr="placeholder" placeholder="0.7" />
<small class="form-hint" data-i18n="settingsBasic.hybridHint">向量检索的权重(0-1),1.0表示纯向量检索,0.0表示纯关键词检索</small>
<label for="knowledge-retrieval-sub-index-filter" data-i18n="settingsBasic.subIndexFilter">子索引过滤(可选)</label>
<input type="text" id="knowledge-retrieval-sub-index-filter" data-i18n="settingsBasic.subIndexFilterPlaceholder" data-i18n-attr="placeholder" placeholder="与索引 sub_indexes 标签一致,如 prod" />
<small class="form-hint" data-i18n="settingsBasic.subIndexFilterHint">留空表示不过滤;非空时仅检索 sub_indexes 含该标签的向量行(未打标旧数据仍会命中)。</small>
</div>
<div class="settings-subsection-header">
<h5 data-i18n="settingsBasic.postRetrieveHeader">检索后处理(去重 / 预算)</h5>
</div>
<p class="form-hint" style="margin: 0 0 12px 0;" data-i18n="settingsBasic.postRetrieveDedupeAuto">检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。</p>
<div class="form-group">
<label for="knowledge-post-retrieve-prefetch-top-k" data-i18n="settingsBasic.prefetchTopK">预取候选数(向量阶段)</label>
<input type="number" id="knowledge-post-retrieve-prefetch-top-k" min="0" max="200" data-i18n="settingsBasic.prefetchTopKPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
<small class="form-hint" data-i18n="settingsBasic.prefetchTopKHint">0 表示与 Top-K 相同;大于 Top-K 时先多取候选再经去重/截断回到 Top-K(上限 200)。</small>
</div>
<div class="form-group">
<label for="knowledge-post-retrieve-max-chars" data-i18n="settingsBasic.maxContextChars">返回内容最大字符数(Unicode</label>
<input type="number" id="knowledge-post-retrieve-max-chars" min="0" max="1000000" data-i18n="settingsBasic.maxContextCharsPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
<small class="form-hint" data-i18n="settingsBasic.maxContextCharsHint">0 表示不限制;按检索顺序整段保留 chunk,超出则丢弃后续。</small>
</div>
<div class="form-group">
<label for="knowledge-post-retrieve-max-tokens" data-i18n="settingsBasic.maxContextTokens">返回内容最大 Token 数</label>
<input type="number" id="knowledge-post-retrieve-max-tokens" min="0" max="1000000" data-i18n="settingsBasic.maxContextTokensPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
<small class="form-hint" data-i18n="settingsBasic.maxContextTokensHint">0 表示不限制;tiktoken 估算(与嵌入模型名一致,失败则用 cl100k_base)。</small>
</div>
<div class="settings-subsection-header">
<h5 data-i18n="settingsBasic.indexConfig">索引配置</h5>
</div>
<div class="form-group">
<label for="knowledge-indexing-chunk-strategy" data-i18n="settingsBasic.chunkStrategy">分块策略</label>
<select id="knowledge-indexing-chunk-strategy">
<option value="markdown_then_recursive" data-i18n="settingsBasic.chunkStrategyMarkdownRecursive">Markdown 标题切分后递归(推荐)</option>
<option value="recursive" data-i18n="settingsBasic.chunkStrategyRecursive">仅递归切分</option>
</select>
<small class="form-hint" data-i18n="settingsBasic.chunkStrategyHint">与 Eino 官方索引链一致:技术文档建议 Markdown 标题 + 递归;纯文本可仅用递归。</small>
</div>
<div class="form-group">
<label for="knowledge-indexing-request-timeout" data-i18n="settingsBasic.requestTimeoutSeconds">嵌入 HTTP 超时(秒)</label>
<input type="number" id="knowledge-indexing-request-timeout" min="0" max="600" data-i18n="settingsBasic.requestTimeoutPlaceholder" data-i18n-attr="placeholder" placeholder="120" />
<small class="form-hint" data-i18n="settingsBasic.requestTimeoutHint">0 表示使用默认 120 秒;与 OpenAI 嵌入客户端一致。</small>
</div>
<div class="form-group">
<label for="knowledge-indexing-batch-size" data-i18n="settingsBasic.batchSize">嵌入批大小</label>
<input type="number" id="knowledge-indexing-batch-size" min="1" max="256" data-i18n="settingsBasic.batchSizePlaceholder" data-i18n-attr="placeholder" placeholder="64" />
<small class="form-hint" data-i18n="settingsBasic.batchSizeHint">单次请求嵌入的文本条数上限(SQLite 索引写入分批)。</small>
</div>
<div class="form-group">
<label class="checkbox-label">
<input type="checkbox" id="knowledge-indexing-prefer-source-file" />
<span data-i18n="settingsBasic.preferSourceFile">索引时优先从磁盘源文件读取(Eino FileLoader</span>
</label>
<small class="form-hint" data-i18n="settingsBasic.preferSourceFileHint">开启后以 file_path 为准;读取失败时回退数据库中的 content。</small>
</div>
<div class="form-group">
<label for="knowledge-indexing-sub-indexes" data-i18n="settingsBasic.subIndexes">Eino 子索引(逗号分隔)</label>
<input type="text" id="knowledge-indexing-sub-indexes" data-i18n="settingsBasic.subIndexesPlaceholder" data-i18n-attr="placeholder" placeholder="例如: prod, knowledge" />
<small class="form-hint" data-i18n="settingsBasic.subIndexesHint">传入 indexer.WithSubIndexes,写入向量行的 sub_indexes 字段(逻辑分区标记)。</small>
</div>
<div class="form-group">
<label for="knowledge-indexing-chunk-size" data-i18n="settingsBasic.chunkSize">分块大小(Chunk Size</label>
<input type="number" id="knowledge-indexing-chunk-size" min="128" max="4096" data-i18n="settingsBasic.chunkSizePlaceholder" data-i18n-attr="placeholder" placeholder="512" />