mirror of
https://github.com/Ed1s0nZ/CyberStrikeAI.git
synced 2026-04-21 18:26:38 +02:00
Add files via upload
This commit is contained in:
@@ -1299,10 +1299,36 @@
|
||||
"similarityThreshold": "Similarity threshold",
|
||||
"similarityPlaceholder": "0.7",
|
||||
"similarityHint": "Results below this value are filtered (0-1)",
|
||||
"hybridWeight": "Hybrid weight",
|
||||
"hybridPlaceholder": "0.7",
|
||||
"hybridHint": "Vector weight (0-1); 1.0 = vector only, 0.0 = keyword only",
|
||||
"subIndexFilter": "Sub-index filter (optional)",
|
||||
"subIndexFilterPlaceholder": "e.g. prod, must match an indexing sub_indexes tag",
|
||||
"subIndexFilterHint": "Empty = no filter. When set, only rows whose sub_indexes contain this tag (legacy rows with empty sub_indexes still match).",
|
||||
"postRetrieveHeader": "Post-retrieval (dedupe / budget)",
|
||||
"postRetrieveDedupeAuto": "Results are always deduped by normalized text (whitespace-collapsed bodies). No setting required.",
|
||||
"prefetchTopK": "Prefetch candidates (vector stage)",
|
||||
"prefetchTopKPlaceholder": "0",
|
||||
"prefetchTopKHint": "0 = same as Top-K; larger values fetch more vector hits before dedupe/truncate (max 200).",
|
||||
"maxContextChars": "Max returned characters (Unicode)",
|
||||
"maxContextCharsPlaceholder": "0",
|
||||
"maxContextCharsHint": "0 = unlimited; keeps whole chunks in rank order until the budget is exceeded.",
|
||||
"maxContextTokens": "Max returned tokens",
|
||||
"maxContextTokensPlaceholder": "0",
|
||||
"maxContextTokensHint": "0 = unlimited; tiktoken estimate (embedding model name, fallback cl100k_base).",
|
||||
"indexConfig": "Index config",
|
||||
"chunkStrategy": "Chunking strategy",
|
||||
"chunkStrategyMarkdownRecursive": "Markdown headers, then recursive (recommended)",
|
||||
"chunkStrategyRecursive": "Recursive only",
|
||||
"chunkStrategyHint": "Matches Eino-style pipelines: Markdown headers + recursive for docs; plain text can use recursive only.",
|
||||
"requestTimeoutSeconds": "Embedding HTTP timeout (seconds)",
|
||||
"requestTimeoutPlaceholder": "120",
|
||||
"requestTimeoutHint": "0 uses the default 120s embedding HTTP client timeout.",
|
||||
"batchSize": "Embedding batch size",
|
||||
"batchSizePlaceholder": "64",
|
||||
"batchSizeHint": "Max texts per embedding request (SQLite indexer batches writes accordingly).",
|
||||
"preferSourceFile": "Prefer on-disk source file when indexing (Eino FileLoader)",
|
||||
"preferSourceFileHint": "When enabled, content comes from file_path; falls back to DB content if load fails.",
|
||||
"subIndexes": "Eino sub-indexes (comma-separated)",
|
||||
"subIndexesPlaceholder": "e.g. prod, knowledge",
|
||||
"subIndexesHint": "Passed to indexer.WithSubIndexes; stored in the sub_indexes column.",
|
||||
"chunkSize": "Chunk size",
|
||||
"chunkSizePlaceholder": "512",
|
||||
"chunkSizeHint": "Max tokens per chunk (default 512)",
|
||||
|
||||
@@ -1299,10 +1299,36 @@
|
||||
"similarityThreshold": "相似度阈值",
|
||||
"similarityPlaceholder": "0.7",
|
||||
"similarityHint": "相似度阈值(0-1),低于此值的结果将被过滤",
|
||||
"hybridWeight": "混合检索权重",
|
||||
"hybridPlaceholder": "0.7",
|
||||
"hybridHint": "向量检索的权重(0-1),1.0表示纯向量检索,0.0表示纯关键词检索",
|
||||
"subIndexFilter": "子索引过滤(可选)",
|
||||
"subIndexFilterPlaceholder": "如 prod,与索引 sub_indexes 一致",
|
||||
"subIndexFilterHint": "留空不过滤;填写后仅检索向量行 sub_indexes 中含该标签的结果(未打标旧行仍保留)。",
|
||||
"postRetrieveHeader": "检索后处理(去重 / 预算)",
|
||||
"postRetrieveDedupeAuto": "检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。",
|
||||
"prefetchTopK": "预取候选数(向量阶段)",
|
||||
"prefetchTopKPlaceholder": "0",
|
||||
"prefetchTopKHint": "0 表示与 Top-K 相同;大于 Top-K 时先多取候选再经去重/截断回到 Top-K(上限 200)。",
|
||||
"maxContextChars": "返回内容最大字符数(Unicode)",
|
||||
"maxContextCharsPlaceholder": "0",
|
||||
"maxContextCharsHint": "0 表示不限制;按检索顺序整段保留 chunk,超出则丢弃后续。",
|
||||
"maxContextTokens": "返回内容最大 Token 数",
|
||||
"maxContextTokensPlaceholder": "0",
|
||||
"maxContextTokensHint": "0 表示不限制;tiktoken 估算(与嵌入模型名一致,失败则用 cl100k_base)。",
|
||||
"indexConfig": "索引配置",
|
||||
"chunkStrategy": "分块策略",
|
||||
"chunkStrategyMarkdownRecursive": "Markdown 标题切分后递归(推荐)",
|
||||
"chunkStrategyRecursive": "仅递归切分",
|
||||
"chunkStrategyHint": "与 Eino 索引链一致:技术文档建议 Markdown 标题 + 递归;纯文本可仅用递归。",
|
||||
"requestTimeoutSeconds": "嵌入 HTTP 超时(秒)",
|
||||
"requestTimeoutPlaceholder": "120",
|
||||
"requestTimeoutHint": "0 表示使用默认 120 秒,与嵌入 HTTP 客户端一致。",
|
||||
"batchSize": "嵌入批大小",
|
||||
"batchSizePlaceholder": "64",
|
||||
"batchSizeHint": "单次嵌入请求的文本条数上限(SQLite 写入按此分批)。",
|
||||
"preferSourceFile": "索引时优先从磁盘源文件读取(Eino FileLoader)",
|
||||
"preferSourceFileHint": "开启后以 file_path 为准;读取失败时回退数据库中的 content。",
|
||||
"subIndexes": "Eino 子索引(逗号分隔)",
|
||||
"subIndexesPlaceholder": "例如: prod, knowledge",
|
||||
"subIndexesHint": "对应 indexer.WithSubIndexes,持久化到向量表 sub_indexes 字段。",
|
||||
"chunkSize": "分块大小(Chunk Size)",
|
||||
"chunkSizePlaceholder": "512",
|
||||
"chunkSizeHint": "每个块的最大 token 数(默认 512),长文本会被分割成多个块",
|
||||
|
||||
+65
-18
@@ -182,15 +182,49 @@ async function loadConfig(loadTools = true) {
|
||||
retrievalThresholdInput.value = knowledge.retrieval?.similarity_threshold || 0.7;
|
||||
}
|
||||
|
||||
const retrievalWeightInput = document.getElementById('knowledge-retrieval-hybrid-weight');
|
||||
if (retrievalWeightInput) {
|
||||
const hybridWeight = knowledge.retrieval?.hybrid_weight;
|
||||
// 允许0.0值,只有undefined/null时才使用默认值
|
||||
retrievalWeightInput.value = (hybridWeight !== undefined && hybridWeight !== null) ? hybridWeight : 0.7;
|
||||
const subIdxFilterInput = document.getElementById('knowledge-retrieval-sub-index-filter');
|
||||
if (subIdxFilterInput) {
|
||||
subIdxFilterInput.value = knowledge.retrieval?.sub_index_filter || '';
|
||||
}
|
||||
|
||||
const post = knowledge.retrieval?.post_retrieve || {};
|
||||
const prefetchInput = document.getElementById('knowledge-post-retrieve-prefetch-top-k');
|
||||
if (prefetchInput) {
|
||||
prefetchInput.value = post.prefetch_top_k ?? 0;
|
||||
}
|
||||
const maxCharsInput = document.getElementById('knowledge-post-retrieve-max-chars');
|
||||
if (maxCharsInput) {
|
||||
maxCharsInput.value = post.max_context_chars ?? 0;
|
||||
}
|
||||
const maxTokInput = document.getElementById('knowledge-post-retrieve-max-tokens');
|
||||
if (maxTokInput) {
|
||||
maxTokInput.value = post.max_context_tokens ?? 0;
|
||||
}
|
||||
|
||||
// 索引配置
|
||||
const indexing = knowledge.indexing || {};
|
||||
const chunkStrategySelect = document.getElementById('knowledge-indexing-chunk-strategy');
|
||||
if (chunkStrategySelect) {
|
||||
const v = (indexing.chunk_strategy || 'markdown_then_recursive').toLowerCase();
|
||||
chunkStrategySelect.value = v === 'recursive' ? 'recursive' : 'markdown_then_recursive';
|
||||
}
|
||||
const reqTimeoutInput = document.getElementById('knowledge-indexing-request-timeout');
|
||||
if (reqTimeoutInput) {
|
||||
reqTimeoutInput.value = indexing.request_timeout_seconds ?? 120;
|
||||
}
|
||||
const batchSizeInput = document.getElementById('knowledge-indexing-batch-size');
|
||||
if (batchSizeInput) {
|
||||
batchSizeInput.value = indexing.batch_size ?? 64;
|
||||
}
|
||||
const preferFileCb = document.getElementById('knowledge-indexing-prefer-source-file');
|
||||
if (preferFileCb) {
|
||||
preferFileCb.checked = indexing.prefer_source_file === true;
|
||||
}
|
||||
const subIdxInput = document.getElementById('knowledge-indexing-sub-indexes');
|
||||
if (subIdxInput) {
|
||||
const arr = indexing.sub_indexes;
|
||||
subIdxInput.value = Array.isArray(arr) ? arr.join(', ') : (typeof arr === 'string' ? arr : '');
|
||||
}
|
||||
const chunkSizeInput = document.getElementById('knowledge-indexing-chunk-size');
|
||||
if (chunkSizeInput) {
|
||||
chunkSizeInput.value = indexing.chunk_size || 512;
|
||||
@@ -811,20 +845,33 @@ async function applySettings() {
|
||||
const val = parseFloat(document.getElementById('knowledge-retrieval-similarity-threshold')?.value);
|
||||
return isNaN(val) ? 0.7 : val;
|
||||
})(),
|
||||
hybrid_weight: (() => {
|
||||
const val = parseFloat(document.getElementById('knowledge-retrieval-hybrid-weight')?.value);
|
||||
return isNaN(val) ? 0.7 : val; // 允许0.0值,只有NaN时才使用默认值
|
||||
})()
|
||||
sub_index_filter: document.getElementById('knowledge-retrieval-sub-index-filter')?.value?.trim() || '',
|
||||
post_retrieve: {
|
||||
prefetch_top_k: parseInt(document.getElementById('knowledge-post-retrieve-prefetch-top-k')?.value, 10) || 0,
|
||||
max_context_chars: parseInt(document.getElementById('knowledge-post-retrieve-max-chars')?.value, 10) || 0,
|
||||
max_context_tokens: parseInt(document.getElementById('knowledge-post-retrieve-max-tokens')?.value, 10) || 0
|
||||
}
|
||||
},
|
||||
indexing: {
|
||||
chunk_size: parseInt(document.getElementById("knowledge-indexing-chunk-size")?.value) || 512,
|
||||
chunk_overlap: parseInt(document.getElementById("knowledge-indexing-chunk-overlap")?.value) ?? 50,
|
||||
max_chunks_per_item: parseInt(document.getElementById("knowledge-indexing-max-chunks-per-item")?.value) ?? 0,
|
||||
max_rpm: parseInt(document.getElementById("knowledge-indexing-max-rpm")?.value) ?? 0,
|
||||
rate_limit_delay_ms: parseInt(document.getElementById("knowledge-indexing-rate-limit-delay-ms")?.value) ?? 300,
|
||||
max_retries: parseInt(document.getElementById("knowledge-indexing-max-retries")?.value) ?? 3,
|
||||
retry_delay_ms: parseInt(document.getElementById("knowledge-indexing-retry-delay-ms")?.value) ?? 1000
|
||||
}
|
||||
indexing: (() => {
|
||||
const subRaw = document.getElementById("knowledge-indexing-sub-indexes")?.value?.trim() || "";
|
||||
const sub_indexes = subRaw
|
||||
? subRaw.split(/[,,]/).map(s => s.trim()).filter(Boolean)
|
||||
: [];
|
||||
return {
|
||||
chunk_strategy: document.getElementById("knowledge-indexing-chunk-strategy")?.value || "markdown_then_recursive",
|
||||
request_timeout_seconds: parseInt(document.getElementById("knowledge-indexing-request-timeout")?.value, 10) || 0,
|
||||
batch_size: parseInt(document.getElementById("knowledge-indexing-batch-size")?.value, 10) || 0,
|
||||
prefer_source_file: document.getElementById("knowledge-indexing-prefer-source-file")?.checked === true,
|
||||
sub_indexes,
|
||||
chunk_size: parseInt(document.getElementById("knowledge-indexing-chunk-size")?.value) || 512,
|
||||
chunk_overlap: parseInt(document.getElementById("knowledge-indexing-chunk-overlap")?.value) ?? 50,
|
||||
max_chunks_per_item: parseInt(document.getElementById("knowledge-indexing-max-chunks-per-item")?.value) ?? 0,
|
||||
max_rpm: parseInt(document.getElementById("knowledge-indexing-max-rpm")?.value) ?? 0,
|
||||
rate_limit_delay_ms: parseInt(document.getElementById("knowledge-indexing-rate-limit-delay-ms")?.value) ?? 300,
|
||||
max_retries: parseInt(document.getElementById("knowledge-indexing-max-retries")?.value) ?? 3,
|
||||
retry_delay_ms: parseInt(document.getElementById("knowledge-indexing-retry-delay-ms")?.value) ?? 1000
|
||||
};
|
||||
})()
|
||||
};
|
||||
|
||||
const wecomAgentIdVal = document.getElementById('robot-wecom-agent-id')?.value.trim();
|
||||
|
||||
@@ -1526,14 +1526,64 @@
|
||||
<small class="form-hint" data-i18n="settingsBasic.similarityHint">相似度阈值(0-1),低于此值的结果将被过滤</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-retrieval-hybrid-weight" data-i18n="settingsBasic.hybridWeight">混合检索权重</label>
|
||||
<input type="number" id="knowledge-retrieval-hybrid-weight" min="0" max="1" step="0.1" data-i18n="settingsBasic.hybridPlaceholder" data-i18n-attr="placeholder" placeholder="0.7" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.hybridHint">向量检索的权重(0-1),1.0表示纯向量检索,0.0表示纯关键词检索</small>
|
||||
<label for="knowledge-retrieval-sub-index-filter" data-i18n="settingsBasic.subIndexFilter">子索引过滤(可选)</label>
|
||||
<input type="text" id="knowledge-retrieval-sub-index-filter" data-i18n="settingsBasic.subIndexFilterPlaceholder" data-i18n-attr="placeholder" placeholder="与索引 sub_indexes 标签一致,如 prod" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.subIndexFilterHint">留空表示不过滤;非空时仅检索 sub_indexes 含该标签的向量行(未打标旧数据仍会命中)。</small>
|
||||
</div>
|
||||
|
||||
<div class="settings-subsection-header">
|
||||
<h5 data-i18n="settingsBasic.postRetrieveHeader">检索后处理(去重 / 预算)</h5>
|
||||
</div>
|
||||
<p class="form-hint" style="margin: 0 0 12px 0;" data-i18n="settingsBasic.postRetrieveDedupeAuto">检索结果会自动按正文规范化去重(合并仅空白不同的重复片段),无需配置。</p>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-post-retrieve-prefetch-top-k" data-i18n="settingsBasic.prefetchTopK">预取候选数(向量阶段)</label>
|
||||
<input type="number" id="knowledge-post-retrieve-prefetch-top-k" min="0" max="200" data-i18n="settingsBasic.prefetchTopKPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.prefetchTopKHint">0 表示与 Top-K 相同;大于 Top-K 时先多取候选再经去重/截断回到 Top-K(上限 200)。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-post-retrieve-max-chars" data-i18n="settingsBasic.maxContextChars">返回内容最大字符数(Unicode)</label>
|
||||
<input type="number" id="knowledge-post-retrieve-max-chars" min="0" max="1000000" data-i18n="settingsBasic.maxContextCharsPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.maxContextCharsHint">0 表示不限制;按检索顺序整段保留 chunk,超出则丢弃后续。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-post-retrieve-max-tokens" data-i18n="settingsBasic.maxContextTokens">返回内容最大 Token 数</label>
|
||||
<input type="number" id="knowledge-post-retrieve-max-tokens" min="0" max="1000000" data-i18n="settingsBasic.maxContextTokensPlaceholder" data-i18n-attr="placeholder" placeholder="0" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.maxContextTokensHint">0 表示不限制;tiktoken 估算(与嵌入模型名一致,失败则用 cl100k_base)。</small>
|
||||
</div>
|
||||
|
||||
<div class="settings-subsection-header">
|
||||
<h5 data-i18n="settingsBasic.indexConfig">索引配置</h5>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-indexing-chunk-strategy" data-i18n="settingsBasic.chunkStrategy">分块策略</label>
|
||||
<select id="knowledge-indexing-chunk-strategy">
|
||||
<option value="markdown_then_recursive" data-i18n="settingsBasic.chunkStrategyMarkdownRecursive">Markdown 标题切分后递归(推荐)</option>
|
||||
<option value="recursive" data-i18n="settingsBasic.chunkStrategyRecursive">仅递归切分</option>
|
||||
</select>
|
||||
<small class="form-hint" data-i18n="settingsBasic.chunkStrategyHint">与 Eino 官方索引链一致:技术文档建议 Markdown 标题 + 递归;纯文本可仅用递归。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-indexing-request-timeout" data-i18n="settingsBasic.requestTimeoutSeconds">嵌入 HTTP 超时(秒)</label>
|
||||
<input type="number" id="knowledge-indexing-request-timeout" min="0" max="600" data-i18n="settingsBasic.requestTimeoutPlaceholder" data-i18n-attr="placeholder" placeholder="120" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.requestTimeoutHint">0 表示使用默认 120 秒;与 OpenAI 嵌入客户端一致。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-indexing-batch-size" data-i18n="settingsBasic.batchSize">嵌入批大小</label>
|
||||
<input type="number" id="knowledge-indexing-batch-size" min="1" max="256" data-i18n="settingsBasic.batchSizePlaceholder" data-i18n-attr="placeholder" placeholder="64" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.batchSizeHint">单次请求嵌入的文本条数上限(SQLite 索引写入分批)。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label class="checkbox-label">
|
||||
<input type="checkbox" id="knowledge-indexing-prefer-source-file" />
|
||||
<span data-i18n="settingsBasic.preferSourceFile">索引时优先从磁盘源文件读取(Eino FileLoader)</span>
|
||||
</label>
|
||||
<small class="form-hint" data-i18n="settingsBasic.preferSourceFileHint">开启后以 file_path 为准;读取失败时回退数据库中的 content。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-indexing-sub-indexes" data-i18n="settingsBasic.subIndexes">Eino 子索引(逗号分隔)</label>
|
||||
<input type="text" id="knowledge-indexing-sub-indexes" data-i18n="settingsBasic.subIndexesPlaceholder" data-i18n-attr="placeholder" placeholder="例如: prod, knowledge" />
|
||||
<small class="form-hint" data-i18n="settingsBasic.subIndexesHint">传入 indexer.WithSubIndexes,写入向量行的 sub_indexes 字段(逻辑分区标记)。</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="knowledge-indexing-chunk-size" data-i18n="settingsBasic.chunkSize">分块大小(Chunk Size)</label>
|
||||
<input type="number" id="knowledge-indexing-chunk-size" min="128" max="4096" data-i18n="settingsBasic.chunkSizePlaceholder" data-i18n-attr="placeholder" placeholder="512" />
|
||||
|
||||
Reference in New Issue
Block a user