mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-29 05:00:01 +02:00
Merge PR #1369: gate domain-skill auto-promote on classifier_score > 0
This commit is contained in:
@@ -291,8 +291,20 @@ export async function writeSkill(input: WriteSkillInput): Promise<DomainSkillRow
|
||||
*
|
||||
* Auto-promote logic:
|
||||
* - increment use_count
|
||||
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 → state:active
|
||||
* - else stay quarantined with updated counter
|
||||
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 AND L4 has scored
|
||||
* the body (classifier_score > 0) → state:active
|
||||
* - else stay quarantined with updated counter; user must run
|
||||
* `domain-skill promote-to-global` manually
|
||||
*
|
||||
* The classifier_score > 0 gate is load-bearing: handleSave currently writes
|
||||
* classifier_score=0 with the comment "L4 deferred to load-time / sidebar-agent
|
||||
* fills this in on first prompt-injection load," but sidebar-agent was ripped
|
||||
* (CLAUDE.md "Sidebar architecture") and nothing else updates the score, so
|
||||
* skills authored via the production path never had their body scanned by L4.
|
||||
* Without this gate, three benign uses promote any quarantined skill — including
|
||||
* one written under the influence of a poisoned page — into the prompt context
|
||||
* for every subsequent visit. The gate re-opens automatically the day L4 is
|
||||
* rewired and writeSkill / recordSkillUse start receiving non-zero scores.
|
||||
*/
|
||||
export async function recordSkillUse(host: string, projectSlug: string, classifierFlagged: boolean): Promise<DomainSkillRow | null> {
|
||||
const normalized = normalizeHost(host);
|
||||
@@ -303,7 +315,12 @@ export async function recordSkillUse(host: string, projectSlug: string, classifi
|
||||
const useCount = current.use_count + 1;
|
||||
const flagCount = current.flag_count + (classifierFlagged ? 1 : 0);
|
||||
let state: SkillState = current.state;
|
||||
if (state === 'quarantined' && useCount >= PROMOTE_THRESHOLD && flagCount === 0) {
|
||||
if (
|
||||
state === 'quarantined' &&
|
||||
useCount >= PROMOTE_THRESHOLD &&
|
||||
flagCount === 0 &&
|
||||
current.classifier_score > 0
|
||||
) {
|
||||
state = 'active';
|
||||
}
|
||||
const updated: DomainSkillRow = {
|
||||
|
||||
Reference in New Issue
Block a user