Merge PR #1369: gate domain-skill auto-promote on classifier_score > 0

This commit is contained in:
Garry Tan
2026-05-08 21:42:09 -07:00
2 changed files with 45 additions and 3 deletions
+20 -3
View File
@@ -291,8 +291,20 @@ export async function writeSkill(input: WriteSkillInput): Promise<DomainSkillRow
*
* Auto-promote logic:
* - increment use_count
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 → state:active
* - else stay quarantined with updated counter
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 AND L4 has scored
* the body (classifier_score > 0) → state:active
* - else stay quarantined with updated counter; user must run
* `domain-skill promote-to-global` manually
*
* The classifier_score > 0 gate is load-bearing: handleSave currently writes
* classifier_score=0 with the comment "L4 deferred to load-time / sidebar-agent
* fills this in on first prompt-injection load," but sidebar-agent was ripped
* (CLAUDE.md "Sidebar architecture") and nothing else updates the score, so
* skills authored via the production path never had their body scanned by L4.
* Without this gate, three benign uses promote any quarantined skill — including
* one written under the influence of a poisoned page — into the prompt context
* for every subsequent visit. The gate re-opens automatically the day L4 is
* rewired and writeSkill / recordSkillUse start receiving non-zero scores.
*/
export async function recordSkillUse(host: string, projectSlug: string, classifierFlagged: boolean): Promise<DomainSkillRow | null> {
const normalized = normalizeHost(host);
@@ -303,7 +315,12 @@ export async function recordSkillUse(host: string, projectSlug: string, classifi
const useCount = current.use_count + 1;
const flagCount = current.flag_count + (classifierFlagged ? 1 : 0);
let state: SkillState = current.state;
if (state === 'quarantined' && useCount >= PROMOTE_THRESHOLD && flagCount === 0) {
if (
state === 'quarantined' &&
useCount >= PROMOTE_THRESHOLD &&
flagCount === 0 &&
current.classifier_score > 0
) {
state = 'active';
}
const updated: DomainSkillRow = {