mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-07-05 07:57:50 +02:00
feat(identify): C2PA vendor coverage, AI-enhanced split, detect/remove threshold unify
Retained-corpus mining (2026-06-20) surfaced three provenance gaps; all are
oracle-free and regression-guarded.
- C2PA vendor coverage (roadmap): register Volcano Engine under its Chinese
legal entity 北京火山引擎科技有限公司 (the latin "volcengine" needle misses
those certs) -> normalizes to the same ByteDance platform; register ElevenLabs
("Eleven Labs Inc.", pure generative-AI) as a generator. Document the
deliberate exclusion of TikTok Inc. and PixelBin.io/"Fynd" (provenance/transform
signers, not generators) so they are not re-added.
- AI-generated vs AI-enhanced (roadmap): ProvenanceReport.ai_source_kind splits
the C2PA digital-source-type into "generated" (trainedAlgorithmicMedia) vs
"enhanced" (compositeWithTrainedAlgorithmicMedia) so a caller branches a
full-frame scrub from a region-targeted clean. Parsed once in
noai.c2pa._populate_registry_fields (PNG + any c2pa-python-readable container),
with a raw head-scan fallback in identify for the non-PNG raw-blob path. CLI
verdict reads "AI-generated (fully synthetic)" vs "AI-enhanced (real content
with an AI-composited region)"; surfaced in --json.
- Detect-vs-remove threshold desync (P0#7): identify's sparkle threshold and the
removal arbitration gate were two independent 0.5 constants. Unify them into the
single GEMINI_SPARKLE_TRUST_CONF (identify imports it) so they can never drift.
Lowering the gate to recover faint sub-0.5 sparkles was evaluated and REJECTED:
a real Doubao text mark scores ~0.40-0.42 as a gemini match with a higher
core-ring brightness margin than a genuine faint sparkle, so neither confidence
nor the brightness gate separates them in [0.35, 0.5) -- lowering would trade a
rare miss for false-positive removals on clean images. Regression-guarded by
TestSparkleDetectRemoveAlignment (real demo sparkle at borderline opacities;
identify and best_auto_mark must agree on either side of the line).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -881,6 +881,13 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo
|
||||
|
||||
_banner()
|
||||
verdict = {True: "AI-generated", False: "not AI", None: "unknown"}[report.is_ai_generated]
|
||||
# Sharpen the True verdict when the C2PA source type says the image is a real
|
||||
# photo with an AI-composited region rather than a full AI generation, so the
|
||||
# caller (and the user) can tell "scrub the whole frame" from "scrub the AI region".
|
||||
if report.is_ai_generated and report.ai_source_kind == "enhanced":
|
||||
verdict = "AI-enhanced (real content with an AI-composited region)"
|
||||
elif report.is_ai_generated and report.ai_source_kind == "generated":
|
||||
verdict = "AI-generated (fully synthetic)"
|
||||
console.print(f"\n Verdict: {verdict} (confidence: {report.confidence})")
|
||||
console.print(f" Platform: {report.platform or 'undetermined'}")
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ from remove_ai_watermarks.metadata import (
|
||||
)
|
||||
from remove_ai_watermarks.noai.c2pa import cbor_text_after, extract_c2pa_info, soft_binding_vendors_in
|
||||
from remove_ai_watermarks.noai.constants import C2PA_AI_TOOLS, C2PA_AI_VENDORS, C2PA_ISSUERS
|
||||
from remove_ai_watermarks.watermark_registry import GEMINI_SPARKLE_TRUST_CONF
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
@@ -57,11 +58,14 @@ log = logging.getLogger(__name__)
|
||||
_SCAN_BYTES = 1024 * 1024
|
||||
|
||||
# Visible-sparkle confidence above which the signal is trusted as provenance.
|
||||
# Stricter than the removal default (0.25): on the corpus, Gemini-family
|
||||
# sparkles score >= 0.56 while non-sparkle images top out at 0.49, so 0.5
|
||||
# cleanly separates them and avoids false positives when sparkle is the only
|
||||
# signal (e.g. an OpenAI image scored 0.37 -- below threshold, correctly dropped).
|
||||
_SPARKLE_THRESHOLD = 0.5
|
||||
# Shared with the removal arbitration (watermark_registry.GEMINI_SPARKLE_TRUST_CONF)
|
||||
# so the provenance "is there a sparkle" verdict and the removal "take the sparkle"
|
||||
# decision can never drift apart -- the detect-vs-remove desync the retained-corpus
|
||||
# mining surfaced (2026-06-20). On the corpus Gemini-family sparkles score >= 0.56
|
||||
# while non-sparkle images top out at 0.49, so 0.5 cleanly separates them and avoids
|
||||
# false positives when the sparkle is the only signal (e.g. an OpenAI image scored
|
||||
# 0.37 -- below threshold, correctly dropped).
|
||||
_SPARKLE_THRESHOLD = GEMINI_SPARKLE_TRUST_CONF
|
||||
|
||||
# Issuer (C2PA signer) -> human-readable generating platform, derived from the
|
||||
# single C2PA_AI_VENDORS registry. Ordered: when a manifest names several issuers
|
||||
@@ -132,6 +136,14 @@ class ProvenanceReport:
|
||||
is_ai_generated: bool | None # True / False is never asserted; None = unknown
|
||||
platform: str | None
|
||||
confidence: str # "high" | "medium" | "none"
|
||||
# Coarse AI-origin kind from the C2PA digital-source-type, so a caller can
|
||||
# branch on full generation vs an AI-touched real photo:
|
||||
# "generated" -- digitalSourceType trainedAlgorithmicMedia (fully AI).
|
||||
# "enhanced" -- compositeWithTrainedAlgorithmicMedia (real content with an
|
||||
# AI-composited region; scrub the AI region, keep the photo).
|
||||
# None -- no C2PA AI source-type (verdict, if AI, came from another
|
||||
# signal: IPTC, AIGC, local gen params, xAI, ...).
|
||||
ai_source_kind: str | None = None
|
||||
watermarks: list[str] = field(default_factory=list[str])
|
||||
signals: list[Signal] = field(default_factory=list["Signal"])
|
||||
caveats: list[str] = field(default_factory=list[str])
|
||||
@@ -484,9 +496,18 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
|
||||
# ── C2PA Content Credentials ────────────────────────────────────
|
||||
has_c2pa = bool(info) or c2pa_marker_in(head)
|
||||
issuers = [info["issuer"]] if info.get("issuer") else _issuers_in(head)
|
||||
c2pa_is_ai = "trainedAlgorithmicMedia" in info.get("source_type", "") or any(
|
||||
m in head for m in (b"trainedAlgorithmicMedia", b"compositeWithTrainedAlgorithmicMedia")
|
||||
)
|
||||
# Full AI generation (trainedAlgorithmicMedia) vs an AI-enhanced real photo
|
||||
# (compositeWithTrainedAlgorithmicMedia). The structured kind is parsed once in
|
||||
# noai.c2pa._populate_registry_fields (covers PNG + any container the c2pa-python
|
||||
# reader handles); fall back to a raw head scan for the non-PNG raw-blob path
|
||||
# where extract_c2pa_info returns {}. Full generation wins when both appear.
|
||||
c2pa_source_kind = info.get("ai_source_kind")
|
||||
if c2pa_source_kind is None:
|
||||
if b"trainedAlgorithmicMedia" in head:
|
||||
c2pa_source_kind = "generated"
|
||||
elif b"compositeWithTrainedAlgorithmicMedia" in head:
|
||||
c2pa_source_kind = "enhanced"
|
||||
c2pa_is_ai = c2pa_source_kind is not None
|
||||
# Generator string (for the signal detail): structured for PNG, CBOR-scanned
|
||||
# for other containers. Best-effort -- some manifests key it as
|
||||
# `claim_generator_info` (Pixel), so this can be None even when a device is
|
||||
@@ -734,6 +755,9 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
|
||||
is_ai_generated=is_ai,
|
||||
platform=platform,
|
||||
confidence=confidence,
|
||||
# Only meaningful when the AI verdict actually came from the C2PA source
|
||||
# type; a non-C2PA AI signal (IPTC/AIGC/local gen) leaves it None.
|
||||
ai_source_kind=c2pa_source_kind if (is_ai and has_c2pa) else None,
|
||||
watermarks=watermarks,
|
||||
signals=signals,
|
||||
caveats=caveats,
|
||||
|
||||
@@ -363,14 +363,22 @@ def _populate_registry_fields(buf: bytes, c2pa_info: dict[str, Any]) -> bool:
|
||||
# Digital source type (matched anywhere in the store, including ingredient
|
||||
# manifests -- a ChatGPT edit of a Sora generation carries the AI marker on
|
||||
# the parent, not the active manifest).
|
||||
# ``ai_source_kind`` is the structured generated-vs-enhanced split the caller
|
||||
# branches on (full-frame scrub vs region-targeted clean); ``source_type`` is the
|
||||
# human-readable form. The two byte strings are unambiguous:
|
||||
# "compositeWithTrainedAlgorithmicMedia" capitalizes the inner "Trained", so a
|
||||
# lowercase "trainedAlgorithmicMedia" match is standalone full generation, which
|
||||
# wins when both appear (an edit chain).
|
||||
ai_source = False
|
||||
if b"trainedAlgorithmicMedia" in buf:
|
||||
c2pa_info["source_type"] = "trainedAlgorithmicMedia (AI-generated)"
|
||||
c2pa_info["ai_source_kind"] = "generated"
|
||||
ai_source = True
|
||||
elif b"algorithmicMedia" in buf:
|
||||
c2pa_info["source_type"] = "algorithmicMedia"
|
||||
elif b"compositeWithTrainedAlgorithmicMedia" in buf:
|
||||
c2pa_info["source_type"] = "compositeWithTrainedAlgorithmicMedia (AI-enhanced)"
|
||||
c2pa_info["ai_source_kind"] = "enhanced"
|
||||
ai_source = True
|
||||
|
||||
# SynthID pixel-watermark proxy: a C2PA manifest from a SynthID-using
|
||||
|
||||
@@ -122,6 +122,20 @@ C2PA_AI_VENDORS: tuple[C2paAiVendor, ...] = (
|
||||
C2paAiVendor(
|
||||
b"volcengine", "ByteDance (Volcano Engine)", "ByteDance (Doubao / Jimeng / Volcano Engine)", "ByteDance"
|
||||
),
|
||||
# Some Volcano Engine certs name the signer with the Chinese legal entity
|
||||
# "北京火山引擎科技有限公司" (Beijing Volcano Engine Technology Co., Ltd.) rather
|
||||
# than the latin "volcengine" -- the latin needle misses it entirely, so real
|
||||
# ByteDance output was un-attributed in production traffic. The issuer is the
|
||||
# UTF-8 of the Chinese name (it appears UTF-8-encoded in the manifest-store
|
||||
# JSON and the raw caBX bytes alike); it normalizes to the same "ByteDance"
|
||||
# needle and platform as the volcengine row, so the two collapse together for
|
||||
# clash detection. Verified against the mined retained corpus, 2026-06-20.
|
||||
C2paAiVendor(
|
||||
"北京火山引擎科技有限公司".encode(),
|
||||
"ByteDance (Volcano Engine)",
|
||||
"ByteDance (Doubao / Jimeng / Volcano Engine)",
|
||||
"ByteDance",
|
||||
),
|
||||
# ByteDance's international brand (BytePlus / Seedream / Seededit) signs its
|
||||
# cert as "Byteplus Pte. Ltd." -- the bare ``volcengine`` needle misses it, so
|
||||
# real BytePlus AI output was mis-attributed (an incidental "Adobe XMP" string
|
||||
@@ -136,11 +150,29 @@ C2PA_AI_VENDORS: tuple[C2paAiVendor, ...] = (
|
||||
# source read AI but no platform was attributed. Verified on real signed files
|
||||
# in production traffic, 2026-06-19. Canva does not use SynthID.
|
||||
C2paAiVendor(b"Canva", "Canva", "Canva (Magic Media)", "Canva"),
|
||||
# ElevenLabs is a pure generative-AI company (AI voice / audio, and image /
|
||||
# video via its API); it signs output as "Eleven Labs Inc.", so the C2PA
|
||||
# manifest alone marks AI generation. Verified against the mined retained
|
||||
# corpus, 2026-06-20. ElevenLabs does not use SynthID.
|
||||
C2paAiVendor(b"Eleven Labs", "ElevenLabs", "ElevenLabs", "ElevenLabs"),
|
||||
# Truepic is a C2PA signing authority, not an AI generator: no platform label,
|
||||
# never asserts is_ai (the verdict comes from the digital-source-type).
|
||||
C2paAiVendor(b"Truepic", "Truepic", None, None),
|
||||
)
|
||||
|
||||
# Deliberately NOT registered as AI-generation vendors (mined-corpus candidates
|
||||
# evaluated 2026-06-20):
|
||||
# - TikTok Inc.: signs C2PA as a content-provenance / AI-labeling authority on
|
||||
# uploads, not as an image generator. The is_ai verdict keys off the
|
||||
# digitalSourceType (trainedAlgorithmicMedia), which is already honored; a
|
||||
# bare TikTok signer marks distribution provenance, not generation, so adding
|
||||
# it as a generator needle would mis-label human uploads as AI.
|
||||
# - PixelBin.io (issuer "Fynd"): an image transformation / optimization / CDN
|
||||
# service. Its C2PA stamps a transform/upload step, not a generation event.
|
||||
# Both are excluded to avoid false-positive AI attribution; re-evaluate only
|
||||
# against a real signed file whose manifest carries a trainedAlgorithmicMedia
|
||||
# digital-source type produced by the vendor itself.
|
||||
|
||||
# Derived view -- add a vendor to C2PA_AI_VENDORS above, not here.
|
||||
# C2PA issuer signature -> resolved org name, for the manifest byte-scan.
|
||||
C2PA_ISSUERS: dict[bytes, str] = {v.issuer: v.org for v in C2PA_AI_VENDORS}
|
||||
|
||||
@@ -90,13 +90,27 @@ class KnownMark:
|
||||
return self._remove(image, inpaint_method, inpaint, inpaint_strength, force)
|
||||
|
||||
|
||||
# Gemini-sparkle confidence above which the registry treats it as a confident
|
||||
# detection for arbitration. Matches identify's corpus-validated sparkle
|
||||
# threshold (0.5): the gemini engine's own detect flag uses a looser internal
|
||||
# threshold and weakly fires (~0.36) on unrelated bottom-right text (e.g. the
|
||||
# Doubao mark), which would otherwise let it hijack `--mark auto`. 0.5 gives 0
|
||||
# false positives on the corpus.
|
||||
_GEMINI_AUTO_MIN_CONF = 0.5
|
||||
# Single source of truth for the Gemini-sparkle "trust this as a real mark"
|
||||
# confidence, shared by BOTH the removal arbitration here (`best_auto_mark` /
|
||||
# `_gemini_detect`) and the provenance detector in `identify` (which imports it
|
||||
# as its sparkle threshold). Defining it once removes the detect-vs-remove
|
||||
# threshold drift the retained-corpus mining surfaced (2026-06-20): identify
|
||||
# would report a sparkle while removal declined it, or vice versa, whenever the
|
||||
# two independently-maintained 0.5 constants fell out of step. Now they cannot.
|
||||
#
|
||||
# Value 0.5 is corpus-validated: the gemini engine's own `detected` flag uses a
|
||||
# looser internal threshold (0.35) and weakly fires (~0.36-0.42) on unrelated
|
||||
# bottom-right text -- a real Doubao mark scores ~0.40-0.42 as a gemini match,
|
||||
# and its core-ring brightness margin is HIGHER than a genuine faint sparkle's,
|
||||
# so neither confidence nor the brightness gate separates them in the [0.35, 0.5)
|
||||
# band. Lowering this gate to recover faint sparkles was evaluated against that
|
||||
# band (2026-06-20) and REJECTED: it cannot be done without re-admitting the
|
||||
# Doubao-text / content false positives, trading a rare miss for false-positive
|
||||
# removals on clean images. The band below the gate is therefore intentionally
|
||||
# left to the higher-strength / metadata paths. 0.5 gives 0 false positives on
|
||||
# the corpus.
|
||||
GEMINI_SPARKLE_TRUST_CONF = 0.5
|
||||
_GEMINI_AUTO_MIN_CONF = GEMINI_SPARKLE_TRUST_CONF
|
||||
|
||||
# ── Engine adapters (lazy singletons; engines are cv2-only, no model load) ──
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from remove_ai_watermarks.identify import (
|
||||
_vendor_of,
|
||||
identify,
|
||||
)
|
||||
from remove_ai_watermarks.watermark_registry import GEMINI_SPARKLE_TRUST_CONF
|
||||
|
||||
# Where the lazy import inside identify._visible_sparkle resolves the detector.
|
||||
_SPARKLE_TARGET = "remove_ai_watermarks.gemini_engine.detect_sparkle_confidence"
|
||||
@@ -140,6 +141,23 @@ class TestIdentifyNonPng:
|
||||
assert r.is_ai_generated is True
|
||||
assert "ByteDance" in (r.platform or "")
|
||||
|
||||
def test_bytedance_chinese_legal_name_attributed(self, tmp_path: Path):
|
||||
# Some Volcano Engine certs name the signer with the Chinese legal entity
|
||||
# rather than the latin "volcengine"; the latin needle misses it, so the
|
||||
# Chinese-name registry entry is what attributes real ByteDance output.
|
||||
blob = "北京火山引擎科技有限公司".encode() + b" ... trainedAlgorithmicMedia"
|
||||
path = self._c2pa_jpeg(tmp_path, blob)
|
||||
r = identify(path, check_visible=False, check_invisible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert "ByteDance" in (r.platform or "")
|
||||
|
||||
def test_elevenlabs_attributed(self, tmp_path: Path):
|
||||
path = self._c2pa_jpeg(tmp_path, b"Eleven Labs Inc. ... trainedAlgorithmicMedia")
|
||||
r = identify(path, check_visible=False, check_invisible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert r.platform == "ElevenLabs"
|
||||
assert not any("SynthID" in w for w in r.watermarks) # ElevenLabs does not use SynthID
|
||||
|
||||
def test_stability_ai_issuer_attributed_no_synthid(self, tmp_path: Path):
|
||||
path = self._c2pa_jpeg(tmp_path, b"Stability AI ... trainedAlgorithmicMedia")
|
||||
r = identify(path, check_visible=False)
|
||||
@@ -148,6 +166,21 @@ class TestIdentifyNonPng:
|
||||
assert "Stability AI" in r.platform
|
||||
assert not any("SynthID" in w for w in r.watermarks) # Stability does not use SynthID
|
||||
|
||||
def test_trained_source_is_generated_kind(self, tmp_path: Path):
|
||||
path = self._c2pa_jpeg(tmp_path, b"OpenAI ... trainedAlgorithmicMedia")
|
||||
r = identify(path, check_visible=False, check_invisible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert r.ai_source_kind == "generated"
|
||||
|
||||
def test_composite_source_is_enhanced_kind(self, tmp_path: Path):
|
||||
# compositeWithTrainedAlgorithmicMedia: a real photo with an AI-composited
|
||||
# region. Still AI (is_ai True), but the kind must read "enhanced" so a
|
||||
# caller can do region-targeted cleaning instead of a full-frame regen.
|
||||
path = self._c2pa_jpeg(tmp_path, b"Adobe ... compositeWithTrainedAlgorithmicMedia")
|
||||
r = identify(path, check_visible=False, check_invisible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert r.ai_source_kind == "enhanced"
|
||||
|
||||
def test_c2pa_without_ai_marker_is_unknown(self, tmp_path: Path):
|
||||
# Adobe signs C2PA on plain Photoshop edits too. Without an AI digital-
|
||||
# source marker, the honest verdict is unknown -- the C2PA watermark is
|
||||
@@ -202,6 +235,16 @@ class TestIdentifySamsungGalaxy:
|
||||
assert r.platform == "ASUS Gallery (C2PA signer)"
|
||||
assert any("C2PA" in w for w in r.watermarks)
|
||||
|
||||
def test_galaxy_capture_without_ai_marker_is_not_ai(self, tmp_path: Path):
|
||||
# A genuine Galaxy phone capture carries Samsung Galaxy C2PA provenance but
|
||||
# NO AI source-type / genAIType. It must stay is_ai=None -- the device cert
|
||||
# is authenticity provenance of a real photo, not an AI-generation signal.
|
||||
path = self._jpeg(tmp_path, "s25_capture.jpg", b"Samsung Galaxy Galaxy S25 c2pa-rs no ai marker")
|
||||
r = identify(path, check_visible=False, check_invisible=False)
|
||||
assert r.is_ai_generated is None
|
||||
assert r.platform == "Samsung Galaxy (C2PA)"
|
||||
assert any("C2PA" in w for w in r.watermarks)
|
||||
|
||||
|
||||
# ── End-to-end verdicts on real fixtures ────────────────────────────
|
||||
|
||||
@@ -277,6 +320,12 @@ class TestIdentifyLocalParams:
|
||||
assert "parameters" in signal.detail
|
||||
assert signal.confidence == "high"
|
||||
|
||||
def test_local_gen_params_have_no_c2pa_source_kind(self, tmp_png_with_ai_metadata: Path):
|
||||
# AI verdict from local SD params (not C2PA) -> ai_source_kind stays None.
|
||||
r = identify(tmp_png_with_ai_metadata, check_visible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert r.ai_source_kind is None
|
||||
|
||||
def test_clean_png_is_unknown(self, tmp_clean_png: Path):
|
||||
r = identify(tmp_clean_png, check_visible=False)
|
||||
assert r.is_ai_generated is None
|
||||
@@ -399,6 +448,66 @@ class TestIdentifyVisibleSparkle:
|
||||
assert r.confidence == "high"
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
_DEMO_BEFORE = REPO_ROOT / "demo_banana_before.png"
|
||||
_DEMO_AFTER = REPO_ROOT / "demo_banana_after.png"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not (_DEMO_BEFORE.exists() and _DEMO_AFTER.exists()), reason="demo banana pair not present")
|
||||
class TestSparkleDetectRemoveAlignment:
|
||||
"""Detect (identify) and remove (registry.best_auto_mark) must agree on the
|
||||
same image -- the retained-corpus desync where identify reported a sparkle the
|
||||
removal arbitration declined (or vice versa). Both gate on the single shared
|
||||
GEMINI_SPARKLE_TRUST_CONF, so a sparkle just over the line is taken by BOTH
|
||||
and one just under is declined by BOTH. Fixtures composite the real captured
|
||||
sparkle (before-minus-after) back at reduced opacity to land on either side.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _faint_sparkle(tmp_path: Path, opacity: float) -> Path:
|
||||
import numpy as np
|
||||
|
||||
from remove_ai_watermarks import image_io
|
||||
|
||||
before = image_io.imread(_DEMO_BEFORE).astype("float32")
|
||||
after = image_io.imread(_DEMO_AFTER).astype("float32")
|
||||
faint = np.clip(after + opacity * (before - after), 0, 255).astype("uint8")
|
||||
out = tmp_path / f"sparkle_{int(opacity * 100)}.png"
|
||||
image_io.imwrite(out, faint)
|
||||
return out
|
||||
|
||||
def _detect_remove(self, path: Path) -> tuple[bool, bool, float]:
|
||||
from remove_ai_watermarks import image_io, watermark_registry
|
||||
from remove_ai_watermarks.gemini_engine import detect_sparkle_confidence
|
||||
|
||||
conf = detect_sparkle_confidence(path) or 0.0
|
||||
identify_fires = conf >= GEMINI_SPARKLE_TRUST_CONF
|
||||
best = watermark_registry.best_auto_mark(image_io.imread(path))
|
||||
remove_takes_gemini = best is not None and best.key == "gemini"
|
||||
return identify_fires, remove_takes_gemini, conf
|
||||
|
||||
def test_above_threshold_both_fire(self, tmp_path: Path):
|
||||
path = self._faint_sparkle(tmp_path, 0.7) # ~0.55 conf, just over the line
|
||||
identify_fires, remove_takes, conf = self._detect_remove(path)
|
||||
assert conf >= GEMINI_SPARKLE_TRUST_CONF
|
||||
assert identify_fires, f"identify declined a sparkle above threshold (conf={conf:.3f})"
|
||||
assert remove_takes, f"removal declined a sparkle above threshold (conf={conf:.3f})"
|
||||
|
||||
def test_below_threshold_both_decline(self, tmp_path: Path):
|
||||
path = self._faint_sparkle(tmp_path, 0.5) # ~0.37 conf, just under the line
|
||||
identify_fires, remove_takes, conf = self._detect_remove(path)
|
||||
assert conf < GEMINI_SPARKLE_TRUST_CONF
|
||||
assert not identify_fires, f"identify fired below threshold (conf={conf:.3f})"
|
||||
assert not remove_takes, f"removal fired below threshold (conf={conf:.3f})"
|
||||
|
||||
def test_full_strength_both_fire(self):
|
||||
# The shipped demo sparkle at full strength: unambiguous agreement.
|
||||
identify_fires, remove_takes, conf = self._detect_remove(_DEMO_BEFORE)
|
||||
assert conf >= GEMINI_SPARKLE_TRUST_CONF
|
||||
assert identify_fires
|
||||
assert remove_takes
|
||||
|
||||
|
||||
class TestIdentifyImportIsLight:
|
||||
"""`import identify` must stay torch-free (lazy noai/__init__): the package
|
||||
is deployed on a 512 MB host where eagerly pulling torch/diffusers OOMs."""
|
||||
|
||||
Reference in New Issue
Block a user