mirror of
https://github.com/romovpa/claudini.git
synced 2026-05-24 19:44:16 +02:00
Default filter_ascii to False to match legacy benchmark behavior (#3)
Pre-2026-03-20 the token filter defaulted to off (filter_ids=False, no forbidden tokens). The subsequent split into filter_ascii/filter_special/ filter_retok introduced filter_ascii=True as the new default, silently narrowing the optimization vocabulary by ~50% for Qwen and invalidating comparisons against historical numbers (verified on claude_v63: avg loss 0.98 with filter vs 0.49 without, with ~12/20 samples matching bit-exact to the legacy hmcGCG numbers once the filter is disabled). Revert default to False so fresh runs reproduce the earlier leaderboard out of the box; presets that want ASCII filtering can still opt in explicitly. Assisted-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
8221f0afb8
commit
238c702b06
+1
-1
@@ -47,7 +47,7 @@ class BenchmarkConfig:
|
||||
input_spec: InputSpec = field(default_factory=InputSpec.default)
|
||||
|
||||
# Token filtering and final evaluation mode
|
||||
filter_ascii: bool = True # block non-ASCII / non-printable tokens
|
||||
filter_ascii: bool = False # block non-ASCII / non-printable tokens
|
||||
filter_special: bool = False # block special / control / added tokens
|
||||
filter_retok: bool = False # decode->re-encode retokenization round-trip filter
|
||||
final_input: str = "tokens"
|
||||
|
||||
@@ -115,7 +115,7 @@ def run_bench(
|
||||
resolved_max_time = preset_cfg.get("max_time", None)
|
||||
|
||||
# Token filtering
|
||||
resolved_filter_ascii = preset_cfg.get("filter_ascii", True)
|
||||
resolved_filter_ascii = preset_cfg.get("filter_ascii", False)
|
||||
resolved_filter_special = preset_cfg.get("filter_special", False)
|
||||
resolved_filter_retok = preset_cfg.get("filter_retok", False)
|
||||
resolved_final_input = preset_cfg.get("final_input", "tokens")
|
||||
|
||||
Reference in New Issue
Block a user