From a3d4fc33df7b7777aec4b742559ba04763e9abe3 Mon Sep 17 00:00:00 2001 From: EP Date: Wed, 20 Aug 2025 18:00:29 -0700 Subject: [PATCH] Tokenizer tab: add OpenAI encodings via browser-safe gpt-tokenizer dynamic import; UI: fit all five Tokenade preset buttons on one line (tighter buttons) --- css/style.css | 6 +++--- index.html | 5 ++++- js/app.js | 15 ++++++++++----- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/css/style.css b/css/style.css index 78ea176..3356cdb 100644 --- a/css/style.css +++ b/css/style.css @@ -2023,12 +2023,12 @@ html { .tokenade-presets { display: flex; align-items: center; - gap: 8px; + gap: 6px; flex-wrap: nowrap; - overflow-x: auto; + overflow-x: visible; margin: 8px 0 12px 0; } -.tokenade-presets .transform-button { flex: 0 0 auto; min-width: 160px; } +.tokenade-presets .transform-button { flex: 0 0 auto; min-width: 136px; padding: 5px 8px; } /* Quick picks panel */ .carrier-quick-grid { diff --git a/index.html b/index.html index 9c7124b..347fbbb 100644 --- a/index.html +++ b/index.html @@ -204,7 +204,10 @@ diff --git a/js/app.js b/js/app.js index 74ad3a4..d40e422 100644 --- a/js/app.js +++ b/js/app.js @@ -1909,7 +1909,7 @@ window.app = new Vue({ } , // Tokenizer visualization - runTokenizer() { + async runTokenizer() { const text = this.tokenizerInput || ''; const engine = this.tokenizerEngine; const tokens = []; @@ -1925,15 +1925,20 @@ window.app = new Vue({ // Naive word split incl. punctuation const parts = text.split(/(\s+|[\.,!?:;()\[\]{}])/); for (const p of parts) { if (p) tokens.push({ text: p }); } - } else if (engine === 'gpt3' && window.gpt3enc && window.gpt3enc.encode) { + } else if (['cl100k','o200k','p50k','r50k'].includes(engine)) { try { - const ids = window.gpt3enc.encode(text); + if (!window.gptTok) { + window.gptTok = await import('https://cdn.jsdelivr.net/npm/gpt-tokenizer@2/+esm'); + } + const map = { cl100k: 'cl100k_base', o200k: 'o200k_base', p50k: 'p50k_base', r50k: 'r50k_base' }; + const enc = map[engine]; + const ids = window.gptTok.encode(text, enc); for (const id of ids) { - const piece = window.gpt3enc.decode([id]); + const piece = window.gptTok.decode([id], enc); tokens.push({ id, text: piece }); } } catch (e) { - console.warn('gpt-3-encoder not available', e); + console.warn('Failed to load/use gpt-tokenizer; falling back to bytes', e); this.tokenizerEngine = 'byte'; return this.runTokenizer(); }