Tokenizer tab: add OpenAI encodings via browser-safe gpt-tokenizer dynamic import; UI: fit all five Tokenade preset buttons on one line (tighter buttons)

This commit is contained in:
EP
2025-08-20 18:00:29 -07:00
parent 030d90bcca
commit a3d4fc33df
3 changed files with 17 additions and 9 deletions
+3 -3
View File
@@ -2023,12 +2023,12 @@ html {
.tokenade-presets {
display: flex;
align-items: center;
gap: 8px;
gap: 6px;
flex-wrap: nowrap;
overflow-x: auto;
overflow-x: visible;
margin: 8px 0 12px 0;
}
.tokenade-presets .transform-button { flex: 0 0 auto; min-width: 160px; }
.tokenade-presets .transform-button { flex: 0 0 auto; min-width: 136px; padding: 5px 8px; }
/* Quick picks panel */
.carrier-quick-grid {
+4 -1
View File
@@ -204,7 +204,10 @@
<select v-model="tokenizerEngine" @change="runTokenizer">
<option value="byte">UTF-8 bytes</option>
<option value="word">Naive words</option>
<option value="gpt3">Experimental: gpt-3-encoder (if available)</option>
<option value="cl100k">OpenAI: cl100k_base (GPT3.5/4)</option>
<option value="o200k">OpenAI: o200k_base (GPT4o)</option>
<option value="p50k">OpenAI: p50k_base</option>
<option value="r50k">OpenAI: r50k_base</option>
</select>
</label>
</div>
+10 -5
View File
@@ -1909,7 +1909,7 @@ window.app = new Vue({
}
,
// Tokenizer visualization
runTokenizer() {
async runTokenizer() {
const text = this.tokenizerInput || '';
const engine = this.tokenizerEngine;
const tokens = [];
@@ -1925,15 +1925,20 @@ window.app = new Vue({
// Naive word split incl. punctuation
const parts = text.split(/(\s+|[\.,!?:;()\[\]{}])/);
for (const p of parts) { if (p) tokens.push({ text: p }); }
} else if (engine === 'gpt3' && window.gpt3enc && window.gpt3enc.encode) {
} else if (['cl100k','o200k','p50k','r50k'].includes(engine)) {
try {
const ids = window.gpt3enc.encode(text);
if (!window.gptTok) {
window.gptTok = await import('https://cdn.jsdelivr.net/npm/gpt-tokenizer@2/+esm');
}
const map = { cl100k: 'cl100k_base', o200k: 'o200k_base', p50k: 'p50k_base', r50k: 'r50k_base' };
const enc = map[engine];
const ids = window.gptTok.encode(text, enc);
for (const id of ids) {
const piece = window.gpt3enc.decode([id]);
const piece = window.gptTok.decode([id], enc);
tokens.push({ id, text: piece });
}
} catch (e) {
console.warn('gpt-3-encoder not available', e);
console.warn('Failed to load/use gpt-tokenizer; falling back to bytes', e);
this.tokenizerEngine = 'byte';
return this.runTokenizer();
}