Files
P4RS3LT0NGV3/build/build-emoji-data.js
2025-12-06 11:10:31 -08:00

492 lines
17 KiB
JavaScript

#!/usr/bin/env node
/**
* Build Emoji Data from Official Unicode Source
* Fetches emoji-test.txt from Unicode.org and generates emojiData.js
*/
const https = require('https');
const fs = require('fs');
const path = require('path');
// Unicode emoji test file (always uses latest version - compatibility testing handles older devices)
// URL automatically redirects to newest Unicode emoji release
const EMOJI_DATA_URL = 'https://www.unicode.org/Public/emoji/latest/emoji-test.txt';
const CACHE_DIR = path.join(__dirname, '..', '.cache');
const CACHE_FILE = path.join(CACHE_DIR, 'emoji-test.txt');
const CACHE_MAX_AGE = 7 * 24 * 60 * 60 * 1000; // 7 days in milliseconds
// Check for --force flag to bypass cache
const FORCE_DOWNLOAD = process.argv.includes('--force') || process.argv.includes('-f');
const startTime = Date.now();
/**
* Check if cached file exists and is recent enough
*/
function shouldUseCache() {
if (FORCE_DOWNLOAD) {
console.log('🔄 Force download requested, bypassing cache...');
return false;
}
if (!fs.existsSync(CACHE_FILE)) {
return false;
}
const stats = fs.statSync(CACHE_FILE);
const age = Date.now() - stats.mtimeMs;
if (age > CACHE_MAX_AGE) {
console.log(`⏰ Cache is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old, will refresh...`);
return false;
}
return true;
}
/**
* Download emoji data from Unicode.org
*/
function downloadEmojiData(callback) {
console.log('📥 Downloading emoji data from Unicode.org...');
console.log(` Source: ${EMOJI_DATA_URL}`);
https.get(EMOJI_DATA_URL, (response) => {
let data = '';
let downloadedBytes = 0;
const totalBytes = parseInt(response.headers['content-length'] || '0', 10);
response.on('data', (chunk) => {
data += chunk;
downloadedBytes += chunk.length;
// Show progress if we know the total size
if (totalBytes > 0) {
const percent = ((downloadedBytes / totalBytes) * 100).toFixed(1);
process.stdout.write(`\r Progress: ${percent}% (${(downloadedBytes / 1024).toFixed(0)} KB)`);
}
});
response.on('end', () => {
const downloadTime = ((Date.now() - startTime) / 1000).toFixed(2);
console.log(`\n✅ Downloaded ${(data.length / 1024).toFixed(2)} KB in ${downloadTime}s`);
// Save to cache
if (!fs.existsSync(CACHE_DIR)) {
fs.mkdirSync(CACHE_DIR, { recursive: true });
}
fs.writeFileSync(CACHE_FILE, data, 'utf8');
console.log(`💾 Cached to ${CACHE_FILE}`);
callback(data, downloadTime);
});
}).on('error', (err) => {
console.error('❌ Error fetching emoji data:', err.message);
process.exit(1);
});
}
/**
* Load emoji data from cache or download if needed
*/
function loadEmojiData() {
if (shouldUseCache()) {
console.log('📂 Using cached emoji data...');
const stats = fs.statSync(CACHE_FILE);
const age = Math.floor((Date.now() - stats.mtimeMs) / (60 * 60 * 1000));
console.log(` Cache age: ${age} hours`);
const data = fs.readFileSync(CACHE_FILE, 'utf8');
const loadTime = ((Date.now() - startTime) / 1000).toFixed(2);
console.log(`✅ Loaded ${(data.length / 1024).toFixed(2)} KB from cache in ${loadTime}s`);
processEmojiData(data, '0.00');
} else {
downloadEmojiData((data, downloadTime) => {
processEmojiData(data, downloadTime);
});
}
}
/**
* Process emoji data (parse and generate)
*/
function processEmojiData(data, downloadTime) {
// Parse the emoji data
console.log('🔨 Parsing emoji data...');
const parseStart = Date.now();
const emojiData = parseEmojiTestFile(data);
const parseTime = ((Date.now() - parseStart) / 1000).toFixed(2);
console.log(`✅ Parsed ${Object.keys(emojiData).length} emojis in ${parseTime}s`);
// Generate JavaScript file
console.log('📝 Generating emojiData.js...');
const genStart = Date.now();
generateEmojiDataFile(emojiData);
const genTime = ((Date.now() - genStart) / 1000).toFixed(2);
const totalTime = ((Date.now() - startTime) / 1000).toFixed(2);
console.log(`\n⏱️ Total time: ${totalTime}s (download: ${downloadTime}s, parse: ${parseTime}s, generate: ${genTime}s)`);
}
// Start loading emoji data
loadEmojiData();
/**
* Check if an emoji has complex modifiers (skin tones, ZWJ sequences, etc.)
* Currently disabled - we want to use the full Unicode 15.1 set
*/
function hasComplexModifiers(emoji, name) {
// Mark all emojis as simple (no filtering)
return false;
}
/**
* Parse the emoji-test.txt file format
* Format: <codepoints> ; <status> # <emoji> <version> <name>
*/
function parseEmojiTestFile(content) {
const lines = content.split('\n');
const emojis = {};
let currentGroup = '';
let currentSubgroup = '';
for (const line of lines) {
// Parse group headers
if (line.startsWith('# group:')) {
currentGroup = line.replace('# group:', '').trim();
continue;
}
// Parse subgroup headers
if (line.startsWith('# subgroup:')) {
currentSubgroup = line.replace('# subgroup:', '').trim();
continue;
}
// Skip comments and empty lines
if (line.startsWith('#') || !line.trim() || !line.includes(';')) {
continue;
}
// Parse emoji line
// Format: 1F600 ; fully-qualified # 😀 E1.0 grinning face
// Or: 1F64D 1F3FD 200D 2642 FE0F ; fully-qualified # 🙍🏽‍♂️ E2.0 man frowning: medium skin tone
// Extract codepoints from the left side (more reliable than character representation)
const codepointMatch = line.match(/^([0-9A-Fa-f\s]+)\s*;\s*(fully-qualified|minimally-qualified|unqualified)/);
let emoji = null;
if (codepointMatch) {
// Reconstruct emoji from codepoints to avoid corruption issues
const codepoints = codepointMatch[1].trim().split(/\s+/)
.map(cp => parseInt(cp, 16))
.filter(cp => !isNaN(cp));
if (codepoints.length > 0) {
// Convert codepoints to emoji string
emoji = String.fromCodePoint(...codepoints);
}
}
// Fallback: extract from character representation if codepoint parsing fails
if (!emoji) {
const parts = line.split('#');
if (parts.length < 2) continue;
const emojiPart = parts[1].trim();
const match = emojiPart.match(/^(.+?)\s+E\d+\.\d+\s+(.+)$/);
if (match) {
emoji = match[1].trim();
} else {
continue;
}
}
// Extract name from the line
const nameMatch = line.match(/#\s+.+?\s+E\d+\.\d+\s+(.+)$/);
const name = nameMatch ? nameMatch[1].trim() : '';
if (emoji && name) {
// Only include fully-qualified emojis
if (line.includes('fully-qualified')) {
// Filter out overly complex sequences for better UX
const isSimple = !hasComplexModifiers(emoji, name);
emojis[emoji] = {
official: name,
group: currentGroup,
subgroup: currentSubgroup,
keywords: generateKeywords(name, currentGroup, currentSubgroup),
isSimple: isSimple
};
}
}
}
return emojis;
}
/**
* Generate keywords from the official emoji name
*/
function generateKeywords(name, group, subgroup) {
const keywords = new Set();
// Add words from the official name
const nameWords = name.toLowerCase()
.replace(/[()]/g, '')
.split(/[\s-]+/)
.filter(word => word.length > 2 && !['with', 'and', 'the'].includes(word));
nameWords.forEach(word => keywords.add(word));
// Add group/subgroup as keywords
if (group) {
const groupWords = group.toLowerCase().split(/[\s&-]+/);
groupWords.forEach(word => {
if (word.length > 3) keywords.add(word);
});
}
// Special keyword mappings for common words
const keywordMap = {
'grinning': ['smile', 'happy', 'grin'],
'tears of joy': ['laugh', 'lol', 'funny'],
'heart': ['love', 'like'],
'thumbs up': ['good', 'yes', 'approve', 'like'],
'thumbs down': ['bad', 'no', 'disapprove'],
'waving': ['hello', 'hi', 'bye', 'wave'],
'clapping': ['applause', 'clap', 'praise'],
'folded': ['pray', 'thanks', 'please'],
'fire': ['hot', 'lit', 'flame'],
'crying': ['sad', 'tear', 'cry'],
'skull': ['dead', 'death'],
'poop': ['shit', 'crap', 'poo'],
'hundred': ['100', 'perfect'],
'collision': ['boom', 'bang', 'explosion'],
'dog': ['puppy', 'pet'],
'cat': ['kitty', 'pet'],
'sun': ['sunny', 'day'],
'moon': ['night'],
'star': ['favorite'],
'rainbow': ['pride', 'colorful']
};
// Add mapped keywords
for (const [trigger, extras] of Object.entries(keywordMap)) {
if (name.toLowerCase().includes(trigger)) {
extras.forEach(k => keywords.add(k));
}
}
return Array.from(keywords);
}
/**
* Map Unicode groups to category IDs (using official Unicode categories)
* Split "People & Body" into subcategories for better organization
*/
function mapGroupToCategory(group, subgroup) {
const groupMap = {
'Smileys & Emotion': 'smileys_emotion',
'Animals & Nature': 'animals_nature',
'Food & Drink': 'food_drink',
'Travel & Places': 'travel_places',
'Activities': 'activities',
'Objects': 'objects',
'Symbols': 'symbols',
'Flags': 'flags'
};
// Special handling for People & Body - split into subcategories
if (group === 'People & Body') {
// Hands and gestures
if (subgroup.startsWith('hand-') || subgroup === 'hands' || subgroup === 'hand-prop') {
return 'people_hands';
}
// Body parts
if (subgroup === 'body-parts') {
return 'people_body_parts';
}
// People (person-*, person, family)
if (subgroup.startsWith('person-') || subgroup === 'person' || subgroup === 'family' || subgroup === 'person-symbol') {
return 'people_persons';
}
// Default to people_body if subgroup doesn't match
return 'people_body';
}
return groupMap[group] || 'symbols';
}
/**
* Load keyword mappings from emojiWordMap.js
*/
function loadEmojiWordMap() {
const wordMapPath = path.join(__dirname, '..', 'src', 'emojiWordMap.js');
if (!fs.existsSync(wordMapPath)) {
console.log('⚠️ emojiWordMap.js not found, skipping keyword merge');
return {};
}
try {
const code = fs.readFileSync(wordMapPath, 'utf8');
// Use vm to safely execute the file and extract emojiKeywords
const vm = require('vm');
const sandbox = {
window: {},
console: console // Allow console in case the file uses it
};
vm.createContext(sandbox);
// Execute the entire file in the sandbox
vm.runInContext(code, sandbox);
const keywordMap = sandbox.window.emojiKeywords || {};
console.log(`📚 Loaded ${Object.keys(keywordMap).length} keyword mappings from emojiWordMap.js`);
return keywordMap;
} catch (error) {
console.log(`⚠️ Error loading emojiWordMap.js: ${error.message}, skipping keyword merge`);
return {};
}
}
/**
* Merge keywords from wordMap into emojiData keywords
*/
function mergeKeywords(baseKeywords, wordMapKeywords) {
const merged = new Set(baseKeywords);
// Add all keywords from wordMap
if (Array.isArray(wordMapKeywords)) {
wordMapKeywords.forEach(kw => merged.add(kw.toLowerCase()));
}
return Array.from(merged).sort();
}
/**
* Generate the emojiData.js file
*/
function generateEmojiDataFile(emojiData) {
const outputPath = path.join(__dirname, '..', 'dist', 'js', 'data', 'emojiData.js');
// Ensure data directory exists
const dataDir = path.dirname(outputPath);
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
}
// Load keyword mappings from emojiWordMap.js
console.log('📚 Loading keyword mappings from emojiWordMap.js...');
const wordMap = loadEmojiWordMap();
let output = `// Unified Emoji Data for P4RS3LT0NGV3
// Generated from Unicode Official Emoji Data (latest version with compatibility testing)
// Keywords merged from emojiWordMap.js for enhanced searchability
// Source: ${EMOJI_DATA_URL}
// Generated: ${new Date().toISOString()}
window.emojiData = {
`;
let mergedCount = 0;
// Add each emoji
for (const [emoji, data] of Object.entries(emojiData)) {
const category = mapGroupToCategory(data.group, data.subgroup);
// Merge keywords from wordMap if available
let finalKeywords = data.keywords;
if (wordMap[emoji]) {
finalKeywords = mergeKeywords(data.keywords, wordMap[emoji]);
mergedCount++;
}
const keywordsStr = JSON.stringify(finalKeywords);
const isSimple = data.isSimple ? 'true' : 'false';
output += ` '${emoji}': { official: '${data.official.replace(/'/g, "\\'")}', keywords: ${keywordsStr}, category: '${category}', isSimple: ${isSimple} },\n`;
}
if (mergedCount > 0) {
console.log(`✅ Merged keywords for ${mergedCount} emojis from emojiWordMap.js`);
}
output += `};
// Helper to get all emojis by category (optionally filter to simple emojis only)
window.emojiData.getByCategory = function(categoryId, simpleOnly = false) {
let emojis = categoryId === 'all'
? Object.keys(window.emojiData).filter(key => typeof window.emojiData[key] === 'object')
: Object.entries(window.emojiData)
.filter(([emoji, data]) => typeof data === 'object' && data.category === categoryId)
.map(([emoji]) => emoji);
// Filter to simple emojis if requested (better for UI display)
if (simpleOnly) {
emojis = emojis.filter(emoji => window.emojiData[emoji]?.isSimple);
}
return emojis;
};
// Helper to search emojis by keyword
window.emojiData.searchByKeyword = function(keyword) {
const lowerKeyword = keyword.toLowerCase();
return Object.entries(window.emojiData)
.filter(([emoji, data]) =>
typeof data === 'object' && (
data.official.toLowerCase().includes(lowerKeyword) ||
data.keywords.some(kw => kw.toLowerCase().includes(lowerKeyword))
)
)
.map(([emoji]) => emoji);
};
// Helper to get emoji by keyword (for encoding)
window.emojiData.getEmojiForWord = function(word) {
const lowerWord = word.toLowerCase();
const matches = Object.entries(window.emojiData)
.filter(([emoji, data]) =>
typeof data === 'object' && data.keywords.includes(lowerWord)
)
.map(([emoji]) => emoji);
// Return random match if multiple found
return matches.length > 0 ? matches[Math.floor(Math.random() * matches.length)] : null;
};
// Categories for UI (official Unicode 15.1 categories, with People & Body split)
window.emojiData.categories = [
{ id: 'all', name: 'All Emojis', icon: '🔍' },
{ id: 'smileys_emotion', name: 'Smileys & Emotion', icon: '😀' },
{ id: 'people_hands', name: 'Hands & Gestures', icon: '👋' },
{ id: 'people_persons', name: 'People', icon: '👤' },
{ id: 'people_body_parts', name: 'Body Parts', icon: '🦵' },
{ id: 'animals_nature', name: 'Animals & Nature', icon: '🐶' },
{ id: 'food_drink', name: 'Food & Drink', icon: '🍕' },
{ id: 'travel_places', name: 'Travel & Places', icon: '✈️' },
{ id: 'activities', name: 'Activities', icon: '⚽' },
{ id: 'objects', name: 'Objects', icon: '💡' },
{ id: 'symbols', name: 'Symbols', icon: '❤️' },
{ id: 'flags', name: 'Flags', icon: '🏁' }
];
`;
// Write the file
fs.writeFileSync(outputPath, output, 'utf8');
const emojiCount = Object.keys(emojiData).length;
const fileSize = (output.length / 1024).toFixed(2);
console.log(`✅ Generated ${emojiCount} emojis → ${fileSize} KB`);
}