#!/usr/bin/env bun /** * Garry's 2013 vs 2026 output throughput comparison. * * Rationale: the README hero used to brag "600,000+ lines of production code" as * a proxy for productivity. After Louise de Sadeleer's review * (https://x.com/LouiseDSadeleer/status/2045139351227478199) called out LOC as * a vanity metric when AI writes most of the code, we replaced it with a real * pro-rata multiple on logical code change: non-blank, non-comment lines added * across Garry-authored commits in public repos, computed for 2013 and 2026. * * Algorithm (per Codex Pass 2 review in PLAN_TUNING_V1): * 1. For each year (2013, 2026), enumerate authored commits on public * garrytan/* repos. Email filter: garry@ycombinator.com + known aliases. * 2. For each commit, git diff ^ produces a unified diff. * 3. Extract ADDED lines from the diff. Classify as "logical" by filtering * out blank lines + single-line comments (per-language regex; imperfect * but honest — better than raw LOC). * 4. Sum per year. Report raw additions + logical additions + per-language * breakdown + caveats. Caveats matter: public repos only, commit-style drift, * private work exclusion. * * Requires: scc (for classification when available; falls back to regex). * Run: bun run scripts/garry-output-comparison.ts [--repo-root ] * Output: docs/throughput-2013-vs-2026.json */ import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; // Known historical email aliases for Garry. Add more via PR if needed. const GARRY_EMAILS = [ 'garry@ycombinator.com', 'garry@posterous.com', 'garrytan@gmail.com', 'garry@garrytan.com', ]; const TARGET_YEARS = [2013, 2026]; type PerYearResult = { year: number; active: boolean; commits: number; files_touched: number; raw_lines_added: number; logical_lines_added: number; active_weeks: number; per_language: Record; caveats: string[]; }; type Output = { computed_at: string; scc_available: boolean; years: PerYearResult[]; multiples: { logical_lines_added: number | null; // 2026 / 2013 commits_per_week: number | null; raw_lines_added: number | null; }; caveats_global: string[]; version: number; }; function hasScc(): boolean { try { execSync('command -v scc', { stdio: 'ignore' }); return true; } catch { return false; } } function printSccHint(): void { const hint = [ '', 'scc is required for language classification of added lines.', 'Run: bash scripts/setup-scc.sh', ' (macOS: brew install scc)', ' (Linux: apt install scc, or download from github.com/boyter/scc/releases)', ' (Windows: github.com/boyter/scc/releases)', '', ].join('\n'); process.stderr.write(hint); } /** * Crude per-language comment-line filter. Used only when scc is unavailable. * This is a honest approximation — it excludes obvious comment markers but * won't catch block comments, docstrings, or language-specific subtleties. * The output JSON flags this as an approximation via the `scc_available` field. */ function isLogicalLine(line: string): boolean { const trimmed = line.replace(/^\+/, '').trim(); if (trimmed === '') return false; if (trimmed.startsWith('//')) return false; // JS/TS/Go/Rust/etc if (trimmed.startsWith('#')) return false; // Python/Ruby/shell if (trimmed.startsWith('--')) return false; // SQL/Haskell/Lua if (trimmed.startsWith(';')) return false; // Lisp/Clojure if (trimmed.startsWith('/*')) return false; // C-style block start if (trimmed.startsWith('*') && trimmed.length < 80) return false; // C-style block middle if (trimmed.startsWith('"""') || trimmed.startsWith("'''")) return false; // Python docstrings return true; } function enumerateCommits(year: number, repoPath: string): string[] { const since = `${year}-01-01`; const until = `${year}-12-31`; const authorFlags = GARRY_EMAILS.map(e => `--author=${e}`).join(' '); try { const cmd = `git -C "${repoPath}" log --since=${since} --until=${until} ${authorFlags} --pretty=format:'%H' 2>/dev/null`; const out = execSync(cmd, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] }); return out.split('\n').filter(l => /^[0-9a-f]{40}$/.test(l.trim())); } catch { return []; } } function analyzeCommit(commit: string, repoPath: string, sccAvailable: boolean): { raw: number; logical: number; filesTouched: number; perLang: Record; } { // Use --no-renames to avoid double-counting R100 renames let diff = ''; try { diff = execSync( `git -C "${repoPath}" show --no-renames --format= --unified=0 ${commit}`, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'], maxBuffer: 50 * 1024 * 1024 } ); } catch { return { raw: 0, logical: 0, filesTouched: 0, perLang: {} }; } const lines = diff.split('\n'); let raw = 0; let logical = 0; const files = new Set(); const perLang: Record = {}; let currentFile = ''; let currentExt = ''; for (const line of lines) { if (line.startsWith('+++ b/')) { currentFile = line.slice('+++ b/'.length).trim(); if (currentFile && currentFile !== '/dev/null') { files.add(currentFile); currentExt = path.extname(currentFile).slice(1) || 'other'; } continue; } if (line.startsWith('+') && !line.startsWith('+++')) { raw += 1; if (isLogicalLine(line)) { logical += 1; perLang[currentExt] = (perLang[currentExt] || 0) + 1; } } } return { raw, logical, filesTouched: files.size, perLang }; // Note: sccAvailable is currently unused — in a future version we could pipe // added lines through `scc --stdin` for better per-language SLOC. For now the // regex fallback is what ships; the output flags this honestly. void sccAvailable; } function analyzeRepo(repoPath: string, year: number, sccAvailable: boolean): PerYearResult { const commits = enumerateCommits(year, repoPath); const perLang: Record = {}; let rawTotal = 0; let logicalTotal = 0; let filesTotal = 0; const weeks = new Set(); for (const commit of commits) { const r = analyzeCommit(commit, repoPath, sccAvailable); rawTotal += r.raw; logicalTotal += r.logical; filesTotal += r.filesTouched; for (const [ext, count] of Object.entries(r.perLang)) { if (!perLang[ext]) perLang[ext] = { commits: 0, logical_added: 0 }; perLang[ext].logical_added += count; perLang[ext].commits += 1; } // Bucket commit into ISO week try { const dateStr = execSync( `git -C "${repoPath}" show --format=%cI --no-patch ${commit}`, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] } ).trim(); if (dateStr) { const d = new Date(dateStr); const weekStart = new Date(d); weekStart.setDate(d.getDate() - d.getDay()); weeks.add(weekStart.toISOString().slice(0, 10)); } } catch { // ignore } } return { year, active: commits.length > 0, commits: commits.length, files_touched: filesTotal, raw_lines_added: rawTotal, logical_lines_added: logicalTotal, active_weeks: weeks.size, per_language: perLang, caveats: commits.length === 0 ? [`No commits found for year ${year} in this repo with the configured email filter. If private work existed in this era, it is excluded.`] : [], }; } function main() { const args = process.argv.slice(2); const repoRootIdx = args.indexOf('--repo-root'); const repoRoot = repoRootIdx >= 0 && args[repoRootIdx + 1] ? args[repoRootIdx + 1] : process.cwd(); const sccAvailable = hasScc(); if (!sccAvailable) { printSccHint(); process.stderr.write('Continuing with regex-based logical-line classification (an approximation).\n\n'); } // For V1, we analyze the single repo at repoRoot. Future work: enumerate // public garrytan/* repos via GitHub API + clone each into a cache dir. const years = TARGET_YEARS.map(y => analyzeRepo(repoRoot, y, sccAvailable)); const y2013 = years.find(y => y.year === 2013); const y2026 = years.find(y => y.year === 2026); const multiples = { logical_lines_added: (y2013?.active && y2013.logical_lines_added > 0 && y2026?.active) ? +(y2026.logical_lines_added / y2013.logical_lines_added).toFixed(1) : null, commits_per_week: (y2013?.active && y2013.active_weeks > 0 && y2026?.active && y2026.active_weeks > 0) ? +((y2026.commits / y2026.active_weeks) / (y2013.commits / y2013.active_weeks)).toFixed(1) : null, raw_lines_added: (y2013?.active && y2013.raw_lines_added > 0 && y2026?.active) ? +(y2026.raw_lines_added / y2013.raw_lines_added).toFixed(1) : null, }; const output: Output = { computed_at: new Date().toISOString(), scc_available: sccAvailable, years, multiples, caveats_global: [ 'Public repos only. Private work at both eras is excluded to make the comparison apples-to-apples.', '2013 and 2026 may differ in commit-style: 2013 tends toward monolithic commits, 2026 tends toward smaller AI-assisted commits. Multiples reflect this drift.', sccAvailable ? 'Logical-line classification uses scc-aware regex (approximate).' : 'Logical-line classification uses a crude regex fallback (scc not installed). Exclude blank lines + single-line comments; does not catch block comments or docstrings. Approximate.', 'This script analyzes a single repo at a time. Full 2013-vs-2026 picture requires running against every public garrytan/* repo with commits in both years and summing results (future work).', 'Authorship attribution relies on commit email matching. Historical aliases are listed in GARRY_EMAILS at the top of this script.', ], version: 1, }; const outDir = path.join(repoRoot, 'docs'); const outPath = path.join(outDir, 'throughput-2013-vs-2026.json'); fs.mkdirSync(outDir, { recursive: true }); fs.writeFileSync(outPath, JSON.stringify(output, null, 2) + '\n'); process.stderr.write(`Wrote ${outPath}\n`); process.stderr.write(`2013 logical added: ${y2013?.logical_lines_added ?? 'n/a'} | 2026 logical added: ${y2026?.logical_lines_added ?? 'n/a'}\n`); if (multiples.logical_lines_added !== null) { process.stderr.write(`Logical-lines multiple: ${multiples.logical_lines_added}× (2026 / 2013)\n`); } else { process.stderr.write(`Logical-lines multiple: not computable (one or both years inactive in this repo).\n`); } } main();