feat(browser-skills): bundled hackernews-frontpage reference skill

Smallest interesting browser-skill: scrapes HN front page, returns
30 stories as JSON. No auth, stable HTML, fully fixture-tested.

Files:
  SKILL.md                          frontmatter + prose
  script.ts                         exports parseStoriesFromHtml(html)
                                    main: goto + html + parse + JSON.stringify
  _lib/browse-client.ts             vendored copy of the SDK
  fixtures/hn-2026-04-26.html       captured front page (5 stories)
  script.test.ts                    13 assertions against the fixture

The parser is a pure function over HTML so script.test.ts runs
without a daemon (just imports parseStoriesFromHtml and asserts).

This exercises every Phase 1 component end-to-end:
  - browse-client SDK (script imports browse from ./_lib/)
  - 3-tier lookup (hackernews-frontpage lives in the bundled tier)
  - scoped tokens (read+write is enough for goto + html)
  - spawn lifecycle (\$B skill run hackernews-frontpage)
  - file-fixture testing (\$B skill test hackernews-frontpage)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-26 05:06:27 -07:00
parent e6a27993fe
commit 8a60d99c74
5 changed files with 598 additions and 0 deletions
@@ -0,0 +1,52 @@
---
name: hackernews-frontpage
description: Scrape the Hacker News front page (titles, points, comment counts).
host: news.ycombinator.com
trusted: true
source: human
version: 1.0.0
args: []
triggers:
- scrape hacker news frontpage
- scrape hn frontpage
- get hn top stories
- latest hacker news stories
---
# Hacker News front-page scraper
Scrapes the Hacker News (`news.ycombinator.com`) front page and returns the
top 30 stories as JSON. Each story has its rank, title, link URL, point count,
and comment count.
## Usage
```
$ $B skill run hackernews-frontpage
{
"stories": [
{ "rank": 1, "title": "...", "url": "...", "points": 412, "comments": 87 },
...
],
"count": 30
}
```
## How it works
1. Navigates to `https://news.ycombinator.com` via the daemon.
2. Reads the page HTML.
3. Parses each story row (HN's stable `tr.athing` structure) into a typed
`Story` record.
4. Emits a single JSON document on stdout.
## Why this is the reference skill
`hackernews-frontpage` is the smallest interesting browser-skill: no auth,
stable HTML, deterministic output, file-fixture-friendly. Every Phase 1
component (SDK, scoped tokens, three-tier lookup, spawn lifecycle) is
exercised by `$B skill run hackernews-frontpage` and the bundled
`script.test.ts`.
When the HN HTML rotates and our selectors break, the test fails against the
captured fixture before users notice. That's the point.
@@ -0,0 +1,257 @@
/**
* browse-client — canonical SDK that browser-skill scripts import to drive the
* gstack daemon over loopback HTTP.
*
* Distribution model:
* This file is the canonical source. Each browser-skill ships a sibling
* copy at `<skill>/_lib/browse-client.ts` (Phase 2's generator copies it
* alongside every generated skill; Phase 1's bundled `hackernews-frontpage`
* reference skill ships a hand-copied version). The skill imports the
* sibling via relative path: `import { browse } from './_lib/browse-client'`.
*
* Why per-skill copies and not a single global SDK: each skill is fully
* portable (copy the directory anywhere, it runs), version drift is
* impossible (the SDK is frozen at the version the skill was authored
* against), no npm publish workflow, no fixed-path tilde imports.
*
* Auth resolution:
* 1. GSTACK_PORT + GSTACK_SKILL_TOKEN env vars (set by `$B skill run` when
* spawning the script). The token is a per-spawn scoped capability bound
* to read+write commands; it expires when the spawn ends.
* 2. State file fallback: read `BROWSE_STATE_FILE` env or `<git-root>/.gstack/browse.json`
* and use the `port` + `token` (the daemon root token). This path exists
* for developers running a skill directly via `bun run script.ts` outside
* the harness — your own authority, not an agent's.
*
* Trust:
* The SDK exposes only the daemon's existing HTTP surface (POST /command).
* No new capabilities. The token's scopes (read+write for spawned skills,
* full root for standalone debug) determine what actually executes.
*
* Zero side effects on import. Safe to import from tests or plain scripts.
*/
import * as fs from 'fs';
import * as path from 'path';
import * as cp from 'child_process';
export interface BrowseClientOptions {
/** Override port. Default: GSTACK_PORT env or state file. */
port?: number;
/** Override token. Default: GSTACK_SKILL_TOKEN env, then state file root token. */
token?: string;
/** Tab id to target (every command can scope to a tab). Default: BROWSE_TAB env or undefined (active tab). */
tabId?: number;
/** Per-request timeout in milliseconds. Default: 30_000. */
timeoutMs?: number;
/** Override state-file path. Default: BROWSE_STATE_FILE env or <git-root>/.gstack/browse.json. */
stateFile?: string;
}
interface ResolvedAuth {
port: number;
token: string;
source: 'env' | 'state-file';
}
/** Resolve the daemon port + token. Throws a clear error if neither path works. */
export function resolveBrowseAuth(opts: BrowseClientOptions = {}): ResolvedAuth {
if (opts.port !== undefined && opts.token !== undefined) {
return { port: opts.port, token: opts.token, source: 'env' };
}
// 1. Env vars (set by $B skill run when spawning).
const envPort = process.env.GSTACK_PORT;
const envToken = process.env.GSTACK_SKILL_TOKEN;
if (envPort && envToken) {
const port = opts.port ?? parseInt(envPort, 10);
if (!isNaN(port)) {
return { port, token: opts.token ?? envToken, source: 'env' };
}
}
// 2. State file fallback (developer running `bun run script.ts` directly).
const stateFile = opts.stateFile ?? process.env.BROWSE_STATE_FILE ?? defaultStateFile();
if (stateFile && fs.existsSync(stateFile)) {
try {
const data = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
if (typeof data.port === 'number' && typeof data.token === 'string') {
return {
port: opts.port ?? data.port,
token: opts.token ?? data.token,
source: 'state-file',
};
}
} catch {
// fall through to error
}
}
throw new Error(
'browse-client: cannot find daemon port + token. Either spawn via `$B skill run` ' +
'(sets GSTACK_PORT + GSTACK_SKILL_TOKEN) or run from a project with a live daemon ' +
'(.gstack/browse.json must exist).'
);
}
function defaultStateFile(): string | null {
try {
const proc = cp.spawnSync('git', ['rev-parse', '--show-toplevel'], { encoding: 'utf-8', timeout: 2000 });
const root = proc.status === 0 ? proc.stdout.trim() : null;
const base = root || process.cwd();
return path.join(base, '.gstack', 'browse.json');
} catch {
return path.join(process.cwd(), '.gstack', 'browse.json');
}
}
export class BrowseClientError extends Error {
constructor(
message: string,
public readonly status?: number,
public readonly body?: string,
) {
super(message);
this.name = 'BrowseClientError';
}
}
/**
* Thin client over the daemon's POST /command endpoint.
*
* Convenience methods cover the common cases (goto, click, text, snapshot,
* etc.). For anything not exposed as a method, use `command(cmd, args)`.
*/
export class BrowseClient {
readonly port: number;
readonly token: string;
readonly tabId?: number;
readonly timeoutMs: number;
constructor(opts: BrowseClientOptions = {}) {
const auth = resolveBrowseAuth(opts);
this.port = auth.port;
this.token = auth.token;
this.tabId = opts.tabId ?? (process.env.BROWSE_TAB ? parseInt(process.env.BROWSE_TAB, 10) : undefined);
this.timeoutMs = opts.timeoutMs ?? 30_000;
}
// ─── Low-level dispatch ─────────────────────────────────────────
/** Send an arbitrary command; returns raw response text. Throws on non-2xx. */
async command(cmd: string, args: string[] = []): Promise<string> {
const body = JSON.stringify({
command: cmd,
args,
...(this.tabId !== undefined && !isNaN(this.tabId) ? { tabId: this.tabId } : {}),
});
let resp: Response;
try {
resp = await fetch(`http://127.0.0.1:${this.port}/command`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.token}`,
},
body,
signal: AbortSignal.timeout(this.timeoutMs),
});
} catch (err: any) {
if (err.name === 'TimeoutError' || err.name === 'AbortError') {
throw new BrowseClientError(`browse-client: command "${cmd}" timed out after ${this.timeoutMs}ms`);
}
if (err.code === 'ECONNREFUSED') {
throw new BrowseClientError(`browse-client: daemon not running on port ${this.port}`);
}
throw new BrowseClientError(`browse-client: ${err.message ?? err}`);
}
const text = await resp.text();
if (!resp.ok) {
let message = `browse-client: command "${cmd}" failed with status ${resp.status}`;
try {
const parsed = JSON.parse(text);
if (parsed.error) message += `: ${parsed.error}`;
} catch {
if (text) message += `: ${text.slice(0, 200)}`;
}
throw new BrowseClientError(message, resp.status, text);
}
return text;
}
// ─── Navigation ─────────────────────────────────────────────────
async goto(url: string): Promise<string> { return this.command('goto', [url]); }
async wait(arg: string): Promise<string> { return this.command('wait', [arg]); }
// ─── Reading ────────────────────────────────────────────────────
async text(selector?: string): Promise<string> {
return this.command('text', selector ? [selector] : []);
}
async html(selector?: string): Promise<string> {
return this.command('html', selector ? [selector] : []);
}
async links(): Promise<string> { return this.command('links'); }
async forms(): Promise<string> { return this.command('forms'); }
async accessibility(): Promise<string> { return this.command('accessibility'); }
async attrs(selector: string): Promise<string> { return this.command('attrs', [selector]); }
async media(...flags: string[]): Promise<string> { return this.command('media', flags); }
async data(...flags: string[]): Promise<string> { return this.command('data', flags); }
// ─── Interaction ────────────────────────────────────────────────
async click(selector: string): Promise<string> { return this.command('click', [selector]); }
async fill(selector: string, value: string): Promise<string> { return this.command('fill', [selector, value]); }
async select(selector: string, value: string): Promise<string> { return this.command('select', [selector, value]); }
async hover(selector: string): Promise<string> { return this.command('hover', [selector]); }
async type(text: string): Promise<string> { return this.command('type', [text]); }
async press(key: string): Promise<string> { return this.command('press', [key]); }
async scroll(selector?: string): Promise<string> {
return this.command('scroll', selector ? [selector] : []);
}
// ─── Snapshot + screenshot ──────────────────────────────────────
/** Snapshot returns the ARIA tree. Pass flags like '-i' (interactive only), '-c' (compact). */
async snapshot(...flags: string[]): Promise<string> { return this.command('snapshot', flags); }
async screenshot(...args: string[]): Promise<string> { return this.command('screenshot', args); }
}
/**
* Default singleton. Lazily resolves auth on first method call so a script can
* import `browse` and immediately call `await browse.goto(...)` without
* threading through a constructor.
*/
class LazyBrowseClient {
private inner: BrowseClient | null = null;
private get(): BrowseClient {
if (!this.inner) this.inner = new BrowseClient();
return this.inner;
}
// Mirror the BrowseClient surface; each method delegates to a freshly resolved instance.
command(cmd: string, args: string[] = []) { return this.get().command(cmd, args); }
goto(url: string) { return this.get().goto(url); }
wait(arg: string) { return this.get().wait(arg); }
text(selector?: string) { return this.get().text(selector); }
html(selector?: string) { return this.get().html(selector); }
links() { return this.get().links(); }
forms() { return this.get().forms(); }
accessibility() { return this.get().accessibility(); }
attrs(selector: string) { return this.get().attrs(selector); }
media(...flags: string[]) { return this.get().media(...flags); }
data(...flags: string[]) { return this.get().data(...flags); }
click(selector: string) { return this.get().click(selector); }
fill(selector: string, value: string) { return this.get().fill(selector, value); }
select(selector: string, value: string) { return this.get().select(selector, value); }
hover(selector: string) { return this.get().hover(selector); }
type(text: string) { return this.get().type(text); }
press(key: string) { return this.get().press(key); }
scroll(selector?: string) { return this.get().scroll(selector); }
snapshot(...flags: string[]) { return this.get().snapshot(...flags); }
screenshot(...args: string[]) { return this.get().screenshot(...args); }
}
export const browse = new LazyBrowseClient();
@@ -0,0 +1,52 @@
<!DOCTYPE html><html lang="en" op="news"><head><meta charset="utf-8"><title>Hacker News</title></head>
<body><center><table id="hnmain" border="0" cellpadding="0" cellspacing="0" width="85%" bgcolor="#f6f6ef">
<tr><td>
<table border="0" cellpadding="0" cellspacing="0" class="itemlist">
<tr class="athing submission" id="40000001">
<td align="right" valign="top" class="title"><span class="rank">1.</span></td>
<td valign="top" class="votelinks"><center><a id="up_40000001" href="vote?id=40000001"><div class="votearrow" title="upvote"></div></a></center></td>
<td class="title"><span class="titleline"><a href="https://example.com/blog-post-1" rel="noreferrer">Show HN: A toy compiler in 200 lines</a> <span class="sitebit comhead"> (<a href="from?site=example.com"><span class="sitestr">example.com</span></a>)</span></span></td>
</tr>
<tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_40000001">412 points</span> by <a href="user?id=alice" class="hnuser">alice</a> <span class="age" title="2026-04-26T08:15:00"><a href="item?id=40000001">3 hours ago</a></span> <span id="unv_40000001"></span> | <a href="hide?id=40000001&amp;goto=news">hide</a> | <a href="item?id=40000001">87&nbsp;comments</a> </span></td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="athing submission" id="40000002">
<td align="right" valign="top" class="title"><span class="rank">2.</span></td>
<td valign="top" class="votelinks"><center><a id="up_40000002" href="vote?id=40000002"><div class="votearrow" title="upvote"></div></a></center></td>
<td class="title"><span class="titleline"><a href="https://example.org/database-internals" rel="noreferrer">Database internals: writing an LSM tree</a> <span class="sitebit comhead"> (<a href="from?site=example.org"><span class="sitestr">example.org</span></a>)</span></span></td>
</tr>
<tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_40000002">298 points</span> by <a href="user?id=bob" class="hnuser">bob</a> <span class="age" title="2026-04-26T07:42:00"><a href="item?id=40000002">4 hours ago</a></span> <span id="unv_40000002"></span> | <a href="hide?id=40000002&amp;goto=news">hide</a> | <a href="item?id=40000002">152&nbsp;comments</a> </span></td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="athing submission" id="40000003">
<td align="right" valign="top" class="title"><span class="rank">3.</span></td>
<td valign="top" class="votelinks"><center><a id="up_40000003" href="vote?id=40000003"><div class="votearrow" title="upvote"></div></a></center></td>
<td class="title"><span class="titleline"><a href="https://example.com/yc-w26-startup">Acme (YC W26) is hiring senior engineers (remote)</a> <span class="sitebit comhead"> (<a href="from?site=example.com"><span class="sitestr">example.com</span></a>)</span></span></td>
</tr>
<tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="age" title="2026-04-26T06:00:00"><a href="item?id=40000003">5 hours ago</a></span> </span></td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="athing submission" id="40000004">
<td align="right" valign="top" class="title"><span class="rank">4.</span></td>
<td valign="top" class="votelinks"><center><a id="up_40000004" href="vote?id=40000004"><div class="votearrow" title="upvote"></div></a></center></td>
<td class="title"><span class="titleline"><a href="https://example.net/ask-hn" rel="noreferrer">Ask HN: What&#x27;s your most underrated tool?</a></span></td>
</tr>
<tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_40000004">156 points</span> by <a href="user?id=carol" class="hnuser">carol</a> <span class="age" title="2026-04-26T05:30:00"><a href="item?id=40000004">6 hours ago</a></span> <span id="unv_40000004"></span> | <a href="hide?id=40000004&amp;goto=news">hide</a> | <a href="item?id=40000004">discuss</a> </span></td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="athing submission" id="40000005">
<td align="right" valign="top" class="title"><span class="rank">5.</span></td>
<td valign="top" class="votelinks"><center><a id="up_40000005" href="vote?id=40000005"><div class="votearrow" title="upvote"></div></a></center></td>
<td class="title"><span class="titleline"><a href="https://example.io/quantum&amp;chess">Why quantum &amp; chess engines disagree</a> <span class="sitebit comhead"> (<a href="from?site=example.io"><span class="sitestr">example.io</span></a>)</span></span></td>
</tr>
<tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_40000005">73 points</span> by <a href="user?id=dave" class="hnuser">dave</a> <span class="age" title="2026-04-26T04:00:00"><a href="item?id=40000005">7 hours ago</a></span> <span id="unv_40000005"></span> | <a href="hide?id=40000005&amp;goto=news">hide</a> | <a href="item?id=40000005">12&nbsp;comments</a> </span></td></tr>
<tr class="spacer" style="height:5px"></tr>
</table>
</td></tr>
</table></center></body></html>
@@ -0,0 +1,105 @@
/**
* hackernews-frontpage script tests — exercise parseStoriesFromHtml against
* the bundled HN fixture. No daemon, no network: the parser is a pure function
* over HTML, so we test it directly.
*/
import { describe, it, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import { parseStoriesFromHtml } from './script';
const FIXTURE = fs.readFileSync(
path.join(__dirname, 'fixtures', 'hn-2026-04-26.html'),
'utf-8',
);
describe('parseStoriesFromHtml against bundled HN fixture', () => {
it('returns 5 stories (matching the fixture)', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories).toHaveLength(5);
});
it('assigns 1-based ranks in document order', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories.map(s => s.rank)).toEqual([1, 2, 3, 4, 5]);
});
it('extracts ids matching the tr.athing[id] attribute', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories.map(s => s.id)).toEqual([
'40000001', '40000002', '40000003', '40000004', '40000005',
]);
});
it('extracts titles and decodes HTML entities', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories[0].title).toBe('Show HN: A toy compiler in 200 lines');
expect(stories[1].title).toBe('Database internals: writing an LSM tree');
expect(stories[3].title).toBe("Ask HN: What's your most underrated tool?");
expect(stories[4].title).toBe('Why quantum & chess engines disagree');
});
it('extracts URLs and decodes ampersands', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories[0].url).toBe('https://example.com/blog-post-1');
expect(stories[1].url).toBe('https://example.org/database-internals');
expect(stories[4].url).toBe('https://example.io/quantum&chess');
});
it('parses point counts as numbers', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories[0].points).toBe(412);
expect(stories[1].points).toBe(298);
expect(stories[3].points).toBe(156);
expect(stories[4].points).toBe(73);
});
it('parses comment counts as numbers', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories[0].comments).toBe(87);
expect(stories[1].comments).toBe(152);
expect(stories[4].comments).toBe(12);
});
it('treats "discuss" links as 0 comments', () => {
const stories = parseStoriesFromHtml(FIXTURE);
expect(stories[3].comments).toBe(0);
});
it('returns null points + null comments for job postings', () => {
const stories = parseStoriesFromHtml(FIXTURE);
// Story #3 is the YC-hiring row in the fixture.
expect(stories[2].title).toContain('YC W26');
expect(stories[2].points).toBeNull();
expect(stories[2].comments).toBeNull();
});
it('returns [] for empty HTML', () => {
expect(parseStoriesFromHtml('')).toEqual([]);
});
it('returns [] for HTML with no story rows', () => {
expect(parseStoriesFromHtml('<html><body><p>nothing here</p></body></html>')).toEqual([]);
});
it('does not fabricate stories from arbitrary tr.athing rows missing titleline', () => {
const html = '<tr class="athing" id="999"><td>nothing</td></tr>';
expect(parseStoriesFromHtml(html)).toEqual([]);
});
});
describe('output shape', () => {
it('every story has all required keys', () => {
const stories = parseStoriesFromHtml(FIXTURE);
for (const s of stories) {
expect(typeof s.rank).toBe('number');
expect(typeof s.id).toBe('string');
expect(typeof s.title).toBe('string');
expect(typeof s.url).toBe('string');
// points/comments may be null for job rows
expect(s.points === null || typeof s.points === 'number').toBe(true);
expect(s.comments === null || typeof s.comments === 'number').toBe(true);
}
});
});
@@ -0,0 +1,132 @@
/**
* hackernews-frontpage — scrape the HN front page and emit JSON.
*
* Output protocol:
* stdout = a single JSON document on success: { stories: Story[], count }
* stderr = anything we want logged (currently nothing)
* exit 0 on success, nonzero on parse / network failure.
*
* The parser logic (`parseStoriesFromHtml`) is exported so script.test.ts can
* exercise it against bundled HTML fixtures without spinning up the daemon.
*/
import { browse } from './_lib/browse-client';
export interface Story {
/** 1-based rank as displayed on HN. */
rank: number;
/** HN item id (the integer in `tr.athing[id]`). */
id: string;
title: string;
/** Outbound URL the title links to. */
url: string;
/** null when the row has no score (job postings). */
points: number | null;
/** null when the row has no comments link (job postings). */
comments: number | null;
}
export interface Output {
stories: Story[];
count: number;
}
const FRONT_PAGE_URL = 'https://news.ycombinator.com/';
/**
* Parse HN front-page HTML into Story[].
*
* HN's structure is stable: each story is a pair of rows.
* <tr class="athing submission" id="<itemid>">
* <td class="rank">N.</td>
* <td class="title">...</td>
* <td class="title"><span class="titleline"><a href="<url>">title</a> ...</span></td>
* </tr>
* <tr><td colspan="2"></td><td class="subtext"><span class="subline">
* <span class="score" id="score_<itemid>">N points</span>
* ... <a href="item?id=<itemid>">N comments</a>
* </span></td></tr>
*
* Job postings ("Foo (YC X25) is hiring...") omit the score and comments —
* those fields come back as null.
*/
export function parseStoriesFromHtml(html: string): Story[] {
const stories: Story[] = [];
// Match each `tr.athing` row, capturing the id attribute and the row body.
const rowRegex = /<tr\s+[^>]*\bclass="athing[^"]*"[^>]*\bid="(\d+)"[^>]*>([\s\S]*?)<\/tr>/g;
let match: RegExpExecArray | null;
let rank = 0;
while ((match = rowRegex.exec(html)) !== null) {
rank++;
const id = match[1];
const rowBody = match[2];
// Title link: <span class="titleline"><a href="..." ...>title</a>
const titleMatch = rowBody.match(/<span\s+class="titleline"[^>]*>\s*<a\s+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/);
if (!titleMatch) continue;
const url = decodeHtmlEntities(titleMatch[1]);
const title = stripTags(decodeHtmlEntities(titleMatch[2])).trim();
// The next sibling tr should hold the subtext row. Bound the lookahead
// to before the next story (tr.spacer marks the gap, then tr.athing).
// Bug if we don't bound: the score from story N+1 leaks into story N
// when story N is a job posting (no score of its own).
const subtextStart = match.index + match[0].length;
const tail = html.slice(subtextStart);
const spacerIdx = tail.search(/<tr\b[^>]*\bclass="spacer\b/);
const nextAthingIdx = tail.search(/<tr\b[^>]*\bclass="athing\b/);
const candidates = [spacerIdx, nextAthingIdx].filter(i => i >= 0);
const boundary = candidates.length > 0 ? Math.min(...candidates) : tail.length;
const subtextSlice = tail.slice(0, boundary);
let points: number | null = null;
let comments: number | null = null;
const scoreMatch = subtextSlice.match(/<span\s+class="score"[^>]*>(\d+)\s*points?<\/span>/);
if (scoreMatch) points = parseInt(scoreMatch[1], 10);
// Comment count: an anchor like `<a href="item?id=...">N comments</a>`,
// or `discuss` (treated as 0). Skip "hide" / "context" / "from" links.
const commentsMatch = subtextSlice.match(/<a\s+href="item\?id=\d+"[^>]*>(\d+)\s*(?:&nbsp;)?\s*comments?<\/a>/);
if (commentsMatch) {
comments = parseInt(commentsMatch[1], 10);
} else if (/discuss<\/a>/.test(subtextSlice)) {
comments = 0;
}
stories.push({ rank, id, title, url, points, comments });
}
return stories;
}
function stripTags(s: string): string {
return s.replace(/<[^>]*>/g, '');
}
function decodeHtmlEntities(s: string): string {
return s
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#x27;/g, "'")
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&nbsp;/g, ' ');
}
// ─── Main entry (only when run as a script, not when imported by tests) ─
if (import.meta.main) {
await main();
}
async function main(): Promise<void> {
await browse.goto(FRONT_PAGE_URL);
const html = await browse.html();
const stories = parseStoriesFromHtml(html);
const output: Output = { stories, count: stories.length };
process.stdout.write(JSON.stringify(output) + '\n');
}