mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
b73f364411
* refactor: extract path-security.ts shared module validateOutputPath, validateReadPath, and SAFE_DIRECTORIES were duplicated across write-commands.ts, meta-commands.ts, and read-commands.ts. Extract to a single shared module with re-exports for backward compatibility. Also adds validateTempPath() for the upcoming GET /file endpoint (TEMP_DIR only, not cwd, to prevent remote agents from reading project files). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: default paired agents to full access, split SCOPE_CONTROL The trust boundary for paired agents is the pairing ceremony itself, not the scope. An agent with write scope can already click anything and navigate anywhere. Gating js/cookies behind --admin was security theater. Changes: - Default pair scopes: read+write+admin+meta (was read+write) - New SCOPE_CONTROL for browser-wide destructive ops (stop, restart, disconnect, state, handoff, resume, connect) - --admin flag now grants control scope (backward compat) - New --restrict flag for limited access (e.g., --restrict read) - Updated hint text: "re-pair with --control" instead of "--admin" Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add media and data commands for page content extraction media command: discovers all img/video/audio/background-image elements on the page. Returns JSON with URLs, dimensions, srcset, loading state, HLS/DASH detection. Supports --images/--videos/--audio filters and optional CSS selector scoping. data command: extracts structured data embedded in pages (JSON-LD, Open Graph, Twitter Cards, meta tags). One command returns product prices, article metadata, social share info without DOM scraping. Both are READ scope with untrusted content wrapping. Shared media-extract.ts helper for reuse by the upcoming scrape command. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add download, scrape, and archive commands download: fetch any URL or @ref element to disk using browser session cookies via page.request.fetch(). Supports blob: URLs via in-page base64 conversion. --base64 flag returns inline data URI (cap 10MB). Detects HLS/DASH and rejects with yt-dlp hint. scrape: bulk media download composing media discovery + download loop. Sequential with 100ms delay, URL deduplication, configurable --limit. Writes manifest.json with per-file metadata for machine consumption. archive: saves complete page as MHTML via CDP Page.captureSnapshot. No silent fallback -- errors clearly if CDP unavailable. All three are WRITE scope (write to disk, blocked in watch mode). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add GET /file endpoint for remote agent file retrieval Remote paired agents can now retrieve downloaded files over HTTP. TEMP_DIR only (not cwd) to prevent project file exfiltration. - Bearer token auth (root or scoped with read scope) - Path validation via validateTempPath() (symlink-aware) - 200MB size cap - Extension-based MIME detection - Zero-copy streaming via Bun.file() Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add scroll --times N for automated repeated scrolling Extends the scroll command with --times N flag for infinite feed scraping. Scrolls N times with configurable --wait delay (default 1000ms) between each scroll for content loading. Usage: scroll --times 10 scroll --times 5 --wait 2000 scroll --times 3 .feed-container Composable with scrape: scroll to load content, then scrape images. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add network response body capture (--capture/--export/--bodies) The killer feature for social media scraping. Extends the existing network command to intercept API response bodies: network --capture [--filter graphql] # start capturing network --capture stop # stop network --export /tmp/api.jsonl # export as JSONL network --bodies # show summary Uses page.on('response') listener with URL pattern filtering. SizeCappedBuffer (50MB total, 5MB per-entry cap) evicts oldest entries when full. Binary responses stored as base64, text as-is. This lets agents tap Instagram's GraphQL API, TikTok's hydration data, and any SPA's internal API responses instead of fragile DOM scraping. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add screenshot --base64 for inline image return Returns data:image/png;base64,... instead of writing to disk. Cap at 10MB. Works with all screenshot modes (element, clip, viewport). Eliminates the two-step screenshot+file-serve dance for remote agents. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: add data platform tests and media fixture Tests for SizeCappedBuffer (eviction, export, summary), validateTempPath (TEMP_DIR only, rejects cwd), command registration (all new commands in correct scope sets), and MIME mapping source checks. Rich HTML fixture with: standard images, lazy-loaded images, srcset, video with sources + HLS, audio, CSS background-images, JSON-LD, Open Graph, Twitter Cards, and meta tags. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: regenerate SKILL.md with Extraction category Add Extraction category to browse command table ordering. Regenerate SKILL.md files to include media, data, download, scrape, archive commands in the generated documentation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.16.0.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
104 lines
3.7 KiB
TypeScript
104 lines
3.7 KiB
TypeScript
/**
|
|
* Shared path validation — single source of truth for file path security.
|
|
*
|
|
* Previously duplicated across write-commands.ts, meta-commands.ts, and read-commands.ts.
|
|
* All file I/O commands (screenshot, pdf, download, scrape, archive, eval) must
|
|
* validate paths through these functions.
|
|
*
|
|
* validateOutputPath(path) — for writing files (screenshot, pdf, download, scrape, archive)
|
|
* validateReadPath(path) — for reading files (eval)
|
|
* validateTempPath(path) — for serving files to remote agents (GET /file, TEMP_DIR only)
|
|
*
|
|
* Security invariants:
|
|
* 1. All paths resolved to absolute before checking
|
|
* 2. Symlinks resolved to catch traversal via symlink inside safe dir
|
|
* 3. SAFE_DIRECTORIES = [TEMP_DIR, cwd] for local commands
|
|
* 4. TEMP_ONLY = [TEMP_DIR] for remote file serving (prevents project file exfil)
|
|
*/
|
|
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import { TEMP_DIR, isPathWithin } from './platform';
|
|
|
|
// Resolve safe directories through realpathSync to handle symlinks (e.g., macOS /tmp → /private/tmp)
|
|
export const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()].map(d => {
|
|
try { return fs.realpathSync(d); } catch { return d; }
|
|
});
|
|
|
|
const TEMP_ONLY = [TEMP_DIR].map(d => {
|
|
try { return fs.realpathSync(d); } catch { return d; }
|
|
});
|
|
|
|
/** Validate a file path for writing (screenshot, pdf, download, scrape, archive). */
|
|
export function validateOutputPath(filePath: string): void {
|
|
const resolved = path.resolve(filePath);
|
|
|
|
// Resolve real path of the parent directory to catch symlinks.
|
|
// The file itself may not exist yet (e.g., screenshot output).
|
|
// This also handles macOS /tmp → /private/tmp transparently.
|
|
let dir = path.dirname(resolved);
|
|
let realDir: string;
|
|
try {
|
|
realDir = fs.realpathSync(dir);
|
|
} catch {
|
|
try {
|
|
realDir = fs.realpathSync(path.dirname(dir));
|
|
} catch {
|
|
throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
|
}
|
|
}
|
|
|
|
const realResolved = path.join(realDir, path.basename(resolved));
|
|
const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(realResolved, dir));
|
|
if (!isSafe) {
|
|
throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
|
}
|
|
}
|
|
|
|
/** Validate a file path for reading (eval command). */
|
|
export function validateReadPath(filePath: string): void {
|
|
const resolved = path.resolve(filePath);
|
|
let realPath: string;
|
|
try {
|
|
realPath = fs.realpathSync(resolved);
|
|
} catch (err: any) {
|
|
if (err.code === 'ENOENT') {
|
|
try {
|
|
const dir = fs.realpathSync(path.dirname(resolved));
|
|
realPath = path.join(dir, path.basename(resolved));
|
|
} catch {
|
|
realPath = resolved;
|
|
}
|
|
} else {
|
|
throw new Error(`Cannot resolve real path: ${filePath} (${err.code})`);
|
|
}
|
|
}
|
|
const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(realPath, dir));
|
|
if (!isSafe) {
|
|
throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
|
}
|
|
}
|
|
|
|
/** Validate a file path for remote serving (GET /file). TEMP_DIR only, not cwd. */
|
|
export function validateTempPath(filePath: string): void {
|
|
const resolved = path.resolve(filePath);
|
|
let realPath: string;
|
|
try {
|
|
realPath = fs.realpathSync(resolved);
|
|
} catch (err: any) {
|
|
if (err.code === 'ENOENT') {
|
|
throw new Error('File not found');
|
|
}
|
|
throw new Error(`Cannot resolve path: ${filePath}`);
|
|
}
|
|
const isSafe = TEMP_ONLY.some(dir => isPathWithin(realPath, dir));
|
|
if (!isSafe) {
|
|
throw new Error(`Path must be within: ${TEMP_ONLY.join(', ')} (remote file serving is restricted to temp directory)`);
|
|
}
|
|
}
|
|
|
|
/** Escape special regex metacharacters in a user-supplied string to prevent ReDoS. */
|
|
export function escapeRegExp(s: string): string {
|
|
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
}
|