mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-05 17:46:37 +02:00
ea51b45e08
* fix(browse): sanitize lone Unicode surrogates at commandResult chokepoint + /batch envelope (#1440) Page captures with mixed-script Unicode round-trip cleanly to the Claude API. Two new utilities in browse/src/sanitize.ts: stripLoneSurrogates for raw UTF-16 strings, stripLoneSurrogateEscapes for \uXXXX JSON escape text. sanitizeBody picks the right pass based on cr.json. buildCommandResponse is extracted from handleCommand (now exported) and applies sanitization before new Response(). /batch was bypassing this chokepoint via direct JSON.stringify, so it sanitizes each cr.result before pushing AND wraps the envelope with stripLoneSurrogateEscapes. Defense in depth wraps at getCleanText, getCleanTextWithStripping, html, accessibility, and snapshot.ts return points so downstream consumers (datamarking, envelope wrapping) see sanitized text before the response is built. 25 new unit tests across sanitize.test.ts and build-command-response.test.ts. content-security.test.ts updated to accept either pre- or post-sanitize form of the snapshot scoped branch (source-level regression check). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * feat: bug fix wave v1.36.0.0 — Implementation Tasks, allowlist patterns, surrogate-safe page captures (#1440 #1452 #1454) Three filed issues land together: #1440 — Page captures from real-world HTML hit 'API Error 400: no low surrogate in string'. Sanitizers + buildCommandResponse extraction shipped in the prior commit; this commit adds the migration script that patches existing brain-allowlist/privacy-map/gitattributes installs and the supporting tests. #1452 — Federation sync was silently skipping root-level design and test-plan docs. bin/gstack-artifacts-init adds two patterns to all three managed blocks (.brain-allowlist, .brain-privacy-map.json, .gitattributes). Idempotent migration v1.36.0.0.sh repairs existing installs in place via jq (preserves JSON validity) — no commit + push from the migration. #1454 — All four review skills (CEO/design/eng/DX) emit an Implementation Tasks markdown section AND write a jq-built JSONL artifact per phase. /autoplan reads all four files, scopes by current branch + 5-commit window, dedupes on exact (component, sorted(files), title), and renders an aggregated list in the Final Approval Gate. New tests: - browse/test/sanitize.test.ts (18 cases) - browse/test/build-command-response.test.ts (7 cases) - test/artifacts-init-migration.test.ts (7 cases) VERSION → 1.36.0.0. Skips the v1.34.x slot taken by 'gstack consumable as submodule' and the v1.35.0.0 slot taken by /document-generate. #1428 was shipped separately by v1.34.2.0 with a different approach; follow-up #1503 filed for the bare-path filesystem boundary concern surfaced during our analysis. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * chore: bump to v1.38.1.0 VERSION + package.json + CHANGELOG header + migration filename + test reference all consistently at v1.38.1.0. Migration renamed: gstack-upgrade/migrations/v1.38.0.0.sh -> v1.38.1.0.sh. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
545 lines
22 KiB
TypeScript
545 lines
22 KiB
TypeScript
/**
|
|
* Read commands — extract data from pages without side effects
|
|
*
|
|
* text, html, links, forms, accessibility, js, eval, css, attrs,
|
|
* console, network, cookies, storage, perf
|
|
*/
|
|
|
|
import type { TabSession } from './tab-session';
|
|
import type { BrowserManager } from './browser-manager';
|
|
import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
|
|
import type { Page, Frame } from 'playwright';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import { TEMP_DIR } from './platform';
|
|
import { inspectElement, formatInspectorResult, getModificationHistory } from './cdp-inspector';
|
|
import { validateReadPath } from './path-security';
|
|
import { stripLoneSurrogates } from './sanitize';
|
|
// Re-export for backward compatibility (tests import from read-commands)
|
|
export { validateReadPath } from './path-security';
|
|
|
|
// Redaction patterns for sensitive cookie/storage values — exported for test coverage
|
|
export const SENSITIVE_COOKIE_NAME = /(^|[_.-])(token|secret|key|password|credential|auth|jwt|session|csrf|sid)($|[_.-])|api.?key/i;
|
|
export const SENSITIVE_COOKIE_VALUE = /^(eyJ|sk-|sk_live_|sk_test_|pk_live_|pk_test_|rk_live_|sk-ant-|ghp_|gho_|github_pat_|xox[bpsa]-|AKIA[A-Z0-9]{16}|AIza|SG\.|Bearer\s|sbp_)/;
|
|
|
|
/** Detect await keyword, ignoring comments. Accepted risk: await in string literals triggers wrapping (harmless). */
|
|
function hasAwait(code: string): boolean {
|
|
const stripped = code.replace(/\/\/.*$/gm, '').replace(/\/\*[\s\S]*?\*\//g, '');
|
|
return /\bawait\b/.test(stripped);
|
|
}
|
|
|
|
/** Detect whether code needs a block wrapper {…} vs expression wrapper (…) inside an async IIFE. */
|
|
function needsBlockWrapper(code: string): boolean {
|
|
const trimmed = code.trim();
|
|
if (trimmed.split('\n').length > 1) return true;
|
|
if (/\b(const|let|var|function|class|return|throw|if|for|while|switch|try)\b/.test(trimmed)) return true;
|
|
if (trimmed.includes(';')) return true;
|
|
return false;
|
|
}
|
|
|
|
/** Wrap code for page.evaluate(), using async IIFE with block or expression body as needed. */
|
|
function wrapForEvaluate(code: string): string {
|
|
if (!hasAwait(code)) return code;
|
|
const trimmed = code.trim();
|
|
return needsBlockWrapper(trimmed)
|
|
? `(async()=>{\n${code}\n})()`
|
|
: `(async()=>(${trimmed}))()`;
|
|
}
|
|
|
|
/**
|
|
* Extract clean text from a page (strips script/style/noscript/svg).
|
|
* Exported for DRY reuse in meta-commands (diff).
|
|
*/
|
|
export async function getCleanText(page: Page | Frame): Promise<string> {
|
|
const raw = await page.evaluate(() => {
|
|
const body = document.body;
|
|
if (!body) return '';
|
|
const clone = body.cloneNode(true) as HTMLElement;
|
|
clone.querySelectorAll('script, style, noscript, svg').forEach(el => el.remove());
|
|
return clone.innerText
|
|
.split('\n')
|
|
.map(line => line.trim())
|
|
.filter(line => line.length > 0)
|
|
.join('\n');
|
|
});
|
|
return stripLoneSurrogates(raw);
|
|
}
|
|
|
|
/**
|
|
* When cookies have been imported for specific domains, block JS execution
|
|
* on pages whose origin doesn't match any imported cookie domain.
|
|
* Prevents cross-origin cookie exfiltration via `js document.cookie` or
|
|
* similar when the agent navigates to an untrusted page.
|
|
*/
|
|
function assertJsOriginAllowed(bm: BrowserManager, pageUrl: string): void {
|
|
if (!bm.hasCookieImports()) return;
|
|
|
|
let hostname: string;
|
|
try {
|
|
hostname = new URL(pageUrl).hostname;
|
|
} catch {
|
|
return; // about:blank, data: URIs — allow (no cookies at risk)
|
|
}
|
|
|
|
const importedDomains = bm.getCookieImportedDomains();
|
|
const allowed = [...importedDomains].some(domain => {
|
|
// Exact match or subdomain match (e.g., ".github.com" matches "api.github.com")
|
|
const normalized = domain.startsWith('.') ? domain : '.' + domain;
|
|
return hostname === domain.replace(/^\./, '') || hostname.endsWith(normalized);
|
|
});
|
|
|
|
if (!allowed) {
|
|
throw new Error(
|
|
`JS execution blocked: current page (${hostname}) does not match any cookie-imported domain. ` +
|
|
`Imported cookies for: ${[...importedDomains].join(', ')}. ` +
|
|
`This prevents cross-origin cookie exfiltration. Navigate to an imported domain or run without imported cookies.`
|
|
);
|
|
}
|
|
}
|
|
|
|
export async function handleReadCommand(
|
|
command: string,
|
|
args: string[],
|
|
session: TabSession,
|
|
bm?: BrowserManager,
|
|
): Promise<string> {
|
|
const page = session.getPage();
|
|
// Frame-aware target for content extraction
|
|
const target = session.getActiveFrameOrPage();
|
|
|
|
switch (command) {
|
|
case 'text': {
|
|
return getCleanText(target);
|
|
}
|
|
|
|
case 'html': {
|
|
const selector = args[0];
|
|
if (selector) {
|
|
const resolved = await session.resolveRef(selector);
|
|
if ('locator' in resolved) {
|
|
return stripLoneSurrogates(await resolved.locator.innerHTML({ timeout: 5000 }));
|
|
}
|
|
return stripLoneSurrogates(await target.locator(resolved.selector).innerHTML({ timeout: 5000 }));
|
|
}
|
|
// page.content() is page-only; use evaluate for frame compat
|
|
const doctype = await target.evaluate(() => {
|
|
const dt = document.doctype;
|
|
return dt ? `<!DOCTYPE ${dt.name}>` : '';
|
|
});
|
|
const html = await target.evaluate(() => document.documentElement.outerHTML);
|
|
return stripLoneSurrogates(doctype ? `${doctype}\n${html}` : html);
|
|
}
|
|
|
|
case 'links': {
|
|
const links = await target.evaluate(() =>
|
|
[...document.querySelectorAll('a[href]')].map(a => ({
|
|
text: a.textContent?.trim().slice(0, 120) || '',
|
|
href: (a as HTMLAnchorElement).href,
|
|
})).filter(l => l.text && l.href)
|
|
);
|
|
return links.map(l => `${l.text} → ${l.href}`).join('\n');
|
|
}
|
|
|
|
case 'forms': {
|
|
const forms = await target.evaluate(() => {
|
|
return [...document.querySelectorAll('form')].map((form, i) => {
|
|
const fields = [...form.querySelectorAll('input, select, textarea')].map(el => {
|
|
const input = el as HTMLInputElement;
|
|
return {
|
|
tag: el.tagName.toLowerCase(),
|
|
type: input.type || undefined,
|
|
name: input.name || undefined,
|
|
id: input.id || undefined,
|
|
placeholder: input.placeholder || undefined,
|
|
required: input.required || undefined,
|
|
value: input.type === 'password'
|
|
|| (input.name && /(^|[_.-])(token|secret|key|password|credential|auth|jwt|session|csrf|sid)($|[_.-])|api.?key/i.test(input.name))
|
|
|| (input.id && /(^|[_.-])(token|secret|key|password|credential|auth|jwt|session|csrf|sid)($|[_.-])|api.?key/i.test(input.id))
|
|
? '[redacted]' : (input.value || undefined),
|
|
options: el.tagName === 'SELECT'
|
|
? [...(el as HTMLSelectElement).options].map(o => ({ value: o.value, text: o.text }))
|
|
: undefined,
|
|
};
|
|
});
|
|
return {
|
|
index: i,
|
|
action: form.action || undefined,
|
|
method: form.method || 'get',
|
|
id: form.id || undefined,
|
|
fields,
|
|
};
|
|
});
|
|
});
|
|
return JSON.stringify(forms, null, 2);
|
|
}
|
|
|
|
case 'accessibility': {
|
|
const snapshot = await target.locator("body").ariaSnapshot();
|
|
return stripLoneSurrogates(snapshot);
|
|
}
|
|
|
|
case 'js': {
|
|
const expr = args[0];
|
|
if (!expr) throw new Error('Usage: browse js <expression>');
|
|
if (bm) assertJsOriginAllowed(bm, page.url());
|
|
const wrapped = wrapForEvaluate(expr);
|
|
const result = await target.evaluate(wrapped);
|
|
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
|
}
|
|
|
|
case 'eval': {
|
|
const filePath = args[0];
|
|
if (!filePath) throw new Error('Usage: browse eval <js-file>');
|
|
if (bm) assertJsOriginAllowed(bm, page.url());
|
|
validateReadPath(filePath);
|
|
if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`);
|
|
const code = fs.readFileSync(filePath, 'utf-8');
|
|
const wrapped = wrapForEvaluate(code);
|
|
const result = await target.evaluate(wrapped);
|
|
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
|
}
|
|
|
|
case 'css': {
|
|
const [selector, property] = args;
|
|
if (!selector || !property) throw new Error('Usage: browse css <selector> <property>');
|
|
const resolved = await session.resolveRef(selector);
|
|
if ('locator' in resolved) {
|
|
const value = await resolved.locator.evaluate(
|
|
(el, prop) => getComputedStyle(el).getPropertyValue(prop),
|
|
property
|
|
);
|
|
return value;
|
|
}
|
|
const value = await target.evaluate(
|
|
([sel, prop]) => {
|
|
const el = document.querySelector(sel);
|
|
if (!el) return `Element not found: ${sel}`;
|
|
return getComputedStyle(el).getPropertyValue(prop);
|
|
},
|
|
[resolved.selector, property]
|
|
);
|
|
return value;
|
|
}
|
|
|
|
case 'attrs': {
|
|
const selector = args[0];
|
|
if (!selector) throw new Error('Usage: browse attrs <selector>');
|
|
const resolved = await session.resolveRef(selector);
|
|
if ('locator' in resolved) {
|
|
const attrs = await resolved.locator.evaluate((el) => {
|
|
const result: Record<string, string> = {};
|
|
for (const attr of el.attributes) {
|
|
result[attr.name] = attr.value;
|
|
}
|
|
return result;
|
|
});
|
|
return JSON.stringify(attrs, null, 2);
|
|
}
|
|
const attrs = await target.evaluate((sel: string) => {
|
|
const el = document.querySelector(sel);
|
|
if (!el) return `Element not found: ${sel}`;
|
|
const result: Record<string, string> = {};
|
|
for (const attr of el.attributes) {
|
|
result[attr.name] = attr.value;
|
|
}
|
|
return result;
|
|
}, resolved.selector);
|
|
return typeof attrs === 'string' ? attrs : JSON.stringify(attrs, null, 2);
|
|
}
|
|
|
|
case 'console': {
|
|
if (args[0] === '--clear') {
|
|
consoleBuffer.clear();
|
|
return 'Console buffer cleared.';
|
|
}
|
|
const entries = args[0] === '--errors'
|
|
? consoleBuffer.toArray().filter(e => e.level === 'error' || e.level === 'warning')
|
|
: consoleBuffer.toArray();
|
|
if (entries.length === 0) return args[0] === '--errors' ? '(no console errors)' : '(no console messages)';
|
|
return entries.map(e =>
|
|
`[${new Date(e.timestamp).toISOString()}] [${e.level}] ${e.text}`
|
|
).join('\n');
|
|
}
|
|
|
|
case 'network': {
|
|
if (args[0] === '--clear') {
|
|
networkBuffer.clear();
|
|
return 'Network buffer cleared.';
|
|
}
|
|
|
|
// Network capture extensions
|
|
if (args[0] === '--capture') {
|
|
const {
|
|
startCapture, stopCapture, getCaptureListener, isCaptureActive,
|
|
} = await import('./network-capture');
|
|
|
|
if (args[1] === 'stop') {
|
|
// Detach listener from current page
|
|
const page = bm.getPage();
|
|
const listener = getCaptureListener();
|
|
if (listener) page.removeListener('response', listener);
|
|
const result = stopCapture();
|
|
return `Network capture stopped. ${result.count} responses captured (${result.sizeKB}KB).`;
|
|
}
|
|
|
|
// Start capture
|
|
if (isCaptureActive()) return 'Capture already active. Use --capture stop first.';
|
|
const filterIdx = args.indexOf('--filter');
|
|
const filterPattern = filterIdx >= 0 ? args[filterIdx + 1] : undefined;
|
|
const info = startCapture(filterPattern);
|
|
// Attach listener to current page
|
|
const page = bm.getPage();
|
|
const listener = getCaptureListener();
|
|
if (listener) page.on('response', listener);
|
|
return `Network capture started${info.filter ? ` (filter: ${info.filter})` : ''}. Use --capture stop to stop.`;
|
|
}
|
|
|
|
if (args[0] === '--export') {
|
|
const { exportCapture } = await import('./network-capture');
|
|
const { validateOutputPath: vop } = await import('./path-security');
|
|
const exportPath = args[1];
|
|
if (!exportPath) throw new Error('Usage: network --export <path>');
|
|
vop(exportPath);
|
|
const count = exportCapture(exportPath);
|
|
return `Exported ${count} captured responses to ${exportPath}`;
|
|
}
|
|
|
|
if (args[0] === '--bodies') {
|
|
const { getCaptureBuffer } = await import('./network-capture');
|
|
return getCaptureBuffer().summary();
|
|
}
|
|
|
|
// Default: show request metadata
|
|
if (networkBuffer.length === 0) return '(no network requests)';
|
|
return networkBuffer.toArray().map(e =>
|
|
`${e.method} ${e.url} → ${e.status || 'pending'} (${e.duration || '?'}ms, ${e.size || '?'}B)`
|
|
).join('\n');
|
|
}
|
|
|
|
case 'dialog': {
|
|
if (args[0] === '--clear') {
|
|
dialogBuffer.clear();
|
|
return 'Dialog buffer cleared.';
|
|
}
|
|
if (dialogBuffer.length === 0) return '(no dialogs captured)';
|
|
return dialogBuffer.toArray().map(e =>
|
|
`[${new Date(e.timestamp).toISOString()}] [${e.type}] "${e.message}" → ${e.action}${e.response ? ` "${e.response}"` : ''}`
|
|
).join('\n');
|
|
}
|
|
|
|
case 'is': {
|
|
const property = args[0];
|
|
const selector = args[1];
|
|
if (!property || !selector) throw new Error('Usage: browse is <property> <selector>\nProperties: visible, hidden, enabled, disabled, checked, editable, focused');
|
|
|
|
const resolved = await session.resolveRef(selector);
|
|
let locator;
|
|
if ('locator' in resolved) {
|
|
locator = resolved.locator;
|
|
} else {
|
|
locator = target.locator(resolved.selector);
|
|
}
|
|
|
|
switch (property) {
|
|
case 'visible': return String(await locator.isVisible());
|
|
case 'hidden': return String(await locator.isHidden());
|
|
case 'enabled': return String(await locator.isEnabled());
|
|
case 'disabled': return String(await locator.isDisabled());
|
|
case 'checked': return String(await locator.isChecked());
|
|
case 'editable': return String(await locator.isEditable());
|
|
case 'focused': {
|
|
const isFocused = await locator.evaluate(
|
|
(el) => el === document.activeElement
|
|
);
|
|
return String(isFocused);
|
|
}
|
|
default:
|
|
throw new Error(`Unknown property: ${property}. Use: visible, hidden, enabled, disabled, checked, editable, focused`);
|
|
}
|
|
}
|
|
|
|
case 'cookies': {
|
|
const cookies = await page.context().cookies();
|
|
// Redact cookie values that look like secrets (consistent with storage redaction)
|
|
const redacted = cookies.map(c => {
|
|
if (SENSITIVE_COOKIE_NAME.test(c.name) || SENSITIVE_COOKIE_VALUE.test(c.value)) {
|
|
return { ...c, value: `[REDACTED — ${c.value.length} chars]` };
|
|
}
|
|
return c;
|
|
});
|
|
return JSON.stringify(redacted, null, 2);
|
|
}
|
|
|
|
case 'storage': {
|
|
if (args[0] === 'set' && args[1]) {
|
|
const key = args[1];
|
|
const value = args[2] || '';
|
|
await target.evaluate(([k, v]: string[]) => localStorage.setItem(k, v), [key, value]);
|
|
return `Set localStorage["${key}"]`;
|
|
}
|
|
const storage = await target.evaluate(() => ({
|
|
localStorage: { ...localStorage },
|
|
sessionStorage: { ...sessionStorage },
|
|
}));
|
|
// Redact values that look like secrets (tokens, keys, passwords, JWTs)
|
|
const SENSITIVE_KEY = /(^|[_.-])(token|secret|key|password|credential|auth|jwt|session|csrf)($|[_.-])|api.?key/i;
|
|
const SENSITIVE_VALUE = /^(eyJ|sk-|sk_live_|sk_test_|pk_live_|pk_test_|rk_live_|sk-ant-|ghp_|gho_|github_pat_|xox[bpsa]-|AKIA[A-Z0-9]{16}|AIza|SG\.|Bearer\s|sbp_)/;
|
|
const redacted = JSON.parse(JSON.stringify(storage));
|
|
for (const storeType of ['localStorage', 'sessionStorage'] as const) {
|
|
const store = redacted[storeType];
|
|
if (!store) continue;
|
|
for (const [key, value] of Object.entries(store)) {
|
|
if (typeof value !== 'string') continue;
|
|
if (SENSITIVE_KEY.test(key) || SENSITIVE_VALUE.test(value)) {
|
|
store[key] = `[REDACTED — ${value.length} chars]`;
|
|
}
|
|
}
|
|
}
|
|
return JSON.stringify(redacted, null, 2);
|
|
}
|
|
|
|
case 'perf': {
|
|
const timings = await page.evaluate(() => {
|
|
const nav = performance.getEntriesByType('navigation')[0] as PerformanceNavigationTiming;
|
|
if (!nav) return 'No navigation timing data available.';
|
|
return {
|
|
dns: Math.round(nav.domainLookupEnd - nav.domainLookupStart),
|
|
tcp: Math.round(nav.connectEnd - nav.connectStart),
|
|
ssl: Math.round(nav.secureConnectionStart > 0 ? nav.connectEnd - nav.secureConnectionStart : 0),
|
|
ttfb: Math.round(nav.responseStart - nav.requestStart),
|
|
download: Math.round(nav.responseEnd - nav.responseStart),
|
|
domParse: Math.round(nav.domInteractive - nav.responseEnd),
|
|
domReady: Math.round(nav.domContentLoadedEventEnd - nav.startTime),
|
|
load: Math.round(nav.loadEventEnd - nav.startTime),
|
|
total: Math.round(nav.loadEventEnd - nav.startTime),
|
|
};
|
|
});
|
|
if (typeof timings === 'string') return timings;
|
|
return Object.entries(timings)
|
|
.map(([k, v]) => `${k.padEnd(12)} ${v}ms`)
|
|
.join('\n');
|
|
}
|
|
|
|
case 'inspect': {
|
|
// Parse flags
|
|
let includeUA = false;
|
|
let showHistory = false;
|
|
let selector: string | undefined;
|
|
|
|
for (const arg of args) {
|
|
if (arg === '--all') {
|
|
includeUA = true;
|
|
} else if (arg === '--history') {
|
|
showHistory = true;
|
|
} else if (!selector) {
|
|
selector = arg;
|
|
}
|
|
}
|
|
|
|
// --history mode: return modification history
|
|
if (showHistory) {
|
|
const history = getModificationHistory();
|
|
if (history.length === 0) return '(no style modifications)';
|
|
return history.map((m, i) =>
|
|
`[${i}] ${m.selector} { ${m.property}: ${m.oldValue} → ${m.newValue} } (${m.source}, ${m.method})`
|
|
).join('\n');
|
|
}
|
|
|
|
// If no selector given, check for stored inspector data
|
|
if (!selector) {
|
|
// Access stored inspector data from the server's in-memory state
|
|
// The server stores this when the extension picks an element via POST /inspector/pick
|
|
const stored = (bm as any)._inspectorData;
|
|
const storedTs = (bm as any)._inspectorTimestamp;
|
|
if (stored) {
|
|
const stale = storedTs && (Date.now() - storedTs > 60000);
|
|
let output = formatInspectorResult(stored, { includeUA });
|
|
if (stale) output = '⚠ Data may be stale (>60s old)\n\n' + output;
|
|
return output;
|
|
}
|
|
throw new Error('Usage: browse inspect [selector] [--all] [--history]\nOr pick an element in the Chrome sidebar first.');
|
|
}
|
|
|
|
// Direct inspection by selector
|
|
const result = await inspectElement(page, selector, { includeUA });
|
|
// Store for later retrieval
|
|
(bm as any)._inspectorData = result;
|
|
(bm as any)._inspectorTimestamp = Date.now();
|
|
return formatInspectorResult(result, { includeUA });
|
|
}
|
|
|
|
case 'media': {
|
|
const { extractMedia } = await import('./media-extract');
|
|
const target = bm.getActiveFrameOrPage();
|
|
const filter = args.includes('--images') ? 'images' as const
|
|
: args.includes('--videos') ? 'videos' as const
|
|
: args.includes('--audio') ? 'audio' as const
|
|
: undefined;
|
|
const selectorArg = args.find(a => !a.startsWith('--'));
|
|
const result = await extractMedia(target, { selector: selectorArg, filter });
|
|
return JSON.stringify(result, null, 2);
|
|
}
|
|
|
|
case 'data': {
|
|
const target = bm.getActiveFrameOrPage();
|
|
const wantJsonLd = args.includes('--jsonld') || args.length === 0;
|
|
const wantOg = args.includes('--og') || args.length === 0;
|
|
const wantTwitter = args.includes('--twitter') || args.length === 0;
|
|
const wantMeta = args.includes('--meta') || args.length === 0;
|
|
|
|
const result = await target.evaluate(({ wantJsonLd, wantOg, wantTwitter, wantMeta }) => {
|
|
const data: Record<string, any> = {};
|
|
|
|
if (wantJsonLd) {
|
|
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
|
const jsonLd: any[] = [];
|
|
scripts.forEach(s => {
|
|
try { jsonLd.push(JSON.parse(s.textContent || '')); } catch {}
|
|
});
|
|
data.jsonLd = jsonLd;
|
|
}
|
|
|
|
if (wantOg) {
|
|
const og: Record<string, string> = {};
|
|
document.querySelectorAll('meta[property^="og:"]').forEach(m => {
|
|
const prop = m.getAttribute('property')?.replace('og:', '') || '';
|
|
og[prop] = m.getAttribute('content') || '';
|
|
});
|
|
data.openGraph = og;
|
|
}
|
|
|
|
if (wantTwitter) {
|
|
const tw: Record<string, string> = {};
|
|
document.querySelectorAll('meta[name^="twitter:"]').forEach(m => {
|
|
const name = m.getAttribute('name')?.replace('twitter:', '') || '';
|
|
tw[name] = m.getAttribute('content') || '';
|
|
});
|
|
data.twitterCards = tw;
|
|
}
|
|
|
|
if (wantMeta) {
|
|
const meta: Record<string, string> = {};
|
|
const canonical = document.querySelector('link[rel="canonical"]');
|
|
if (canonical) meta.canonical = canonical.getAttribute('href') || '';
|
|
const desc = document.querySelector('meta[name="description"]');
|
|
if (desc) meta.description = desc.getAttribute('content') || '';
|
|
const keywords = document.querySelector('meta[name="keywords"]');
|
|
if (keywords) meta.keywords = keywords.getAttribute('content') || '';
|
|
const author = document.querySelector('meta[name="author"]');
|
|
if (author) meta.author = author.getAttribute('content') || '';
|
|
const title = document.querySelector('title');
|
|
if (title) meta.title = title.textContent || '';
|
|
data.meta = meta;
|
|
}
|
|
|
|
return data;
|
|
}, { wantJsonLd, wantOg, wantTwitter, wantMeta });
|
|
|
|
return JSON.stringify(result, null, 2);
|
|
}
|
|
|
|
default:
|
|
throw new Error(`Unknown read command: ${command}`);
|
|
}
|
|
}
|