mirror of
https://github.com/garrytan/gstack.git
synced 2026-07-05 15:47:57 +02:00
feat(browse): webdriver-mask stealth + Chromium-through-bridge e2e
D7 (codex narrowing): mask navigator.webdriver only via addInitScript. The wintermute approach (fake plugins=[1..5], fake languages=['en-US', 'en'], stub window.chrome) is intentionally NOT applied — modern fingerprinters check consistency between plugins.length, languages, userAgent, and platform, and synthesizing fixed values can flag MORE bot-like, not less. The honest minimum is webdriver, which Chromium exposes as a known automation tell. Adds browse/src/stealth.ts: single source of truth for the stealth init script and launch args. Both browser-manager.launch() (headless) and launchHeaded() (persistent context with extension) call applyStealth(context) and pass STEALTH_LAUNCH_ARGS into chromium.launch. The pre-existing launchHeaded stealth that did fake plugins/languages is removed for the same reason. The cdc_/__webdriver runtime cleanup and Permissions API patch are kept — they remove automation-injected artifacts, not synthesize fake natural-browser values. Adds bridge-chromium-e2e.test.ts (codex F3): the test that proves the FEATURE works. Real Chromium with proxy.server = 'socks5://127.0.0.1: <bridgePort>' navigates to a local HTTP fixture; the auth upstream's connect counter and the HTTP fixture's hit counter both increment, proving traffic actually traversed bridge → auth-upstream → destination. Without this test, we could ship a working byte-relay and a broken Chromium integration and never know. Adds bridge-port-restart.test.ts (codex F1, reframed): old test assumed two daemons coexist, which contradicts D2 single-daemon model. Reframed as restart-then-restart, asserting fresh ephemeral ports (never the hardcoded 1090) on each spin-up. Adds stealth-webdriver.test.ts: navigator.webdriver=false in both fresh contexts and persistent contexts; navigator.plugins/languages are NOT replaced with the wintermute fake list (D7 verification). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -193,7 +193,8 @@ export class BrowserManager {
|
||||
// BROWSE_EXTENSIONS_DIR points to an unpacked Chrome extension directory.
|
||||
// Extensions only work in headed mode, so we use an off-screen window.
|
||||
const extensionsDir = process.env.BROWSE_EXTENSIONS_DIR;
|
||||
const launchArgs: string[] = [];
|
||||
const { STEALTH_LAUNCH_ARGS } = await import('./stealth');
|
||||
const launchArgs: string[] = [...STEALTH_LAUNCH_ARGS];
|
||||
let useHeadless = true;
|
||||
|
||||
// Docker/CI: Chromium sandbox requires unprivileged user namespaces which
|
||||
@@ -244,6 +245,13 @@ export class BrowserManager {
|
||||
await this.context.setExtraHTTPHeaders(this.extraHeaders);
|
||||
}
|
||||
|
||||
// D7: mask navigator.webdriver only. The other 3 wintermute patches
|
||||
// (plugins, languages, chrome.runtime) are intentionally NOT applied —
|
||||
// faking them to fixed values can flag more bot-like to modern
|
||||
// fingerprinters, not less.
|
||||
const { applyStealth } = await import('./stealth');
|
||||
await applyStealth(this.context);
|
||||
|
||||
// Create first tab
|
||||
await this.newTab();
|
||||
}
|
||||
@@ -385,33 +393,20 @@ export class BrowserManager {
|
||||
this.connectionMode = 'headed';
|
||||
this.intentionalDisconnect = false;
|
||||
|
||||
// ─── Anti-bot-detection stealth patches ───────────────────────
|
||||
// Playwright's Chromium is detected by sites like Google/NYTimes via:
|
||||
// 1. navigator.webdriver = true (handled by --disable-blink-features above)
|
||||
// 2. Missing plugins array (real Chrome has PDF viewer, etc.)
|
||||
// 3. Missing languages
|
||||
// 4. CDP runtime detection (window.cdc_* variables)
|
||||
// 5. Permissions API returning 'denied' for notifications
|
||||
// ─── Anti-bot-detection patches ───────────────────────────────
|
||||
// D7 (codex correction): mask navigator.webdriver only. We do NOT fake
|
||||
// plugins/languages — modern fingerprinters check consistency between
|
||||
// those and userAgent/platform, and synthesizing fixed values can flag
|
||||
// MORE bot-like, not less. Let Chromium's natural plugins and languages
|
||||
// surface unmodified.
|
||||
//
|
||||
// What we DO clean up are automation-specific runtime artifacts that
|
||||
// shouldn't exist in a real browser at all (Permissions API quirks,
|
||||
// ChromeDriver-injected window globals). Those aren't fingerprint
|
||||
// synthesis — they're removing leaked automation tells.
|
||||
const { applyStealth } = await import('./stealth');
|
||||
await applyStealth(this.context);
|
||||
await this.context.addInitScript(() => {
|
||||
// Fake plugins array (real Chrome has at least PDF Viewer)
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const plugins = [
|
||||
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||
{ name: 'Chrome PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
];
|
||||
(plugins as any).namedItem = (name: string) => plugins.find(p => p.name === name) || null;
|
||||
(plugins as any).refresh = () => {};
|
||||
return plugins;
|
||||
},
|
||||
});
|
||||
|
||||
// Fake languages (Playwright sometimes sends empty)
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US', 'en'],
|
||||
});
|
||||
|
||||
// Remove CDP runtime artifacts that automation detectors look for
|
||||
// cdc_ prefixed vars are injected by ChromeDriver/CDP
|
||||
const cleanup = () => {
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Stealth init script — webdriver-mask only (D7, codex narrowed).
|
||||
*
|
||||
* Modern anti-bot fingerprinters check consistency between navigator
|
||||
* properties (plugins.length, languages, userAgent, platform). Faking those
|
||||
* to fixed values (the wintermute approach) can flag MORE bot-like, not
|
||||
* less, and breaks legitimate sites that reflect on these properties.
|
||||
*
|
||||
* The honest minimum is masking navigator.webdriver, which Chromium exposes
|
||||
* as a known automation tell. Letting plugins/languages/chrome.runtime
|
||||
* surface their native Chromium values keeps the fingerprint internally
|
||||
* consistent.
|
||||
*/
|
||||
|
||||
import type { Browser, BrowserContext } from 'playwright';
|
||||
|
||||
/**
|
||||
* Init script applied to every page in a context. Runs in the page's main
|
||||
* world before any other scripts. Idempotent — defining the same property
|
||||
* twice in different contexts is fine.
|
||||
*/
|
||||
export const WEBDRIVER_MASK_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => false });`;
|
||||
|
||||
/**
|
||||
* Apply stealth patches to a fresh BrowserContext (or persistent context).
|
||||
* Called by browser-manager.launch() and launchHeaded().
|
||||
*/
|
||||
export async function applyStealth(context: BrowserContext): Promise<void> {
|
||||
await context.addInitScript({ content: WEBDRIVER_MASK_SCRIPT });
|
||||
}
|
||||
|
||||
/**
|
||||
* Args added to chromium.launch's `args` to suppress the
|
||||
* AutomationControlled blink feature. This is independent of the init
|
||||
* script — it changes how Chromium identifies itself in the protocol layer.
|
||||
*/
|
||||
export const STEALTH_LAUNCH_ARGS = [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
];
|
||||
Reference in New Issue
Block a user