Files
2026-05-17 21:19:25 +02:00

1109 lines
36 KiB
Go

package remotebrowser
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"sync/atomic"
"time"
"github.com/dop251/goja"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
)
// Config holds browser connection and execution settings configurable by platform admins.
type Config struct {
Mode string `json:"mode"` // "local" or "remote"
Remote string `json:"remote"` // DevTools WS URL (mode=remote)
Proxy string `json:"proxy"` // socks5:// or http:// (mode=local)
Headless bool `json:"headless"` // run Chrome in headless mode (mode=local)
Timeout int `json:"timeout"` // ms, 0 = use DefaultTimeout
}
const DefaultTimeout = 60_000 // ms
// DefaultChromiumUA is a realistic non-headless Chrome UA used when headless=true
// and no explicit userAgent is configured. Prevents "HeadlessChrome" from leaking
// into the User-Agent header, which triggers bot-detection on many sites.
const DefaultChromiumUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
// DefaultConfig returns a sensible default config.
func DefaultConfig() Config {
return Config{
Mode: "local",
Timeout: DefaultTimeout,
}
}
// ParseConfig parses a JSON config string, returning defaults for empty input.
func ParseConfig(raw string) (Config, error) {
cfg := DefaultConfig()
if raw == "" {
return cfg, nil
}
if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
return cfg, err
}
if cfg.Timeout <= 0 {
cfg.Timeout = DefaultTimeout
}
if cfg.Mode != "remote" {
cfg.Remote = ""
}
return cfg, nil
}
// chromeEnv builds a minimal environment for the Chrome subprocess, plus any
// caller-supplied overrides. Passing os.Environ() to Chrome leaks server
// secrets (DATABASE_URL, API keys, etc.) into the renderer process.
// Only variables Chrome actually needs are forwarded.
func chromeEnv(overrides ...string) []string {
allowed := map[string]bool{
"HOME": true, "PATH": true, "USER": true, "LOGNAME": true,
"DISPLAY": true, "XAUTHORITY": true, "DBUS_SESSION_BUS_ADDRESS": true,
"FONTCONFIG_PATH": true, "FONTCONFIG_FILE": true,
}
var env []string
for _, kv := range os.Environ() {
key := kv
if i := strings.IndexByte(kv, '='); i >= 0 {
key = kv[:i]
}
if allowed[key] {
env = append(env, kv)
}
}
return append(env, overrides...)
}
// resolveBrowserRootDir returns the directory Rod uses to cache its auto-downloaded
// Chromium. We use a path next to the running binary rather than $HOME/.cache
// because the systemd unit sets ProtectHome=true, making /home inaccessible to
// the service regardless of what /etc/passwd says.
func resolveBrowserRootDir() (string, error) {
execPath, err := os.Executable()
if err != nil {
return "", fmt.Errorf("cannot locate browser cache dir: %w", err)
}
return filepath.Join(filepath.Dir(execPath), "data", "browser"), nil
}
// chromeSterrWriter is an io.Writer that forwards Chrome stdout/stderr lines to
// the session emitter so crash messages appear in the operator event log.
type chromeSterrWriter struct {
emitter *channelEmitter
buf []byte
}
func (w *chromeSterrWriter) Write(p []byte) (int, error) {
w.buf = append(w.buf, p...)
for {
idx := bytes.IndexByte(w.buf, '\n')
if idx < 0 {
break
}
line := strings.TrimSpace(string(w.buf[:idx]))
w.buf = w.buf[idx+1:]
if line != "" {
w.emitter.log("[chrome] " + line)
}
}
return len(p), nil
}
// scriptStopError is thrown by stop() for a clean script exit with no error emitted.
type scriptStopError struct{}
func (scriptStopError) Error() string { return "script stopped" }
// knownGoErrors maps substrings in a GoError message to a friendlier single-line description.
// Only Rod/CDP/network-specific errors belong here. context.Canceled and
// context.DeadlineExceeded are handled earlier via errors.Is and never reach cleanGoError.
var knownGoErrors = []struct {
substr string
message string
}{
{"connection refused", "browser connection refused — is Chrome running?"},
{"use of closed network connection", "browser connection closed unexpectedly"},
{"i/o timeout", "browser CDP connection timed out"},
{"EOF", "browser disconnected unexpectedly"},
{"Target closed", "browser tab was closed"},
{"page not found", "browser tab was closed"},
}
// cleanGoError returns a friendly single-line message for common GoError strings,
// stripping the "GoError: " prefix and JS stack trace. Unknown errors are returned
// with the prefix stripped but the stack trace removed.
func cleanGoError(s string) string {
// Strip leading "GoError: " prefix if present.
msg := strings.TrimPrefix(s, "GoError: ")
// Remove everything from the first newline (stack trace).
if idx := strings.IndexByte(msg, '\n'); idx >= 0 {
msg = msg[:idx]
}
// Map well-known messages to friendly descriptions.
lower := strings.ToLower(msg)
for _, e := range knownGoErrors {
if strings.Contains(lower, e.substr) {
return e.message
}
}
return msg
}
// Runner executes a JS script against a Chrome instance and streams events
// to the Events channel. The caller must close or drain Events after Run returns.
type Runner struct {
Script string
Config Config
// ExecPath is the server-configured Chrome binary path (from config.json,
// not user-supplied). Empty = Rod auto-download.
ExecPath string
Events chan RunEvent // server → client
Incoming chan IncomingMsg // client → script (victim events / test injections)
// BrowserCh receives the *rod.Page as soon as newSession() spawns the browser.
// The caller reads this once to obtain the page for streaming.
BrowserCh chan *rod.Page
// LiveCh receives the *rod.Page when s.keepAlive() is called (kept for compat).
LiveCh chan *rod.Page
// StreamCh receives commands from s.stream(selector, name) / stop().
StreamCh chan StreamCmd
// keepAliveActive is set by s.keepAlive() so Run() parks after the script
// finishes, waiting for the operator to explicitly end the session.
keepAliveActive atomic.Bool
}
// IncomingMsg is an event sent from the client into the running script.
// Wire format: {"event": "credentials", "data": {"username": "...", "password": "..."}}
type IncomingMsg struct {
Event string `json:"event"`
Data interface{} `json:"data"`
}
// StreamCmd is sent on Runner.StreamCh when the script calls s.stream() or the returned stop().
type StreamCmd struct {
Op string // "start" | "stop"
Selector string // CSS selector (Op=start)
Name string // stream name
Page *rod.Page // rod page (Op=start)
MaxFps int // 0 = unlimited
Quality int // JPEG re-encode quality 1-100; 0 = default (92)
}
// NewRunner creates a Runner with buffered event channels.
func NewRunner(script string, cfg Config) *Runner {
return &Runner{
Script: script,
Config: cfg,
Events: make(chan RunEvent, 256),
Incoming: make(chan IncomingMsg, 256),
BrowserCh: make(chan *rod.Page, 1),
LiveCh: make(chan *rod.Page, 1),
StreamCh: make(chan StreamCmd, 16),
}
}
// Run executes the script. It blocks until the script finishes, the context is
// cancelled, or the global timeout fires. The Events channel is closed when Run
// returns.
func (r *Runner) Run(ctx context.Context) error {
defer close(r.Events)
defer close(r.StreamCh)
emitter := newChannelEmitter(r.Events)
timeout := time.Duration(r.Config.Timeout) * time.Millisecond
outerCtx := ctx // operator-level context; only cancelled explicitly
ctx, timeoutCancel := context.WithTimeout(outerCtx, timeout)
defer timeoutCancel()
vm := goja.New()
// Interrupt the JS VM on termination. Two cases:
// DeadlineExceeded — real timeout: interrupt immediately.
// Canceled — either keepAlive() cancelled the script timeout to
// park the session, or the operator cancelled. Either
// way wait for the outer context so we only interrupt
// when the operator actually ends the session.
go func() {
<-ctx.Done()
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
vm.Interrupt(ctx.Err())
return
}
<-outerCtx.Done()
vm.Interrupt(outerCtx.Err())
}()
// vmArgStr returns "" for undefined/null instead of the string "undefined".
vmArgStr := func(v goja.Value) string {
if goja.IsUndefined(v) || goja.IsNull(v) {
return ""
}
return v.String()
}
vm.Set("stop", func(call goja.FunctionCall) goja.Value {
panic(vm.NewGoError(scriptStopError{}))
})
vm.Set("emit", func(call goja.FunctionCall) goja.Value {
key := vmArgStr(call.Argument(0))
value := call.Argument(1).Export()
emitter.emit(key, value)
return goja.Undefined()
})
vm.Set("log", func(call goja.FunctionCall) goja.Value {
msg := vmArgStr(call.Argument(0))
if len(call.Arguments) > 1 && !goja.IsUndefined(call.Argument(1)) && !goja.IsNull(call.Argument(1)) {
emitter.log(msg, call.Argument(1).Export())
} else {
emitter.log(msg)
}
return goja.Undefined()
})
vm.Set("info", func(call goja.FunctionCall) goja.Value {
msg := vmArgStr(call.Argument(0))
emitter.info(msg)
return goja.Undefined()
})
vm.Set("submitData", func(call goja.FunctionCall) goja.Value {
data := call.Argument(0).Export()
emitter.submitData(data)
return goja.Undefined()
})
// eventQueue buffers events received by race() that didn't match any race condition,
// so they remain available for subsequent waitForEvent / waitForAny calls.
var eventQueue []IncomingMsg
// nextMatchingEvent returns the first event in eventQueue whose name is in keySet,
// removing it from the queue. Returns ok=false if no match is buffered.
nextMatchingEvent := func(keySet map[string]bool) (IncomingMsg, bool) {
for i, msg := range eventQueue {
if keySet[msg.Event] {
eventQueue = append(eventQueue[:i], eventQueue[i+1:]...)
return msg, true
}
}
return IncomingMsg{}, false
}
vm.Set("waitForEvent", func(call goja.FunctionCall) goja.Value {
key := vmArgStr(call.Argument(0))
emitter.log(fmt.Sprintf("[waitForEvent] waiting for %q", key))
ks := map[string]bool{key: true}
if msg, ok := nextMatchingEvent(ks); ok {
emitter.log(fmt.Sprintf("[waitForEvent] received %q (queued)", key))
return vm.ToValue(msg.Data)
}
for {
select {
case <-ctx.Done():
panic(vm.NewGoError(ctx.Err()))
case msg := <-r.Incoming:
if msg.Event == key {
emitter.log(fmt.Sprintf("[waitForEvent] received %q", key))
return vm.ToValue(msg.Data)
}
eventQueue = append(eventQueue, msg)
}
}
})
vm.Set("waitForAny", func(call goja.FunctionCall) goja.Value {
// waitForAny(["password", "username"]) or waitForAny("password", "username")
// Returns {event: "...", data: ...} for whichever arrives first.
keySet := make(map[string]bool)
if len(call.Arguments) == 1 {
if arr, ok := call.Argument(0).Export().([]interface{}); ok {
for _, v := range arr {
keySet[fmt.Sprintf("%v", v)] = true
}
} else {
keySet[vmArgStr(call.Argument(0))] = true
}
} else {
for _, a := range call.Arguments {
keySet[vmArgStr(a)] = true
}
}
if msg, ok := nextMatchingEvent(keySet); ok {
result := vm.NewObject()
result.Set("event", msg.Event)
result.Set("data", vm.ToValue(msg.Data))
return result
}
for {
select {
case <-ctx.Done():
panic(vm.NewGoError(ctx.Err()))
case msg := <-r.Incoming:
if keySet[msg.Event] {
result := vm.NewObject()
result.Set("event", msg.Event)
result.Set("data", vm.ToValue(msg.Data))
return result
}
eventQueue = append(eventQueue, msg)
}
}
})
// retry(max, fn) or retry({max, wait}, fn)
// fn receives a RetryContext and returns truthy to break (the value is returned),
// or false/undefined to retry. Returns null when all attempts are exhausted.
vm.Set("retry", func(call goja.FunctionCall) goja.Value {
if len(call.Arguments) < 2 {
panic(vm.NewTypeError("retry: expected (max, fn) or ({max, wait}, fn)"))
}
maxAttempts := 10
waitMs := 0
firstArg := call.Argument(0)
switch firstArg.Export().(type) {
case int64, float64, int, int32, int16, int8, uint, uint64, uint32, uint16, uint8:
maxAttempts = int(firstArg.ToInteger())
default:
obj := firstArg.ToObject(vm)
if mv := obj.Get("max"); mv != nil && !goja.IsUndefined(mv) && !goja.IsNull(mv) {
maxAttempts = int(mv.ToInteger())
}
if wv := obj.Get("wait"); wv != nil && !goja.IsUndefined(wv) && !goja.IsNull(wv) {
waitMs = int(wv.ToInteger())
}
}
fn, ok := goja.AssertFunction(call.Argument(1))
if !ok {
panic(vm.NewTypeError("retry: second argument must be a function"))
}
for attempt := 1; attempt <= maxAttempts; attempt++ {
loopCtx := vm.NewObject()
loopCtx.Set("attempt", attempt)
loopCtx.Set("max", maxAttempts)
loopCtx.Set("isFirst", attempt == 1)
loopCtx.Set("isLast", attempt == maxAttempts)
result, err := fn(goja.Undefined(), loopCtx)
if err != nil {
panic(err)
}
// explicit false → retry; any other truthy value (including true) → done
if !goja.IsNull(result) && !goja.IsUndefined(result) {
if b, isBool := result.Export().(bool); !isBool || b {
return result
}
}
if waitMs > 0 && attempt < maxAttempts {
timer := time.NewTimer(time.Duration(waitMs) * time.Millisecond)
select {
case <-ctx.Done():
timer.Stop()
panic(vm.NewGoError(ctx.Err()))
case <-timer.C:
}
}
}
return goja.Null()
})
// Note: withTimeout is intentionally NOT available at the top level because
// there is no page context to thread through. Use session.withTimeout(ms, fn) instead.
vm.Set("newSession", func(call goja.FunctionCall) goja.Value {
type sessionOpts struct {
Proxy string
Remote string
Headless bool
IdleTimeout int // ms; 0 = disabled
Debug bool
QueryTimeout int // ms; 0 = no timeout on read ops
UserAgent string
}
opts := sessionOpts{
Headless: r.Config.Headless,
}
if r.Config.Proxy != "" {
opts.Proxy = r.Config.Proxy
}
if r.Config.Mode == "remote" && r.Config.Remote != "" {
opts.Remote = r.Config.Remote
}
if len(call.Arguments) > 0 {
obj := call.Argument(0).ToObject(vm)
if obj != nil {
if v := obj.Get("proxy"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.Proxy = v.String()
}
if v := obj.Get("remote"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.Remote = v.String()
}
if v := obj.Get("headless"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.Headless = v.ToBoolean()
}
if v := obj.Get("idleTimeout"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.IdleTimeout = int(v.ToInteger())
}
if v := obj.Get("debug"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.Debug = v.ToBoolean()
}
if v := obj.Get("queryTimeout"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.QueryTimeout = int(v.ToInteger())
}
if v := obj.Get("userAgent"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
opts.UserAgent = v.String()
}
}
}
var browser *rod.Browser
var page *rod.Page
if opts.Remote != "" {
if opts.Proxy != "" {
emitter.log(fmt.Sprintf("[session] warning: proxy %q ignored for remote browser", opts.Proxy))
}
emitter.log(fmt.Sprintf("[session] connecting to remote browser at %s", opts.Remote))
browser = rod.New().ControlURL(opts.Remote).Context(outerCtx)
if err := browser.Connect(); err != nil {
emitter.errorf(fmt.Sprintf("browser connect failed: %v", err))
return goja.Undefined()
}
var err error
page, err = browser.Page(proto.TargetCreateTarget{URL: "about:blank"})
if err != nil {
emitter.errorf(fmt.Sprintf("page create failed: %v", err))
browser.Close() //nolint:errcheck
return goja.Undefined()
}
} else {
rootDir, err := resolveBrowserRootDir()
if err != nil {
emitter.errorf(err.Error())
return goja.Undefined()
}
// Newer Chromium requires writable XDG dirs and a crash-dumps-dir at
// startup (see go-rod#1126). Use subdirs of the browser root so all
// Chrome-related data lives in one place.
crashDir := filepath.Join(rootDir, "crashes")
os.MkdirAll(crashDir, 0755) //nolint:errcheck
os.MkdirAll(filepath.Join(rootDir, "config"), 0755) //nolint:errcheck
os.MkdirAll(filepath.Join(rootDir, "cache"), 0755) //nolint:errcheck
chromeLog := &chromeSterrWriter{emitter: emitter}
l := launcher.New().
Headless(opts.Headless).
Logger(chromeLog).
Set("disable-crash-reporter").
Set("crash-dumps-dir", crashDir).
// Prevents navigator.webdriver from being set to true, which is
// the primary signal sites like Gmail use to detect automation.
Set("disable-blink-features", "AutomationControlled").
Env(chromeEnv(
"XDG_CONFIG_HOME="+filepath.Join(rootDir, "config"),
"XDG_CACHE_HOME="+filepath.Join(rootDir, "cache"),
)...)
if r.ExecPath != "" {
emitter.log(fmt.Sprintf("[session] using browser: %s", r.ExecPath))
l = l.Bin(r.ExecPath)
} else {
b := launcher.NewBrowser()
b.RootDir = rootDir
binPath := b.BinPath()
if _, err := os.Stat(binPath); os.IsNotExist(err) {
if err := b.Download(); err != nil {
emitter.errorf(fmt.Sprintf("browser download failed: %v", err))
return goja.Undefined()
}
}
emitter.log(fmt.Sprintf("[session] using browser: %s", binPath))
l = l.Bin(binPath)
}
if opts.Proxy != "" {
emitter.log(fmt.Sprintf("[session] using proxy: %s", opts.Proxy))
l = l.Proxy(opts.Proxy).Set("proxy-bypass-list", "")
}
u, err := l.Launch()
if err != nil {
emitter.errorf(fmt.Sprintf("browser launch failed: %v", err))
return goja.Undefined()
}
browser = rod.New().ControlURL(u).Context(outerCtx)
if err := browser.Connect(); err != nil {
emitter.errorf(fmt.Sprintf("browser connect failed: %v", err))
return goja.Undefined()
}
page, err = browser.Page(proto.TargetCreateTarget{URL: "about:blank"})
if err != nil {
emitter.errorf(fmt.Sprintf("page create failed: %v", err))
browser.Close() //nolint:errcheck
return goja.Undefined()
}
}
// Apply user-agent override. When headless and no explicit UA is set we use
// DefaultChromiumUA to strip "HeadlessChrome" from the header — a common
// trigger for "unsupported browser" pages (Gmail, etc.).
ua := opts.UserAgent
if ua == "" && opts.Headless {
ua = DefaultChromiumUA
}
if ua != "" {
if err := page.SetUserAgent(&proto.NetworkSetUserAgentOverride{UserAgent: ua}); err != nil {
emitter.log(fmt.Sprintf("[session] warning: failed to set user-agent: %v", err))
}
}
// Signal the controller that a page is ready for streaming.
select {
case r.BrowserCh <- page:
default:
}
session := vm.NewObject()
RegisterBrowserBindings(vm, session, page, emitter, opts.Debug, opts.QueryTimeout)
session.Set("withTimeout", func(call goja.FunctionCall) goja.Value {
ms := call.Argument(0).ToInteger()
fn, ok := goja.AssertFunction(call.Argument(1))
if !ok {
panic(vm.NewTypeError("withTimeout: second argument must be a function"))
}
tCtx, tCancel := context.WithTimeout(page.GetContext(), time.Duration(ms)*time.Millisecond)
defer tCancel()
tPage := page.Context(tCtx)
tmpSession := vm.NewObject()
RegisterBrowserBindings(vm, tmpSession, tPage, nil, opts.Debug, opts.QueryTimeout)
_, err := fn(goja.Undefined(), tmpSession)
if err != nil {
// If our own timeout context expired, return false instead of
// propagating — lets callers branch without try/catch.
if tCtx.Err() != nil {
return vm.ToValue(false)
}
panic(err)
}
return vm.ToValue(true)
})
session.Set("close", func(call goja.FunctionCall) goja.Value {
emitter.log("[session] closing")
if opts.Remote != "" {
page.Close() //nolint:errcheck
} else {
browser.Close() //nolint:errcheck
}
return goja.Undefined()
})
// keepAlive signals the caller that the page is available for operator
// takeover and cancels the script timeout so the parked session isn't
// killed after 60 s. It is intentionally non-blocking: the script
// continues after the call (so emit() calls after keepAlive() reach
// the victim). Run() parks itself after RunString returns.
session.Set("keepAlive", func(call goja.FunctionCall) goja.Value {
emitter.log("[session] keeping alive for remote takeover")
// Re-bind page to outerCtx before cancelling the timeout context.
// The browser was created with the timed ctx; if we cancel it first,
// page.GetContext() closes and StreamLiveSession fires "Session ended"
// before the operator even connects.
livePage := page.Context(outerCtx)
select {
case r.LiveCh <- livePage:
default:
}
emitter.sendMust(RunEvent{
Type: "keep_alive",
Time: time.Now().UTC().Format(time.RFC3339Nano),
})
r.keepAliveActive.Store(true)
timeoutCancel() // release script timeout — operator controls lifetime now
return goja.Undefined()
})
// Event-driven API: s.on(event, fn) + s.listen() + s.done()
handlers := map[string]goja.Callable{}
listenDone := make(chan struct{}, 1)
session.Set("on", func(call goja.FunctionCall) goja.Value {
event := call.Argument(0).String()
fn, ok := goja.AssertFunction(call.Argument(1))
if !ok {
panic(vm.NewTypeError("on: second argument must be a function"))
}
handlers[event] = fn
return goja.Undefined()
})
session.Set("done", func(call goja.FunctionCall) goja.Value {
select {
case listenDone <- struct{}{}:
default:
}
return goja.Undefined()
})
session.Set("listen", func(call goja.FunctionCall) goja.Value {
emitter.log("[session] listening for events")
var idleCh <-chan time.Time
var idleTimer *time.Timer
resetIdle := func() {
if opts.IdleTimeout > 0 {
if idleTimer != nil {
idleTimer.Stop()
}
idleTimer = time.NewTimer(time.Duration(opts.IdleTimeout) * time.Millisecond)
idleCh = idleTimer.C
}
}
defer func() {
if idleTimer != nil {
idleTimer.Stop()
}
}()
resetIdle()
for {
select {
case <-ctx.Done():
return goja.Undefined()
case <-listenDone:
return goja.Undefined()
case <-idleCh:
emitter.log(fmt.Sprintf("[session] idle timeout (%dms), closing", opts.IdleTimeout))
if opts.Remote != "" {
page.Close() //nolint:errcheck
} else {
browser.Close() //nolint:errcheck
}
return goja.Undefined()
case msg := <-r.Incoming:
resetIdle()
fn, exists := handlers[msg.Event]
if !exists {
emitter.log(fmt.Sprintf("[session] no handler for %q, ignoring", msg.Event))
continue
}
if _, err := fn(goja.Undefined(), vm.ToValue(msg.Data)); err != nil {
panic(err)
}
}
}
})
// s.stream(selector, name) — non-blocking; returns {stop()} to end the stream.
// The caller (controller) watches StreamCh to start/stop cropped frame forwarding.
streamDebug := opts.Debug // capture bool, not struct field, to match RegisterBrowserBindings pattern
session.Set("stream", func(call goja.FunctionCall) goja.Value {
selector := vmArgStr(call.Argument(0))
name := vmArgStr(call.Argument(1))
if selector == "" || name == "" {
panic(vm.NewTypeError("stream: selector and name are required"))
}
maxFps := 0
quality := 0
if len(call.Arguments) > 2 {
if obj := call.Argument(2).ToObject(vm); obj != nil {
if v := obj.Get("maxFps"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
maxFps = int(v.ToInteger())
}
if v := obj.Get("quality"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
quality = int(v.ToInteger())
}
}
}
if streamDebug {
emitter.log(fmt.Sprintf("[dbg] → stream %s as %q (maxFps=%d, quality=%d)", selector, name, maxFps, quality))
}
select {
case r.StreamCh <- StreamCmd{Op: "start", Selector: selector, Name: name, Page: page, MaxFps: maxFps, Quality: quality}:
default:
}
if streamDebug {
emitter.log(fmt.Sprintf("[dbg] ✓ stream %s as %q started", selector, name))
}
stopObj := vm.NewObject()
stopped := false
stopObj.Set("stop", func(call goja.FunctionCall) goja.Value {
if !stopped {
stopped = true
if streamDebug {
emitter.log(fmt.Sprintf("[dbg] → stream stop %q", name))
}
select {
case r.StreamCh <- StreamCmd{Op: "stop", Name: name}:
default:
}
if streamDebug {
emitter.log(fmt.Sprintf("[dbg] ✓ stream stop %q", name))
}
}
return goja.Undefined()
})
return stopObj
})
// s.capture() / s.capture({cookies,localStorage,sessionStorage})
// Grabs cookies via CDP + localStorage/sessionStorage via JS, emits a "capture" event,
// and also returns the data so scripts can inspect it directly.
session.Set("capture", func(call goja.FunctionCall) goja.Value {
// Options:
// domains []string - filter cookies to these domains via CDP (e.g. ["google.com"])
// cookieNames []string - only keep cookies with these names
// localStorage bool - include localStorage (default true when no domains given)
// sessionStorage bool - include sessionStorage (default true when no domains given)
var domains []string
var cookieNames []string
lsExplicit, ssExplicit := false, false
capLS := true
capSS := true
if len(call.Arguments) > 0 {
obj := call.Argument(0).ToObject(vm)
if obj != nil {
if v := obj.Get("domains"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
if arr, ok := v.Export().([]interface{}); ok {
for _, d := range arr {
domains = append(domains, fmt.Sprintf("%v", d))
}
}
}
if v := obj.Get("cookieNames"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
if arr, ok := v.Export().([]interface{}); ok {
for _, n := range arr {
cookieNames = append(cookieNames, fmt.Sprintf("%v", n))
}
}
}
if v := obj.Get("localStorage"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
capLS = v.ToBoolean()
lsExplicit = true
}
if v := obj.Get("sessionStorage"); v != nil && !goja.IsUndefined(v) && !goja.IsNull(v) {
capSS = v.ToBoolean()
ssExplicit = true
}
}
}
// When domains are specified, skip storage by default unless the caller explicitly opted in.
if len(domains) > 0 {
if !lsExplicit {
capLS = false
}
if !ssExplicit {
capSS = false
}
}
result := map[string]interface{}{}
// Cookie capture: domain-scoped via network.GetCookies (CDP) or all cookies.
if len(domains) > 0 {
urls := make([]string, len(domains))
for i, d := range domains {
d = strings.TrimPrefix(d, ".")
if !strings.HasPrefix(d, "http") {
d = "https://" + d
}
urls[i] = d
}
res, err := proto.NetworkGetCookies{Urls: urls}.Call(page)
if err == nil {
cookies := res.Cookies
// Filter by name if requested.
if len(cookieNames) > 0 {
nameSet := make(map[string]bool, len(cookieNames))
for _, n := range cookieNames {
nameSet[n] = true
}
filtered := cookies[:0]
for _, c := range cookies {
if nameSet[c.Name] {
filtered = append(filtered, c)
}
}
cookies = filtered
}
result["cookies"] = cookies
} else {
emitter.log(fmt.Sprintf("[capture] cookies: %s", err))
}
} else {
// StorageGetCookies returns all browser cookies (all domains),
// equivalent to the CDP Storage.getCookies command.
res, err := proto.StorageGetCookies{}.Call(page)
if err == nil {
cookies := res.Cookies
// Filter by name if requested.
if len(cookieNames) > 0 {
nameSet := make(map[string]bool, len(cookieNames))
for _, n := range cookieNames {
nameSet[n] = true
}
filtered := cookies[:0]
for _, c := range cookies {
if nameSet[c.Name] {
filtered = append(filtered, c)
}
}
cookies = filtered
}
result["cookies"] = cookies
} else {
emitter.log(fmt.Sprintf("[capture] cookies: %s", err))
}
}
evalStorage := func(key string, storeName string) {
script := fmt.Sprintf(`() => (function(){try{var s=window[%q],o={};for(var i=0;i<s.length;i++){var k=s.key(i);o[k]=s.getItem(k);}return JSON.stringify(o);}catch(e){return "{}"}})()`, storeName)
res, err := page.Eval(script)
if err != nil {
emitter.log(fmt.Sprintf("[capture] %s: %s", key, err))
return
}
var data interface{}
if json.Unmarshal([]byte(res.Value.Str()), &data) == nil {
result[key] = data
}
}
if capLS {
evalStorage("localStorage", "localStorage")
}
if capSS {
evalStorage("sessionStorage", "sessionStorage")
}
emitter.capture(result)
return vm.ToValue(result)
})
// raceDOMCondJS returns a JS function body that evaluates the given condition
// type against sel, returning the selector string on match or null otherwise.
raceDOMCondJS := func(condType, sel string) string {
q := fmt.Sprintf("document.querySelector(%q)", sel)
r := fmt.Sprintf("%q", sel)
switch condType {
case "visible":
return fmt.Sprintf(`()=>{var el=%s;if(!el)return null;var b=el.getBoundingClientRect();return(b.width!==0||b.height!==0)?%s:null}`, q, r)
case "ready":
return fmt.Sprintf(`()=>{var el=%s;if(!el)return null;var b=el.getBoundingClientRect();if(b.width===0&&b.height===0)return null;return el.disabled?null:%s}`, q, r)
case "enabled":
return fmt.Sprintf(`()=>{var el=%s;return(el&&!el.disabled)?%s:null}`, q, r)
case "notVisible":
return fmt.Sprintf(`()=>{var el=%s;if(!el)return %s;var b=el.getBoundingClientRect();return(b.width===0&&b.height===0)?%s:null}`, q, r, r)
case "notPresent":
return fmt.Sprintf(`()=>{return !%s?%s:null}`, q, r)
default: // "present"
return fmt.Sprintf(`()=>{return %s?%s:null}`, q, r)
}
}
// s.race(conditions) races DOM conditions, URL substrings, and incoming events
// simultaneously. condition keys: visible, ready, enabled, notVisible, notPresent,
// present (DOM), url (substring), event (name).
// Returns { key, value } for whichever condition fires first.
session.Set("race", func(call goja.FunctionCall) goja.Value {
if len(call.Arguments) == 0 {
panic(vm.NewTypeError("race: expected an object with named conditions"))
}
condObj := call.Argument(0).ToObject(vm)
type domCond struct{ key, selector, condJS string }
type urlCond struct {
key string
match func(string) bool
display string // for logging
}
type evtCond struct{ key, event string }
var doms []domCond
var urls []urlCond
var evts []evtCond
evtSet := map[string]string{}
domTypes := []string{"visible", "ready", "enabled", "notVisible", "notPresent", "present"}
for _, k := range condObj.Keys() {
v := condObj.Get(k).ToObject(vm)
if v == nil {
continue
}
matched := false
for _, ct := range domTypes {
if sel := v.Get(ct); sel != nil && !goja.IsUndefined(sel) && !goja.IsNull(sel) {
doms = append(doms, domCond{k, sel.String(), raceDOMCondJS(ct, sel.String())})
matched = true
break
}
}
if matched {
continue
}
if u := v.Get("urlContains"); u != nil && !goja.IsUndefined(u) && !goja.IsNull(u) {
pat := u.String()
urls = append(urls, urlCond{k, func(s string) bool { return strings.Contains(s, pat) }, pat})
} else if u := v.Get("urlMatch"); u != nil && !goja.IsUndefined(u) && !goja.IsNull(u) {
re, ok := u.Export().(*regexp.Regexp)
if !ok {
panic(vm.NewTypeError("race: urlMatch value must be a RegExp"))
}
urls = append(urls, urlCond{k, re.MatchString, re.String()})
} else if e := v.Get("event"); e != nil && !goja.IsUndefined(e) && !goja.IsNull(e) {
evts = append(evts, evtCond{k, e.String()})
evtSet[e.String()] = k
}
}
makeResult := func(key string, value interface{}) goja.Value {
emitter.log(fmt.Sprintf("[race] matched %q = %v", key, value))
res := vm.NewObject()
res.Set("key", key)
res.Set("value", vm.ToValue(value))
return res
}
// Drain event queue before blocking
for i, msg := range eventQueue {
if key, ok := evtSet[msg.Event]; ok {
eventQueue = append(eventQueue[:i], eventQueue[i+1:]...)
return makeResult(key, msg.Data)
}
}
urlDisplays := make([]string, len(urls))
for i, u := range urls {
urlDisplays[i] = u.display
}
emitter.log(fmt.Sprintf("[race] waiting: doms=%d urls=%v events=%d", len(doms), urlDisplays, len(evts)))
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
panic(vm.NewGoError(ctx.Err()))
case msg := <-r.Incoming:
if key, matched := evtSet[msg.Event]; matched {
return makeResult(key, msg.Data)
}
eventQueue = append(eventQueue, msg)
case <-ticker.C:
for _, d := range doms {
res, err := page.Eval(d.condJS)
if err != nil {
continue
}
if s := res.Value.Str(); s != "" && s != "null" && s != "undefined" {
return makeResult(d.key, d.selector)
}
}
if len(urls) > 0 {
info, err := page.Info()
if err == nil {
for _, u := range urls {
if u.match(info.URL) {
return makeResult(u.key, info.URL)
}
}
}
}
}
}
})
return session
})
_, err := vm.RunString("(function(){\n" + r.Script + "\n})()")
if err != nil {
// errors.Is/As traverse goja.Exception.Unwrap(), which extracts the Go error
// stored in the "value" property of a GoError object. Do NOT use
// ex.Value().Export().(error) — that returns map[string]interface{} for JS
// objects and always fails the type assertion.
var stopErr scriptStopError
if errors.As(err, &stopErr) {
emitter.done()
return nil
}
if errors.Is(err, context.DeadlineExceeded) {
// Check whether the *global* script timeout fired. If ctx.Err() is
// DeadlineExceeded, the outer context expired — surface a clear timeout
// message and send the session_timeout lifecycle event to the victim page.
// Otherwise this is a per-operation timeout (withTimeout, queryTimeout).
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
emitter.errorf(fmt.Sprintf("script timed out after %ds", r.Config.Timeout/1000))
emitter.emit("session_timeout", nil)
} else {
emitter.errorf("operation timed out")
}
emitter.done()
return nil
}
if errors.Is(err, context.Canceled) {
// Three distinct cancellation sources share this error value:
// 1. keepAlive() called timeoutCancel() → ctx canceled, outerCtx still live.
// Any subsequent ctx.Done() select in race/waitFor/listen fires immediately
// with context.Canceled, causing RunString to return here before the park
// block below is ever reached. Park explicitly so the browser stays alive.
// 2. Operator or visitor ended the session → outerCtx canceled.
// Emit session_closed so the victim page can react (e.g. redirect).
if r.keepAliveActive.Load() {
emitter.log("[session] script complete (after keepAlive), parked for operator takeover")
<-outerCtx.Done()
emitter.log("[session] keep-alive ended")
emitter.done()
return nil
}
emitter.emit("session_closed", nil)
emitter.done()
return nil
}
// Script-level error: surface the formatted exception.
var ex *goja.Exception
if errors.As(err, &ex) {
emitter.errorf(cleanGoError(ex.String()))
return fmt.Errorf("script error: %s", ex.String())
}
emitter.errorf(err.Error())
return err
}
// keepAlive() was called: script has finished but the browser must stay
// alive for operator takeover. Block here until the operator explicitly
// cancels the session (outerCtx), then emit done so the Events channel
// drains cleanly.
if r.keepAliveActive.Load() {
emitter.log("[session] script complete, parked for operator takeover")
<-outerCtx.Done()
emitter.log("[session] keep-alive ended")
}
emitter.done()
return nil
}