feat: improved instruction block with snapshot→@ref pattern

The paste-into-agent instruction block now teaches the snapshot→@ref workflow (the most powerful browsing pattern), shows the server URL prominently, and uses clearer formatting. Tests updated to match. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-08-02 04:18:37 +02:00 · 2026-04-04 23:53:52 -07:00
parent bf66cec3d5
commit 376814c3f9
2 changed files with 47 additions and 33 deletions
@@ -465,11 +465,15 @@ export function generateInstructionBlock(opts: InstructionBlockOptions): string

  return `\
 ${'='.repeat(59)}
- REMOTE BROWSER ACCESS — paste this into your other agent
+ REMOTE BROWSER ACCESS
+ Paste this into your other AI agent's chat.
 ${'='.repeat(59)}

-You have access to a remote browser controlled via HTTP API.
-This setup key expires in 5 minutes.
+You can control a real Chromium browser via HTTP API. Navigate
+pages, read content, click buttons, fill forms, take screenshots.
+You get your own isolated tab. This setup key expires in 5 minutes.
+
+SERVER: ${serverUrl}

 STEP 1 — Exchange the setup key for a session token:

@@ -478,50 +482,59 @@ STEP 1 — Exchange the setup key for a session token:
    -d '{"setup_key": "${setupKey}"}' \\
    ${serverUrl}/connect

-  You'll get back: {"token": "gsk_sess_...", "expires": "...", "scopes": [...]}
-  Save that token. Use it for all subsequent requests.
+  Save the "token" value from the response. Use it as your
+  Bearer token for all subsequent requests.

-STEP 2 — Create your own tab:
+STEP 2 — Create your own tab (required before interacting):

  curl -s -X POST \\
-    -H "Authorization: Bearer <your-session-token>" \\
+    -H "Authorization: Bearer <TOKEN>" \\
    -H "Content-Type: application/json" \\
    -d '{"command": "newtab", "args": ["https://example.com"]}' \\
    ${serverUrl}/command

-  You'll get back: {"tabId": N, ...}
-  Include "tabId": N in all subsequent commands.
+  Save the "tabId" from the response. Include it in every command.

-STEP 3 — Use the browser. Send commands as POST /command:
+STEP 3 — Browse. The key pattern is snapshot then act:

+  # Get an interactive snapshot with clickable @ref labels
  curl -s -X POST \\
-    -H "Authorization: Bearer <your-session-token>" \\
+    -H "Authorization: Bearer <TOKEN>" \\
    -H "Content-Type: application/json" \\
-    -d '{"command": "snapshot", "args": ["-i"], "tabId": <your-tab-id>}' \\
+    -d '{"command": "snapshot", "args": ["-i"], "tabId": <TAB>}' \\
    ${serverUrl}/command

-AVAILABLE COMMANDS:
+  The snapshot returns labeled elements like:
+    @e1 [link] "Home"
+    @e2 [button] "Sign In"
+    @e3 [input] "Search..."
+
+  Use those @refs to interact:
+    {"command": "click", "args": ["@e2"], "tabId": <TAB>}
+    {"command": "fill", "args": ["@e3", "query"], "tabId": <TAB>}
+
+  Always snapshot first, then use the @refs. Don't guess selectors.
+
+COMMAND REFERENCE:
  Navigate:    {"command": "goto", "args": ["URL"], "tabId": N}
-  Read page:   {"command": "snapshot", "args": ["-i"], "tabId": N}
+  Snapshot:    {"command": "snapshot", "args": ["-i"], "tabId": N}
  Full text:   {"command": "text", "args": [], "tabId": N}
-  Screenshot:  {"command": "screenshot", "args": ["/tmp/screen.png"], "tabId": N}
+  Screenshot:  {"command": "screenshot", "args": ["/tmp/s.png"], "tabId": N}
  Click:       {"command": "click", "args": ["@e3"], "tabId": N}
  Fill form:   {"command": "fill", "args": ["@e5", "value"], "tabId": N}
  Go back:     {"command": "back", "args": [], "tabId": N}
-  List tabs:   {"command": "tabs", "args": []}
+  Tabs:        {"command": "tabs", "args": []}
+  New tab:     {"command": "newtab", "args": ["URL"]}

-SCOPES: This token has ${scopeDesc}.
-${scopes.includes('admin') ? '' : `To request admin access, ask the user to re-run pair-agent with --admin.\n`}
-SESSION: Token expires ${expiresAt}. The user can revoke it
-anytime with: $B tunnel revoke <your-agent-name>
+SCOPES: ${scopeDesc}.
+${scopes.includes('admin') ? '' : `To get admin access (JS, cookies, storage), ask the user to re-pair with --admin.\n`}
+TOKEN: Expires ${expiresAt}. Revoke: ask the user to run
+  $B tunnel revoke <your-name>

-IF SOMETHING GOES WRONG:
-  401 Unauthorized → Token expired or revoked. Ask the user
-    to run pair-agent again.
-  403 Forbidden → Command not in your scope, or tab not owned
-    by you. Use newtab first.
-  429 Too Many Requests → Sending > 10 requests/second.
-    Wait for the Retry-After header.
+ERRORS:
+  401 → Token expired/revoked. Ask user to run /pair-agent again.
+  403 → Command out of scope, or tab not yours. Run newtab first.
+  429 → Rate limited (>10 req/s). Wait for Retry-After header.

 ${'='.repeat(59)}`;
 }
@@ -82,9 +82,10 @@ describe('generateInstructionBlock', () => {
    expect(block).toContain('STEP 1');
    expect(block).toContain('STEP 2');
    expect(block).toContain('STEP 3');
-    expect(block).toContain('AVAILABLE COMMANDS');
+    expect(block).toContain('COMMAND REFERENCE');
    expect(block).toContain('read + write access');
    expect(block).toContain('tabId');
+    expect(block).toContain('@ref');
    expect(block).not.toContain('undefined');
  });

@@ -109,7 +110,7 @@ describe('generateInstructionBlock', () => {

    expect(block).toContain('admin access');
    expect(block).toContain('execute JS');
-    expect(block).not.toContain('To request admin access');
+    expect(block).not.toContain('re-pair with --admin');
  });

  it('shows re-pair hint when admin not included', () => {
@@ -120,7 +121,7 @@ describe('generateInstructionBlock', () => {
      expiresAt: '2026-04-06T00:00:00Z',
    });

-    expect(block).toContain('To request admin access');
+    expect(block).toContain('re-pair with --admin');
  });

  it('includes newtab as step 2 (agents must own their tab)', () => {
@@ -143,8 +144,8 @@ describe('generateInstructionBlock', () => {
      expiresAt: '2026-04-06T00:00:00Z',
    });

-    expect(block).toContain('401 Unauthorized');
-    expect(block).toContain('403 Forbidden');
-    expect(block).toContain('429 Too Many Requests');
+    expect(block).toContain('401');
+    expect(block).toContain('403');
+    expect(block).toContain('429');
  });
 });