mirror of
https://github.com/zhom/donutbrowser.git
synced 2026-05-26 18:17:49 +02:00
refactor: reduce token usage
This commit is contained in:
+409
-1
@@ -33,6 +33,48 @@ pub struct McpTool {
|
||||
pub input_schema: serde_json::Value,
|
||||
}
|
||||
|
||||
/// JavaScript executed in the target page to enumerate visible interactive
|
||||
/// elements. Returns a JSON string `{elements, count, truncated}` where
|
||||
/// `elements` is the newline-joined labeled list. Live references are stashed
|
||||
/// on `window.__donut_interactive` so subsequent `click_by_index` /
|
||||
/// `type_by_index` calls can resolve `index → Element` without round-tripping
|
||||
/// a selector. `__MAX_CHARS__` is substituted at call time.
|
||||
const INTERACTIVE_ELEMENTS_JS: &str = r#"(() => {
|
||||
const SELECTORS = 'a, button, input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [role="tab"], [role="menuitem"], [role="combobox"], [role="option"], [contenteditable=""], [contenteditable="true"], [tabindex]:not([tabindex="-1"])';
|
||||
const ATTRS = ['type','name','id','role','aria-label','aria-checked','aria-expanded','placeholder','title','value','href','alt'];
|
||||
const MAX_CHARS = __MAX_CHARS__;
|
||||
const interactive = [];
|
||||
const lines = [];
|
||||
let truncated = false;
|
||||
let total = 0;
|
||||
const nodes = document.querySelectorAll(SELECTORS);
|
||||
for (const el of nodes) {
|
||||
if (el.disabled) continue;
|
||||
const r = el.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.height <= 0) continue;
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.visibility === 'hidden' || style.display === 'none' || style.opacity === '0') continue;
|
||||
const tag = el.tagName.toLowerCase();
|
||||
const parts = [];
|
||||
for (const a of ATTRS) {
|
||||
const v = el.getAttribute(a);
|
||||
if (v) parts.push(a + '="' + String(v).slice(0,100).replace(/"/g,'\\"') + '"');
|
||||
}
|
||||
let text = '';
|
||||
if (!['INPUT','TEXTAREA','SELECT'].includes(el.tagName)) {
|
||||
text = (el.innerText || el.textContent || '').trim().replace(/\s+/g,' ').slice(0,100);
|
||||
}
|
||||
const idx = interactive.length;
|
||||
const line = '[' + idx + ']<' + tag + (parts.length ? ' ' + parts.join(' ') : '') + '>' + text + '</' + tag + '>';
|
||||
if (total + line.length + 1 > MAX_CHARS) { truncated = true; break; }
|
||||
total += line.length + 1;
|
||||
interactive.push(el);
|
||||
lines.push(line);
|
||||
}
|
||||
window.__donut_interactive = interactive;
|
||||
return JSON.stringify({ elements: lines.join('\n'), count: interactive.length, truncated: truncated });
|
||||
})()"#;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[allow(dead_code)]
|
||||
pub struct McpRequest {
|
||||
@@ -1354,6 +1396,76 @@ impl McpServer {
|
||||
"required": ["profile_id"]
|
||||
}),
|
||||
},
|
||||
McpTool {
|
||||
name: "get_interactive_elements".to_string(),
|
||||
description: "Enumerate visible interactive elements on the page (buttons, links, inputs, etc.) as a compact indexed list. The returned indices are stable for the current page and can be used with click_by_index and type_by_index instead of guessing CSS selectors. Call this before click_by_index / type_by_index, and re-call after any navigation or major DOM change. Far cheaper in tokens than get_page_content for agentic browsing.".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"profile_id": {
|
||||
"type": "string",
|
||||
"description": "The UUID of the running profile"
|
||||
},
|
||||
"max_chars": {
|
||||
"type": "integer",
|
||||
"description": "Cap on the serialized output length (default: 40000). The response carries a `truncated` flag if the list was cut off — narrow the viewport or scroll if you need elements past the cutoff."
|
||||
}
|
||||
},
|
||||
"required": ["profile_id"]
|
||||
}),
|
||||
},
|
||||
McpTool {
|
||||
name: "click_by_index".to_string(),
|
||||
description: "Click the element at the given index from the last get_interactive_elements call. Indices are valid until the next navigation. If the click triggers navigation, waits for the new page to load before returning.".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"profile_id": {
|
||||
"type": "string",
|
||||
"description": "The UUID of the running profile"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "Zero-based index from the last get_interactive_elements response"
|
||||
}
|
||||
},
|
||||
"required": ["profile_id", "index"]
|
||||
}),
|
||||
},
|
||||
McpTool {
|
||||
name: "type_by_index".to_string(),
|
||||
description: "Focus the element at the given index from the last get_interactive_elements call and type text into it. Same human-like-typing defaults as type_text; only set instant=true when you're sure the target lacks bot detection.".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"profile_id": {
|
||||
"type": "string",
|
||||
"description": "The UUID of the running profile"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "Zero-based index from the last get_interactive_elements response"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "Text to type into the element"
|
||||
},
|
||||
"clear_first": {
|
||||
"type": "boolean",
|
||||
"description": "Clear the input before typing (default: true)"
|
||||
},
|
||||
"instant": {
|
||||
"type": "boolean",
|
||||
"description": "Paste all text at once instead of human typing. WARNING: only use on targets without bot detection."
|
||||
},
|
||||
"wpm": {
|
||||
"type": "number",
|
||||
"description": "Target words per minute for human typing (default: 80)"
|
||||
}
|
||||
},
|
||||
"required": ["profile_id", "index", "text"]
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1602,6 +1714,18 @@ impl McpServer {
|
||||
Self::require_paid_subscription("Browser automation").await?;
|
||||
self.handle_get_page_info(arguments).await
|
||||
}
|
||||
"get_interactive_elements" => {
|
||||
Self::require_paid_subscription("Browser automation").await?;
|
||||
self.handle_get_interactive_elements(arguments).await
|
||||
}
|
||||
"click_by_index" => {
|
||||
Self::require_paid_subscription("Browser automation").await?;
|
||||
self.handle_click_by_index(arguments).await
|
||||
}
|
||||
"type_by_index" => {
|
||||
Self::require_paid_subscription("Browser automation").await?;
|
||||
self.handle_type_by_index(arguments).await
|
||||
}
|
||||
_ => Err(McpError {
|
||||
code: -32602,
|
||||
message: format!("Unknown tool: {tool_name}"),
|
||||
@@ -4263,6 +4387,11 @@ impl McpServer {
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("text");
|
||||
let selector = arguments.get("selector").and_then(|v| v.as_str());
|
||||
let max_chars = arguments
|
||||
.get("max_chars")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize)
|
||||
.unwrap_or(40_000);
|
||||
|
||||
let profile = self.get_running_profile(profile_id)?;
|
||||
let cdp_port = self.get_cdp_port_for_profile(&profile).await?;
|
||||
@@ -4310,10 +4439,28 @@ impl McpServer {
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
// Cap output so a 500 KB DOM dump doesn't blow out the agent's context.
|
||||
// Slice on character boundaries (chars().take().collect()) rather than
|
||||
// byte indices, since the latter would panic on multi-byte boundaries.
|
||||
let total_chars = content.chars().count();
|
||||
let (text, truncated) = if total_chars > max_chars {
|
||||
(content.chars().take(max_chars).collect::<String>(), true)
|
||||
} else {
|
||||
(content.to_string(), false)
|
||||
};
|
||||
|
||||
let payload = if truncated {
|
||||
format!(
|
||||
"{text}\n\n[truncated: showing {max_chars} of {total_chars} chars — call with a larger max_chars or use get_interactive_elements for an indexed view]"
|
||||
)
|
||||
} else {
|
||||
text
|
||||
};
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": content
|
||||
"text": payload
|
||||
}]
|
||||
}))
|
||||
}
|
||||
@@ -4361,6 +4508,267 @@ impl McpServer {
|
||||
}))
|
||||
}
|
||||
|
||||
async fn handle_get_interactive_elements(
|
||||
&self,
|
||||
arguments: &serde_json::Value,
|
||||
) -> Result<serde_json::Value, McpError> {
|
||||
let profile_id = arguments
|
||||
.get("profile_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing profile_id".to_string(),
|
||||
})?;
|
||||
let max_chars = arguments
|
||||
.get("max_chars")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize)
|
||||
.unwrap_or(40_000);
|
||||
|
||||
let profile = self.get_running_profile(profile_id)?;
|
||||
let cdp_port = self.get_cdp_port_for_profile(&profile).await?;
|
||||
let ws_url = self.get_cdp_ws_url(cdp_port).await?;
|
||||
|
||||
// Walk the DOM for visible, non-disabled interactive elements, label them
|
||||
// with a zero-based index, and cache the live references on
|
||||
// `window.__donut_interactive` so click_by_index / type_by_index can
|
||||
// resolve the index → Element without round-tripping a selector.
|
||||
let js = INTERACTIVE_ELEMENTS_JS.replace("__MAX_CHARS__", &max_chars.to_string());
|
||||
|
||||
let result = self
|
||||
.send_cdp(
|
||||
&ws_url,
|
||||
"Runtime.evaluate",
|
||||
serde_json::json!({
|
||||
"expression": js,
|
||||
"returnByValue": true,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(exception) = result.get("exceptionDetails") {
|
||||
let msg = exception
|
||||
.get("exception")
|
||||
.and_then(|e| e.get("description"))
|
||||
.or_else(|| exception.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("Enumeration failed");
|
||||
return Err(McpError {
|
||||
code: -32000,
|
||||
message: msg.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let payload_str = result
|
||||
.get("result")
|
||||
.and_then(|r| r.get("value"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("{}");
|
||||
|
||||
let payload: serde_json::Value =
|
||||
serde_json::from_str(payload_str).unwrap_or(serde_json::json!({}));
|
||||
let elements = payload
|
||||
.get("elements")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let count = payload.get("count").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let truncated = payload
|
||||
.get("truncated")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
let header = if truncated {
|
||||
format!("{count} interactive elements (truncated at {max_chars} chars — re-call with a larger max_chars or scroll the page):")
|
||||
} else {
|
||||
format!("{count} interactive elements:")
|
||||
};
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": format!("{header}\n{elements}")
|
||||
}]
|
||||
}))
|
||||
}
|
||||
|
||||
async fn handle_click_by_index(
|
||||
&self,
|
||||
arguments: &serde_json::Value,
|
||||
) -> Result<serde_json::Value, McpError> {
|
||||
let profile_id = arguments
|
||||
.get("profile_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing profile_id".to_string(),
|
||||
})?;
|
||||
let index = arguments
|
||||
.get("index")
|
||||
.and_then(|v| v.as_u64())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing index".to_string(),
|
||||
})?;
|
||||
|
||||
let profile = self.get_running_profile(profile_id)?;
|
||||
let cdp_port = self.get_cdp_port_for_profile(&profile).await?;
|
||||
let ws_url = self.get_cdp_ws_url(cdp_port).await?;
|
||||
|
||||
let js = format!(
|
||||
r#"(() => {{
|
||||
const arr = window.__donut_interactive;
|
||||
if (!arr || !arr[{index}]) throw new Error('No element at index {index}. Call get_interactive_elements first or after navigation.');
|
||||
const el = arr[{index}];
|
||||
el.scrollIntoView({{block: 'center'}});
|
||||
el.click();
|
||||
return true;
|
||||
}})()"#
|
||||
);
|
||||
|
||||
let result = self
|
||||
.send_cdp_and_wait_for_load(
|
||||
&ws_url,
|
||||
"Runtime.evaluate",
|
||||
serde_json::json!({
|
||||
"expression": js,
|
||||
"returnByValue": true,
|
||||
}),
|
||||
10,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(exception) = result.get("exceptionDetails") {
|
||||
let msg = exception
|
||||
.get("exception")
|
||||
.and_then(|e| e.get("description"))
|
||||
.or_else(|| exception.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("Click failed");
|
||||
return Err(McpError {
|
||||
code: -32000,
|
||||
message: msg.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": format!("Clicked element at index {index}")
|
||||
}]
|
||||
}))
|
||||
}
|
||||
|
||||
async fn handle_type_by_index(
|
||||
&self,
|
||||
arguments: &serde_json::Value,
|
||||
) -> Result<serde_json::Value, McpError> {
|
||||
let profile_id = arguments
|
||||
.get("profile_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing profile_id".to_string(),
|
||||
})?;
|
||||
let index = arguments
|
||||
.get("index")
|
||||
.and_then(|v| v.as_u64())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing index".to_string(),
|
||||
})?;
|
||||
let text = arguments
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| McpError {
|
||||
code: -32602,
|
||||
message: "Missing text".to_string(),
|
||||
})?;
|
||||
let clear_first = arguments
|
||||
.get("clear_first")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let instant = arguments
|
||||
.get("instant")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
let wpm = arguments.get("wpm").and_then(|v| v.as_f64());
|
||||
|
||||
let profile = self.get_running_profile(profile_id)?;
|
||||
let cdp_port = self.get_cdp_port_for_profile(&profile).await?;
|
||||
let ws_url = self.get_cdp_ws_url(cdp_port).await?;
|
||||
|
||||
// Mirrors handle_type_text's focus step but resolves the element via the
|
||||
// cached index instead of a CSS selector.
|
||||
let focus_js = if clear_first {
|
||||
format!(
|
||||
r#"(() => {{
|
||||
const arr = window.__donut_interactive;
|
||||
if (!arr || !arr[{index}]) throw new Error('No element at index {index}. Call get_interactive_elements first or after navigation.');
|
||||
const el = arr[{index}];
|
||||
el.scrollIntoView({{block: 'center'}});
|
||||
el.focus();
|
||||
el.value = '';
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
return true;
|
||||
}})()"#
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"(() => {{
|
||||
const arr = window.__donut_interactive;
|
||||
if (!arr || !arr[{index}]) throw new Error('No element at index {index}. Call get_interactive_elements first or after navigation.');
|
||||
const el = arr[{index}];
|
||||
el.scrollIntoView({{block: 'center'}});
|
||||
el.focus();
|
||||
return true;
|
||||
}})()"#
|
||||
)
|
||||
};
|
||||
|
||||
let focus_result = self
|
||||
.send_cdp(
|
||||
&ws_url,
|
||||
"Runtime.evaluate",
|
||||
serde_json::json!({
|
||||
"expression": focus_js,
|
||||
"returnByValue": true,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(exception) = focus_result.get("exceptionDetails") {
|
||||
let msg = exception
|
||||
.get("exception")
|
||||
.and_then(|e| e.get("description"))
|
||||
.or_else(|| exception.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("Focus failed");
|
||||
return Err(McpError {
|
||||
code: -32000,
|
||||
message: msg.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if instant {
|
||||
self
|
||||
.send_cdp(
|
||||
&ws_url,
|
||||
"Input.insertText",
|
||||
serde_json::json!({ "text": text }),
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
self.send_human_keystrokes(&ws_url, text, wpm).await?;
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": format!("Typed text into element at index {index}")
|
||||
}]
|
||||
}))
|
||||
}
|
||||
|
||||
// --- Synchronizer handlers ---
|
||||
|
||||
async fn handle_start_sync_session(
|
||||
|
||||
Reference in New Issue
Block a user