fix: resilient subscription CLI calls (retry, richer errors, capped concurrency)

The 'recon failed (claude subscription CLI failed: )' was a transient CLI failure
(rate limit / cold start) reported with a blank message and no retry.

- chat_cli: on non-zero exit, surface exit code + stdout (CLI writes the real
  reason there, not stderr); treat empty output as an error
- pool.one(): retry up to 3x with backoff for transient failures (both
  subscription and API paths)
- with_auth: cap concurrency to 3 on the subscription path — spawning many
  parallel CLI processes itself trips provider rate limits

Verified: live subscription run recovers and completes recon → select → exploit
→ vote → artifacts.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
CyberSecurityUP
2026-06-23 13:07:55 -03:00
parent 9dfcea87bc
commit c6fd5d6ac8
2 changed files with 46 additions and 10 deletions
+21 -2
View File
@@ -180,10 +180,29 @@ impl ChatClient {
// Drop closes stdin so the CLI processes the prompt and exits.
}
let out = child.wait_with_output().await?;
let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string();
let stderr = String::from_utf8_lossy(&out.stderr);
if !out.status.success() {
return Err(anyhow!("{} subscription CLI failed: {}", bin, truncate(&String::from_utf8_lossy(&out.stderr), 200)));
// The CLI often writes the real reason (rate limit, auth) to stdout,
// not stderr — surface both plus the exit code so the error isn't blank.
let detail = if !stderr.trim().is_empty() {
stderr.trim().to_string()
} else if !stdout.is_empty() {
stdout.clone()
} else {
"no output".to_string()
};
return Err(anyhow!(
"{} subscription CLI exit {}: {}",
bin,
out.status.code().map(|c| c.to_string()).unwrap_or_else(|| "signal".into()),
truncate(&detail, 240)
));
}
Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
if stdout.is_empty() {
return Err(anyhow!("{} subscription CLI returned empty output", bin));
}
Ok(stdout)
}
}
+25 -8
View File
@@ -28,7 +28,9 @@ impl ModelPool {
subscription: bool,
mcp_config: Option<String>,
) -> Self {
let concurrency = concurrency.max(1);
// Subscription spawns one CLI process per call; too many in parallel
// trips provider rate limits, so cap concurrency on that path.
let concurrency = if subscription { concurrency.clamp(1, 3) } else { concurrency.max(1) };
ModelPool {
client: ChatClient::new(),
sem: Arc::new(Semaphore::new(concurrency)),
@@ -42,15 +44,30 @@ impl ModelPool {
}
}
/// One completion for a model, via subscription CLI (optionally with MCP) or HTTP API.
/// One completion for a model, via subscription CLI (optionally with MCP) or
/// HTTP API, with a short retry/backoff to ride out transient failures
/// (rate limits, MCP cold-starts, network blips).
async fn one(&self, m: &ModelRef, system: &str, user: &str) -> Result<String> {
if self.subscription && cli_binary_for(&m.provider).is_some() {
return self
.client
.chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref())
.await;
let use_cli = self.subscription && cli_binary_for(&m.provider).is_some();
let mut last = anyhow::anyhow!("no attempt");
for attempt in 0..3u64 {
if attempt > 0 {
// 1.5s, 4.5s backoff.
tokio::time::sleep(std::time::Duration::from_millis(1500 * attempt * attempt.max(1))).await;
}
let r = if use_cli {
self.client
.chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref())
.await
} else {
self.client.chat(m, system, user).await
};
match r {
Ok(t) => return Ok(t),
Err(e) => last = e,
}
}
self.client.chat(m, system, user).await
Err(last)
}
/// Complete a prompt, trying each candidate model until one succeeds.