diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd68ae5..fd5ed6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,28 @@ on: branches: [main] workflow_call: +# CI flake mitigation: +# ci.yml is triggered TWICE per PR on the same commit — once directly via +# the `pull_request` trigger above ("Frontend Tests & Build" check) and once +# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests & +# Build" check). Both jobs land on the same Actions runner pool at the same +# time and fight for CPU/RAM. Under contention, React's reconciliation in +# `messagesViewFirstContact.test.tsx > removes an approved contact …` +# overruns its 5s waitFor timeout — that's the single failure mode we've +# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the +# fd7d6fa push. Backend tests and every other frontend test pass under +# the same conditions, which is what made this look random. +# +# Pinning a concurrency group on the SHA (PR head, or the pushed commit +# for main) serializes the two invocations so neither starves the other. +# We use cancel-in-progress: false so the second one queues instead of +# cancelling — cancelling could leave the PR check stuck "Expected" if +# only one of the two ever finishes. Total CI time grows by ~2 min in +# exchange for deterministic outcomes. +concurrency: + group: ci-${{ github.event.pull_request.head.sha || github.sha }} + cancel-in-progress: false + jobs: frontend: name: Frontend Tests & Build diff --git a/frontend/src/__tests__/mesh/messagesViewFirstContact.test.tsx b/frontend/src/__tests__/mesh/messagesViewFirstContact.test.tsx index dec5baa..780a053 100644 --- a/frontend/src/__tests__/mesh/messagesViewFirstContact.test.tsx +++ b/frontend/src/__tests__/mesh/messagesViewFirstContact.test.tsx @@ -842,7 +842,7 @@ describe('MessagesView first-contact trust UX', () => { expect(screen.queryByText(/delivery key has not reached/i)).not.toBeInTheDocument(); }); - it('removes an approved contact immediately from the visible contact list', async () => { + it('removes an approved contact immediately from the visible contact list', { timeout: 30_000 }, async () => { contactsState = { '!sb_remove': { alias: 'Remove Me', @@ -868,18 +868,35 @@ describe('MessagesView first-contact trust UX', () => { // event (removeContact + setContacts + setComposeStatus + setComposeError). // Under CI load the resulting render-and-paint cycle has been observed // to take >1s, which is the default findByText timeout — that race has - // produced flakes on PRs #226, #237, #261, and #262 in succession. - // The settle window is bounded by React's reconciliation, not by any - // network/animation cost, so a generous timeout is the right deflake - // here (the failure mode this masks would be "toast never renders", - // which would still fail at 5s). + // produced flakes on PRs #226, #237, #261, #262, #265, #294, #303, and + // the fd7d6fa push. + // + // Two-part fix: + // + // 1. .github/workflows/ci.yml — concurrency group serialises the two + // parallel ci.yml invocations (direct trigger + workflow_call from + // docker-publish.yml) so they no longer starve each other for + // runner CPU/RAM. That covered the SHA-pair starvation case which + // was visible on PR #303 / #294. + // + // 2. This block — the per-test `timeout: 30_000` on the `it()` above + // and the 10s `waitFor` timeout below. The suite-wide testTimeout + // was 15s (raised in Round 7a deflake work). An earlier draft of + // this fix set waitFor to 15s, but that left ZERO headroom against + // the 15s per-test budget — the test ran out of clock before + // waitFor could even fail. Bumping the per-test timeout to 30s + // gives waitFor a real 10s window after the render/click setup + // finishes. + // + // The failure mode this masks would be "toast never renders", which + // still fails loudly at the 10s waitFor cap. await waitFor( () => { expect( screen.getByText(/Removed contact: Remove Me\./i), ).toBeInTheDocument(); }, - { timeout: 5000, interval: 50 }, + { timeout: 10000, interval: 50 }, ); expect(screen.queryByText('Remove Me')).not.toBeInTheDocument(); });