From f2a5a8d3dc8c9e9a61ed0d2135c6b9bae96cbf6b Mon Sep 17 00:00:00 2001 From: Aditi Chauhan Date: Sat, 20 Jun 2026 16:41:35 +0530 Subject: [PATCH 1/8] fix(review): retry review when a stale Running pass's reviewer died Engine.Trigger's per-commit idempotency check short-circuited on any non-failed run for (session, targetSHA), including a row still ReviewRunRunning whose reviewer process had actually died (e.g. the user closed the reviewer's terminal mid-run, bypassing Submit). Nothing transitions such a row out of Running, so retrying review for the same commit returned "already up to date" permanently, until a new commit was pushed (#342). Now when the existing pass is Running, Trigger checks launcher liveness via the review's reviewer handle before reusing it: if alive, behavior is unchanged (reuse the in-progress run, no double-spawn); if not alive, the stale row is marked Failed and Trigger falls through to spawn a fresh run. Co-Authored-By: Claude Opus 4.8 --- backend/internal/review/review.go | 24 ++++++++++++++-- backend/internal/review/review_test.go | 39 ++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 4b6a3eb3..810b9966 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -184,12 +184,30 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge return TriggerResult{}, err } - // Idempotency: return a non-failed pass as-is. Failed passes stay visible - // but can be retried after the user fixes the underlying issue. + // Idempotency: return a non-failed pass as-is, except when the pass is still + // Running but its reviewer process has died (e.g. the user closed the + // reviewer's terminal mid-run, bypassing Submit). A dead Running row is + // marked Failed and falls through to spawn a fresh review, so retrying isn't + // permanently stuck on "already up to date" until a new commit lands (#342). + // Failed passes stay visible but can be retried after the user fixes the + // underlying issue. if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err } else if ok && existing.Status != domain.ReviewRunFailed { - return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil + stale := false + if existing.Status == domain.ReviewRunRunning { + alive, err := e.launcher.Alive(ctx, review.ReviewerHandleID) + if err != nil { + return TriggerResult{}, err + } + stale = !alive + } + if !stale { + return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil + } + if _, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "reviewer process was no longer alive; marked failed so the review can be retried"); err != nil { + return TriggerResult{}, err + } } harness, err := e.reviewerHarness(ctx, worker) diff --git a/backend/internal/review/review_test.go b/backend/internal/review/review_test.go index 74f6a761..ab59bcee 100644 --- a/backend/internal/review/review_test.go +++ b/backend/internal/review/review_test.go @@ -182,7 +182,9 @@ func TestTriggerSpawnsNewReviewerAndRecordsRunAfterLaunch(t *testing.T) { func TestTriggerConcurrentSameWorkerSpawnsOnce(t *testing.T) { store := &fakeStore{} - launcher := &fakeLauncher{handle: "review-mer-1"} + // The winner's freshly-spawned reviewer is alive, so losers that re-read its + // Running run short-circuit to reuse instead of re-spawning. + launcher := &fakeLauncher{handle: "review-mer-1", alive: true} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) const n = 8 @@ -246,7 +248,7 @@ func TestTriggerIsIdempotentForSameCommit(t *testing.T) { review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, } - launcher := &fakeLauncher{} + launcher := &fakeLauncher{alive: true} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) res, err := eng.Trigger(context.Background(), "mer-1") @@ -264,6 +266,39 @@ func TestTriggerIsIdempotentForSameCommit(t *testing.T) { } } +func TestTriggerMarksStaleRunningPassFailedAndRetries(t *testing.T) { + // A Running pass whose reviewer process died (terminal closed mid-run) must + // not permanently short-circuit retries with "already up to date" (#342): + // the stale row is marked Failed and a fresh review is spawned. + store := &fakeStore{ + review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, + runs: []domain.ReviewRun{{ID: "run-stale", ReviewID: "rev-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, + } + launcher := &fakeLauncher{alive: false, handle: "review-mer-1"} + eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) + + res, err := eng.Trigger(context.Background(), "mer-1") + if err != nil { + t.Fatalf("Trigger: %v", err) + } + if !res.Created || res.Run.ID == "run-stale" { + t.Fatalf("expected a fresh run, got %+v", res) + } + if !launcher.spawned { + t.Fatalf("expected a fresh reviewer spawn: %+v", launcher) + } + if len(store.runs) != 2 { + t.Fatalf("expected the stale run plus a new one: %+v", store.runs) + } + stale := store.runs[0] + if stale.ID != "run-stale" || stale.Status != domain.ReviewRunFailed || stale.Verdict != domain.VerdictNone { + t.Fatalf("stale run not marked failed: %+v", stale) + } + if !strings.Contains(stale.Body, "no longer alive") { + t.Fatalf("stale run body = %q, want liveness reason", stale.Body) + } +} + func TestTriggerNotifiesLiveReviewerOnNewCommit(t *testing.T) { store := &fakeStore{ review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, From 177eb76db7e911ad5ef247831544aa2b438ab7b9 Mon Sep 17 00:00:00 2001 From: Aditi Chauhan Date: Sat, 20 Jun 2026 16:47:18 +0530 Subject: [PATCH 2/8] fix(review): gate idempotency on a recorded verdict, not run liveness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per maintainer direction on #342: pane liveness can't distinguish "still actively reviewing" from "pane left open after the user stopped the agent mid-run," so an Alive()-gated check still misses the reported case. Trigger's idempotency now treats a pass as "already up to date" only when it actually carries a verdict (set exclusively by Submit), not merely a non-Failed status. An un-verdicted Running pass is never trusted: it's superseded (marked Failed) and a fresh pass is started — the live pane is notified if still alive, or a new one spawned if not. A Running row that loses the supersede race to a concurrent Submit is re-read and its verdict trusted instead of starting a redundant pass. Co-Authored-By: Claude Opus 4.8 --- backend/internal/review/review.go | 40 ++++++----- backend/internal/review/review_test.go | 98 ++++++++++++++++---------- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 810b9966..d30600b2 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -184,29 +184,31 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge return TriggerResult{}, err } - // Idempotency: return a non-failed pass as-is, except when the pass is still - // Running but its reviewer process has died (e.g. the user closed the - // reviewer's terminal mid-run, bypassing Submit). A dead Running row is - // marked Failed and falls through to spawn a fresh review, so retrying isn't - // permanently stuck on "already up to date" until a new commit lands (#342). - // Failed passes stay visible but can be retried after the user fixes the - // underlying issue. + // Idempotency: a pass only counts as done once it actually carries a + // verdict (set exclusively by Submit). Status alone isn't enough — a + // Running pass with no verdict may have been interrupted (its execution + // stopped, or its pane killed) without ever calling Submit, and pane + // liveness can't distinguish "still working" from "pane open, work + // stopped." So an un-verdicted Running pass is superseded on retry: it's + // marked Failed, and a fresh pass is started below — reusing the pane via + // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Status != domain.ReviewRunFailed { - stale := false - if existing.Status == domain.ReviewRunRunning { - alive, err := e.launcher.Alive(ctx, review.ReviewerHandleID) - if err != nil { + } else if ok && existing.Verdict != domain.VerdictNone { + return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil + } else if ok && existing.Status == domain.ReviewRunRunning { + superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") + if err != nil { + return TriggerResult{}, err + } + if !superseded { + // Lost the race to a concurrent Submit: re-read and trust its verdict + // rather than starting a redundant pass. + if latest, ok, err := e.store.GetReviewRun(ctx, existing.ID); err != nil { return TriggerResult{}, err + } else if ok { + return TriggerResult{Run: latest, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } - stale = !alive - } - if !stale { - return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil - } - if _, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "reviewer process was no longer alive; marked failed so the review can be retried"); err != nil { - return TriggerResult{}, err } } diff --git a/backend/internal/review/review_test.go b/backend/internal/review/review_test.go index ab59bcee..466058e0 100644 --- a/backend/internal/review/review_test.go +++ b/backend/internal/review/review_test.go @@ -128,7 +128,9 @@ func (f *fakeLauncher) Notify(_ context.Context, handleID string, spec LaunchSpe f.gotSpec = spec return f.notifyErr } -func (f *fakeLauncher) Alive(_ context.Context, _ string) (bool, error) { return f.alive, nil } +func (f *fakeLauncher) Alive(_ context.Context, _ string) (bool, error) { + return f.alive || f.spawned, nil +} func liveWorker() domain.SessionRecord { return domain.SessionRecord{ @@ -180,44 +182,37 @@ func TestTriggerSpawnsNewReviewerAndRecordsRunAfterLaunch(t *testing.T) { } } -func TestTriggerConcurrentSameWorkerSpawnsOnce(t *testing.T) { +func TestTriggerConcurrentSameWorkerNeverDoubleSpawns(t *testing.T) { + // Concurrent triggers for the same worker must never end up with two + // independent reviewer panes for the same commit (#242): the loser of + // each race either finds the pane already alive (Notify, not Spawn) or + // loses the unique-insert race outright. Verdict-gated idempotency (#342) + // means a run with no verdict yet is no longer trusted blindly, so repeat + // triggers may notify the live pane again instead of being a pure no-op — + // that's fine; only an actual second Spawn would be a regression. store := &fakeStore{} - // The winner's freshly-spawned reviewer is alive, so losers that re-read its - // Running run short-circuit to reuse instead of re-spawning. - launcher := &fakeLauncher{handle: "review-mer-1", alive: true} + launcher := &fakeLauncher{handle: "review-mer-1"} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) const n = 8 var wg sync.WaitGroup - results := make([]TriggerResult, n) errs := make([]error, n) wg.Add(n) for i := 0; i < n; i++ { go func(i int) { defer wg.Done() - results[i], errs[i] = eng.Trigger(context.Background(), "mer-1") + _, errs[i] = eng.Trigger(context.Background(), "mer-1") }(i) } wg.Wait() - created := 0 for i := 0; i < n; i++ { if errs[i] != nil { t.Fatalf("Trigger[%d]: %v", i, errs[i]) } - if results[i].Created { - created++ - } - } - // Exactly one trigger does the work; the rest reuse its run. - if created != 1 { - t.Errorf("Created=true count = %d, want exactly 1", created) } if launcher.spawnCount != 1 { - t.Errorf("reviewer spawn count = %d, want 1", launcher.spawnCount) - } - if len(store.runs) != 1 { - t.Errorf("recorded review runs = %d, want 1", len(store.runs)) + t.Errorf("reviewer spawn count = %d, want exactly 1 (no double-spawn)", launcher.spawnCount) } } @@ -244,9 +239,14 @@ func TestTriggerFallsBackToExistingRunOnUniqueConflict(t *testing.T) { } func TestTriggerIsIdempotentForSameCommit(t *testing.T) { + // A verdict was actually recorded for this commit (via Submit) — this is + // the only state that counts as genuinely done. store := &fakeStore{ review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, - runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, + runs: []domain.ReviewRun{{ + ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", + Status: domain.ReviewRunComplete, Verdict: domain.VerdictApproved, + }}, } launcher := &fakeLauncher{alive: true} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) @@ -266,36 +266,62 @@ func TestTriggerIsIdempotentForSameCommit(t *testing.T) { } } -func TestTriggerMarksStaleRunningPassFailedAndRetries(t *testing.T) { - // A Running pass whose reviewer process died (terminal closed mid-run) must - // not permanently short-circuit retries with "already up to date" (#342): - // the stale row is marked Failed and a fresh review is spawned. +func TestTriggerRespawnsWhenRunningRowHasNoVerdict(t *testing.T) { + // A prior pass for this commit never produced a verdict — its execution + // may have been stopped (e.g. the user killed the terminal, or just + // interrupted the agent without killing the pane) without ever calling + // Submit. Pane liveness alone can't tell those apart, so Status=Running + // with no verdict is never trusted blindly: the stale row is marked + // Failed and a fresh pass starts, reusing the pane if it's still alive + // (Notify) and spawning a new one only if it's not (#342). store := &fakeStore{ review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, - runs: []domain.ReviewRun{{ID: "run-stale", ReviewID: "rev-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, + runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, } - launcher := &fakeLauncher{alive: false, handle: "review-mer-1"} + launcher := &fakeLauncher{alive: false, handle: "review-mer-2"} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) res, err := eng.Trigger(context.Background(), "mer-1") if err != nil { t.Fatalf("Trigger: %v", err) } - if !res.Created || res.Run.ID == "run-stale" { - t.Fatalf("expected a fresh run, got %+v", res) + if !res.Created { + t.Fatalf("expected a fresh pass when the prior run has no verdict: %+v", res) } if !launcher.spawned { - t.Fatalf("expected a fresh reviewer spawn: %+v", launcher) - } - if len(store.runs) != 2 { - t.Fatalf("expected the stale run plus a new one: %+v", store.runs) + t.Fatalf("expected a fresh spawn since the old pane is dead: %+v", launcher) } stale := store.runs[0] - if stale.ID != "run-stale" || stale.Status != domain.ReviewRunFailed || stale.Verdict != domain.VerdictNone { - t.Fatalf("stale run not marked failed: %+v", stale) + if stale.ID != "run-1" || stale.Status != domain.ReviewRunFailed { + t.Fatalf("expected stale run-1 marked Failed, got %+v", stale) + } +} + +func TestTriggerNotifiesLiveReviewerWhenRunningRowHasNoVerdict(t *testing.T) { + // Same as above, but the pane is still alive (e.g. the user only + // interrupted the agent's work without killing the terminal/pane). The + // stale row is still superseded, but the live pane is re-notified instead + // of spawning a redundant second pane. + store := &fakeStore{ + review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, + runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, } - if !strings.Contains(stale.Body, "no longer alive") { - t.Fatalf("stale run body = %q, want liveness reason", stale.Body) + launcher := &fakeLauncher{alive: true, handle: "review-mer-1"} + eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) + + res, err := eng.Trigger(context.Background(), "mer-1") + if err != nil { + t.Fatalf("Trigger: %v", err) + } + if !res.Created { + t.Fatalf("expected a fresh pass even though the pane is reused: %+v", res) + } + if !launcher.notified || launcher.spawned { + t.Fatalf("expected notify on the still-alive pane, not a spawn: %+v", launcher) + } + stale := store.runs[0] + if stale.ID != "run-1" || stale.Status != domain.ReviewRunFailed { + t.Fatalf("expected stale run-1 marked Failed, got %+v", stale) } } From ad0e0a4a25564d432276727fc106867bc38d405e Mon Sep 17 00:00:00 2001 From: neversettle <41864816+neversettle17-101@users.noreply.github.com> Date: Sun, 21 Jun 2026 17:48:33 +0530 Subject: [PATCH 3/8] Test --- backend/internal/review/review.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index d30600b2..2471b34e 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -194,7 +194,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.VerdictNone { + } else if ok && existing.Verdict != domain.VerdictO { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") From a0849ec0eb3784a41eda9e69f0ae4dd5d6f21ed4 Mon Sep 17 00:00:00 2001 From: Aditi Chauhan Date: Sun, 21 Jun 2026 17:51:55 +0530 Subject: [PATCH 4/8] Revert "Test" This reverts commit ad0e0a4a25564d432276727fc106867bc38d405e. --- backend/internal/review/review.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 2471b34e..d30600b2 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -194,7 +194,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.VerdictO { + } else if ok && existing.Verdict != domain.VerdictNone { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") From ec0b9b2596cda8805593336770efbab2119fb4f2 Mon Sep 17 00:00:00 2001 From: neversettle <41864816+neversettle17-101@users.noreply.github.com> Date: Sun, 21 Jun 2026 18:02:56 +0530 Subject: [PATCH 5/8] Test - 2 --- backend/internal/review/review.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index d30600b2..866e9a31 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -194,7 +194,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.VerdictNone { + } else if ok && existing.Verdict != domain.VerdictNo { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") From dd5bc648491650bba2ba9ec99e9069fa7828cbd2 Mon Sep 17 00:00:00 2001 From: neversettle <41864816+neversettle17-101@users.noreply.github.com> Date: Sun, 21 Jun 2026 18:05:11 +0530 Subject: [PATCH 6/8] Update review.go --- backend/internal/review/review.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 866e9a31..580c0674 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -194,7 +194,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.VerdictNo { + } else if ok && existing.Verdict != domain.V { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") From 32da2bf7d9b8b6510790e852b190fdf2cd7fe28f Mon Sep 17 00:00:00 2001 From: Aditi Chauhan Date: Sun, 21 Jun 2026 18:07:39 +0530 Subject: [PATCH 7/8] fix(review): restore domain.VerdictNone in idempotency check The 'Test - 2' (ec0b9b2) and 'Update review.go' (dd5bc64) scratch commits left line 197 referencing the undefined symbol domain.V, breaking the build of internal/review and every package that imports it (internal/cli in CI). Restore the intended domain.VerdictNone. Co-Authored-By: Claude Opus 4.8 --- backend/internal/review/review.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 580c0674..d30600b2 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -194,7 +194,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge // Notify if it's still alive, or spawning a new one if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.V { + } else if ok && existing.Verdict != domain.VerdictNone { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") From 25b74dab5dda835d6df9fbfc7f8cffbf39283d90 Mon Sep 17 00:00:00 2001 From: Aditi Chauhan Date: Sun, 21 Jun 2026 18:38:19 +0530 Subject: [PATCH 8/8] feat(review): gate re-review block on Status==Complete; lock state matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-review is now blocked only when a pass actually finished — Status Complete, which Submit sets exclusively and only after recording a verdict. Any non-Complete pass is retryable: a Failed pass falls through to a fresh retry, and a Running pass (which pane liveness can't prove is still working) is superseded on retry. This is behaviorally equivalent to the prior verdict!=None gate on today's data, but keys the "done" decision on the canonical terminal status and is body-independent (an approved review may carry no body yet still blocks). Lock the full re-trigger state machine with tests: - Complete (even with an empty body) blocks; no respawn. - Concurrent Submit winning the supersede CAS race: Trigger re-reads and returns the now-Complete pass instead of starting a redundant one (previously untested) — via a new fakeStore.beforeUpdate hook. - Existing coverage: dead pane -> spawn, live pane -> notify, Failed -> fresh retry, concurrent triggers never double-spawn. Co-Authored-By: Claude Opus 4.8 --- backend/internal/review/review.go | 25 +++++++------ backend/internal/review/review_test.go | 52 ++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index d30600b2..794455de 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -184,17 +184,20 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge return TriggerResult{}, err } - // Idempotency: a pass only counts as done once it actually carries a - // verdict (set exclusively by Submit). Status alone isn't enough — a - // Running pass with no verdict may have been interrupted (its execution - // stopped, or its pane killed) without ever calling Submit, and pane - // liveness can't distinguish "still working" from "pane open, work - // stopped." So an un-verdicted Running pass is superseded on retry: it's - // marked Failed, and a fresh pass is started below — reusing the pane via - // Notify if it's still alive, or spawning a new one if not (#342). + // Idempotency: a commit only counts as reviewed once a pass actually + // finished — Status Complete, which only Submit sets, and only after + // recording a verdict. Any non-Complete pass is retryable: + // - A Failed pass (launch error, or one superseded below) falls through to + // a fresh retry. + // - A Running pass may have been interrupted (its execution stopped, or + // its pane killed) without ever calling Submit, and pane liveness can't + // distinguish "still working" from "pane open, work stopped." So it's + // superseded on retry: marked Failed, then a fresh pass starts below — + // reusing the pane via Notify if it's still alive, or spawning a new one + // if not (#342). if existing, ok, err := e.store.GetReviewRunBySessionAndSHA(ctx, workerID, targetSHA); err != nil { return TriggerResult{}, err - } else if ok && existing.Verdict != domain.VerdictNone { + } else if ok && existing.Status == domain.ReviewRunComplete { return TriggerResult{Run: existing, ReviewerHandleID: review.ReviewerHandleID, Created: false}, nil } else if ok && existing.Status == domain.ReviewRunRunning { superseded, err := e.store.UpdateReviewRunResult(ctx, existing.ID, domain.ReviewRunFailed, domain.VerdictNone, "superseded by a new review trigger") @@ -202,8 +205,8 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge return TriggerResult{}, err } if !superseded { - // Lost the race to a concurrent Submit: re-read and trust its verdict - // rather than starting a redundant pass. + // Lost the race to a concurrent Submit: that pass is now Complete, so + // re-read and return it instead of starting a redundant pass. if latest, ok, err := e.store.GetReviewRun(ctx, existing.ID); err != nil { return TriggerResult{}, err } else if ok { diff --git a/backend/internal/review/review_test.go b/backend/internal/review/review_test.go index 466058e0..92d8c376 100644 --- a/backend/internal/review/review_test.go +++ b/backend/internal/review/review_test.go @@ -23,6 +23,10 @@ type fakeStore struct { // winner (so a follow-up GetReviewRunBySessionAndSHA finds it) and returns // insertErr instead of recording the caller's run. insertErr error + // beforeUpdate, when set, runs at the start of UpdateReviewRunResult. Tests + // use it to model a concurrent Submit landing on a run between the + // idempotency lookup and the supersede update (the CAS race). + beforeUpdate func(s *fakeStore, id string) } func (f *fakeStore) UpsertReview(_ context.Context, r domain.Review) error { @@ -47,6 +51,9 @@ func (f *fakeStore) InsertReviewRun(_ context.Context, r domain.ReviewRun) error return nil } func (f *fakeStore) UpdateReviewRunResult(_ context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body string) (bool, error) { + if f.beforeUpdate != nil { + f.beforeUpdate(f, id) + } for i := range f.runs { if f.runs[i].ID == id { if f.runs[i].Status != domain.ReviewRunRunning { @@ -239,13 +246,15 @@ func TestTriggerFallsBackToExistingRunOnUniqueConflict(t *testing.T) { } func TestTriggerIsIdempotentForSameCommit(t *testing.T) { - // A verdict was actually recorded for this commit (via Submit) — this is - // the only state that counts as genuinely done. + // A Complete pass is the only state that blocks re-review: Status Complete + // is set exclusively by Submit, after it records a verdict. The body is + // deliberately empty here — an approved review may carry none, yet a + // finished pass still blocks. The block is gated on Status, not the body. store := &fakeStore{ review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, runs: []domain.ReviewRun{{ ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", - Status: domain.ReviewRunComplete, Verdict: domain.VerdictApproved, + Status: domain.ReviewRunComplete, Verdict: domain.VerdictApproved, Body: "", }}, } launcher := &fakeLauncher{alive: true} @@ -266,6 +275,43 @@ func TestTriggerIsIdempotentForSameCommit(t *testing.T) { } } +func TestTriggerReturnsCompletedRunWhenSubmitWinsSupersedeRace(t *testing.T) { + // A retry finds a Running pass and moves to supersede it, but a concurrent + // Submit lands first and marks that pass Complete. The CAS supersede then + // no-ops (the row is no longer Running), so Trigger must re-read and return + // the now-Complete pass rather than starting a redundant one. + store := &fakeStore{ + review: &domain.Review{ID: "rev-1", SessionID: "mer-1", ReviewerHandleID: "review-mer-1"}, + runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", TargetSHA: "sha1", Status: domain.ReviewRunRunning}}, + } + // Model the concurrent Submit: the moment Trigger tries to supersede run-1, + // flip it Complete with a verdict, exactly as Submit would have. + store.beforeUpdate = func(s *fakeStore, id string) { + for i := range s.runs { + if s.runs[i].ID == id && s.runs[i].Status == domain.ReviewRunRunning { + s.runs[i].Status = domain.ReviewRunComplete + s.runs[i].Verdict = domain.VerdictApproved + } + } + } + launcher := &fakeLauncher{alive: true, handle: "review-mer-1"} + eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, launcher) + + res, err := eng.Trigger(context.Background(), "mer-1") + if err != nil { + t.Fatalf("Trigger: %v", err) + } + if res.Created || res.Run.ID != "run-1" || res.Run.Status != domain.ReviewRunComplete { + t.Fatalf("expected the Submit-completed run returned as-is, got %+v", res) + } + if launcher.spawned || launcher.notified { + t.Fatalf("should not start a redundant pass after losing to Submit: %+v", launcher) + } + if len(store.runs) != 1 { + t.Fatalf("should not insert another run: %+v", store.runs) + } +} + func TestTriggerRespawnsWhenRunningRowHasNoVerdict(t *testing.T) { // A prior pass for this commit never produced a verdict — its execution // may have been stopped (e.g. the user killed the terminal, or just