diff --git a/.gitignore b/.gitignore index 0a8225ec..3863ec09 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,10 @@ session-events.jsonl.* # Agent Orchestrator local session state .ao/ +# AO reviewer scratch output. The reviewer agent runs inside the worker's +# worktree; its review writeup must never be committed onto the worker branch. +/review.md + # Environment .direnv/ .env diff --git a/backend/internal/cli/review.go b/backend/internal/cli/review.go index 7bb2f7f5..fa9046f9 100644 --- a/backend/internal/cli/review.go +++ b/backend/internal/cli/review.go @@ -3,12 +3,14 @@ package cli import ( "errors" "fmt" + "io" "net/url" "os" "strings" "time" "github.com/spf13/cobra" + "github.com/spf13/pflag" ) // reviewRun mirrors the daemon's domain.ReviewRun for the CLI client. @@ -32,16 +34,18 @@ type reviewRunResponse struct { // submitReviewRequest mirrors controllers.SubmitReviewInput. type submitReviewRequest struct { - RunID string `json:"runId"` - Verdict string `json:"verdict"` - Body string `json:"body"` + RunID string `json:"runId"` + Verdict string `json:"verdict"` + Body string `json:"body"` + GithubReviewID string `json:"githubReviewId"` } type reviewSubmitOptions struct { - session string - runID string - verdict string - body string + session string + runID string + verdict string + body string + reviewID string } func newReviewCommand(ctx *commandContext) *cobra.Command { @@ -63,10 +67,16 @@ func newReviewSubmitCommand(ctx *commandContext) *cobra.Command { return ctx.submitReview(cmd, args, opts) }, } + // Reviewer agents routinely spell flags with underscores (--review_id) rather + // than hyphens (--review-id); normalize so both resolve to the same flag. + cmd.Flags().SetNormalizeFunc(func(_ *pflag.FlagSet, name string) pflag.NormalizedName { + return pflag.NormalizedName(strings.ReplaceAll(name, "_", "-")) + }) cmd.Flags().StringVar(&opts.session, "session", "", "Worker session id (or pass it as the positional argument)") cmd.Flags().StringVar(&opts.runID, "run", "", "Review run id (required)") cmd.Flags().StringVar(&opts.verdict, "verdict", "", "Review verdict: approved or changes_requested (required)") - cmd.Flags().StringVar(&opts.body, "body", "", "Path to a Markdown file with the review body") + cmd.Flags().StringVar(&opts.body, "body", "", "Review body: a path to a Markdown file, or - to read from stdin (so nothing is written into the worktree)") + cmd.Flags().StringVar(&opts.reviewID, "review-id", "", "Id of the GitHub PR review just posted (the .id from the gh api POST that created the review)") return cmd } @@ -88,15 +98,24 @@ func (c *commandContext) submitReview(cmd *cobra.Command, args []string, opts re } var body string if path := strings.TrimSpace(opts.body); path != "" { - raw, err := os.ReadFile(path) + var raw []byte + var err error + if path == "-" { + // Read the review from stdin so the reviewer never has to write a file + // into its checkout (where it could be committed onto the worker branch). + raw, err = io.ReadAll(cmd.InOrStdin()) + } else { + raw, err = os.ReadFile(path) + } if err != nil { - return usageError{fmt.Errorf("read body file: %w", err)} + return usageError{fmt.Errorf("read review body: %w", err)} } body = string(raw) } + reviewID := strings.TrimSpace(opts.reviewID) path := "sessions/" + url.PathEscape(session) + "/reviews/submit" var res reviewRunResponse - if err := c.postJSON(cmd.Context(), path, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body}, &res); err != nil { + if err := c.postJSON(cmd.Context(), path, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body, GithubReviewID: reviewID}, &res); err != nil { return err } _, err := fmt.Fprintf(cmd.OutOrStdout(), "recorded %s review for %s\n", res.Review.Verdict, session) diff --git a/backend/internal/cli/review_test.go b/backend/internal/cli/review_test.go index 29cdee64..b3033de1 100644 --- a/backend/internal/cli/review_test.go +++ b/backend/internal/cli/review_test.go @@ -7,6 +7,7 @@ import ( "net/http/httptest" "os" "path/filepath" + "strings" "testing" ) @@ -62,6 +63,47 @@ func TestReviewSubmitReadsBodyFile(t *testing.T) { } } +func TestReviewSubmitReadsBodyFromStdin(t *testing.T) { + cfg := setConfigEnv(t) + srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"changes_requested"}}`) + writeRunFileFor(t, cfg, srv) + + deps := aliveDeps() + deps.In = strings.NewReader("please fix from stdin") + _, errOut, err := executeCLI(t, deps, + "review", "submit", "mer-1", "--run", "run-1", "--verdict", "changes_requested", "--body", "-") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + var req submitReviewRequest + if err := json.Unmarshal([]byte(capture.body), &req); err != nil { + t.Fatalf("decode body: %v", err) + } + if req.Body != "please fix from stdin" { + t.Fatalf("body = %q, want the stdin contents", req.Body) + } +} + +func TestReviewSubmitAcceptsUnderscoreFlags(t *testing.T) { + cfg := setConfigEnv(t) + srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"changes_requested"}}`) + writeRunFileFor(t, cfg, srv) + + // Reviewer agents often spell --review-id as --review_id; both must work. + _, errOut, err := executeCLI(t, aliveDeps(), + "review", "submit", "mer-1", "--run", "run-1", "--verdict", "changes_requested", "--review_id", "98765") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + var req submitReviewRequest + if err := json.Unmarshal([]byte(capture.body), &req); err != nil { + t.Fatalf("decode body: %v", err) + } + if req.GithubReviewID != "98765" { + t.Fatalf("githubReviewId = %q, want 98765", req.GithubReviewID) + } +} + func TestReviewSubmitUsesSessionFlag(t *testing.T) { cfg := setConfigEnv(t) srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"approved"}}`) diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 5d79ec31..1a3048af 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -125,6 +125,10 @@ func startSession(cfg config.Config, runtime *zellij.Runtime, store *sqlite.Stor PRs: store, Projects: store, Launcher: reviewcore.NewLauncher(reviewers, runtime), + // On a changes_requested verdict the engine nudges the worker's live pane + // directly, using the same per-daemon messenger lifecycle uses for SCM + // nudges (issue #337). + Messenger: messenger, }) reviewSvc := reviewsvc.New(reviewEngine) return sessionSvc, reviewSvc, nil diff --git a/backend/internal/domain/review.go b/backend/internal/domain/review.go index 8b38a40f..6ed840d3 100644 --- a/backend/internal/domain/review.go +++ b/backend/internal/domain/review.go @@ -40,8 +40,14 @@ type ReviewRun struct { Verdict ReviewVerdict `json:"verdict"` // Body is the review text the reviewer submitted. It is recorded for AO's // own tracking; the reviewer also posts the review to the PR itself. - Body string `json:"body"` - CreatedAt time.Time `json:"createdAt"` + Body string `json:"body"` + // GithubReviewID is the id of the GitHub PR review the reviewer posted for + // this pass (the `gh api .../pulls/{n}/reviews` object id), recorded at + // submit time. It is empty when the reviewer could not post to the provider. + // When the pass requests changes, AO includes it in the message to the + // worker so the worker knows exactly which review to address and reply to. + GithubReviewID string `json:"githubReviewId"` + CreatedAt time.Time `json:"createdAt"` } // ReviewRunStatus is the lifecycle state of a single review pass. diff --git a/backend/internal/httpd/apispec/openapi.yaml b/backend/internal/httpd/apispec/openapi.yaml index a279d460..2131151e 100644 --- a/backend/internal/httpd/apispec/openapi.yaml +++ b/backend/internal/httpd/apispec/openapi.yaml @@ -1649,6 +1649,8 @@ components: createdAt: format: date-time type: string + githubReviewId: + type: string harness: type: string id: @@ -1675,6 +1677,7 @@ components: - status - verdict - body + - githubReviewId - createdAt type: object ReviewRunResponse: @@ -1870,6 +1873,9 @@ components: body: description: Review body recorded by AO. Required for changes_requested. type: string + githubReviewId: + description: Id of the GitHub PR review the reviewer posted, if any. + type: string runId: description: Review run id being completed. type: string @@ -1880,6 +1886,7 @@ components: - runId - verdict - body + - githubReviewId type: object WorkspaceRepo: properties: diff --git a/backend/internal/httpd/controllers/reviews.go b/backend/internal/httpd/controllers/reviews.go index bb0a235a..e629670f 100644 --- a/backend/internal/httpd/controllers/reviews.go +++ b/backend/internal/httpd/controllers/reviews.go @@ -30,9 +30,10 @@ type ReviewRunResponse struct { // SubmitReviewInput is the body of POST /api/v1/sessions/{sessionId}/reviews/submit. type SubmitReviewInput struct { - RunID string `json:"runId" description:"Review run id being completed."` - Verdict string `json:"verdict" description:"Review verdict: approved or changes_requested."` - Body string `json:"body" description:"Review body recorded by AO. Required for changes_requested."` + RunID string `json:"runId" description:"Review run id being completed."` + Verdict string `json:"verdict" description:"Review verdict: approved or changes_requested."` + Body string `json:"body" description:"Review body recorded by AO. Required for changes_requested."` + GithubReviewID string `json:"githubReviewId" description:"Id of the GitHub PR review the reviewer posted, if any."` } // ReviewsController owns the session-scoped /reviews routes. A nil Svc returns 501. @@ -93,7 +94,7 @@ func (c *ReviewsController) submit(w http.ResponseWriter, r *http.Request) { envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "INVALID_BODY", "Invalid request body", nil) return } - run, err := c.Svc.Submit(r.Context(), sessionID(r), in.RunID, domain.ReviewVerdict(in.Verdict), in.Body) + run, err := c.Svc.Submit(r.Context(), sessionID(r), in.RunID, domain.ReviewVerdict(in.Verdict), in.Body, in.GithubReviewID) if err != nil { writeReviewError(w, r, err) return diff --git a/backend/internal/httpd/controllers/reviews_test.go b/backend/internal/httpd/controllers/reviews_test.go index 969dd0b4..674105eb 100644 --- a/backend/internal/httpd/controllers/reviews_test.go +++ b/backend/internal/httpd/controllers/reviews_test.go @@ -29,7 +29,7 @@ func (f *fakeReviewService) Trigger(context.Context, domain.SessionID) (reviewco return reviewcore.TriggerResult{Run: domain.ReviewRun{ID: "run-1"}, Created: true}, nil } -func (f *fakeReviewService) Submit(context.Context, domain.SessionID, string, domain.ReviewVerdict, string) (domain.ReviewRun, error) { +func (f *fakeReviewService) Submit(context.Context, domain.SessionID, string, domain.ReviewVerdict, string, string) (domain.ReviewRun, error) { return domain.ReviewRun{}, nil } diff --git a/backend/internal/review/prompt.go b/backend/internal/review/prompt.go index 1a5ee248..09e51307 100644 --- a/backend/internal/review/prompt.go +++ b/backend/internal/review/prompt.go @@ -16,19 +16,28 @@ func reviewTexts(spec LaunchSpec) (prompt, systemPrompt string) { You are an AO code reviewer. You review a single pull request's changes in the current checkout — do not start unrelated work. Inspect what the PR changed by diffing the checkout against the PR's base branch, and review for correctness bugs, missing error handling, security issues, test coverage, and clear deviations from the surrounding code's conventions. Prefer a few high-confidence findings over nitpicks. -Post your review on the pull request using the available review tooling (request changes if it needs work, approve if it is ready), with inline comments for specific findings. Do not push commits, edit files, or modify the branch — review only.` +Post your review as a comment on the pull request, stating clearly whether it needs changes or is ready, with inline comments for specific findings. Do not push commits, edit files, or modify the branch — review only.` prompt = fmt.Sprintf(`Review pull request %s (head commit %s). Do these steps in order: -1. Post your review on the pull request with `+"`gh`"+`, with inline comments for specific findings: - - If changes are needed, request changes. - - If it is ready, approve it. GitHub does not let you approve a PR you opened — if the approval is rejected because you are the PR author, post the same review as a regular comment instead (a COMMENT-event review whose body states it is an approval). -2. Write your full review to review.md and record the result with AO by running exactly: +1. Post your review on the pull request and capture its id in one call. Post with `+"`gh api`"+` rather than `+"`gh pr review`"+`: it is the only way to attach inline comments, and its response carries the created review's id, so AO can tell the worker exactly which review to address. Send the review as a JSON body so the inline comments form a proper array of objects: - ao review submit --session %s --run %s --verdict --body review.md + gh api --method POST repos/{owner}/{repo}/pulls/{number}/reviews --input - --jq '.id' <<'JSON' + { "event": "COMMENT", "body": "", + "comments": [ { "path": "", "line": , "body": "" } ] } + JSON -Only if step 1 genuinely fails on the provider, still run step 2 so the result is recorded.`, + - Substitute the PR's owner/repo/number. Add one object to "comments" per inline finding; omit the field for a review with no inline comments. + - Always use "event": "COMMENT": reviews are posted from the PR author's own account, and GitHub rejects both APPROVE and REQUEST_CHANGES on your own PR. State in the body whether you are requesting changes or approving; the machine-readable verdict goes to AO in step 2. + - The printed number is the review id. If the call fails on the provider, leave the id empty. +2. Record the result with AO, passing your full review on stdin with --body - so nothing is ever written into the worktree (a file there could be committed onto the worker's branch): + + ao review submit --session %s --run %s --verdict --review-id --body - <<'MD' + + MD + +Only if step 1 genuinely fails on the provider, still run step 2 (without --review-id) so the result is recorded.`, spec.PRURL, spec.TargetSHA, spec.WorkerID, spec.RunID) return prompt, systemPrompt } diff --git a/backend/internal/review/review.go b/backend/internal/review/review.go index 4b6a3eb3..4c8c812c 100644 --- a/backend/internal/review/review.go +++ b/backend/internal/review/review.go @@ -18,6 +18,7 @@ import ( "github.com/google/uuid" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) // ErrInvalid and ErrNotFound let the transport layer map failures to 422/404. @@ -32,7 +33,7 @@ type Store interface { UpsertReview(ctx context.Context, r domain.Review) error GetReviewBySession(ctx context.Context, id domain.SessionID) (domain.Review, bool, error) InsertReviewRun(ctx context.Context, r domain.ReviewRun) error - UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body string) (bool, error) + UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body, githubReviewID string) (bool, error) GetReviewRun(ctx context.Context, id string) (domain.ReviewRun, bool, error) GetReviewRunBySessionAndSHA(ctx context.Context, id domain.SessionID, targetSHA string) (domain.ReviewRun, bool, error) ListReviewRunsBySession(ctx context.Context, id domain.SessionID) ([]domain.ReviewRun, error) @@ -55,11 +56,12 @@ type Projects interface { // Deps wires the engine. type Deps struct { - Store Store - Sessions Sessions - PRs PRs - Projects Projects - Launcher Launcher + Store Store + Sessions Sessions + PRs PRs + Projects Projects + Launcher Launcher + Messenger ports.AgentMessenger // Clock and NewID are injectable for deterministic tests. Clock func() time.Time @@ -68,13 +70,14 @@ type Deps struct { // Engine is the core code-review engine. type Engine struct { - store Store - sessions Sessions - prs PRs - projects Projects - launcher Launcher - clock func() time.Time - newID func() string + store Store + sessions Sessions + prs PRs + projects Projects + launcher Launcher + messenger ports.AgentMessenger + clock func() time.Time + newID func() string // triggerMu guards triggerLocks; triggerLocks holds one mutex per worker // session so concurrent Trigger calls for the same worker serialise (see @@ -99,6 +102,7 @@ func New(d Deps) *Engine { prs: d.PRs, projects: d.Projects, launcher: d.Launcher, + messenger: d.Messenger, clock: clock, newID: newID, triggerLocks: make(map[domain.SessionID]*sync.Mutex), @@ -235,7 +239,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge } failRun := func(err error) error { - if _, updateErr := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunFailed, domain.VerdictNone, err.Error()); updateErr != nil { + if _, updateErr := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunFailed, domain.VerdictNone, err.Error(), ""); updateErr != nil { return updateErr } return err @@ -273,9 +277,17 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge } // Submit records the reviewer's result for a specific worker review pass: it -// marks the run complete and stores the verdict and body. AO does not post the -// review — the reviewer agent posts it to the PR itself. -func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body string) (domain.ReviewRun, error) { +// marks the run complete and stores the verdict, body, and the GitHub review id +// the reviewer posted. AO does not post the review — the reviewer agent posts it +// to the PR itself. +// +// On a changes_requested verdict, Submit also messages the worker session with +// the review feedback directly, so the worker learns about it event-driven +// rather than via the SCM poll loop (which never observes CHANGES_REQUESTED for +// self-reviews or COMMENT-state reviews; issue #337). When a GitHub review id is +// known, it is included so the worker knows exactly which review to address and +// reply to. +func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body, githubReviewID string) (domain.ReviewRun, error) { if workerID == "" { return domain.ReviewRun{}, fmt.Errorf("%w: worker session id is required", ErrInvalid) } @@ -303,7 +315,18 @@ func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID st return domain.ReviewRun{}, fmt.Errorf("%w: review run %q is not running", ErrInvalid, runID) } - updated, err := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunComplete, verdict, body) + // Notify the worker before marking the run complete. If the message fails, + // the run stays 'running' so a retried `ao review submit` runs again instead + // of tripping the status='running' guard above on an already-completed run. A + // message that lands but a DB write that then fails degrades to one extra + // nudge on retry — the same trade lifecycle's sendOnce makes. + if verdict == domain.VerdictChangesRequested { + if err := e.notifyWorkerChangesRequested(ctx, workerID, body, githubReviewID); err != nil { + return domain.ReviewRun{}, err + } + } + + updated, err := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunComplete, verdict, body, githubReviewID) if err != nil { return domain.ReviewRun{}, err } @@ -313,9 +336,35 @@ func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID st run.Status = domain.ReviewRunComplete run.Verdict = verdict run.Body = body + run.GithubReviewID = githubReviewID return run, nil } +// notifyWorkerChangesRequested injects the AO reviewer's feedback into the +// worker's live agent pane via the same messenger lifecycle uses for SCM nudges. +// +// When the GitHub review id is known, the worker is asked to reply on that +// review referencing its id with how it addressed the feedback and to resolve +// the review comment threads it addressed. The reviewer posts inline comments +// (per its prompt), so the per-finding threads are resolvable via `gh api` +// GraphQL resolveReviewThread; the top-level review object itself is not +// resolvable, hence the reply. The body is reviewer-authored text pasted into a +// PTY, so it is sanitized first (matching the lifecycle reaction path). +func (e *Engine) notifyWorkerChangesRequested(ctx context.Context, workerID domain.SessionID, body, githubReviewID string) error { + if e.messenger == nil { + return nil + } + msg := "An AO code reviewer requested changes on your PR. Review the feedback below and address it." + if githubReviewID != "" { + safeReviewID := domain.SanitizeControlChars(githubReviewID) + msg += fmt.Sprintf(" This feedback is GitHub review %s. Once you have addressed it, reply on that review referencing id %s with how you addressed it, then resolve the review comment threads you addressed.", safeReviewID, safeReviewID) + } + if body != "" { + msg += "\n\n" + domain.SanitizeControlChars(body) + } + return e.messenger.Send(ctx, workerID, msg) +} + // List returns a worker's review state: the live reviewer handle and its passes. func (e *Engine) List(ctx context.Context, workerID domain.SessionID) (SessionReviews, error) { if workerID == "" { diff --git a/backend/internal/review/review_test.go b/backend/internal/review/review_test.go index 74f6a761..54b0b8d1 100644 --- a/backend/internal/review/review_test.go +++ b/backend/internal/review/review_test.go @@ -46,7 +46,7 @@ func (f *fakeStore) InsertReviewRun(_ context.Context, r domain.ReviewRun) error f.runs = append(f.runs, r) return nil } -func (f *fakeStore) UpdateReviewRunResult(_ context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body string) (bool, error) { +func (f *fakeStore) UpdateReviewRunResult(_ context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body, githubReviewID string) (bool, error) { for i := range f.runs { if f.runs[i].ID == id { if f.runs[i].Status != domain.ReviewRunRunning { @@ -55,6 +55,7 @@ func (f *fakeStore) UpdateReviewRunResult(_ context.Context, id string, status d f.runs[i].Status = status f.runs[i].Verdict = verdict f.runs[i].Body = body + f.runs[i].GithubReviewID = githubReviewID return true, nil } } @@ -130,6 +131,20 @@ func (f *fakeLauncher) Notify(_ context.Context, handleID string, spec LaunchSpe } func (f *fakeLauncher) Alive(_ context.Context, _ string) (bool, error) { return f.alive, nil } +type fakeMessenger struct { + sends int + gotID domain.SessionID + gotMsg string + sendErr error +} + +func (f *fakeMessenger) Send(_ context.Context, id domain.SessionID, msg string) error { + f.sends++ + f.gotID = id + f.gotMsg = msg + return f.sendErr +} + func liveWorker() domain.SessionRecord { return domain.SessionRecord{ ID: "mer-1", @@ -377,7 +392,7 @@ func TestSubmitRecordsVerdictAndBody(t *testing.T) { store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", Status: domain.ReviewRunRunning}}} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, &fakeLauncher{}) - run, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "please fix") + run, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "please fix", "") if err != nil { t.Fatalf("Submit: %v", err) } @@ -386,20 +401,119 @@ func TestSubmitRecordsVerdictAndBody(t *testing.T) { } } +func newEngineWithMessenger(store Store, messenger ports.AgentMessenger) *Engine { + ids := 0 + return New(Deps{ + Store: store, Sessions: fakeSessions{rec: liveWorker(), ok: true}, PRs: prAt("sha1"), + Projects: fakeProjects{}, Launcher: &fakeLauncher{}, Messenger: messenger, + Clock: func() time.Time { return time.Unix(0, 0).UTC() }, + NewID: func() string { ids++; return "id-" + string(rune('0'+ids)) }, + }) +} + +func TestSubmitChangesRequestedMessagesWorker(t *testing.T) { + store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", Status: domain.ReviewRunRunning}}} + msgr := &fakeMessenger{} + eng := newEngineWithMessenger(store, msgr) + + run, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "fix the bug", "98\x1b[2J765") + if err != nil { + t.Fatalf("Submit: %v", err) + } + if run.GithubReviewID != "98\x1b[2J765" || store.runs[0].GithubReviewID != "98\x1b[2J765" { + t.Fatalf("review id not persisted: run=%+v stored=%+v", run, store.runs[0]) + } + if msgr.sends != 1 || msgr.gotID != "mer-1" { + t.Fatalf("expected one message to worker mer-1, got %+v", msgr) + } + if !strings.Contains(msgr.gotMsg, "fix the bug") || !strings.Contains(msgr.gotMsg, "98[2J765") { + t.Fatalf("message missing body or review id: %q", msgr.gotMsg) + } + if strings.Contains(msgr.gotMsg, "\x1b") { + t.Fatalf("message should sanitize review id before sending to worker: %q", msgr.gotMsg) + } + // The worker must be able to tell an AO internal review from external SCM + // reviewer feedback, and is asked to reply + resolve for an AO review. + if !strings.Contains(msgr.gotMsg, "AO code reviewer") { + t.Fatalf("message should identify the AO internal review: %q", msgr.gotMsg) + } + if !strings.Contains(msgr.gotMsg, "reply on that review") || !strings.Contains(msgr.gotMsg, "resolve") { + t.Fatalf("message should ask the worker to reply and resolve: %q", msgr.gotMsg) + } +} + +func TestSubmitApprovedDoesNotMessageWorker(t *testing.T) { + store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", Status: domain.ReviewRunRunning}}} + msgr := &fakeMessenger{} + eng := newEngineWithMessenger(store, msgr) + + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictApproved, "", "98765"); err != nil { + t.Fatalf("Submit: %v", err) + } + if msgr.sends != 0 { + t.Fatalf("approved review should not message the worker: %+v", msgr) + } +} + +func TestSubmitChangesRequestedOmitsReviewIDWhenAbsent(t *testing.T) { + store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", Status: domain.ReviewRunRunning}}} + msgr := &fakeMessenger{} + eng := newEngineWithMessenger(store, msgr) + + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "fix it", ""); err != nil { + t.Fatalf("Submit: %v", err) + } + if msgr.sends != 1 || !strings.Contains(msgr.gotMsg, "fix it") { + t.Fatalf("expected message with body: %+v", msgr) + } + // Still identified as an AO review, but with no id there is nothing to reply + // to or resolve, so that instruction is omitted. + if !strings.Contains(msgr.gotMsg, "AO code reviewer") { + t.Fatalf("message should identify the AO internal review: %q", msgr.gotMsg) + } + if strings.Contains(msgr.gotMsg, "reply on that review") { + t.Fatalf("message should not ask to reply/resolve when no review id was supplied: %q", msgr.gotMsg) + } +} + +func TestSubmitPropagatesMessengerError(t *testing.T) { + store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "mer-1", Status: domain.ReviewRunRunning}}} + msgr := &fakeMessenger{sendErr: fmt.Errorf("dead pane")} + eng := newEngineWithMessenger(store, msgr) + + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "fix it", "1"); err == nil { + t.Fatal("expected Submit to surface the messenger error") + } + // The run must stay running so a retried submit can try again, rather than + // being marked complete and then failing the status='running' guard. + if store.runs[0].Status != domain.ReviewRunRunning { + t.Fatalf("run should stay running after a failed send, got %q", store.runs[0].Status) + } + + // A retry once the pane is back completes the run and messages the worker. + msgr.sendErr = nil + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictChangesRequested, "fix it", "1"); err != nil { + t.Fatalf("retry after recovered send: %v", err) + } + if store.runs[0].Status != domain.ReviewRunComplete || msgr.sends != 2 { + t.Fatalf("retry should complete the run and re-send: status=%q sends=%d", store.runs[0].Status, msgr.sends) + } +} + func TestSubmitValidationAndOwnership(t *testing.T) { store := &fakeStore{runs: []domain.ReviewRun{{ID: "run-1", SessionID: "other", Status: domain.ReviewRunRunning}}} eng := newEngineForTest(store, fakeSessions{rec: liveWorker(), ok: true}, prAt("sha1"), fakeProjects{}, &fakeLauncher{}) - if _, err := eng.Submit(context.Background(), "mer-1", "", domain.VerdictApproved, ""); !errors.Is(err, ErrInvalid) { + if _, err := eng.Submit(context.Background(), "mer-1", "", domain.VerdictApproved, "", ""); !errors.Is(err, ErrInvalid) { t.Fatalf("missing run id err = %v", err) } - if _, err := eng.Submit(context.Background(), "mer-1", "run-1", "garbage", "b"); !errors.Is(err, ErrInvalid) { + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", "garbage", "b", ""); !errors.Is(err, ErrInvalid) { t.Fatalf("bad verdict err = %v", err) } - if _, err := eng.Submit(context.Background(), "mer-1", "missing", domain.VerdictApproved, ""); !errors.Is(err, ErrNotFound) { + if _, err := eng.Submit(context.Background(), "mer-1", "missing", domain.VerdictApproved, "", ""); !errors.Is(err, ErrNotFound) { t.Fatalf("unknown run err = %v", err) } - if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictApproved, ""); !errors.Is(err, ErrInvalid) { + if _, err := eng.Submit(context.Background(), "mer-1", "run-1", domain.VerdictApproved, "", ""); !errors.Is(err, ErrInvalid) { t.Fatalf("ownership err = %v", err) } } diff --git a/backend/internal/service/review/review.go b/backend/internal/service/review/review.go index f52e5c38..a7ed9c19 100644 --- a/backend/internal/service/review/review.go +++ b/backend/internal/service/review/review.go @@ -23,7 +23,7 @@ var ( // Manager is the reviews surface the HTTP controller depends on. type Manager interface { Trigger(ctx context.Context, workerID domain.SessionID) (reviewcore.TriggerResult, error) - Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body string) (domain.ReviewRun, error) + Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body, githubReviewID string) (domain.ReviewRun, error) List(ctx context.Context, workerID domain.SessionID) (reviewcore.SessionReviews, error) } @@ -45,8 +45,8 @@ func (s *Service) Trigger(ctx context.Context, workerID domain.SessionID) (revie } // Submit records a reviewer's result for a specific worker review pass. -func (s *Service) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body string) (domain.ReviewRun, error) { - return s.engine.Submit(ctx, workerID, runID, verdict, body) +func (s *Service) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body, githubReviewID string) (domain.ReviewRun, error) { + return s.engine.Submit(ctx, workerID, runID, verdict, body, githubReviewID) } // List returns a worker's review state. diff --git a/backend/internal/storage/sqlite/gen/models.go b/backend/internal/storage/sqlite/gen/models.go index f9899869..46995c98 100644 --- a/backend/internal/storage/sqlite/gen/models.go +++ b/backend/internal/storage/sqlite/gen/models.go @@ -135,16 +135,17 @@ type Review struct { } type ReviewRun struct { - ID string - ReviewID string - SessionID domain.SessionID - Harness domain.ReviewerHarness - PRURL string - TargetSha string - Status domain.ReviewRunStatus - Verdict domain.ReviewVerdict - Body string - CreatedAt time.Time + ID string + ReviewID string + SessionID domain.SessionID + Harness domain.ReviewerHarness + PRURL string + TargetSha string + Status domain.ReviewRunStatus + Verdict domain.ReviewVerdict + Body string + CreatedAt time.Time + GithubReviewID string } type Session struct { diff --git a/backend/internal/storage/sqlite/gen/review.sql.go b/backend/internal/storage/sqlite/gen/review.sql.go index c075e2b5..1053a8a9 100644 --- a/backend/internal/storage/sqlite/gen/review.sql.go +++ b/backend/internal/storage/sqlite/gen/review.sql.go @@ -34,7 +34,7 @@ func (q *Queries) GetReviewBySession(ctx context.Context, sessionID domain.Sessi } const getReviewRun = `-- name: GetReviewRun :one -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE id = ? ` @@ -52,12 +52,13 @@ func (q *Queries) GetReviewRun(ctx context.Context, id string) (ReviewRun, error &i.Verdict, &i.Body, &i.CreatedAt, + &i.GithubReviewID, ) return i, err } const getReviewRunBySessionAndSHA = `-- name: GetReviewRunBySessionAndSHA :one -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE session_id = ? AND target_sha = ? ORDER BY created_at DESC LIMIT 1 ` @@ -80,26 +81,28 @@ func (q *Queries) GetReviewRunBySessionAndSHA(ctx context.Context, arg GetReview &i.Verdict, &i.Body, &i.CreatedAt, + &i.GithubReviewID, ) return i, err } const insertReviewRun = `-- name: InsertReviewRun :exec -INSERT INTO review_run (id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO review_run (id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, github_review_id, created_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ` type InsertReviewRunParams struct { - ID string - ReviewID string - SessionID domain.SessionID - Harness domain.ReviewerHarness - PRURL string - TargetSha string - Status domain.ReviewRunStatus - Verdict domain.ReviewVerdict - Body string - CreatedAt time.Time + ID string + ReviewID string + SessionID domain.SessionID + Harness domain.ReviewerHarness + PRURL string + TargetSha string + Status domain.ReviewRunStatus + Verdict domain.ReviewVerdict + Body string + GithubReviewID string + CreatedAt time.Time } func (q *Queries) InsertReviewRun(ctx context.Context, arg InsertReviewRunParams) error { @@ -113,13 +116,14 @@ func (q *Queries) InsertReviewRun(ctx context.Context, arg InsertReviewRunParams arg.Status, arg.Verdict, arg.Body, + arg.GithubReviewID, arg.CreatedAt, ) return err } const listReviewRunsBySession = `-- name: ListReviewRunsBySession :many -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE session_id = ? ORDER BY created_at DESC ` @@ -143,6 +147,7 @@ func (q *Queries) ListReviewRunsBySession(ctx context.Context, sessionID domain. &i.Verdict, &i.Body, &i.CreatedAt, + &i.GithubReviewID, ); err != nil { return nil, err } @@ -158,14 +163,15 @@ func (q *Queries) ListReviewRunsBySession(ctx context.Context, sessionID domain. } const updateReviewRunResult = `-- name: UpdateReviewRunResult :execrows -UPDATE review_run SET status = ?, verdict = ?, body = ? WHERE id = ? AND status = 'running' +UPDATE review_run SET status = ?, verdict = ?, body = ?, github_review_id = ? WHERE id = ? AND status = 'running' ` type UpdateReviewRunResultParams struct { - Status domain.ReviewRunStatus - Verdict domain.ReviewVerdict - Body string - ID string + Status domain.ReviewRunStatus + Verdict domain.ReviewVerdict + Body string + GithubReviewID string + ID string } func (q *Queries) UpdateReviewRunResult(ctx context.Context, arg UpdateReviewRunResultParams) (int64, error) { @@ -173,6 +179,7 @@ func (q *Queries) UpdateReviewRunResult(ctx context.Context, arg UpdateReviewRun arg.Status, arg.Verdict, arg.Body, + arg.GithubReviewID, arg.ID, ) if err != nil { diff --git a/backend/internal/storage/sqlite/migrations/0016_review_run_github_review_id.sql b/backend/internal/storage/sqlite/migrations/0016_review_run_github_review_id.sql new file mode 100644 index 00000000..70a8cfdc --- /dev/null +++ b/backend/internal/storage/sqlite/migrations/0016_review_run_github_review_id.sql @@ -0,0 +1,15 @@ +-- The reviewer agent posts its review to the PR and learns the GitHub review +-- object id (`gh api repos/{owner}/{repo}/pulls/{n}/reviews`). `ao review submit` +-- now carries that id through to the run row so that, when the pass requests +-- changes, AO can tell the worker exactly which GitHub review to address and +-- reply to (issue #337). Empty when the reviewer could not post to the provider. + +-- +goose Up +-- +goose StatementBegin +ALTER TABLE review_run ADD COLUMN github_review_id TEXT NOT NULL DEFAULT ''; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +ALTER TABLE review_run DROP COLUMN github_review_id; +-- +goose StatementEnd diff --git a/backend/internal/storage/sqlite/queries/review.sql b/backend/internal/storage/sqlite/queries/review.sql index 1151c946..c540f628 100644 --- a/backend/internal/storage/sqlite/queries/review.sql +++ b/backend/internal/storage/sqlite/queries/review.sql @@ -12,20 +12,20 @@ SELECT id, session_id, project_id, harness, pr_url, reviewer_handle_id, created_ FROM review WHERE session_id = ?; -- name: InsertReviewRun :exec -INSERT INTO review_run (id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); +INSERT INTO review_run (id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, github_review_id, created_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); -- name: UpdateReviewRunResult :execrows -UPDATE review_run SET status = ?, verdict = ?, body = ? WHERE id = ? AND status = 'running'; +UPDATE review_run SET status = ?, verdict = ?, body = ?, github_review_id = ? WHERE id = ? AND status = 'running'; -- name: GetReviewRun :one -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE id = ?; -- name: GetReviewRunBySessionAndSHA :one -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE session_id = ? AND target_sha = ? ORDER BY created_at DESC LIMIT 1; -- name: ListReviewRunsBySession :many -SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at +SELECT id, review_id, session_id, harness, pr_url, target_sha, status, verdict, body, created_at, github_review_id FROM review_run WHERE session_id = ? ORDER BY created_at DESC; diff --git a/backend/internal/storage/sqlite/store/review_store.go b/backend/internal/storage/sqlite/store/review_store.go index b872b33a..9d5c0960 100644 --- a/backend/internal/storage/sqlite/store/review_store.go +++ b/backend/internal/storage/sqlite/store/review_store.go @@ -46,16 +46,17 @@ func (s *Store) InsertReviewRun(ctx context.Context, r domain.ReviewRun) error { s.writeMu.Lock() defer s.writeMu.Unlock() err := s.qw.InsertReviewRun(ctx, gen.InsertReviewRunParams{ - ID: r.ID, - ReviewID: r.ReviewID, - SessionID: r.SessionID, - Harness: r.Harness, - PRURL: r.PRURL, - TargetSha: r.TargetSHA, - Status: r.Status, - Verdict: r.Verdict, - Body: r.Body, - CreatedAt: r.CreatedAt, + ID: r.ID, + ReviewID: r.ReviewID, + SessionID: r.SessionID, + Harness: r.Harness, + PRURL: r.PRURL, + TargetSha: r.TargetSHA, + Status: r.Status, + Verdict: r.Verdict, + Body: r.Body, + GithubReviewID: r.GithubReviewID, + CreatedAt: r.CreatedAt, }) if isSQLiteUnique(err) { return fmt.Errorf("insert review run for session %s sha %s: %w", r.SessionID, r.TargetSHA, domain.ErrDuplicateReviewRun) @@ -63,15 +64,17 @@ func (s *Store) InsertReviewRun(ctx context.Context, r domain.ReviewRun) error { return err } -// UpdateReviewRunResult sets the status/verdict/body of a running review pass. -func (s *Store) UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body string) (bool, error) { +// UpdateReviewRunResult sets the status/verdict/body and the GitHub review id of +// a running review pass. +func (s *Store) UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body, githubReviewID string) (bool, error) { s.writeMu.Lock() defer s.writeMu.Unlock() n, err := s.qw.UpdateReviewRunResult(ctx, gen.UpdateReviewRunResultParams{ - Status: status, - Verdict: verdict, - Body: body, - ID: id, + Status: status, + Verdict: verdict, + Body: body, + GithubReviewID: githubReviewID, + ID: id, }) if err != nil { return false, err @@ -133,15 +136,16 @@ func reviewFromRow(r gen.Review) domain.Review { func reviewRunFromRow(r gen.ReviewRun) domain.ReviewRun { return domain.ReviewRun{ - ID: r.ID, - ReviewID: r.ReviewID, - SessionID: r.SessionID, - Harness: r.Harness, - PRURL: r.PRURL, - TargetSHA: r.TargetSha, - Status: r.Status, - Verdict: r.Verdict, - Body: r.Body, - CreatedAt: r.CreatedAt, + ID: r.ID, + ReviewID: r.ReviewID, + SessionID: r.SessionID, + Harness: r.Harness, + PRURL: r.PRURL, + TargetSHA: r.TargetSha, + Status: r.Status, + Verdict: r.Verdict, + Body: r.Body, + GithubReviewID: r.GithubReviewID, + CreatedAt: r.CreatedAt, } } diff --git a/backend/internal/storage/sqlite/store/review_store_test.go b/backend/internal/storage/sqlite/store/review_store_test.go index a9c72ab6..39943826 100644 --- a/backend/internal/storage/sqlite/store/review_store_test.go +++ b/backend/internal/storage/sqlite/store/review_store_test.go @@ -41,7 +41,7 @@ func TestInsertReviewRunDuplicateSHAMapsToSentinel(t *testing.T) { t.Fatalf("duplicate insert err = %v, want ErrDuplicateReviewRun", err) } - if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunFailed, domain.VerdictNone, "claude: not found"); err != nil { + if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunFailed, domain.VerdictNone, "claude: not found", ""); err != nil { t.Fatalf("mark failed: %v", err) } else if !ok { t.Fatal("mark failed: got ok=false") @@ -108,7 +108,7 @@ func TestReviewUpsertReusesRowAndRunRoundTrip(t *testing.T) { }); err != nil { t.Fatalf("insert run: %v", err) } - if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunComplete, domain.VerdictChangesRequested, "please fix"); err != nil { + if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunComplete, domain.VerdictChangesRequested, "please fix", "rev-987"); err != nil { t.Fatalf("update run: %v", err) } else if !ok { t.Fatal("update run: got ok=false") @@ -126,7 +126,7 @@ func TestReviewUpsertReusesRowAndRunRoundTrip(t *testing.T) { if err != nil || !ok { t.Fatalf("by sha: ok=%v err=%v", ok, err) } - if bySHA.Status != domain.ReviewRunComplete || bySHA.Verdict != domain.VerdictChangesRequested || bySHA.Body != "please fix" { + if bySHA.Status != domain.ReviewRunComplete || bySHA.Verdict != domain.VerdictChangesRequested || bySHA.Body != "please fix" || bySHA.GithubReviewID != "rev-987" { t.Fatalf("run result not persisted: %+v", bySHA) } if _, ok, _ := s.GetReviewRunBySessionAndSHA(ctx, rec.ID, "other"); ok { @@ -141,7 +141,7 @@ func TestReviewUpsertReusesRowAndRunRoundTrip(t *testing.T) { t.Fatalf("list runs = %+v", runs) } - if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunComplete, domain.VerdictApproved, "again"); err != nil { + if ok, err := s.UpdateReviewRunResult(ctx, "run-1", domain.ReviewRunComplete, domain.VerdictApproved, "again", ""); err != nil { t.Fatalf("second update: %v", err) } else if ok { t.Fatal("second update completed an already-complete run") diff --git a/frontend/src/api/schema.ts b/frontend/src/api/schema.ts index 0bb8bd4a..3714763c 100644 --- a/frontend/src/api/schema.ts +++ b/frontend/src/api/schema.ts @@ -595,6 +595,7 @@ export interface components { body: string; /** Format: date-time */ createdAt: string; + githubReviewId: string; harness: string; id: string; prUrl: string; @@ -676,6 +677,8 @@ export interface components { SubmitReviewInput: { /** @description Review body recorded by AO. Required for changes_requested. */ body: string; + /** @description Id of the GitHub PR review the reviewer posted, if any. */ + githubReviewId: string; /** @description Review run id being completed. */ runId: string; /** @description Review verdict: approved or changes_requested. */