Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ session-events.jsonl.*
# Agent Orchestrator local session state
.ao/

# AO reviewer scratch output. The reviewer agent runs inside the worker's
# worktree; its review writeup must never be committed onto the worker branch.
/review.md

# Environment
.direnv/
.env
Expand Down
35 changes: 24 additions & 11 deletions backend/internal/cli/review.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cli
import (
"errors"
"fmt"
"io"
"net/url"
"os"
"strings"
Expand Down Expand Up @@ -32,16 +33,18 @@ type reviewRunResponse struct {

// submitReviewRequest mirrors controllers.SubmitReviewInput.
type submitReviewRequest struct {
RunID string `json:"runId"`
Verdict string `json:"verdict"`
Body string `json:"body"`
RunID string `json:"runId"`
Verdict string `json:"verdict"`
Body string `json:"body"`
GithubReviewID string `json:"githubReviewId"`
}

type reviewSubmitOptions struct {
session string
runID string
verdict string
body string
session string
runID string
verdict string
body string
reviewID string
}

func newReviewCommand(ctx *commandContext) *cobra.Command {
Expand All @@ -66,7 +69,8 @@ func newReviewSubmitCommand(ctx *commandContext) *cobra.Command {
cmd.Flags().StringVar(&opts.session, "session", "", "Worker session id (or pass it as the positional argument)")
cmd.Flags().StringVar(&opts.runID, "run", "", "Review run id (required)")
cmd.Flags().StringVar(&opts.verdict, "verdict", "", "Review verdict: approved or changes_requested (required)")
cmd.Flags().StringVar(&opts.body, "body", "", "Path to a Markdown file with the review body")
cmd.Flags().StringVar(&opts.body, "body", "", "Review body: a path to a Markdown file, or - to read from stdin (so nothing is written into the worktree)")
cmd.Flags().StringVar(&opts.reviewID, "review-id", "", "Id of the GitHub PR review just posted (the .id from the gh api POST that created the review)")
return cmd
}

Expand All @@ -88,15 +92,24 @@ func (c *commandContext) submitReview(cmd *cobra.Command, args []string, opts re
}
var body string
if path := strings.TrimSpace(opts.body); path != "" {
raw, err := os.ReadFile(path)
var raw []byte
var err error
if path == "-" {
// Read the review from stdin so the reviewer never has to write a file
// into its checkout (where it could be committed onto the worker branch).
raw, err = io.ReadAll(cmd.InOrStdin())
} else {
raw, err = os.ReadFile(path)
}
if err != nil {
return usageError{fmt.Errorf("read body file: %w", err)}
return usageError{fmt.Errorf("read review body: %w", err)}
}
body = string(raw)
}
reviewID := strings.TrimSpace(opts.reviewID)
path := "sessions/" + url.PathEscape(session) + "/reviews/submit"
var res reviewRunResponse
if err := c.postJSON(cmd.Context(), path, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body}, &res); err != nil {
if err := c.postJSON(cmd.Context(), path, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body, GithubReviewID: reviewID}, &res); err != nil {
return err
}
_, err := fmt.Fprintf(cmd.OutOrStdout(), "recorded %s review for %s\n", res.Review.Verdict, session)
Expand Down
22 changes: 22 additions & 0 deletions backend/internal/cli/review_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
)

Expand Down Expand Up @@ -62,6 +63,27 @@ func TestReviewSubmitReadsBodyFile(t *testing.T) {
}
}

func TestReviewSubmitReadsBodyFromStdin(t *testing.T) {
cfg := setConfigEnv(t)
srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"changes_requested"}}`)
writeRunFileFor(t, cfg, srv)

deps := aliveDeps()
deps.In = strings.NewReader("please fix from stdin")
_, errOut, err := executeCLI(t, deps,
"review", "submit", "mer-1", "--run", "run-1", "--verdict", "changes_requested", "--body", "-")
if err != nil {
t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut)
}
var req submitReviewRequest
if err := json.Unmarshal([]byte(capture.body), &req); err != nil {
t.Fatalf("decode body: %v", err)
}
if req.Body != "please fix from stdin" {
t.Fatalf("body = %q, want the stdin contents", req.Body)
}
}

func TestReviewSubmitUsesSessionFlag(t *testing.T) {
cfg := setConfigEnv(t)
srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"approved"}}`)
Expand Down
4 changes: 4 additions & 0 deletions backend/internal/daemon/lifecycle_wiring.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ func startSession(cfg config.Config, runtime *zellij.Runtime, store *sqlite.Stor
PRs: store,
Projects: store,
Launcher: reviewcore.NewLauncher(reviewers, runtime),
// On a changes_requested verdict the engine nudges the worker's live pane
// directly, using the same per-daemon messenger lifecycle uses for SCM
// nudges (issue #337).
Messenger: messenger,
})
reviewSvc := reviewsvc.New(reviewEngine)
return sessionSvc, reviewSvc, nil
Expand Down
10 changes: 8 additions & 2 deletions backend/internal/domain/review.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,14 @@ type ReviewRun struct {
Verdict ReviewVerdict `json:"verdict"`
// Body is the review text the reviewer submitted. It is recorded for AO's
// own tracking; the reviewer also posts the review to the PR itself.
Body string `json:"body"`
CreatedAt time.Time `json:"createdAt"`
Body string `json:"body"`
// GithubReviewID is the id of the GitHub PR review the reviewer posted for
// this pass (the `gh api .../pulls/{n}/reviews` object id), recorded at
// submit time. It is empty when the reviewer could not post to the provider.
// When the pass requests changes, AO includes it in the message to the
// worker so the worker knows exactly which review to address and reply to.
GithubReviewID string `json:"githubReviewId"`
CreatedAt time.Time `json:"createdAt"`
}

// ReviewRunStatus is the lifecycle state of a single review pass.
Expand Down
7 changes: 7 additions & 0 deletions backend/internal/httpd/apispec/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,8 @@ components:
createdAt:
format: date-time
type: string
githubReviewId:
type: string
harness:
type: string
id:
Expand All @@ -1675,6 +1677,7 @@ components:
- status
- verdict
- body
- githubReviewId
- createdAt
type: object
ReviewRunResponse:
Expand Down Expand Up @@ -1870,6 +1873,9 @@ components:
body:
description: Review body recorded by AO. Required for changes_requested.
type: string
githubReviewId:
description: Id of the GitHub PR review the reviewer posted, if any.
type: string
runId:
description: Review run id being completed.
type: string
Expand All @@ -1880,6 +1886,7 @@ components:
- runId
- verdict
- body
- githubReviewId
type: object
WorkspaceRepo:
properties:
Expand Down
9 changes: 5 additions & 4 deletions backend/internal/httpd/controllers/reviews.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ type ReviewRunResponse struct {

// SubmitReviewInput is the body of POST /api/v1/sessions/{sessionId}/reviews/submit.
type SubmitReviewInput struct {
RunID string `json:"runId" description:"Review run id being completed."`
Verdict string `json:"verdict" description:"Review verdict: approved or changes_requested."`
Body string `json:"body" description:"Review body recorded by AO. Required for changes_requested."`
RunID string `json:"runId" description:"Review run id being completed."`
Verdict string `json:"verdict" description:"Review verdict: approved or changes_requested."`
Body string `json:"body" description:"Review body recorded by AO. Required for changes_requested."`
GithubReviewID string `json:"githubReviewId" description:"Id of the GitHub PR review the reviewer posted, if any."`
}

// ReviewsController owns the session-scoped /reviews routes. A nil Svc returns 501.
Expand Down Expand Up @@ -93,7 +94,7 @@ func (c *ReviewsController) submit(w http.ResponseWriter, r *http.Request) {
envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "INVALID_BODY", "Invalid request body", nil)
return
}
run, err := c.Svc.Submit(r.Context(), sessionID(r), in.RunID, domain.ReviewVerdict(in.Verdict), in.Body)
run, err := c.Svc.Submit(r.Context(), sessionID(r), in.RunID, domain.ReviewVerdict(in.Verdict), in.Body, in.GithubReviewID)
if err != nil {
writeReviewError(w, r, err)
return
Expand Down
2 changes: 1 addition & 1 deletion backend/internal/httpd/controllers/reviews_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func (f *fakeReviewService) Trigger(context.Context, domain.SessionID) (reviewco
return reviewcore.TriggerResult{Run: domain.ReviewRun{ID: "run-1"}, Created: true}, nil
}

func (f *fakeReviewService) Submit(context.Context, domain.SessionID, string, domain.ReviewVerdict, string) (domain.ReviewRun, error) {
func (f *fakeReviewService) Submit(context.Context, domain.SessionID, string, domain.ReviewVerdict, string, string) (domain.ReviewRun, error) {
return domain.ReviewRun{}, nil
}

Expand Down
23 changes: 16 additions & 7 deletions backend/internal/review/prompt.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,28 @@ func reviewTexts(spec LaunchSpec) (prompt, systemPrompt string) {

You are an AO code reviewer. You review a single pull request's changes in the current checkout — do not start unrelated work. Inspect what the PR changed by diffing the checkout against the PR's base branch, and review for correctness bugs, missing error handling, security issues, test coverage, and clear deviations from the surrounding code's conventions. Prefer a few high-confidence findings over nitpicks.

Post your review on the pull request using the available review tooling (request changes if it needs work, approve if it is ready), with inline comments for specific findings. Do not push commits, edit files, or modify the branch — review only.`
Post your review as a comment on the pull request, stating clearly whether it needs changes or is ready, with inline comments for specific findings. Do not push commits, edit files, or modify the branch — review only.`

prompt = fmt.Sprintf(`Review pull request %s (head commit %s).

Do these steps in order:
1. Post your review on the pull request with `+"`gh`"+`, with inline comments for specific findings:
- If changes are needed, request changes.
- If it is ready, approve it. GitHub does not let you approve a PR you opened — if the approval is rejected because you are the PR author, post the same review as a regular comment instead (a COMMENT-event review whose body states it is an approval).
2. Write your full review to review.md and record the result with AO by running exactly:
1. Post your review on the pull request and capture its id in one call. Post with `+"`gh api`"+` rather than `+"`gh pr review`"+`: it is the only way to attach inline comments, and its response carries the created review's id, so AO can tell the worker exactly which review to address. Send the review as a JSON body so the inline comments form a proper array of objects:

ao review submit --session %s --run %s --verdict <approved|changes_requested> --body review.md
gh api --method POST repos/{owner}/{repo}/pulls/{number}/reviews --input - --jq '.id' <<'JSON'
{ "event": "COMMENT", "body": "<summary>",
"comments": [ { "path": "<file>", "line": <n>, "body": "<finding>" } ] }
JSON

Only if step 1 genuinely fails on the provider, still run step 2 so the result is recorded.`,
- Substitute the PR's owner/repo/number. Add one object to "comments" per inline finding; omit the field for a review with no inline comments.
- Always use "event": "COMMENT": reviews are posted from the PR author's own account, and GitHub rejects both APPROVE and REQUEST_CHANGES on your own PR. State in the body whether you are requesting changes or approving; the machine-readable verdict goes to AO in step 2.
- The printed number is the review id. If the call fails on the provider, leave the id empty.
2. Record the result with AO, passing your full review on stdin with --body - so nothing is ever written into the worktree (a file there could be committed onto the worker's branch):

ao review submit --session %s --run %s --verdict <approved|changes_requested> --review-id <id-from-step-1> --body - <<'MD'
<your full review markdown>
MD

Only if step 1 genuinely fails on the provider, still run step 2 (without --review-id) so the result is recorded.`,
spec.PRURL, spec.TargetSHA, spec.WorkerID, spec.RunID)
return prompt, systemPrompt
}
84 changes: 66 additions & 18 deletions backend/internal/review/review.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/google/uuid"

"github.com/aoagents/agent-orchestrator/backend/internal/domain"
"github.com/aoagents/agent-orchestrator/backend/internal/ports"
)

// ErrInvalid and ErrNotFound let the transport layer map failures to 422/404.
Expand All @@ -32,7 +33,7 @@ type Store interface {
UpsertReview(ctx context.Context, r domain.Review) error
GetReviewBySession(ctx context.Context, id domain.SessionID) (domain.Review, bool, error)
InsertReviewRun(ctx context.Context, r domain.ReviewRun) error
UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body string) (bool, error)
UpdateReviewRunResult(ctx context.Context, id string, status domain.ReviewRunStatus, verdict domain.ReviewVerdict, body, githubReviewID string) (bool, error)
GetReviewRun(ctx context.Context, id string) (domain.ReviewRun, bool, error)
GetReviewRunBySessionAndSHA(ctx context.Context, id domain.SessionID, targetSHA string) (domain.ReviewRun, bool, error)
ListReviewRunsBySession(ctx context.Context, id domain.SessionID) ([]domain.ReviewRun, error)
Expand All @@ -55,11 +56,12 @@ type Projects interface {

// Deps wires the engine.
type Deps struct {
Store Store
Sessions Sessions
PRs PRs
Projects Projects
Launcher Launcher
Store Store
Sessions Sessions
PRs PRs
Projects Projects
Launcher Launcher
Messenger ports.AgentMessenger

// Clock and NewID are injectable for deterministic tests.
Clock func() time.Time
Expand All @@ -68,13 +70,14 @@ type Deps struct {

// Engine is the core code-review engine.
type Engine struct {
store Store
sessions Sessions
prs PRs
projects Projects
launcher Launcher
clock func() time.Time
newID func() string
store Store
sessions Sessions
prs PRs
projects Projects
launcher Launcher
messenger ports.AgentMessenger
clock func() time.Time
newID func() string

// triggerMu guards triggerLocks; triggerLocks holds one mutex per worker
// session so concurrent Trigger calls for the same worker serialise (see
Expand All @@ -99,6 +102,7 @@ func New(d Deps) *Engine {
prs: d.PRs,
projects: d.Projects,
launcher: d.Launcher,
messenger: d.Messenger,
clock: clock,
newID: newID,
triggerLocks: make(map[domain.SessionID]*sync.Mutex),
Expand Down Expand Up @@ -235,7 +239,7 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge
}

failRun := func(err error) error {
if _, updateErr := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunFailed, domain.VerdictNone, err.Error()); updateErr != nil {
if _, updateErr := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunFailed, domain.VerdictNone, err.Error(), ""); updateErr != nil {
return updateErr
}
return err
Expand Down Expand Up @@ -273,9 +277,17 @@ func (e *Engine) Trigger(ctx context.Context, workerID domain.SessionID) (Trigge
}

// Submit records the reviewer's result for a specific worker review pass: it
// marks the run complete and stores the verdict and body. AO does not post the
// review — the reviewer agent posts it to the PR itself.
func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body string) (domain.ReviewRun, error) {
// marks the run complete and stores the verdict, body, and the GitHub review id
// the reviewer posted. AO does not post the review — the reviewer agent posts it
// to the PR itself.
//
// On a changes_requested verdict, Submit also messages the worker session with
// the review feedback directly, so the worker learns about it event-driven
// rather than via the SCM poll loop (which never observes CHANGES_REQUESTED for
// self-reviews or COMMENT-state reviews; issue #337). When a GitHub review id is
// known, it is included so the worker knows exactly which review to address and
// reply to.
func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID string, verdict domain.ReviewVerdict, body, githubReviewID string) (domain.ReviewRun, error) {
if workerID == "" {
return domain.ReviewRun{}, fmt.Errorf("%w: worker session id is required", ErrInvalid)
}
Expand Down Expand Up @@ -303,7 +315,18 @@ func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID st
return domain.ReviewRun{}, fmt.Errorf("%w: review run %q is not running", ErrInvalid, runID)
}

updated, err := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunComplete, verdict, body)
// Notify the worker before marking the run complete. If the message fails,
// the run stays 'running' so a retried `ao review submit` runs again instead
// of tripping the status='running' guard above on an already-completed run. A
// message that lands but a DB write that then fails degrades to one extra
// nudge on retry — the same trade lifecycle's sendOnce makes.
if verdict == domain.VerdictChangesRequested {
if err := e.notifyWorkerChangesRequested(ctx, workerID, body, githubReviewID); err != nil {
return domain.ReviewRun{}, err
}
}

updated, err := e.store.UpdateReviewRunResult(ctx, run.ID, domain.ReviewRunComplete, verdict, body, githubReviewID)
if err != nil {
return domain.ReviewRun{}, err
}
Expand All @@ -313,9 +336,34 @@ func (e *Engine) Submit(ctx context.Context, workerID domain.SessionID, runID st
run.Status = domain.ReviewRunComplete
run.Verdict = verdict
run.Body = body
run.GithubReviewID = githubReviewID
return run, nil
}

// notifyWorkerChangesRequested injects the AO reviewer's feedback into the
// worker's live agent pane via the same messenger lifecycle uses for SCM nudges.
//
// When the GitHub review id is known, the worker is asked to reply on that
// review referencing its id with how it addressed the feedback and to resolve
// the review comment threads it addressed. The reviewer posts inline comments
// (per its prompt), so the per-finding threads are resolvable via `gh api`
// GraphQL resolveReviewThread; the top-level review object itself is not
// resolvable, hence the reply. The body is reviewer-authored text pasted into a
// PTY, so it is sanitized first (matching the lifecycle reaction path).
func (e *Engine) notifyWorkerChangesRequested(ctx context.Context, workerID domain.SessionID, body, githubReviewID string) error {
if e.messenger == nil {
return nil
}
msg := "An AO code reviewer requested changes on your PR. Review the feedback below and address it."
if githubReviewID != "" {
msg += fmt.Sprintf(" This feedback is GitHub review %s. Once you have addressed it, reply on that review referencing id %s with how you addressed it, then resolve the review comment threads you addressed.", githubReviewID, githubReviewID)
}
if body != "" {
msg += "\n\n" + domain.SanitizeControlChars(body)
}
return e.messenger.Send(ctx, workerID, msg)
}

// List returns a worker's review state: the live reviewer handle and its passes.
func (e *Engine) List(ctx context.Context, workerID domain.SessionID) (SessionReviews, error) {
if workerID == "" {
Expand Down
Loading
Loading