diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index bc3b5437..cef00c53 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -150,6 +150,7 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg ports.LaunchConfig) ( permissions = cfg.Config.Permissions } appendPermissionFlags(&cmd, permissions) + appendToolFlags(&cmd, cfg.AllowedTools, cfg.DisallowedTools) if model := strings.TrimSpace(cfg.Config.Model); model != "" { cmd = append(cmd, "--model", model) @@ -313,6 +314,20 @@ func appendPermissionFlags(cmd *[]string, permissions ports.PermissionMode) { } } +// appendToolFlags emits --allowedTools / --disallowedTools for a tool-scoped +// launch. Each list is joined with commas into one value so rules that contain +// spaces (e.g. "Bash(git diff:*)") are not split into separate tool names. +// Empty lists emit nothing, so an unrestricted launch is unchanged. These rules +// only bite when the launch is off bypassPermissions, which ignores them. +func appendToolFlags(cmd *[]string, allowed, disallowed []string) { + if len(allowed) > 0 { + *cmd = append(*cmd, "--allowedTools", strings.Join(allowed, ",")) + } + if len(disallowed) > 0 { + *cmd = append(*cmd, "--disallowedTools", strings.Join(disallowed, ",")) + } +} + func normalizePermissionMode(mode ports.PermissionMode) ports.PermissionMode { switch mode { case ports.PermissionModeDefault, diff --git a/backend/internal/adapters/agent/claudecode/claudecode_test.go b/backend/internal/adapters/agent/claudecode/claudecode_test.go index bec96a67..97a1d175 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode_test.go +++ b/backend/internal/adapters/agent/claudecode/claudecode_test.go @@ -580,6 +580,38 @@ func readJSON(t *testing.T, path string) map[string]any { return m } +func TestGetLaunchCommandEmitsToolAllowlist(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + + cmd, err := p.GetLaunchCommand(context.Background(), ports.LaunchConfig{ + AllowedTools: []string{"Read", "Grep", "Bash(git diff:*)"}, + DisallowedTools: []string{"Edit", "Write", "Bash(git push:*)"}, + }) + if err != nil { + t.Fatal(err) + } + + // Each list is one comma-joined value so a rule with spaces stays intact. + if !containsSubsequence(cmd, []string{"--allowedTools", "Read,Grep,Bash(git diff:*)"}) { + t.Fatalf("missing joined --allowedTools value; got %#v", cmd) + } + if !containsSubsequence(cmd, []string{"--disallowedTools", "Edit,Write,Bash(git push:*)"}) { + t.Fatalf("missing joined --disallowedTools value; got %#v", cmd) + } +} + +func TestGetLaunchCommandOmitsToolFlagsWhenUnset(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + + cmd, err := p.GetLaunchCommand(context.Background(), ports.LaunchConfig{Prompt: "do it"}) + if err != nil { + t.Fatal(err) + } + if contains(cmd, "--allowedTools") || contains(cmd, "--disallowedTools") { + t.Fatalf("unrestricted launch should emit no tool flags; got %#v", cmd) + } +} + func contains(values []string, needle string) bool { for _, v := range values { if v == needle { diff --git a/backend/internal/adapters/reviewer/claudecode/claudecode.go b/backend/internal/adapters/reviewer/claudecode/claudecode.go index 8f595e58..e9c56790 100644 --- a/backend/internal/adapters/reviewer/claudecode/claudecode.go +++ b/backend/internal/adapters/reviewer/claudecode/claudecode.go @@ -31,6 +31,37 @@ func (r *Reviewer) Harness() domain.ReviewerHarness { var _ ports.Reviewer = (*Reviewer)(nil) +// reviewerAllowedTools is the read-only tool allowlist the reviewer launches +// with. The reviewer runs headless (no human to approve prompts) but must stay +// read-only, so instead of bypassPermissions — which skips the permission +// system entirely and ignores allow/deny rules — it launches in the default +// mode where these rules are honored: allow rules auto-approve without +// prompting, so the reviewer can read the checkout and run the few commands it +// needs (git diff/log/show to inspect the PR, gh to post the review, and +// `ao review submit` to record the verdict) without stalling. +var reviewerAllowedTools = []string{ + "Read", + "Grep", + "Glob", + "Bash(gh:*)", + "Bash(git diff:*)", + "Bash(git log:*)", + "Bash(git show:*)", + "Bash(git status:*)", + "Bash(ao review submit:*)", +} + +// reviewerDisallowedTools hard-denies the write paths as defense in depth, so a +// misbehaving model cannot edit files or move the branch even if a future +// allowlist entry would otherwise admit it. +var reviewerDisallowedTools = []string{ + "Edit", + "Write", + "NotebookEdit", + "Bash(git push:*)", + "Bash(git commit:*)", +} + // ReviewCommand builds a claude-code invocation that reviews the worker's // checkout for the PR, with the review prompt baked in. func (r *Reviewer) ReviewCommand(ctx context.Context, inv ports.ReviewInvocation) (ports.ReviewCommandSpec, error) { @@ -39,10 +70,12 @@ func (r *Reviewer) ReviewCommand(ctx context.Context, inv ports.ReviewInvocation WorkspacePath: inv.WorkspacePath, Prompt: inv.Prompt, SystemPrompt: inv.SystemPrompt, - // The reviewer runs headless with no human to approve tool prompts; it - // is read-only by prompt and must run gh/ao on its own, so bypass the - // permission gate rather than stall on the first prompt. - Permissions: ports.PermissionModeBypassPermissions, + // Launch off bypassPermissions so the allow/deny lists are enforced. + // Set an explicit non-bypass mode instead of deferring to the user's + // Claude defaultMode, which may itself be bypassPermissions. + Permissions: ports.PermissionModeAuto, + AllowedTools: reviewerAllowedTools, + DisallowedTools: reviewerDisallowedTools, }) if err != nil { return ports.ReviewCommandSpec{}, err diff --git a/backend/internal/adapters/reviewer/claudecode/claudecode_test.go b/backend/internal/adapters/reviewer/claudecode/claudecode_test.go new file mode 100644 index 00000000..53b1bb5f --- /dev/null +++ b/backend/internal/adapters/reviewer/claudecode/claudecode_test.go @@ -0,0 +1,71 @@ +package claudecode + +import ( + "context" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// captureAgent is a stub ports.Agent that records the LaunchConfig the reviewer +// builds, so the test asserts the reviewer's tool policy without needing the +// real claude binary on PATH. +type captureAgent struct { + got ports.LaunchConfig +} + +func (a *captureAgent) GetConfigSpec(context.Context) (ports.ConfigSpec, error) { + return ports.ConfigSpec{}, nil +} +func (a *captureAgent) GetLaunchCommand(_ context.Context, cfg ports.LaunchConfig) ([]string, error) { + a.got = cfg + return []string{"claude"}, nil +} +func (a *captureAgent) GetPromptDeliveryStrategy(context.Context, ports.LaunchConfig) (ports.PromptDeliveryStrategy, error) { + return ports.PromptDeliveryInCommand, nil +} +func (a *captureAgent) GetAgentHooks(context.Context, ports.WorkspaceHookConfig) error { return nil } +func (a *captureAgent) GetRestoreCommand(context.Context, ports.RestoreConfig) ([]string, bool, error) { + return nil, false, nil +} +func (a *captureAgent) SessionInfo(context.Context, ports.SessionRef) (ports.SessionInfo, bool, error) { + return ports.SessionInfo{}, false, nil +} + +func TestReviewCommandLaunchesReadOnlyOffBypass(t *testing.T) { + agent := &captureAgent{} + r := &Reviewer{agent: agent} + + if _, err := r.ReviewCommand(context.Background(), ports.ReviewInvocation{ + ReviewerID: "review-w1", + WorkspacePath: "/ws/w1", + Prompt: "review it", + SystemPrompt: "you are a reviewer", + }); err != nil { + t.Fatalf("ReviewCommand: %v", err) + } + + // The allowlist is what enforces read-only, so it must launch in an + // explicit non-bypass mode: bypassPermissions ignores allow/deny rules + // entirely, and an empty mode would defer to a user's defaultMode. + if agent.got.Permissions != ports.PermissionModeAuto { + t.Fatalf("reviewer must launch in auto permission mode; got %q", agent.got.Permissions) + } + if !contains(agent.got.AllowedTools, "Read") || !contains(agent.got.AllowedTools, "Bash(ao review submit:*)") { + t.Fatalf("allowlist missing read-only review tools: %#v", agent.got.AllowedTools) + } + for _, denied := range []string{"Edit", "Write", "Bash(git push:*)", "Bash(git commit:*)"} { + if !contains(agent.got.DisallowedTools, denied) { + t.Fatalf("disallow list missing %q: %#v", denied, agent.got.DisallowedTools) + } + } +} + +func contains(values []string, needle string) bool { + for _, v := range values { + if v == needle { + return true + } + } + return false +} diff --git a/backend/internal/cli/review.go b/backend/internal/cli/review.go index 7bb2f7f5..d6c26e12 100644 --- a/backend/internal/cli/review.go +++ b/backend/internal/cli/review.go @@ -38,10 +38,11 @@ type submitReviewRequest struct { } type reviewSubmitOptions struct { - session string - runID string - verdict string - body string + session string + runID string + verdict string + body string + bodyText string } func newReviewCommand(ctx *commandContext) *cobra.Command { @@ -67,6 +68,7 @@ func newReviewSubmitCommand(ctx *commandContext) *cobra.Command { cmd.Flags().StringVar(&opts.runID, "run", "", "Review run id (required)") cmd.Flags().StringVar(&opts.verdict, "verdict", "", "Review verdict: approved or changes_requested (required)") cmd.Flags().StringVar(&opts.body, "body", "", "Path to a Markdown file with the review body") + cmd.Flags().StringVar(&opts.bodyText, "body-text", "", "Review body as inline Markdown (use instead of --body when no file should be written)") return cmd } @@ -86,17 +88,24 @@ func (c *commandContext) submitReview(cmd *cobra.Command, args []string, opts re if verdict == "" { return usageError{errors.New("usage: --verdict is required (approved or changes_requested)")} } + path := strings.TrimSpace(opts.body) + if path != "" && opts.bodyText != "" { + return usageError{errors.New("usage: pass either --body or --body-text, not both")} + } var body string - if path := strings.TrimSpace(opts.body); path != "" { + switch { + case opts.bodyText != "": + body = opts.bodyText + case path != "": raw, err := os.ReadFile(path) if err != nil { return usageError{fmt.Errorf("read body file: %w", err)} } body = string(raw) } - path := "sessions/" + url.PathEscape(session) + "/reviews/submit" + reqPath := "sessions/" + url.PathEscape(session) + "/reviews/submit" var res reviewRunResponse - if err := c.postJSON(cmd.Context(), path, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body}, &res); err != nil { + if err := c.postJSON(cmd.Context(), reqPath, submitReviewRequest{RunID: runID, Verdict: verdict, Body: body}, &res); err != nil { return err } _, err := fmt.Fprintf(cmd.OutOrStdout(), "recorded %s review for %s\n", res.Review.Verdict, session) diff --git a/backend/internal/cli/review_test.go b/backend/internal/cli/review_test.go index 29cdee64..d9d2cd01 100644 --- a/backend/internal/cli/review_test.go +++ b/backend/internal/cli/review_test.go @@ -62,6 +62,34 @@ func TestReviewSubmitReadsBodyFile(t *testing.T) { } } +func TestReviewSubmitReadsInlineBody(t *testing.T) { + cfg := setConfigEnv(t) + srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"changes_requested"}}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, aliveDeps(), + "review", "submit", "mer-1", "--run", "run-1", "--verdict", "changes_requested", "--body-text", "please fix") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + var req submitReviewRequest + if err := json.Unmarshal([]byte(capture.body), &req); err != nil { + t.Fatalf("decode body: %v", err) + } + if req.Body != "please fix" { + t.Fatalf("inline body not forwarded; request = %+v", req) + } +} + +func TestReviewSubmitBodyAndBodyTextIsUsageError(t *testing.T) { + setConfigEnv(t) + _, _, err := executeCLI(t, aliveDeps(), + "review", "submit", "mer-1", "--run", "run-1", "--verdict", "approved", "--body", "x.md", "--body-text", "y") + if got := ExitCode(err); got != 2 { + t.Fatalf("exit code = %d, want 2 (usage); err=%v", got, err) + } +} + func TestReviewSubmitUsesSessionFlag(t *testing.T) { cfg := setConfigEnv(t) srv, capture := reviewServer(t, http.StatusOK, `{"review":{"verdict":"approved"}}`) diff --git a/backend/internal/ports/agent.go b/backend/internal/ports/agent.go index 93c534b2..6ff2b10c 100644 --- a/backend/internal/ports/agent.go +++ b/backend/internal/ports/agent.go @@ -100,11 +100,19 @@ const ( // LaunchConfig carries inputs needed to build a new agent launch command. type LaunchConfig struct { - Config AgentConfig - IssueID string - Permissions PermissionMode - Prompt string - SessionID string + Config AgentConfig + IssueID string + Permissions PermissionMode + Prompt string + SessionID string + // AllowedTools and DisallowedTools scope the agent to a tool allowlist when + // it runs in a non-bypass permission mode (allow rules auto-approve, deny + // rules auto-reject). They are the enforced read-only guarantee the reviewer + // relies on: bypassPermissions ignores both lists, so a restricted launch + // must leave Permissions off bypass. Empty means no restriction, so worker + // sessions are unaffected. + AllowedTools []string + DisallowedTools []string SystemPrompt string SystemPromptFile string WorkspacePath string diff --git a/backend/internal/review/prompt.go b/backend/internal/review/prompt.go index 1a5ee248..16c925f3 100644 --- a/backend/internal/review/prompt.go +++ b/backend/internal/review/prompt.go @@ -21,14 +21,15 @@ Post your review on the pull request using the available review tooling (request prompt = fmt.Sprintf(`Review pull request %s (head commit %s). Do these steps in order: -1. Post your review on the pull request with `+"`gh`"+`, with inline comments for specific findings: +1. You are read-only: you can read the checkout and run gh, git diff/log/show, and ao. You cannot write or edit files, so do not write a review file. +2. Post your review on the pull request with `+"`gh`"+`, with inline comments for specific findings: - If changes are needed, request changes. - - If it is ready, approve it. GitHub does not let you approve a PR you opened — if the approval is rejected because you are the PR author, post the same review as a regular comment instead (a COMMENT-event review whose body states it is an approval). -2. Write your full review to review.md and record the result with AO by running exactly: + - If it is ready, approve it. GitHub does not let you approve a PR you opened; if the approval is rejected because you are the PR author, post the same review as a regular comment instead (a COMMENT-event review whose body states it is an approval). +3. Record the result with AO by passing your full review inline: - ao review submit --session %s --run %s --verdict --body review.md + ao review submit --session %s --run %s --verdict --body-text "" -Only if step 1 genuinely fails on the provider, still run step 2 so the result is recorded.`, +Only if step 2 genuinely fails on the provider, still run step 3 so the result is recorded.`, spec.PRURL, spec.TargetSHA, spec.WorkerID, spec.RunID) return prompt, systemPrompt }