diff --git a/backend/internal/adapters/agent/agy/auth.go b/backend/internal/adapters/agent/agy/auth.go new file mode 100644 index 00000000..79e13413 --- /dev/null +++ b/backend/internal/adapters/agent/agy/auth.go @@ -0,0 +1,19 @@ +package agy + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/aider/auth.go b/backend/internal/adapters/agent/aider/auth.go new file mode 100644 index 00000000..a6cd90b1 --- /dev/null +++ b/backend/internal/adapters/agent/aider/auth.go @@ -0,0 +1,19 @@ +package aider + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/amp/auth.go b/backend/internal/adapters/agent/amp/auth.go new file mode 100644 index 00000000..b58d4e8f --- /dev/null +++ b/backend/internal/adapters/agent/amp/auth.go @@ -0,0 +1,19 @@ +package amp + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/auggie/auth.go b/backend/internal/adapters/agent/auggie/auth.go new file mode 100644 index 00000000..46ff1b84 --- /dev/null +++ b/backend/internal/adapters/agent/auggie/auth.go @@ -0,0 +1,19 @@ +package auggie + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/authprobe/authprobe.go b/backend/internal/adapters/agent/authprobe/authprobe.go new file mode 100644 index 00000000..ec7f18f6 --- /dev/null +++ b/backend/internal/adapters/agent/authprobe/authprobe.go @@ -0,0 +1,96 @@ +package authprobe + +import ( + "context" + "os/exec" + "strings" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// DefaultCommands are cheap local auth/status probes common across agent CLIs. +// Unsupported commands usually exit quickly with help text and are treated as +// unknown rather than unauthorized. +var DefaultCommands = [][]string{ + {"auth", "status"}, + {"login", "status"}, + {"providers", "list"}, +} + +// CLIStatus runs bounded local CLI probes and classifies their output. +func CLIStatus(ctx context.Context, binary string, commands [][]string) (ports.AgentAuthStatus, error) { + if err := ctx.Err(); err != nil { + return ports.AgentAuthStatusUnknown, err + } + if binary == "" { + return ports.AgentAuthStatusUnknown, nil + } + if len(commands) == 0 { + commands = DefaultCommands + } + for _, args := range commands { + status, err := commandStatus(ctx, binary, args) + if err != nil { + return ports.AgentAuthStatusUnknown, err + } + if status != ports.AgentAuthStatusUnknown { + return status, nil + } + } + return ports.AgentAuthStatusUnknown, nil +} + +func commandStatus(ctx context.Context, binary string, args []string) (ports.AgentAuthStatus, error) { + probeCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + out, err := exec.CommandContext(probeCtx, binary, args...).CombinedOutput() + if probeCtx.Err() != nil { + return ports.AgentAuthStatusUnknown, probeCtx.Err() + } + text := strings.ToLower(string(out)) + if hasAny(text, + "not logged in", + "logged out", + "not authenticated", + "unauthenticated", + "authentication required", + "not authorized", + "unauthorized", + "login required", + "no credentials", + "0 credentials", + "no api key", + "no token", + `"loggedin": false`, + `"loggedin":false`, + ) { + return ports.AgentAuthStatusUnauthorized, nil + } + if hasAny(text, + "logged in", + "authenticated", + "authorized", + "token valid", + "api key found", + "credentials found", + `"loggedin": true`, + `"loggedin":true`, + ) { + return ports.AgentAuthStatusAuthorized, nil + } + if err != nil { + return ports.AgentAuthStatusUnknown, nil + } + return ports.AgentAuthStatusUnknown, nil +} + +func hasAny(text string, needles ...string) bool { + for _, needle := range needles { + if strings.Contains(text, needle) { + return true + } + } + return false +} diff --git a/backend/internal/adapters/agent/autohand/auth.go b/backend/internal/adapters/agent/autohand/auth.go new file mode 100644 index 00000000..6cd14df4 --- /dev/null +++ b/backend/internal/adapters/agent/autohand/auth.go @@ -0,0 +1,19 @@ +package autohand + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index bc3b5437..5fe0f01b 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -26,6 +26,7 @@ import ( "runtime" "strings" "sync" + "time" "github.com/google/uuid" @@ -60,6 +61,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ ports.Agent = (*Plugin)(nil) +var _ ports.AgentAuthChecker = (*Plugin)(nil) // Manifest returns the adapter's static self-description. func (p *Plugin) Manifest() adapters.Manifest { @@ -268,6 +270,35 @@ func (p *Plugin) SessionInfo(ctx context.Context, session ports.SessionRef) (por return info, true, nil } +// AuthStatus checks Claude Code's local authentication state without starting a +// session. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + binary, err := p.claudeBinary(ctx) + if err != nil { + return ports.AgentAuthStatusUnknown, err + } + probeCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + out, err := exec.CommandContext(probeCtx, binary, "auth", "status").CombinedOutput() + if probeCtx.Err() != nil { + return ports.AgentAuthStatusUnknown, probeCtx.Err() + } + var status struct { + LoggedIn bool `json:"loggedIn"` + } + if json.Unmarshal(out, &status) == nil { + if status.LoggedIn { + return ports.AgentAuthStatusAuthorized, nil + } + return ports.AgentAuthStatusUnauthorized, nil + } + if err != nil { + return ports.AgentAuthStatusUnauthorized, nil + } + return ports.AgentAuthStatusUnknown, nil +} + // claudeSessionUUID maps an AO session id onto a stable Claude Code // session UUID via UUIDv5 over a fixed namespace, so the same AO session // always resolves to the same Claude session. diff --git a/backend/internal/adapters/agent/cline/auth.go b/backend/internal/adapters/agent/cline/auth.go new file mode 100644 index 00000000..cff4e88f --- /dev/null +++ b/backend/internal/adapters/agent/cline/auth.go @@ -0,0 +1,19 @@ +package cline + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index 43d1f6b0..436e1f1a 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -13,8 +13,10 @@ import ( "os/exec" "path/filepath" "runtime" + "sort" "strings" "sync" + "time" "github.com/aoagents/agent-orchestrator/backend/internal/adapters" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -34,6 +36,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ ports.Agent = (*Plugin)(nil) +var _ ports.AgentAuthChecker = (*Plugin)(nil) // Manifest returns the adapter's static self-description. func (p *Plugin) Manifest() adapters.Manifest { @@ -146,10 +149,37 @@ func (p *Plugin) SessionInfo(ctx context.Context, session ports.SessionRef) (por return info, true, nil } +// AuthStatus checks Codex's local login state without making a model call. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + binary, err := p.codexBinary(ctx) + if err != nil { + return ports.AgentAuthStatusUnknown, err + } + probeCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + out, err := exec.CommandContext(probeCtx, binary, "login", "status").CombinedOutput() + if probeCtx.Err() != nil { + return ports.AgentAuthStatusUnknown, probeCtx.Err() + } + text := strings.ToLower(string(out)) + if strings.Contains(text, "not logged in") || strings.Contains(text, "logged out") { + return ports.AgentAuthStatusUnauthorized, nil + } + if strings.Contains(text, "logged in") { + return ports.AgentAuthStatusAuthorized, nil + } + if err != nil { + return ports.AgentAuthStatusUnauthorized, nil + } + return ports.AgentAuthStatusUnknown, nil +} + // ResolveCodexBinary returns the path to the codex binary on this machine, // searching PATH then a handful of well-known install locations -// (Homebrew, Cargo, npm global). Returns "codex" as a last-ditch fallback -// so callers see a clear "command not found" rather than an empty argv. +// (Homebrew, Cargo, npm global, NVM). Returns "codex" as a last-ditch +// fallback so callers see a clear "command not found" rather than an empty +// argv. func ResolveCodexBinary(ctx context.Context) (string, error) { if err := ctx.Err(); err != nil { return "", err @@ -203,6 +233,7 @@ func ResolveCodexBinary(ctx context.Context) (string, error) { filepath.Join(home, ".cargo", "bin", "codex"), filepath.Join(home, ".npm", "bin", "codex"), ) + candidates = append(candidates, nvmNodeBinCandidates(home, "codex")...) } for _, candidate := range candidates { @@ -217,6 +248,14 @@ func ResolveCodexBinary(ctx context.Context) (string, error) { return "", fmt.Errorf("codex: %w", ports.ErrAgentBinaryNotFound) } +func nvmNodeBinCandidates(home, binary string) []string { + matches, err := filepath.Glob(filepath.Join(home, ".nvm", "versions", "node", "*", "bin", binary)) + if err != nil || len(matches) == 0 { + return nil + } + sort.Sort(sort.Reverse(sort.StringSlice(matches))) + return matches +} func resolveNativeWindowsCodex(path string) string { if runtime.GOOS != "windows" || !strings.EqualFold(filepath.Ext(path), ".cmd") { return path diff --git a/backend/internal/adapters/agent/codex/codex_test.go b/backend/internal/adapters/agent/codex/codex_test.go index 0981b251..a20a50af 100644 --- a/backend/internal/adapters/agent/codex/codex_test.go +++ b/backend/internal/adapters/agent/codex/codex_test.go @@ -90,6 +90,28 @@ func TestGetLaunchCommandWithoutWorkspaceOmitsTrustFlag(t *testing.T) { } } +func TestResolveCodexBinaryFindsNVMInstallWhenPathIsSparse(t *testing.T) { + home := t.TempDir() + binDir := filepath.Join(home, ".nvm", "versions", "node", "v20.19.4", "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatal(err) + } + want := filepath.Join(binDir, "codex") + if err := os.WriteFile(want, []byte("#!/bin/sh\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("HOME", home) + t.Setenv("PATH", "") + + got, err := ResolveCodexBinary(context.Background()) + if err != nil { + t.Fatalf("ResolveCodexBinary: %v", err) + } + if got != want { + t.Fatalf("ResolveCodexBinary = %q, want %q", got, want) + } +} + func TestGetLaunchCommandMapsApprovalModes(t *testing.T) { tests := []struct { name string diff --git a/backend/internal/adapters/agent/continueagent/auth.go b/backend/internal/adapters/agent/continueagent/auth.go new file mode 100644 index 00000000..54844466 --- /dev/null +++ b/backend/internal/adapters/agent/continueagent/auth.go @@ -0,0 +1,19 @@ +package continueagent + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/copilot/auth.go b/backend/internal/adapters/agent/copilot/auth.go new file mode 100644 index 00000000..441c9d0b --- /dev/null +++ b/backend/internal/adapters/agent/copilot/auth.go @@ -0,0 +1,19 @@ +package copilot + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/crush/auth.go b/backend/internal/adapters/agent/crush/auth.go new file mode 100644 index 00000000..b28332b6 --- /dev/null +++ b/backend/internal/adapters/agent/crush/auth.go @@ -0,0 +1,19 @@ +package crush + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/cursor/auth.go b/backend/internal/adapters/agent/cursor/auth.go new file mode 100644 index 00000000..c9c5fc99 --- /dev/null +++ b/backend/internal/adapters/agent/cursor/auth.go @@ -0,0 +1,19 @@ +package cursor + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/devin/auth.go b/backend/internal/adapters/agent/devin/auth.go new file mode 100644 index 00000000..2ffa9251 --- /dev/null +++ b/backend/internal/adapters/agent/devin/auth.go @@ -0,0 +1,19 @@ +package devin + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/droid/auth.go b/backend/internal/adapters/agent/droid/auth.go new file mode 100644 index 00000000..ff5f9b49 --- /dev/null +++ b/backend/internal/adapters/agent/droid/auth.go @@ -0,0 +1,19 @@ +package droid + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/goose/auth.go b/backend/internal/adapters/agent/goose/auth.go new file mode 100644 index 00000000..8cca9638 --- /dev/null +++ b/backend/internal/adapters/agent/goose/auth.go @@ -0,0 +1,19 @@ +package goose + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/grok/auth.go b/backend/internal/adapters/agent/grok/auth.go new file mode 100644 index 00000000..c2a8312b --- /dev/null +++ b/backend/internal/adapters/agent/grok/auth.go @@ -0,0 +1,19 @@ +package grok + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/kilocode/auth.go b/backend/internal/adapters/agent/kilocode/auth.go new file mode 100644 index 00000000..247259b7 --- /dev/null +++ b/backend/internal/adapters/agent/kilocode/auth.go @@ -0,0 +1,19 @@ +package kilocode + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/kimi/auth.go b/backend/internal/adapters/agent/kimi/auth.go new file mode 100644 index 00000000..ef990324 --- /dev/null +++ b/backend/internal/adapters/agent/kimi/auth.go @@ -0,0 +1,19 @@ +package kimi + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/kiro/auth.go b/backend/internal/adapters/agent/kiro/auth.go new file mode 100644 index 00000000..9e09686e --- /dev/null +++ b/backend/internal/adapters/agent/kiro/auth.go @@ -0,0 +1,19 @@ +package kiro + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/opencode/opencode.go b/backend/internal/adapters/agent/opencode/opencode.go index 377f1bde..02e59766 100644 --- a/backend/internal/adapters/agent/opencode/opencode.go +++ b/backend/internal/adapters/agent/opencode/opencode.go @@ -23,6 +23,7 @@ import ( "runtime" "strings" "sync" + "time" "github.com/aoagents/agent-orchestrator/backend/internal/adapters" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -55,6 +56,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ ports.Agent = (*Plugin)(nil) +var _ ports.AgentAuthChecker = (*Plugin)(nil) // Manifest returns the adapter's static self-description. func (p *Plugin) Manifest() adapters.Manifest { @@ -158,6 +160,33 @@ func (p *Plugin) SessionInfo(ctx context.Context, session ports.SessionRef) (por return info, true, nil } +// AuthStatus checks whether opencode has at least one configured provider +// credential. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + binary, err := p.opencodeBinary(ctx) + if err != nil { + return ports.AgentAuthStatusUnknown, err + } + probeCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + out, err := exec.CommandContext(probeCtx, binary, "providers", "list").CombinedOutput() + if probeCtx.Err() != nil { + return ports.AgentAuthStatusUnknown, probeCtx.Err() + } + text := strings.ToLower(string(out)) + if strings.Contains(text, "0 credentials") { + return ports.AgentAuthStatusUnauthorized, nil + } + if strings.Contains(text, "credential") && err == nil { + return ports.AgentAuthStatusAuthorized, nil + } + if err != nil { + return ports.AgentAuthStatusUnknown, nil + } + return ports.AgentAuthStatusUnknown, nil +} + // appendPermissionFlags maps AO's permission modes onto opencode's single // approval flag. opencode exposes only --dangerously-skip-permissions (no // graduated accept-edits/auto modes), so: diff --git a/backend/internal/adapters/agent/pi/auth.go b/backend/internal/adapters/agent/pi/auth.go new file mode 100644 index 00000000..c61c6750 --- /dev/null +++ b/backend/internal/adapters/agent/pi/auth.go @@ -0,0 +1,19 @@ +package pi + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/qwen/auth.go b/backend/internal/adapters/agent/qwen/auth.go new file mode 100644 index 00000000..004ea52b --- /dev/null +++ b/backend/internal/adapters/agent/qwen/auth.go @@ -0,0 +1,19 @@ +package qwen + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/adapters/agent/registry/registry.go b/backend/internal/adapters/agent/registry/registry.go index 77f9b526..91141e9d 100644 --- a/backend/internal/adapters/agent/registry/registry.go +++ b/backend/internal/adapters/agent/registry/registry.go @@ -83,8 +83,9 @@ func Build() (*adapters.Registry, error) { // harness is the adapter's manifest id, which is also the domain.AgentHarness // value a session carries and the `--harness` flag users pass. type HarnessAgent struct { - Harness domain.AgentHarness - Agent ports.Agent + Harness domain.AgentHarness + Manifest adapters.Manifest + Agent ports.Agent } // Harnessed returns every shipped adapter that drives an agent, paired with its @@ -99,8 +100,9 @@ func Harnessed() []HarnessAgent { continue } out = append(out, HarnessAgent{ - Harness: domain.AgentHarness(a.Manifest().ID), - Agent: agent, + Harness: domain.AgentHarness(a.Manifest().ID), + Manifest: a.Manifest(), + Agent: agent, }) } return out diff --git a/backend/internal/adapters/agent/registry/registry_test.go b/backend/internal/adapters/agent/registry/registry_test.go index 269abced..2c2f56c9 100644 --- a/backend/internal/adapters/agent/registry/registry_test.go +++ b/backend/internal/adapters/agent/registry/registry_test.go @@ -52,6 +52,14 @@ func TestGetAgentHooksFootprintIsGitignored(t *testing.T) { } } +func TestEveryHarnessReportsAuthStatus(t *testing.T) { + for _, ha := range Harnessed() { + if _, ok := ha.Agent.(ports.AgentAuthChecker); !ok { + t.Errorf("%s does not implement ports.AgentAuthChecker", ha.Harness) + } + } +} + // workspaceFiles returns every regular file under root, relative to root. func workspaceFiles(t *testing.T, root string) []string { t.Helper() diff --git a/backend/internal/adapters/agent/vibe/auth.go b/backend/internal/adapters/agent/vibe/auth.go new file mode 100644 index 00000000..b61de0c0 --- /dev/null +++ b/backend/internal/adapters/agent/vibe/auth.go @@ -0,0 +1,19 @@ +package vibe + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/authprobe" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var _ ports.AgentAuthChecker = (*Plugin)(nil) + +// AuthStatus returns the plugin's local authentication status. +func (p *Plugin) AuthStatus(ctx context.Context) (ports.AgentAuthStatus, error) { + cmd, err := p.GetLaunchCommand(ctx, ports.LaunchConfig{}) + if err != nil || len(cmd) == 0 { + return ports.AgentAuthStatusUnknown, err + } + return authprobe.CLIStatus(ctx, cmd[0], nil) +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 7ea50c3a..ebe90382 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -15,10 +15,12 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/adapters/runtime/zellij" "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" "github.com/aoagents/agent-orchestrator/backend/internal/httpd" "github.com/aoagents/agent-orchestrator/backend/internal/notify" "github.com/aoagents/agent-orchestrator/backend/internal/ports" "github.com/aoagents/agent-orchestrator/backend/internal/runfile" + agentsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/agent" notificationsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/notification" projectsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/project" "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" @@ -129,7 +131,8 @@ func Run() error { } srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{ - Projects: projectsvc.NewWithDeps(projectsvc.Deps{Store: store, Sessions: sessionSvc, Telemetry: telemetrySink}), + Agents: agentsvc.New(), + Projects: projectsvc.NewWithDeps(projectsvc.Deps{Store: store, Sessions: sessionSvc, DefaultHarness: domain.AgentHarness(cfg.Agent), Telemetry: telemetrySink}), Sessions: sessionSvc, Reviews: reviewSvc, Notifications: notifier, diff --git a/backend/internal/httpd/api.go b/backend/internal/httpd/api.go index 9026376d..9133e4b7 100644 --- a/backend/internal/httpd/api.go +++ b/backend/internal/httpd/api.go @@ -19,6 +19,7 @@ import ( // APIDeps bundles every service the API layer's controllers depend on. type APIDeps struct { + Agents controllers.AgentCatalog Projects projectsvc.Manager Sessions controllers.SessionService Activity controllers.ActivityRecorder @@ -35,6 +36,7 @@ type APIDeps struct { // router invokes to mount the /api/v1 surface. type API struct { cfg config.Config + agents *controllers.AgentsController projects *controllers.ProjectsController sessions *controllers.SessionsController prs *controllers.PRsController @@ -49,6 +51,9 @@ type API struct { func NewAPI(cfg config.Config, deps APIDeps) *API { return &API{ cfg: cfg, + agents: &controllers.AgentsController{ + Catalog: deps.Agents, + }, projects: &controllers.ProjectsController{ Mgr: deps.Projects, }, @@ -77,6 +82,7 @@ func (a *API) Register(root chi.Router) { r.Group(func(r chi.Router) { r.Use(middleware.Timeout(timeout)) + a.agents.Register(r) a.projects.Register(r) a.sessions.Register(r) a.prs.Register(r) diff --git a/backend/internal/httpd/apispec/openapi.yaml b/backend/internal/httpd/apispec/openapi.yaml index bc388206..288bfab9 100644 --- a/backend/internal/httpd/apispec/openapi.yaml +++ b/backend/internal/httpd/apispec/openapi.yaml @@ -8,6 +8,31 @@ servers: - description: Local daemon (loopback only) url: http://127.0.0.1:3001 paths: + /api/v1/agents: + get: + operationId: listAgents + responses: + "200": + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentsResponse' + description: OK + "500": + content: + application/json: + schema: + $ref: '#/components/schemas/APIError' + description: Internal Server Error + "501": + content: + application/json: + schema: + $ref: '#/components/schemas/APIError' + description: Not Implemented + summary: List supported and locally installed agent adapters + tags: + - agents /api/v1/events: get: operationId: streamEvents @@ -1399,6 +1424,22 @@ components: permissions: type: string type: object + AgentInfo: + properties: + authStatus: + enum: + - authorized + - unauthorized + - unknown + type: string + id: + type: string + label: + type: string + required: + - id + - label + type: object ClaimPRRequest: properties: allowTakeover: @@ -1571,6 +1612,25 @@ components: - ok - sessionId type: object + ListAgentsResponse: + properties: + authorized: + items: + $ref: '#/components/schemas/AgentInfo' + type: array + installed: + items: + $ref: '#/components/schemas/AgentInfo' + type: array + supported: + items: + $ref: '#/components/schemas/AgentInfo' + type: array + required: + - supported + - installed + - authorized + type: object ListNotificationsResponse: properties: notifications: @@ -2371,6 +2431,8 @@ components: - repo type: object tags: +- description: Supported and locally runnable agent adapters + name: agents - description: Project registry, configuration, and lifecycle administration name: projects - description: Agent session lifecycle and messaging diff --git a/backend/internal/httpd/apispec/specgen/build.go b/backend/internal/httpd/apispec/specgen/build.go index c0383858..05e8bb24 100644 --- a/backend/internal/httpd/apispec/specgen/build.go +++ b/backend/internal/httpd/apispec/specgen/build.go @@ -55,6 +55,8 @@ func Build() ([]byte, error) { *(&openapi31.Server{URL: "http://127.0.0.1:3001"}).WithDescription("Local daemon (loopback only)"), } r.Spec.Tags = []openapi31.Tag{ + *(&openapi31.Tag{Name: "agents"}).WithDescription( + "Supported and locally runnable agent adapters"), *(&openapi31.Tag{Name: "projects"}).WithDescription( "Project registry, configuration, and lifecycle administration"), *(&openapi31.Tag{Name: "sessions"}).WithDescription( @@ -167,6 +169,8 @@ var schemaNames = map[string]string{ "ControllersSpawnOrchestratorRequest": "SpawnOrchestratorRequest", "ControllersSpawnOrchestratorResponse": "SpawnOrchestratorResponse", "ControllersOrchestratorResponse": "OrchestratorResponse", + "AgentInventory": "ListAgentsResponse", + "AgentInfo": "AgentInfo", "ControllersListNotificationsQuery": "ListNotificationsQuery", "ControllersNotificationStreamQuery": "NotificationStreamQuery", "ControllersNotificationIDParam": "NotificationIDParam", @@ -268,6 +272,7 @@ type operation struct { func operations() []operation { ops := append([]operation{}, eventOperations()...) + ops = append(ops, agentOperations()...) ops = append(ops, projectOperations()...) ops = append(ops, sessionOperations()...) ops = append(ops, prOperations()...) @@ -276,6 +281,20 @@ func operations() []operation { return ops } +func agentOperations() []operation { + return []operation{ + { + method: http.MethodGet, path: "/api/v1/agents", id: "listAgents", tag: "agents", + summary: "List supported and locally installed agent adapters", + resps: []respUnit{ + {http.StatusOK, controllers.ListAgentsResponse{}}, + {http.StatusInternalServerError, envelope.APIError{}}, + {http.StatusNotImplemented, envelope.APIError{}}, + }, + }, + } +} + func notificationOperations() []operation { return []operation{ { diff --git a/backend/internal/httpd/controllers/agents.go b/backend/internal/httpd/controllers/agents.go new file mode 100644 index 00000000..49d9254e --- /dev/null +++ b/backend/internal/httpd/controllers/agents.go @@ -0,0 +1,40 @@ +package controllers + +import ( + "context" + "net/http" + + "github.com/go-chi/chi/v5" + + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apispec" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" + agentsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/agent" +) + +// AgentCatalog is the controller-facing contract for local agent inventory. +type AgentCatalog interface { + List(ctx context.Context) (agentsvc.Inventory, error) +} + +// AgentsController owns the /agents routes. +type AgentsController struct { + Catalog AgentCatalog +} + +// Register mounts the agent inventory routes on the supplied router. +func (c *AgentsController) Register(r chi.Router) { + r.Get("/agents", c.list) +} + +func (c *AgentsController) list(w http.ResponseWriter, r *http.Request) { + if c.Catalog == nil { + apispec.NotImplemented(w, r, "GET", "/api/v1/agents") + return + } + inventory, err := c.Catalog.List(r.Context()) + if err != nil { + envelope.WriteError(w, r, err) + return + } + envelope.WriteJSON(w, http.StatusOK, inventory) +} diff --git a/backend/internal/httpd/controllers/agents_test.go b/backend/internal/httpd/controllers/agents_test.go new file mode 100644 index 00000000..f159e8e5 --- /dev/null +++ b/backend/internal/httpd/controllers/agents_test.go @@ -0,0 +1,49 @@ +package controllers_test + +import ( + "context" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd" + agentsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/agent" +) + +type fakeAgentCatalog struct { + inventory agentsvc.Inventory + err error +} + +func (f fakeAgentCatalog) List(context.Context) (agentsvc.Inventory, error) { + return f.inventory, f.err +} + +func TestListAgents(t *testing.T) { + log := slog.New(slog.NewTextHandler(io.Discard, nil)) + srv := httptest.NewServer(httpd.NewRouterWithControl(config.Config{}, log, nil, httpd.APIDeps{ + Agents: fakeAgentCatalog{inventory: agentsvc.Inventory{ + Supported: []agentsvc.Info{{ID: "claude-code", Label: "Claude Code"}, {ID: "codex", Label: "Codex"}}, + Installed: []agentsvc.Info{{ID: "codex", Label: "Codex"}}, + Authorized: []agentsvc.Info{{ID: "codex", Label: "Codex"}}, + }}, + }, httpd.ControlDeps{})) + defer srv.Close() + + body, status, _ := doRequest(t, srv, http.MethodGet, "/api/v1/agents", "") + if status != http.StatusOK { + t.Fatalf("GET /agents = %d, body=%s", status, body) + } + for _, want := range []string{`"supported"`, `"installed"`, `"authorized"`, `"id":"codex"`} { + if !strings.Contains(string(body), want) { + t.Fatalf("body missing %s: %s", want, body) + } + } + if strings.Contains(string(body), `"counts"`) { + t.Fatalf("body includes removed counts field: %s", body) + } +} diff --git a/backend/internal/httpd/controllers/dto.go b/backend/internal/httpd/controllers/dto.go index 6a040154..f20b9fdf 100644 --- a/backend/internal/httpd/controllers/dto.go +++ b/backend/internal/httpd/controllers/dto.go @@ -6,6 +6,7 @@ import ( "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + agentsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/agent" projectsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/project" sessionsvc "github.com/aoagents/agent-orchestrator/backend/internal/service/session" ) @@ -423,6 +424,12 @@ type OrchestratorResponse struct { ProjectName string `json:"projectName,omitempty"` } +// ListAgentsResponse is the body of GET /api/v1/agents. +type ListAgentsResponse = agentsvc.Inventory + +// AgentInfo is one supported or installed agent entry. +type AgentInfo = agentsvc.Info + // ListNotificationsQuery is the query string accepted by GET /api/v1/notifications. type ListNotificationsQuery struct { Status string `query:"status,omitempty" enum:"unread" description:"Notification status filter. V1 supports only unread."` diff --git a/backend/internal/ports/agent.go b/backend/internal/ports/agent.go index 93c534b2..77eee2c2 100644 --- a/backend/internal/ports/agent.go +++ b/backend/internal/ports/agent.go @@ -14,6 +14,19 @@ import ( // for a live session. var ErrAgentBinaryNotFound = errors.New("agent: binary not found on PATH") +// AgentAuthStatus describes whether an installed agent is ready to make +// authenticated model calls. +type AgentAuthStatus string + +const ( + // AgentAuthStatusAuthorized means the agent can make authenticated model calls. + AgentAuthStatusAuthorized AgentAuthStatus = "authorized" + // AgentAuthStatusUnauthorized means the agent is installed but not authenticated. + AgentAuthStatusUnauthorized AgentAuthStatus = "unauthorized" + // AgentAuthStatusUnknown means the daemon could not determine auth status. + AgentAuthStatusUnknown AgentAuthStatus = "unknown" +) + // Agent is the contract every CLI coding agent adapter (claude-code, codex, …) // must satisfy. It supplies the argv and process configuration the Session // Manager needs to launch, restore, and read back a native agent session. @@ -42,6 +55,12 @@ type Agent interface { SessionInfo(ctx context.Context, session SessionRef) (info SessionInfo, ok bool, err error) } +// AgentAuthChecker is the optional capability for adapters whose native CLI has +// a cheap local authentication status probe. +type AgentAuthChecker interface { + AuthStatus(ctx context.Context) (AgentAuthStatus, error) +} + // AgentResolver maps a session's harness onto the Agent adapter that drives it, // so the Session Manager can spawn (and restore) a different agent per session // without depending on the concrete adapter registry. ok=false means no adapter diff --git a/backend/internal/service/agent/catalog_test.go b/backend/internal/service/agent/catalog_test.go new file mode 100644 index 00000000..2dd88d6b --- /dev/null +++ b/backend/internal/service/agent/catalog_test.go @@ -0,0 +1,131 @@ +package agent + +import ( + "context" + "errors" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters" + agentregistry "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/registry" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +type fakeAgent struct { + err error +} + +type fakeAuthAgent struct { + fakeAgent + status ports.AgentAuthStatus + authErr error +} + +func (f fakeAgent) GetConfigSpec(context.Context) (ports.ConfigSpec, error) { + return ports.ConfigSpec{}, nil +} + +func (f fakeAgent) GetLaunchCommand(context.Context, ports.LaunchConfig) ([]string, error) { + if f.err != nil { + return nil, f.err + } + return []string{"agent"}, nil +} + +func (f fakeAgent) GetPromptDeliveryStrategy(context.Context, ports.LaunchConfig) (ports.PromptDeliveryStrategy, error) { + return ports.PromptDeliveryInCommand, nil +} + +func (f fakeAgent) GetAgentHooks(context.Context, ports.WorkspaceHookConfig) error { + return nil +} + +func (f fakeAgent) GetRestoreCommand(context.Context, ports.RestoreConfig) ([]string, bool, error) { + return nil, false, nil +} + +func (f fakeAgent) SessionInfo(context.Context, ports.SessionRef) (ports.SessionInfo, bool, error) { + return ports.SessionInfo{}, false, nil +} + +func (f fakeAuthAgent) AuthStatus(context.Context) (ports.AgentAuthStatus, error) { + return f.status, f.authErr +} + +func TestListReportsInstalledAgentsAndIgnoresDetectorErrors(t *testing.T) { + svc := NewWithAgents([]agentregistry.HarnessAgent{ + harnessAgent("codex", "Codex", nil), + harnessAgent("missing", "Missing", ports.ErrAgentBinaryNotFound), + harnessAgent("broken", "Broken", errors.New("unexpected detector failure")), + }) + + got, err := svc.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(got.Supported) != 3 { + t.Fatalf("supported = %#v, want 3 agents", got.Supported) + } + if len(got.Installed) != 1 || got.Installed[0].ID != "codex" { + t.Fatalf("installed = %#v, want only codex", got.Installed) + } +} + +func TestListReportsAuthorizedInstalledAgents(t *testing.T) { + svc := NewWithAgents([]agentregistry.HarnessAgent{ + harnessAuthAgent("codex", "Codex", ports.AgentAuthStatusAuthorized, nil), + harnessAuthAgent("claude-code", "Claude Code", ports.AgentAuthStatusUnauthorized, nil), + harnessAgent("opencode", "OpenCode", nil), + harnessAuthAgent("broken-auth", "Broken Auth", ports.AgentAuthStatusAuthorized, errors.New("probe failed")), + }) + + got, err := svc.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(got.Supported) != 4 || len(got.Installed) != 4 { + t.Fatalf("inventory = %#v, want supported=4 installed=4", got) + } + if len(got.Authorized) != 1 || got.Authorized[0].ID != "codex" { + t.Fatalf("authorized = %#v, want only codex", got.Authorized) + } + + byID := map[string]Info{} + for _, info := range got.Installed { + byID[info.ID] = info + } + if byID["codex"].AuthStatus != ports.AgentAuthStatusAuthorized { + t.Fatalf("codex authStatus = %q", byID["codex"].AuthStatus) + } + if byID["claude-code"].AuthStatus != ports.AgentAuthStatusUnauthorized { + t.Fatalf("claude-code authStatus = %q", byID["claude-code"].AuthStatus) + } + if byID["opencode"].AuthStatus != ports.AgentAuthStatusUnknown { + t.Fatalf("opencode authStatus = %q", byID["opencode"].AuthStatus) + } + if byID["broken-auth"].AuthStatus != ports.AgentAuthStatusUnknown { + t.Fatalf("broken-auth authStatus = %q", byID["broken-auth"].AuthStatus) + } +} + +func harnessAgent(id, label string, err error) agentregistry.HarnessAgent { + return agentregistry.HarnessAgent{ + Harness: domain.AgentHarness(id), + Manifest: adapters.Manifest{ + ID: id, + Name: label, + }, + Agent: fakeAgent{err: err}, + } +} + +func harnessAuthAgent(id, label string, status ports.AgentAuthStatus, err error) agentregistry.HarnessAgent { + return agentregistry.HarnessAgent{ + Harness: domain.AgentHarness(id), + Manifest: adapters.Manifest{ + ID: id, + Name: label, + }, + Agent: fakeAuthAgent{fakeAgent: fakeAgent{}, status: status, authErr: err}, + } +} diff --git a/backend/internal/service/agent/service.go b/backend/internal/service/agent/service.go new file mode 100644 index 00000000..69e4ac60 --- /dev/null +++ b/backend/internal/service/agent/service.go @@ -0,0 +1,104 @@ +package agent + +import ( + "context" + "errors" + "sort" + + agentregistry "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/registry" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// Info is the user-facing identity for an agent adapter. +type Info struct { + ID string `json:"id"` + Label string `json:"label"` + AuthStatus ports.AgentAuthStatus `json:"authStatus,omitempty" enum:"authorized,unauthorized,unknown"` +} + +// Inventory describes all daemon-supported agents and which are runnable here. +type Inventory struct { + Supported []Info `json:"supported"` + Installed []Info `json:"installed"` + Authorized []Info `json:"authorized"` +} + +// Service reports supported and locally runnable agent adapters. +type Service struct { + agents []agentregistry.HarnessAgent +} + +// New returns an agent inventory service backed by the daemon's shipped +// adapter registry. +func New() *Service { + return &Service{agents: agentregistry.Harnessed()} +} + +// NewWithAgents returns an inventory service over a caller-provided adapter +// slice. It is used by focused tests. +func NewWithAgents(agents []agentregistry.HarnessAgent) *Service { + return &Service{agents: agents} +} + +// List returns every supported agent plus the subset whose binary can be +// resolved on this machine. Detector errors are intentionally isolated to the +// affected agent; one broken adapter should not hide the rest of the catalog. +func (s *Service) List(ctx context.Context) (Inventory, error) { + supported := make([]Info, 0, len(s.agents)) + installed := make([]Info, 0, len(s.agents)) + authorized := make([]Info, 0, len(s.agents)) + for _, item := range s.agents { + if err := ctx.Err(); err != nil { + return Inventory{}, err + } + info := Info{ID: string(item.Harness), Label: item.Manifest.Name} + if info.Label == "" { + info.Label = info.ID + } + supported = append(supported, info) + if _, err := item.Agent.GetLaunchCommand(ctx, ports.LaunchConfig{}); err == nil { + info.AuthStatus = authStatus(ctx, item.Agent) + installed = append(installed, info) + if info.AuthStatus == ports.AgentAuthStatusAuthorized { + authorized = append(authorized, info) + } + } else { + continue + } + } + + sortInfos(supported) + sortInfos(installed) + sortInfos(authorized) + return Inventory{ + Supported: supported, + Installed: installed, + Authorized: authorized, + }, nil +} + +func authStatus(ctx context.Context, a ports.Agent) ports.AgentAuthStatus { + checker, ok := a.(ports.AgentAuthChecker) + if !ok { + return ports.AgentAuthStatusUnknown + } + status, err := checker.AuthStatus(ctx) + if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return ports.AgentAuthStatusUnknown + } + return ports.AgentAuthStatusUnknown + } + switch status { + case ports.AgentAuthStatusAuthorized, ports.AgentAuthStatusUnauthorized: + return status + default: + return ports.AgentAuthStatusUnknown + } +} + +func sortInfos(infos []Info) { + sort.Slice(infos, func(i, j int) bool { + return infos[i].ID < infos[j].ID + }) +} diff --git a/backend/internal/service/project/service.go b/backend/internal/service/project/service.go index e21281a6..ad3f93c7 100644 --- a/backend/internal/service/project/service.go +++ b/backend/internal/service/project/service.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/aoagents/agent-orchestrator/backend/internal/config" "github.com/aoagents/agent-orchestrator/backend/internal/domain" "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -45,10 +46,15 @@ type SessionTeardowner interface { // Service implements project registration and lookup use-cases for controllers. type Service struct { - store Store - sessions SessionTeardowner - clock func() time.Time - telemetry ports.EventSink + + // defaultHarness is the daemon's configured default agent. Project detail + // responses expose it so clients can compare an explicit empty override + // against the real effective default. + defaultHarness domain.AgentHarness + store Store + sessions SessionTeardowner + clock func() time.Time + telemetry ports.EventSink // addMu serialises the whole body of Add. Workspace registration performs // filesystem mutations (git init, .gitignore writes, commits) that are not // covered by the store's own writeMu, so path/id conflict checks plus the @@ -60,10 +66,14 @@ var _ Manager = (*Service)(nil) // Deps captures optional collaborators for project use-cases. type Deps struct { - Store Store - Sessions SessionTeardowner - Clock func() time.Time - Telemetry ports.EventSink + + // DefaultHarness is the daemon's configured default agent (AO_AGENT). + // When empty, the service falls back to config.DefaultAgent. + DefaultHarness domain.AgentHarness + Store Store + Sessions SessionTeardowner + Clock func() time.Time + Telemetry ports.EventSink } // New returns a project service backed by the given durable store. @@ -73,7 +83,18 @@ func New(store Store) *Service { // NewWithDeps returns a project service with optional teardown dependencies. func NewWithDeps(d Deps) *Service { - s := &Service{store: d.Store, sessions: d.Sessions, clock: d.Clock, telemetry: d.Telemetry} + defaultHarness := d.DefaultHarness + if defaultHarness == "" { + defaultHarness = domain.AgentHarness(config.DefaultAgent) + } + + s := &Service{ + store: d.Store, + sessions: d.Sessions, + clock: d.Clock, + telemetry: d.Telemetry, + defaultHarness: defaultHarness, + } if s.clock == nil { s.clock = time.Now } @@ -111,7 +132,7 @@ func (m *Service) Get(ctx context.Context, id domain.ProjectID) (GetResult, erro if !ok || !row.ArchivedAt.IsZero() { return GetResult{}, apierr.NotFound("PROJECT_NOT_FOUND", "Unknown project") } - p := projectFromRow(row) + p := m.projectFromRow(row) if row.Kind.WithDefault() == domain.ProjectKindWorkspace { repos, err := m.store.ListWorkspaceRepos(ctx, row.ID) if err != nil { @@ -174,12 +195,12 @@ func (m *Service) Add(ctx context.Context, in AddInput) (Project, error) { }) } - var config domain.ProjectConfig + var projectConfig domain.ProjectConfig if in.Config != nil { if err := in.Config.Validate(); err != nil { return Project{}, apierr.Invalid("INVALID_PROJECT_CONFIG", err.Error(), nil) } - config = *in.Config + projectConfig = *in.Config } registeredAt := time.Now() @@ -189,7 +210,7 @@ func (m *Service) Add(ctx context.Context, in AddInput) (Project, error) { DisplayName: name, RegisteredAt: registeredAt, Kind: domain.ProjectKindSingleRepo, - Config: config, + Config: projectConfig, } if in.AsWorkspace { repos, err := prepareWorkspaceProject(ctx, path, domain.ProjectID(row.ID), registeredAt) @@ -202,7 +223,7 @@ func (m *Service) Add(ctx context.Context, in AddInput) (Project, error) { return Project{}, apierr.Internal("PROJECT_ADD_FAILED", "Failed to register workspace project") } m.emitProjectAdded(row, projectCountBefore == 0) - p := projectFromRow(row) + p := m.projectFromRow(row) p.WorkspaceRepos = workspaceReposFromRecords(repos) return p, nil } @@ -224,7 +245,7 @@ func (m *Service) Add(ctx context.Context, in AddInput) (Project, error) { return Project{}, apierr.Internal("PROJECT_ADD_FAILED", "Failed to register project") } m.emitProjectAdded(row, projectCountBefore == 0) - return projectFromRow(row), nil + return m.projectFromRow(row), nil } func (m *Service) activeProjectCount(ctx context.Context) (int, error) { @@ -286,7 +307,7 @@ func (m *Service) SetConfig(ctx context.Context, id domain.ProjectID, in SetConf if err := m.store.UpsertProject(ctx, row); err != nil { return Project{}, apierr.Internal("PROJECT_CONFIG_UPDATE_FAILED", "Failed to update project config") } - return projectFromRow(row), nil + return m.projectFromRow(row), nil } // resolveGitOriginURL returns the project's `origin` remote URL via @@ -365,7 +386,7 @@ func (m *Service) suggestID(ctx context.Context, base domain.ProjectID) domain.P } } -func projectFromRow(row domain.ProjectRecord) Project { +func (m *Service) projectFromRow(row domain.ProjectRecord) Project { p := Project{ ID: domain.ProjectID(row.ID), Name: displayName(row), @@ -373,6 +394,7 @@ func projectFromRow(row domain.ProjectRecord) Project { Path: row.Path, Repo: row.RepoOriginURL, DefaultBranch: row.Config.WithDefaults().DefaultBranch, + Agent: string(m.defaultHarness), } if !row.Config.IsZero() { cfg := row.Config diff --git a/backend/internal/service/project/service_test.go b/backend/internal/service/project/service_test.go index 4901b83b..f10dba67 100644 --- a/backend/internal/service/project/service_test.go +++ b/backend/internal/service/project/service_test.go @@ -272,6 +272,9 @@ func TestManager_DefaultsWhenUnconfigured(t *testing.T) { if got.Project.DefaultBranch != domain.DefaultBranchName { t.Fatalf("default branch = %q, want %q", got.Project.DefaultBranch, domain.DefaultBranchName) } + if got.Project.Agent != "claude-code" { + t.Fatalf("default agent = %q, want claude-code", got.Project.Agent) + } if got.Project.Config != nil { t.Fatalf("unconfigured project should omit config, got %#v", got.Project.Config) } @@ -285,6 +288,32 @@ func TestManager_DefaultsWhenUnconfigured(t *testing.T) { } } +func TestManager_GetUsesConfiguredDefaultHarness(t *testing.T) { + ctx := context.Background() + store, err := sqlite.Open(t.TempDir()) + if err != nil { + t.Fatalf("open store: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + m := project.NewWithDeps(project.Deps{Store: store, DefaultHarness: domain.HarnessCodex}) + repo := gitRepo(t) + + if _, err := m.Add(ctx, project.AddInput{Path: repo, ProjectID: ptr("ao")}); err != nil { + t.Fatalf("Add: %v", err) + } + + got, err := m.Get(ctx, "ao") + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.Project == nil { + t.Fatalf("Get returned no project: %#v", got) + } + if got.Project.Agent != "codex" { + t.Fatalf("default agent = %q, want codex", got.Project.Agent) + } +} + func TestManager_AddDetectsNonMainDefaultBranch(t *testing.T) { ctx := context.Background() m := newManager(t) diff --git a/backend/internal/service/session/claim_pr.go b/backend/internal/service/session/claim_pr.go index 6d6cd92b..509ca2b0 100644 --- a/backend/internal/service/session/claim_pr.go +++ b/backend/internal/service/session/claim_pr.go @@ -47,9 +47,11 @@ type ClaimPRResult struct { // ListPRs returns all PRs currently owned by a session, ordered for display. func (s *Service) ListPRs(ctx context.Context, id domain.SessionID) ([]domain.PRFacts, error) { - if _, ok, err := s.store.GetSession(ctx, id); err != nil { + _, ok, err := s.store.GetSession(ctx, id) + if err != nil { return nil, fmt.Errorf("get %s: %w", id, err) - } else if !ok { + } + if !ok { return nil, apierr.NotFound("SESSION_NOT_FOUND", "Unknown session") } return s.listPRFacts(ctx, id) diff --git a/backend/internal/service/session/service.go b/backend/internal/service/session/service.go index 090c11b9..d9781cca 100644 --- a/backend/internal/service/session/service.go +++ b/backend/internal/service/session/service.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "strings" + "sync" "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" @@ -44,6 +45,7 @@ type commander interface { Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.SessionRecord, error) Restore(ctx context.Context, id domain.SessionID) (domain.SessionRecord, error) Kill(ctx context.Context, id domain.SessionID) (bool, error) + RetireOrchestrator(ctx context.Context, id domain.SessionID) error Send(ctx context.Context, id domain.SessionID, message string) error Cleanup(ctx context.Context, project domain.ProjectID) (sessionmanager.CleanupResult, error) RollbackSpawn(ctx context.Context, id domain.SessionID) (deleted, killed bool, err error) @@ -80,12 +82,14 @@ type scmProvider interface { // session operations to the internal sessionmanager.Manager and owns read-model // assembly, including user-facing display status derivation. type Service struct { - manager commander - store Store - prClaimer ports.PRClaimer - scm scmProvider - clock func() time.Time - telemetry ports.EventSink + manager commander + store Store + prClaimer ports.PRClaimer + scm scmProvider + clock func() time.Time + telemetry ports.EventSink + orchestratorLocksMu sync.Mutex + orchestratorLocks map[domain.ProjectID]*sync.Mutex // signalCapable reports whether a harness has a hook pipeline that can // deliver activity signals at all. Only capable harnesses are eligible for // the no_signal downgrade — a hook-less harness staying silent forever is @@ -257,23 +261,73 @@ func (s *Service) emitSpawnFailed(cfg ports.SpawnConfig, err error, durationMs i } // SpawnOrchestrator spawns an orchestrator session for a project. When clean is -// true it first tears down any active orchestrator(s) for that project so the new -// one is the only live coordinator — a business rule that belongs here, not in the -// HTTP controller. +// true it performs a replacement cutover: start the new orchestrator first, +// then retire any older active orchestrators for that project so a failed +// replacement never causes downtime. This business rule belongs here, not in +// the HTTP controller. func (s *Service) SpawnOrchestrator(ctx context.Context, projectID domain.ProjectID, clean bool) (domain.Session, error) { + unlock := s.lockOrchestratorProject(projectID) + defer unlock() + + if _, err := s.requireProject(ctx, projectID); err != nil { + return domain.Session{}, err + } + var existing []domain.Session if clean { active := true - existing, err := s.List(ctx, ListFilter{ProjectID: projectID, Active: &active, OrchestratorOnly: true}) + var err error + existing, err = s.List(ctx, ListFilter{ProjectID: projectID, Active: &active, OrchestratorOnly: true}) if err != nil { return domain.Session{}, err } - for _, orch := range existing { - if _, err := s.Kill(ctx, orch.ID); err != nil { - return domain.Session{}, err + } + sess, err := s.Spawn(ctx, ports.SpawnConfig{ProjectID: projectID, Kind: domain.KindOrchestrator}) + if err != nil || !clean { + return sess, err + } + for _, orch := range existing { + if orch.ID == sess.ID { + continue + } + if err := s.manager.RetireOrchestrator(ctx, orch.ID); err != nil { + if errors.Is(err, sessionmanager.ErrRetiredSessionStillAlive) { + return domain.Session{}, apierr.Conflict( + "ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED", + fmt.Sprintf("Replacement orchestrator started and previous orchestrator %s was marked terminated, but its runtime still appeared alive after destroy", orch.ID), + map[string]any{"oldOrchestratorId": orch.ID}, + ) } + if errors.Is(err, sessionmanager.ErrRetireTerminationUnrecorded) { + return domain.Session{}, apierr.Conflict( + "ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED", + fmt.Sprintf("Replacement orchestrator started and previous orchestrator %s was stopped, but its terminated state could not be recorded", orch.ID), + map[string]any{"oldOrchestratorId": orch.ID}, + ) + } + return domain.Session{}, apierr.Conflict( + "ORCHESTRATOR_REPLACEMENT_INCOMPLETE", + fmt.Sprintf("Replacement orchestrator started, but previous orchestrator %s could not be retired", orch.ID), + map[string]any{"oldOrchestratorId": orch.ID}, + ) } } - return s.Spawn(ctx, ports.SpawnConfig{ProjectID: projectID, Kind: domain.KindOrchestrator}) + return sess, nil +} + +func (s *Service) lockOrchestratorProject(projectID domain.ProjectID) func() { + s.orchestratorLocksMu.Lock() + if s.orchestratorLocks == nil { + s.orchestratorLocks = make(map[domain.ProjectID]*sync.Mutex) + } + projectLock := s.orchestratorLocks[projectID] + if projectLock == nil { + projectLock = &sync.Mutex{} + s.orchestratorLocks[projectID] = projectLock + } + s.orchestratorLocksMu.Unlock() + + projectLock.Lock() + return projectLock.Unlock } // Restore relaunches a terminated session and returns the API-facing read model. diff --git a/backend/internal/service/session/service_test.go b/backend/internal/service/session/service_test.go index 81f02d29..08d198d5 100644 --- a/backend/internal/service/session/service_test.go +++ b/backend/internal/service/session/service_test.go @@ -4,6 +4,8 @@ import ( "context" "errors" "fmt" + "strings" + "sync" "testing" "time" @@ -195,12 +197,15 @@ func TestSessionRenameMissingSessionReturnsNotFound(t *testing.T) { // clean-orchestrator ordering without wiring a real session engine. type fakeCommander struct { killed []domain.SessionID + retired []domain.SessionID cleanupProjects []domain.ProjectID killErr error + retireErr error cleanupErr error spawnErr error spawned bool killsAtSpawn int + retiredAtSpawn int } func (f *fakeCommander) Spawn(_ context.Context, cfg ports.SpawnConfig) (domain.SessionRecord, error) { @@ -209,6 +214,8 @@ func (f *fakeCommander) Spawn(_ context.Context, cfg ports.SpawnConfig) (domain. } f.spawned = true f.killsAtSpawn = len(f.killed) + f.retiredAtSpawn = len(f.retired) + return domain.SessionRecord{ID: "mer-9", ProjectID: cfg.ProjectID, Kind: cfg.Kind, Harness: cfg.Harness}, nil } func (f *fakeCommander) Restore(context.Context, domain.SessionID) (domain.SessionRecord, error) { @@ -221,6 +228,13 @@ func (f *fakeCommander) Kill(_ context.Context, id domain.SessionID) (bool, erro f.killed = append(f.killed, id) return true, nil } +func (f *fakeCommander) RetireOrchestrator(_ context.Context, id domain.SessionID) error { + if f.retireErr != nil { + return f.retireErr + } + f.retired = append(f.retired, id) + return nil +} func (f *fakeCommander) Send(context.Context, domain.SessionID, string) error { return nil } func (f *fakeCommander) Cleanup(_ context.Context, project domain.ProjectID) (sessionmanager.CleanupResult, error) { f.cleanupProjects = append(f.cleanupProjects, project) @@ -236,6 +250,52 @@ func (f *fakeCommander) RollbackSpawn(context.Context, domain.SessionID) (bool, return false, false, nil } +type blockingSpawnCommander struct { + mu sync.Mutex + blockProject domain.ProjectID + release chan struct{} + spawnEntered chan domain.ProjectID + spawnCount int +} + +func newBlockingSpawnCommander(blockProject domain.ProjectID) *blockingSpawnCommander { + return &blockingSpawnCommander{ + blockProject: blockProject, + release: make(chan struct{}), + spawnEntered: make(chan domain.ProjectID, 10), + } +} + +func (f *blockingSpawnCommander) Spawn(_ context.Context, cfg ports.SpawnConfig) (domain.SessionRecord, error) { + f.mu.Lock() + f.spawnCount++ + id := domain.SessionID(fmt.Sprintf("%s-%d", cfg.ProjectID, f.spawnCount)) + f.mu.Unlock() + + f.spawnEntered <- cfg.ProjectID + if cfg.ProjectID == f.blockProject { + <-f.release + } + return domain.SessionRecord{ID: id, ProjectID: cfg.ProjectID, Kind: cfg.Kind, Harness: cfg.Harness}, nil +} + +func (f *blockingSpawnCommander) Restore(context.Context, domain.SessionID) (domain.SessionRecord, error) { + return domain.SessionRecord{}, nil +} +func (f *blockingSpawnCommander) Kill(context.Context, domain.SessionID) (bool, error) { + return true, nil +} +func (f *blockingSpawnCommander) RetireOrchestrator(context.Context, domain.SessionID) error { + return nil +} +func (f *blockingSpawnCommander) Send(context.Context, domain.SessionID, string) error { return nil } +func (f *blockingSpawnCommander) Cleanup(context.Context, domain.ProjectID) (sessionmanager.CleanupResult, error) { + return sessionmanager.CleanupResult{}, nil +} +func (f *blockingSpawnCommander) RollbackSpawn(context.Context, domain.SessionID) (bool, bool, error) { + return false, false, nil +} + // TestCleanupMapsManagerResult: the service forwards both reclaimed and // skipped sessions, with non-nil slices so the wire shape stays stable. func TestCleanupMapsManagerResult(t *testing.T) { @@ -287,7 +347,7 @@ func TestTeardownProjectStopsOnKillError(t *testing.T) { } } -func TestSpawnOrchestratorCleanKillsActiveOrchestratorsBeforeSpawn(t *testing.T) { +func TestSpawnOrchestratorCleanSpawnsBeforeRetiringActiveOrchestrators(t *testing.T) { st := newFakeStore() st.projects["mer"] = domain.ProjectRecord{ID: "mer"} // Two active orchestrators plus an unrelated worker and a terminated @@ -304,11 +364,157 @@ func TestSpawnOrchestratorCleanKillsActiveOrchestratorsBeforeSpawn(t *testing.T) t.Fatalf("SpawnOrchestrator: %v", err) } - if len(fc.killed) != 2 { - t.Fatalf("killed = %v, want the two active orchestrators", fc.killed) + if len(fc.retired) != 2 { + t.Fatalf("retired = %v, want the two active orchestrators", fc.retired) + } + if !fc.spawned || fc.retiredAtSpawn != 0 { + t.Fatalf("spawn must run before old orchestrators are retired: spawned=%v retiredAtSpawn=%d", fc.spawned, fc.retiredAtSpawn) + } +} + +func TestSpawnOrchestratorSerializesSameProjectCutovers(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := newBlockingSpawnCommander("mer") + svc := &Service{manager: fc, store: st} + + firstDone := make(chan error, 1) + go func() { + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + firstDone <- err + }() + + if got := <-fc.spawnEntered; got != "mer" { + t.Fatalf("first spawn project = %q, want mer", got) + } + + secondDone := make(chan error, 1) + go func() { + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + secondDone <- err + }() + + select { + case got := <-fc.spawnEntered: + t.Fatalf("same-project spawn entered before first cutover completed: %q", got) + case <-time.After(100 * time.Millisecond): + } + + close(fc.release) + if err := <-firstDone; err != nil { + t.Fatalf("first SpawnOrchestrator: %v", err) + } + if err := <-secondDone; err != nil { + t.Fatalf("second SpawnOrchestrator: %v", err) + } +} + +func TestSpawnOrchestratorAllowsDifferentProjectsDuringCutover(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.projects["other"] = domain.ProjectRecord{ID: "other"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := newBlockingSpawnCommander("mer") + svc := &Service{manager: fc, store: st} + + firstDone := make(chan error, 1) + go func() { + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + firstDone <- err + }() + + if got := <-fc.spawnEntered; got != "mer" { + t.Fatalf("first spawn project = %q, want mer", got) + } + + otherDone := make(chan error, 1) + go func() { + _, err := svc.SpawnOrchestrator(context.Background(), "other", true) + otherDone <- err + }() + + select { + case got := <-fc.spawnEntered: + if got != "other" { + t.Fatalf("second spawn project = %q, want other", got) + } + case <-time.After(1 * time.Second): + t.Fatal("different-project spawn was blocked by mer cutover") + } + if err := <-otherDone; err != nil { + t.Fatalf("other SpawnOrchestrator: %v", err) + } + + close(fc.release) + if err := <-firstDone; err != nil { + t.Fatalf("first SpawnOrchestrator: %v", err) + } +} + +func TestSpawnOrchestratorCleanSpawnFailureKeepsExistingOrchestrators(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := &fakeCommander{spawnErr: errors.New("boom")} + svc := &Service{manager: fc, store: st} + + err := fc.spawnErr + if _, got := svc.SpawnOrchestrator(context.Background(), "mer", true); !errors.Is(got, err) { + t.Fatalf("SpawnOrchestrator err = %v, want %v", got, err) + } + if len(fc.retired) != 0 { + t.Fatalf("retired = %v, want none when replacement spawn fails", fc.retired) + } +} + +func TestSpawnOrchestratorCleanRetireFailureReturnsCutoverError(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := &fakeCommander{retireErr: errors.New("cannot retire")} + svc := &Service{manager: fc, store: st} + + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + if err == nil || !strings.Contains(err.Error(), "Replacement orchestrator started") { + t.Fatalf("err = %v, want cutover failure message", err) + } + if !fc.spawned || len(fc.retired) != 0 { + t.Fatalf("spawned=%v retired=%v, want replacement spawned and retire attempted but failed", fc.spawned, fc.retired) + } +} + +func TestSpawnOrchestratorCleanRetireUnrecordedReturnsRecoveryError(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := &fakeCommander{retireErr: fmt.Errorf("retire mer-1: %w: db unavailable", sessionmanager.ErrRetireTerminationUnrecorded)} + svc := &Service{manager: fc, store: st} + + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + var e *apierr.Error + if !errors.As(err, &e) || e.Code != "ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED" { + t.Fatalf("err = %v, want ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED", err) + } + if !strings.Contains(e.Message, "was stopped") { + t.Fatalf("message = %q, want stopped recovery message", e.Message) + } +} + +func TestSpawnOrchestratorCleanRetiredStillAliveReturnsRecoveryError(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Kind: domain.KindOrchestrator} + fc := &fakeCommander{retireErr: fmt.Errorf("retire mer-1: %w", sessionmanager.ErrRetiredSessionStillAlive)} + svc := &Service{manager: fc, store: st} + + _, err := svc.SpawnOrchestrator(context.Background(), "mer", true) + var e *apierr.Error + if !errors.As(err, &e) || e.Code != "ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED" { + t.Fatalf("err = %v, want ORCHESTRATOR_REPLACEMENT_RECOVERY_REQUIRED", err) } - if !fc.spawned || fc.killsAtSpawn != 2 { - t.Fatalf("spawn must run after both kills: spawned=%v killsAtSpawn=%d", fc.spawned, fc.killsAtSpawn) + if !strings.Contains(e.Message, "marked terminated") || !strings.Contains(e.Message, "still appeared alive") { + t.Fatalf("message = %q, want terminated-but-alive recovery message", e.Message) } } diff --git a/backend/internal/session_manager/manager.go b/backend/internal/session_manager/manager.go index 104982bd..eb065a45 100644 --- a/backend/internal/session_manager/manager.go +++ b/backend/internal/session_manager/manager.go @@ -32,6 +32,18 @@ var ( // adapter. The API maps it to a 400 so a typo'd `--harness` is a validation // error, not an opaque 500. ErrUnknownHarness = errors.New("session: unknown agent harness") + // ErrSessionStillAlive means teardown could not prove the runtime is gone + // even after destroy was attempted. + ErrSessionStillAlive = errors.New("session: runtime still alive after destroy") + // ErrRetiredSessionStillAlive means replacement cutover durably marked the + // old orchestrator terminated, but its runtime still appeared alive after + // destroy. Recovery tooling must retry the runtime kill because active-list + // scans will no longer surface the terminated row. + ErrRetiredSessionStillAlive = errors.New("session: terminated runtime still alive after destroy") + // ErrRetireTerminationUnrecorded means replacement cutover destroyed the old + // orchestrator runtime but could not update durable session state. Callers + // must not claim the previous orchestrator was preserved. + ErrRetireTerminationUnrecorded = errors.New("session: runtime destroyed but termination was not recorded") // ErrMissingHarness means neither the spawn request nor the project's role // config selected an agent. Worker/orchestrator spawns must be explicit. ErrMissingHarness = errors.New("session: agent harness required") @@ -44,6 +56,12 @@ const ( EnvIssueID = "AO_ISSUE_ID" // EnvDataDir tells a spawned agent's AO hook commands where the store lives. EnvDataDir = "AO_DATA_DIR" + // Replacement cutover gets one retry after the initial destroy attempt + // before the old orchestrator retirement fails hard. + orchestratorRetireAttempts = 2 + // A transient SQLite lock after runtime destroy should be retried before + // surfacing recovery-required state to the user. + orchestratorTerminationRecordAttempts = 3 ) // hookBinaryName is the executable name the workspace hook commands invoke: @@ -60,6 +78,7 @@ type lifecycleRecorder interface { type runtimeController interface { Create(ctx context.Context, cfg ports.RuntimeConfig) (ports.RuntimeHandle, error) Destroy(ctx context.Context, handle ports.RuntimeHandle) error + IsAlive(ctx context.Context, handle ports.RuntimeHandle) (bool, error) } // Store is the persistence surface needed by the internal session Manager. @@ -445,6 +464,70 @@ func (m *Manager) Kill(ctx context.Context, id domain.SessionID) (bool, error) { return freed, nil } +// RetireOrchestrator tears down an old orchestrator during replacement cutover. +// Once runtime destroy succeeds, terminal intent is recorded before fallible +// probe/workspace cleanup so the old orchestrator is not reported as preserved. +func (m *Manager) RetireOrchestrator(ctx context.Context, id domain.SessionID) error { + rec, ok, err := m.store.GetSession(ctx, id) + if err != nil { + return fmt.Errorf("retire %s: %w", id, err) + } + if !ok || rec.IsTerminated { + return nil + } + handle := runtimeHandle(rec.Metadata) + ws := workspaceInfo(rec) + if handle.ID == "" || ws.Path == "" { + return fmt.Errorf("retire %s: %w", id, ErrIncompleteHandle) + } + for attempt := 0; attempt < orchestratorRetireAttempts; attempt++ { + if err := m.runtime.Destroy(ctx, handle); err != nil { + if attempt == orchestratorRetireAttempts-1 { + return fmt.Errorf("retire %s: destroy: %w", id, err) + } + continue + } + if err := m.recordRetiredTermination(ctx, id); err != nil { + return err + } + alive, err := m.runtime.IsAlive(ctx, handle) + if err != nil { + m.logger.Warn("session manager: old orchestrator probe failed after runtime destroy", + "session", id, "err", err) + } else if alive { + return fmt.Errorf("retire %s: %w", id, ErrRetiredSessionStillAlive) + } + if err := m.workspace.Destroy(ctx, ws); err != nil && !errors.Is(err, ports.ErrWorkspaceDirty) { + m.logger.Warn("session manager: old orchestrator workspace destroy failed after runtime exit", + "session", id, "err", err) + } + return nil + } + return fmt.Errorf("retire %s: %w", id, ErrSessionStillAlive) +} + +func (m *Manager) recordRetiredTermination(ctx context.Context, id domain.SessionID) error { + var lastErr error + for attempt := 0; attempt < orchestratorTerminationRecordAttempts; attempt++ { + if err := m.lcm.MarkTerminated(ctx, id); err != nil { + lastErr = err + if attempt == orchestratorTerminationRecordAttempts-1 { + break + } + timer := time.NewTimer(25 * time.Millisecond) + select { + case <-ctx.Done(): + timer.Stop() + return fmt.Errorf("retire %s: %w: %w", id, ErrRetireTerminationUnrecorded, ctx.Err()) + case <-timer.C: + } + continue + } + return nil + } + return fmt.Errorf("retire %s: %w: %w", id, ErrRetireTerminationUnrecorded, lastErr) +} + // Restore relaunches a torn-down session in its workspace. The fallible I/O runs // before any durable session write, so a failure never resurrects the row or destroys // the worktree (it may hold the agent's prior work). diff --git a/backend/internal/session_manager/manager_test.go b/backend/internal/session_manager/manager_test.go index b7f3dcca..2523f58f 100644 --- a/backend/internal/session_manager/manager_test.go +++ b/backend/internal/session_manager/manager_test.go @@ -86,8 +86,12 @@ func (f *fakeStore) GetDisplayPRFactsForSession(_ context.Context, id domain.Ses } type fakeLCM struct { - store *fakeStore - completed int + store *fakeStore + completed int + terminated int + termErr error + termFailures int + termAlways bool } func (l *fakeLCM) MarkSpawned(_ context.Context, id domain.SessionID, metadata domain.SessionMetadata) error { @@ -100,6 +104,14 @@ func (l *fakeLCM) MarkSpawned(_ context.Context, id domain.SessionID, metadata d return nil } func (l *fakeLCM) MarkTerminated(_ context.Context, id domain.SessionID) error { + l.terminated++ + if l.termErr != nil && l.termAlways { + return l.termErr + } + if l.termErr != nil && l.termFailures > 0 { + l.termFailures-- + return l.termErr + } rec := l.store.sessions[id] rec.IsTerminated = true rec.Activity = domain.Activity{State: domain.ActivityExited, LastActivityAt: time.Now()} @@ -109,8 +121,11 @@ func (l *fakeLCM) MarkTerminated(_ context.Context, id domain.SessionID) error { type fakeRuntime struct { createErr error + destroyErr error created, destroyed int lastCfg ports.RuntimeConfig + alive bool + probeErr error } func (r *fakeRuntime) Create(_ context.Context, cfg ports.RuntimeConfig) (ports.RuntimeHandle, error) { @@ -121,7 +136,13 @@ func (r *fakeRuntime) Create(_ context.Context, cfg ports.RuntimeConfig) (ports. r.created++ return ports.RuntimeHandle{ID: "h1"}, nil } -func (r *fakeRuntime) Destroy(context.Context, ports.RuntimeHandle) error { r.destroyed++; return nil } +func (r *fakeRuntime) Destroy(context.Context, ports.RuntimeHandle) error { + r.destroyed++ + return r.destroyErr +} +func (r *fakeRuntime) IsAlive(context.Context, ports.RuntimeHandle) (bool, error) { + return r.alive, r.probeErr +} type fakeAgent struct{} @@ -479,6 +500,106 @@ func TestKill_OtherWorkspaceErrorStillFails(t *testing.T) { t.Fatalf("kill err = %v, want workspace error surfaced", err) } } + +func TestRetireOrchestrator_TerminatesOldOrchestratorWhenDestroySucceeds(t *testing.T) { + m, st, rt, ws := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + if err := m.RetireOrchestrator(ctx, "mer-1"); err != nil { + t.Fatalf("RetireOrchestrator: %v", err) + } + if rt.destroyed != 1 || ws.destroyed != 1 { + t.Fatalf("destroyed runtime/workspace = %d/%d, want 1/1", rt.destroyed, ws.destroyed) + } + if !st.sessions["mer-1"].IsTerminated { + t.Fatalf("retired session = %+v, want terminated", st.sessions["mer-1"]) + } +} + +func TestRetireOrchestrator_ReturnsErrorWhenRuntimeStaysAlive(t *testing.T) { + m, st, rt, _ := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + lcm := &fakeLCM{store: st} + m.lcm = lcm + rt.alive = true + if err := m.RetireOrchestrator(ctx, "mer-1"); !errors.Is(err, ErrRetiredSessionStillAlive) { + t.Fatalf("RetireOrchestrator err = %v, want ErrRetiredSessionStillAlive", err) + } + if rt.destroyed != 1 { + t.Fatalf("destroy attempts = %d, want 1", rt.destroyed) + } + if lcm.terminated != 1 { + t.Fatalf("termination records = %d, want 1", lcm.terminated) + } + if got := st.sessions["mer-1"]; !got.IsTerminated { + t.Fatalf("retired session = %+v, want terminated after destroy intent", got) + } +} + +func TestRetireOrchestrator_TerminatesWhenProbeFailsAfterDestroy(t *testing.T) { + m, st, rt, ws := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + rt.probeErr = errors.New("list sessions failed") + if err := m.RetireOrchestrator(ctx, "mer-1"); err != nil { + t.Fatalf("RetireOrchestrator: %v", err) + } + if ws.destroyed != 1 { + t.Fatalf("workspace destroy attempts = %d, want 1", ws.destroyed) + } + if got := st.sessions["mer-1"]; !got.IsTerminated { + t.Fatalf("retired session = %+v, want terminated despite probe failure after destroy", got) + } +} + +func TestRetireOrchestrator_TerminatesWhenWorkspaceDestroyFailsAfterRuntimeDestroy(t *testing.T) { + m, st, _, ws := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + ws.destroyErr = errors.New("cleanup failed") + if err := m.RetireOrchestrator(ctx, "mer-1"); err != nil { + t.Fatalf("RetireOrchestrator: %v", err) + } + if got := st.sessions["mer-1"]; !got.IsTerminated { + t.Fatalf("retired session = %+v, want terminated despite workspace cleanup failure", got) + } +} + +func TestRetireOrchestrator_ReturnsRecoveryErrorWhenTerminationRecordFailsAfterDestroy(t *testing.T) { + m, st, rt, _ := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + m.lcm = &fakeLCM{store: st, termErr: errors.New("db unavailable"), termAlways: true} + if err := m.RetireOrchestrator(ctx, "mer-1"); !errors.Is(err, ErrRetireTerminationUnrecorded) { + t.Fatalf("RetireOrchestrator err = %v, want ErrRetireTerminationUnrecorded", err) + } + if rt.destroyed != 1 { + t.Fatalf("destroy attempts = %d, want 1", rt.destroyed) + } + if got := st.sessions["mer-1"]; got.IsTerminated { + t.Fatalf("retired session = %+v, want active because termination record failed", got) + } +} + +func TestRetireOrchestrator_RetriesTransientTerminationRecordFailure(t *testing.T) { + m, st, rt, _ := newManager() + st.sessions["mer-1"] = mkLive("mer-1") + m.lcm = &fakeLCM{store: st, termErr: errors.New("database locked"), termFailures: 1} + if err := m.RetireOrchestrator(ctx, "mer-1"); err != nil { + t.Fatalf("RetireOrchestrator: %v", err) + } + if rt.destroyed != 1 { + t.Fatalf("destroy attempts = %d, want 1", rt.destroyed) + } + if got := st.sessions["mer-1"]; !got.IsTerminated { + t.Fatalf("retired session = %+v, want terminated after retry", got) + } +} + +func TestRetireOrchestrator_ReturnsIncompleteHandleWhenHandleMissing(t *testing.T) { + m, st, _, _ := newManager() + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Activity: domain.Activity{State: domain.ActivityActive}} + if err := m.RetireOrchestrator(ctx, "mer-1"); !errors.Is(err, ErrIncompleteHandle) { + t.Fatalf("RetireOrchestrator err = %v, want ErrIncompleteHandle", err) + } +} + func TestRestore_ReopensTerminal(t *testing.T) { m, st, rt, _ := newManager() seedTerminal(st, "mer-1", domain.SessionMetadata{WorkspacePath: "/ws/mer-1", Branch: "b", AgentSessionID: "agent-x"}) diff --git a/frontend/src/api/schema.ts b/frontend/src/api/schema.ts index 9f895e64..f3ec961d 100644 --- a/frontend/src/api/schema.ts +++ b/frontend/src/api/schema.ts @@ -4,6 +4,23 @@ */ export interface paths { + "/api/v1/agents": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List supported and locally installed agent adapters */ + get: operations["listAgents"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/v1/events": { parameters: { query?: never; @@ -493,6 +510,12 @@ export interface components { model?: string; permissions?: string; }; + AgentInfo: { + /** @enum {string} */ + authStatus?: "authorized" | "unauthorized" | "unknown"; + id: string; + label: string; + }; ClaimPRRequest: { allowTakeover?: null | boolean; pr: string; @@ -553,6 +576,11 @@ export interface components { ok: boolean; sessionId: string; }; + ListAgentsResponse: { + authorized: components["schemas"]["AgentInfo"][]; + installed: components["schemas"]["AgentInfo"][]; + supported: components["schemas"]["AgentInfo"][]; + }; ListNotificationsResponse: { notifications: components["schemas"]["NotificationResponse"][]; }; @@ -863,6 +891,44 @@ export interface components { } export type $defs = Record; export interface operations { + listAgents: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["ListAgentsResponse"]; + }; + }; + /** @description Internal Server Error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["APIError"]; + }; + }; + /** @description Not Implemented */ + 501: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["APIError"]; + }; + }; + }; + }; streamEvents: { parameters: { query?: { diff --git a/frontend/src/renderer/components/CreateProjectAgentSheet.tsx b/frontend/src/renderer/components/CreateProjectAgentSheet.tsx index ea4a7339..f673bc61 100644 --- a/frontend/src/renderer/components/CreateProjectAgentSheet.tsx +++ b/frontend/src/renderer/components/CreateProjectAgentSheet.tsx @@ -1,11 +1,15 @@ +import { useQuery, useQueryClient } from "@tanstack/react-query"; import * as Dialog from "@radix-ui/react-dialog"; import { X } from "lucide-react"; import { useEffect, useState } from "react"; -import { AGENT_OPTIONS } from "../lib/agent-options"; +import type { components } from "../../api/schema"; +import { agentsQueryKey, agentsQueryOptions, type AgentCatalog } from "../hooks/useAgentsQuery"; import { Button } from "./ui/button"; import { Label } from "./ui/label"; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"; +type AgentInfo = components["schemas"]["AgentInfo"]; + export type CreateProjectAgentSelection = { workerAgent: string; orchestratorAgent: string; @@ -28,6 +32,17 @@ export function CreateProjectAgentSheet({ open, path, }: CreateProjectAgentSheetProps) { + const queryClient = useQueryClient(); + const cachedAgents = queryClient.getQueryData(agentsQueryKey); + const agentsQuery = useQuery({ + ...agentsQueryOptions, + enabled: cachedAgents === undefined, + initialData: cachedAgents, + }); + const agents = agentsQuery.data; + const installedAgents = agents?.installed ?? []; + const agentOptions = agents?.authorized ?? []; + const supportedAgents = agents?.supported ?? []; const [workerAgent, setWorkerAgent] = useState(""); const [orchestratorAgent, setOrchestratorAgent] = useState(""); const canSubmit = workerAgent !== "" && orchestratorAgent !== "" && !isCreating; @@ -76,6 +91,9 @@ export function CreateProjectAgentSheet({ label="Worker agent" placeholder="Select worker agent" value={workerAgent} + authorized={agentOptions} + installed={installedAgents} + supported={supportedAgents} onChange={setWorkerAgent} /> @@ -109,20 +130,42 @@ export function CreateProjectAgentSheet({ } export function RequiredAgentField({ + authorized, id, invalid = false, + installed, label, onChange, placeholder, + supported, value, }: { + authorized: AgentInfo[]; id: string; invalid?: boolean; + installed: AgentInfo[]; label: string; onChange: (value: string) => void; placeholder: string; + supported: AgentInfo[]; value: string; }) { + const authorizedIds = new Set(authorized.map((agent) => agent.id)); + const installedById = new Map(installed.map((agent) => [agent.id, agent])); + const options = supported + .map((agent) => { + const installedAgent = installedById.get(agent.id); + const isAuthorized = authorizedIds.has(agent.id); + const rank = isAuthorized ? 0 : installedAgent ? 1 : 2; + return { + ...agent, + disabled: !isAuthorized, + rank, + reason: !installedAgent ? "Needs install" : !isAuthorized ? "Needs auth" : "", + }; + }) + .sort((a, b) => a.rank - b.rank || a.label.localeCompare(b.label) || a.id.localeCompare(b.id)); + return (