diff --git a/backend/internal/adapters/telemetry/posthog.go b/backend/internal/adapters/telemetry/posthog.go index 253ba0e3..5b09337f 100644 --- a/backend/internal/adapters/telemetry/posthog.go +++ b/backend/internal/adapters/telemetry/posthog.go @@ -35,26 +35,37 @@ var remotePayloadAllowlist = map[string]map[string]struct{}{ "command_path": {}, }, "ao.cli.usage_errors": { + "component": {}, "command": {}, "command_path": {}, "error_kind": {}, + "fingerprint": {}, + "operation": {}, }, "ao.daemon.panic": { - "method": {}, - "path": {}, - "panic_kind": {}, + "component": {}, + "fingerprint": {}, + "method": {}, + "operation": {}, + "path": {}, + "panic_kind": {}, + "stack_fingerprint": {}, }, "ao.daemon.started": { "agent": {}, "port": {}, }, "ao.http.5xx": { - "duration": {}, - "error_code": {}, - "error_kind": {}, - "method": {}, - "path": {}, - "status": {}, + "component": {}, + "duration": {}, + "error_code": {}, + "error_kind": {}, + "fingerprint": {}, + "method": {}, + "operation": {}, + "path": {}, + "status": {}, + "status_family": {}, }, "ao.onboarding.first_project_added": { "has_git_remote": {}, @@ -70,11 +81,14 @@ var remotePayloadAllowlist = map[string]map[string]struct{}{ "kind": {}, }, "ao.session.spawn_failed": { + "component": {}, "duration_ms": {}, "error_code": {}, "error_kind": {}, + "fingerprint": {}, "harness": {}, "kind": {}, + "operation": {}, }, "ao.session.spawned": { "duration_ms": {}, diff --git a/backend/internal/adapters/telemetry/posthog_test.go b/backend/internal/adapters/telemetry/posthog_test.go index 2099ff47..89169860 100644 --- a/backend/internal/adapters/telemetry/posthog_test.go +++ b/backend/internal/adapters/telemetry/posthog_test.go @@ -93,11 +93,15 @@ func TestPostHogSinkSanitizesPayloads(t *testing.T) { OccurredAt: time.Unix(1700000000, 0).UTC(), Level: ports.TelemetryLevelError, Payload: map[string]any{ - "method": http.MethodGet, - "path": "/api/v1/sessions/demo", - "panic_kind": "error", - "panic": "open /Users/name/private: no such file", - "stack": "stack trace with local path", + "component": "httpd", + "operation": "http_request_panic", + "method": http.MethodGet, + "path": "/api/v1/sessions/demo", + "panic_kind": "error", + "fingerprint": "abc123", + "stack_fingerprint": "def456", + "panic": "open /Users/name/private: no such file", + "stack": "stack trace with local path", }, }) if err := sink.Close(context.Background()); err != nil { @@ -110,9 +114,15 @@ func TestPostHogSinkSanitizesPayloads(t *testing.T) { if !ok { t.Fatalf("properties type = %T, want map[string]any", req["properties"]) } + if props["component"] != "httpd" || props["operation"] != "http_request_panic" { + t.Fatalf("sanitized properties = %#v, want allowlisted metadata", props) + } if props["method"] != http.MethodGet || props["path"] != "/api/v1/sessions/demo" || props["panic_kind"] != "error" { t.Fatalf("sanitized properties = %#v, want allowlisted fields", props) } + if props["fingerprint"] != "abc123" || props["stack_fingerprint"] != "def456" { + t.Fatalf("sanitized properties = %#v, want exported fingerprints", props) + } if _, ok := props["panic"]; ok { t.Fatalf("panic property should be dropped: %#v", props) } diff --git a/backend/internal/httpd/log.go b/backend/internal/httpd/log.go index cbdce406..422408c7 100644 --- a/backend/internal/httpd/log.go +++ b/backend/internal/httpd/log.go @@ -1,16 +1,16 @@ package httpd import ( - "errors" "log/slog" "net/http" + "strconv" "time" "github.com/go-chi/chi/v5/middleware" - "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr" "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta" ) // requestLogger emits one structured access-log line per request via the @@ -48,21 +48,23 @@ func requestLogger(log *slog.Logger, sink ports.EventSink) func(http.Handler) ht } log.Info("http request", attrs...) if sink != nil && ww.Status() >= http.StatusInternalServerError { + path := telemetrymeta.RoutePattern(r) payload := map[string]any{ - "method": r.Method, - "path": r.URL.Path, - "status": ww.Status(), - "duration": time.Since(start).Milliseconds(), + "component": "httpd", + "operation": "http_request", + "method": r.Method, + "path": path, + "status": ww.Status(), + "status_family": telemetrymeta.StatusFamily(ww.Status()), + "duration": time.Since(start).Milliseconds(), } if err := capturedErr(); err != nil { - payload["error_kind"] = "internal" - var apiErr *apierr.Error - if errors.As(err, &apiErr) { - payload["error_kind"] = telemetryErrorKind(apiErr.Kind) - if apiErr.Code != "" { - payload["error_code"] = apiErr.Code - } + errorKind, errorCode := telemetrymeta.ErrorKindAndCode(err) + payload["error_kind"] = errorKind + if errorCode != "" { + payload["error_code"] = errorCode } + payload["fingerprint"] = telemetrymeta.Fingerprint("httpd", "http_request", r.Method, path, strconv.Itoa(ww.Status()), errorKind, errorCode) } sink.Emit(r.Context(), ports.TelemetryEvent{ Name: "ao.http.5xx", @@ -78,16 +80,3 @@ func requestLogger(log *slog.Logger, sink ports.EventSink) func(http.Handler) ht }) } } - -func telemetryErrorKind(kind apierr.Kind) string { - switch kind { - case apierr.KindInvalid: - return "invalid" - case apierr.KindNotFound: - return "not_found" - case apierr.KindConflict: - return "conflict" - default: - return "internal" - } -} diff --git a/backend/internal/httpd/log_test.go b/backend/internal/httpd/log_test.go index cb044c6a..c87ec084 100644 --- a/backend/internal/httpd/log_test.go +++ b/backend/internal/httpd/log_test.go @@ -57,6 +57,31 @@ func TestRequestLoggerRecords5xxCause(t *testing.T) { if len(sink.events) != 1 || sink.events[0].Name != "ao.http.5xx" { t.Fatalf("telemetry events = %#v, want one ao.http.5xx event", sink.events) } + payload := sink.events[0].Payload + if got := payload["component"]; got != "httpd" { + t.Fatalf("payload.component = %#v, want httpd", got) + } + if got := payload["operation"]; got != "http_request" { + t.Fatalf("payload.operation = %#v, want http_request", got) + } + if got := payload["method"]; got != http.MethodPost { + t.Fatalf("payload.method = %#v, want POST", got) + } + if got := payload["path"]; got != "/api/v1/sessions/x/kill" { + t.Fatalf("payload.path = %#v, want request path fallback", got) + } + if got := payload["status"]; got != http.StatusInternalServerError { + t.Fatalf("payload.status = %#v, want 500", got) + } + if got := payload["status_family"]; got != "5xx" { + t.Fatalf("payload.status_family = %#v, want 5xx", got) + } + if got := payload["error_kind"]; got != "internal" { + t.Fatalf("payload.error_kind = %#v, want internal", got) + } + if got := payload["fingerprint"]; got == "" { + t.Fatalf("payload.fingerprint = %#v, want non-empty", got) + } }) } } diff --git a/backend/internal/httpd/recover.go b/backend/internal/httpd/recover.go index 70417cbc..c190f70f 100644 --- a/backend/internal/httpd/recover.go +++ b/backend/internal/httpd/recover.go @@ -12,6 +12,7 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta" ) func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler) http.Handler { @@ -29,6 +30,8 @@ func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler) "stack", stack, ) if sink != nil { + path := telemetrymeta.RoutePattern(r) + panicKind := telemetrymeta.PanicKind(rec) sink.Emit(r.Context(), ports.TelemetryEvent{ Name: "ao.daemon.panic", Source: "http", @@ -36,9 +39,13 @@ func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler) Level: ports.TelemetryLevelError, RequestID: middleware.GetReqID(r.Context()), Payload: map[string]any{ - "method": r.Method, - "path": r.URL.Path, - "panic_kind": telemetryPanicKind(rec), + "component": "httpd", + "operation": "http_request_panic", + "method": r.Method, + "path": path, + "panic_kind": panicKind, + "stack_fingerprint": telemetrymeta.Fingerprint("httpd", "http_request_panic", path, panicKind, stack), + "fingerprint": telemetrymeta.Fingerprint("httpd", "http_request_panic", r.Method, path, panicKind), }, }) } @@ -50,17 +57,6 @@ func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler) } } -func telemetryPanicKind(rec any) string { - switch rec.(type) { - case error: - return "error" - case string: - return "string" - default: - return "other" - } -} - func writeRecoveredError(w http.ResponseWriter, r *http.Request) { if strings.HasPrefix(r.URL.Path, "/api/") { envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal_error", "INTERNAL_ERROR", "Internal server error", nil) diff --git a/backend/internal/httpd/router.go b/backend/internal/httpd/router.go index 61a6b390..0bd84af7 100644 --- a/backend/internal/httpd/router.go +++ b/backend/internal/httpd/router.go @@ -17,6 +17,7 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/daemonmeta" "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta" "github.com/aoagents/agent-orchestrator/backend/internal/terminal" ) @@ -187,9 +188,12 @@ func mountTelemetry(r chi.Router, sink ports.EventSink) { Level: ports.TelemetryLevelWarn, RequestID: middleware.GetReqID(req.Context()), Payload: map[string]any{ + "component": "cli", + "operation": "command_parse", "command": body.Command, "command_path": body.CommandPath, "error_kind": "usage", + "fingerprint": telemetrymeta.Fingerprint("cli", "command_parse", body.CommandPath, "usage"), }, }) w.WriteHeader(http.StatusAccepted) diff --git a/backend/internal/httpd/telemetry_test.go b/backend/internal/httpd/telemetry_test.go index 00bfb012..574b0349 100644 --- a/backend/internal/httpd/telemetry_test.go +++ b/backend/internal/httpd/telemetry_test.go @@ -73,6 +73,25 @@ func TestCLIUsageErrorRouteEmitsTelemetry(t *testing.T) { if len(sink.events) != 1 || sink.events[0].Name != "ao.cli.usage_errors" { t.Fatalf("events = %#v, want one ao.cli.usage_errors event", sink.events) } + payload := sink.events[0].Payload + if got := payload["component"]; got != "cli" { + t.Fatalf("payload.component = %#v, want cli", got) + } + if got := payload["operation"]; got != "command_parse" { + t.Fatalf("payload.operation = %#v, want command_parse", got) + } + if got := payload["command_path"]; got != "ao status" { + t.Fatalf("payload.command_path = %#v, want ao status", got) + } + if got := payload["error_kind"]; got != "usage" { + t.Fatalf("payload.error_kind = %#v, want usage", got) + } + if got := payload["fingerprint"]; got == "" { + t.Fatalf("payload.fingerprint = %#v, want non-empty", got) + } + if _, ok := payload["error"]; ok { + t.Fatalf("payload leaked raw error text: %#v", payload) + } } func TestRecoverTelemetryEmitsPanicEvent(t *testing.T) { @@ -90,19 +109,43 @@ func TestRecoverTelemetryEmitsPanicEvent(t *testing.T) { if rec.Code != http.StatusInternalServerError { t.Fatalf("status = %d, want 500", rec.Code) } - var sawPanic, saw5xx bool + var panicPayload, fiveXXPayload map[string]any for _, ev := range sink.events { switch ev.Name { case "ao.daemon.panic": - sawPanic = true + panicPayload = ev.Payload case "ao.http.5xx": - saw5xx = true + fiveXXPayload = ev.Payload } } - if !sawPanic { + if panicPayload == nil { t.Fatalf("events = %#v, want ao.daemon.panic", sink.events) } - if !saw5xx { + if fiveXXPayload == nil { t.Fatalf("events = %#v, want ao.http.5xx after recovery", sink.events) } + if got := panicPayload["component"]; got != "httpd" { + t.Fatalf("panic payload.component = %#v, want httpd", got) + } + if got := panicPayload["operation"]; got != "http_request_panic" { + t.Fatalf("panic payload.operation = %#v, want http_request_panic", got) + } + if got := panicPayload["path"]; got != "/panic" { + t.Fatalf("panic payload.path = %#v, want /panic", got) + } + if got := panicPayload["panic_kind"]; got != "string" { + t.Fatalf("panic payload.panic_kind = %#v, want string", got) + } + if got := panicPayload["fingerprint"]; got == "" { + t.Fatalf("panic payload.fingerprint = %#v, want non-empty", got) + } + if got := panicPayload["stack_fingerprint"]; got == "" { + t.Fatalf("panic payload.stack_fingerprint = %#v, want non-empty", got) + } + if got := fiveXXPayload["path"]; got != "/panic" { + t.Fatalf("5xx payload.path = %#v, want /panic", got) + } + if got := fiveXXPayload["status_family"]; got != "5xx" { + t.Fatalf("5xx payload.status_family = %#v, want 5xx", got) + } } diff --git a/backend/internal/service/session/service.go b/backend/internal/service/session/service.go index b2b48940..9dbdac76 100644 --- a/backend/internal/service/session/service.go +++ b/backend/internal/service/session/service.go @@ -11,6 +11,7 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr" "github.com/aoagents/agent-orchestrator/backend/internal/ports" sessionmanager "github.com/aoagents/agent-orchestrator/backend/internal/session_manager" + "github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta" ) // Store is the read-only persistence surface needed to assemble controller-facing session read models. @@ -229,18 +230,15 @@ func (s *Service) emitSpawnFailed(cfg ports.SpawnConfig, err error, durationMs i } projectID := cfg.ProjectID apiErr := toAPIError(err) - errorKind := "internal" - errorCode := "" - var typedErr *apierr.Error - if errors.As(apiErr, &typedErr) { - errorKind = telemetryErrorKind(typedErr.Kind) - errorCode = typedErr.Code - } + errorKind, errorCode := telemetrymeta.ErrorKindAndCode(apiErr) payload := map[string]any{ + "component": "session_service", + "operation": "spawn_session", "kind": string(cfg.Kind), "harness": string(cfg.Harness), "duration_ms": durationMs, "error_kind": errorKind, + "fingerprint": telemetrymeta.Fingerprint("session_service", "spawn_session", string(cfg.Kind), string(cfg.Harness), errorKind, errorCode), } if errorCode != "" { payload["error_code"] = errorCode @@ -255,19 +253,6 @@ func (s *Service) emitSpawnFailed(cfg ports.SpawnConfig, err error, durationMs i }) } -func telemetryErrorKind(kind apierr.Kind) string { - switch kind { - case apierr.KindInvalid: - return "invalid" - case apierr.KindNotFound: - return "not_found" - case apierr.KindConflict: - return "conflict" - default: - return "internal" - } -} - // SpawnOrchestrator spawns an orchestrator session for a project. When clean is // true it first tears down any active orchestrator(s) for that project so the new // one is the only live coordinator — a business rule that belongs here, not in the diff --git a/backend/internal/service/session/service_test.go b/backend/internal/service/session/service_test.go index 9432148b..3dbfb9d6 100644 --- a/backend/internal/service/session/service_test.go +++ b/backend/internal/service/session/service_test.go @@ -337,6 +337,15 @@ func TestSpawnFailedEmitsDuration(t *testing.T) { if got := sink.events[0].Payload["error_kind"]; got != "internal" { t.Fatalf("spawn_failed error_kind = %#v, want internal", got) } + if got := sink.events[0].Payload["component"]; got != "session_service" { + t.Fatalf("spawn_failed component = %#v, want session_service", got) + } + if got := sink.events[0].Payload["operation"]; got != "spawn_session" { + t.Fatalf("spawn_failed operation = %#v, want spawn_session", got) + } + if got := sink.events[0].Payload["fingerprint"]; got == "" { + t.Fatalf("spawn_failed fingerprint = %#v, want non-empty", got) + } } func TestSpawnEmitsTelemetryOnSuccess(t *testing.T) { @@ -395,11 +404,47 @@ func TestSpawnEmitsTelemetryOnFailure(t *testing.T) { if got := ev.Payload["error_kind"]; got != "internal" { t.Fatalf("event payload error_kind = %#v, want internal", got) } + if got := ev.Payload["component"]; got != "session_service" { + t.Fatalf("event payload component = %#v, want session_service", got) + } + if got := ev.Payload["operation"]; got != "spawn_session" { + t.Fatalf("event payload operation = %#v, want spawn_session", got) + } + if got := ev.Payload["fingerprint"]; got == "" { + t.Fatalf("event payload fingerprint = %#v, want non-empty", got) + } if _, ok := ev.Payload["error"]; ok { t.Fatalf("event payload leaked raw error: %+v", ev.Payload) } } +func TestSpawnEmitsTypedErrorCodeOnFailure(t *testing.T) { + st := newFakeStore() + st.projects["mer"] = domain.ProjectRecord{ID: "mer"} + fc := &fakeCommander{spawnErr: fmt.Errorf("spawn: %w: %q", sessionmanager.ErrUnknownHarness, "bogus")} + ts := &fakeTelemetrySink{} + svc := NewWithDeps(Deps{Manager: fc, Store: st, Telemetry: ts, Clock: func() time.Time { return time.Unix(1700000000, 0).UTC() }}) + + _, err := svc.Spawn(context.Background(), ports.SpawnConfig{ + ProjectID: "mer", + Kind: domain.KindWorker, + Harness: domain.HarnessCodex, + }) + if err == nil { + t.Fatal("Spawn error = nil, want failure") + } + if len(ts.events) != 1 { + t.Fatalf("telemetry events = %d, want 1", len(ts.events)) + } + ev := ts.events[0] + if got := ev.Payload["error_kind"]; got != "invalid" { + t.Fatalf("event payload error_kind = %#v, want invalid", got) + } + if got := ev.Payload["error_code"]; got != "UNKNOWN_HARNESS" { + t.Fatalf("event payload error_code = %#v, want UNKNOWN_HARNESS", got) + } +} + // TestSpawnOrchestratorUnknownProjectReturns404 is the orchestrator-side guard // for Bug 1: same pre-validation, same typed envelope. func TestSpawnOrchestratorUnknownProjectReturns404(t *testing.T) { diff --git a/backend/internal/telemetrymeta/errors.go b/backend/internal/telemetrymeta/errors.go new file mode 100644 index 00000000..f2251ffb --- /dev/null +++ b/backend/internal/telemetrymeta/errors.go @@ -0,0 +1,92 @@ +package telemetrymeta + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/http" + "strings" + + "github.com/go-chi/chi/v5" + + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr" +) + +// ErrorKindAndCode extracts a telemetry-safe error category and optional code. +func ErrorKindAndCode(err error) (kind, code string) { + kind = "internal" + var apiErr *apierr.Error + if errors.As(err, &apiErr) { + return ErrorKind(apiErr.Kind), apiErr.Code + } + return kind, "" +} + +// ErrorKind maps API error kinds to coarse telemetry-safe categories. +func ErrorKind(kind apierr.Kind) string { + switch kind { + case apierr.KindInvalid: + return "invalid" + case apierr.KindNotFound: + return "not_found" + case apierr.KindConflict: + return "conflict" + default: + return "internal" + } +} + +// PanicKind classifies panic payloads without exporting their raw contents. +func PanicKind(rec any) string { + switch rec.(type) { + case error: + return "error" + case string: + return "string" + default: + return "other" + } +} + +// StatusFamily returns a telemetry-friendly HTTP status bucket like 5xx. +func StatusFamily(status int) string { + if status < 100 || status > 999 { + return "unknown" + } + return fmt.Sprintf("%dxx", status/100) +} + +// RoutePattern returns the chi route template when available, else the URL path. +func RoutePattern(r *http.Request) string { + if r == nil { + return "" + } + if rc := chi.RouteContext(r.Context()); rc != nil { + if pattern := strings.TrimSpace(rc.RoutePattern()); pattern != "" { + return pattern + } + } + if r.URL == nil { + return "" + } + return r.URL.Path +} + +// Fingerprint returns a short stable digest for grouping similar failures. +func Fingerprint(parts ...string) string { + h := sha256.New() + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + _, _ = h.Write([]byte(part)) + _, _ = h.Write([]byte{0}) + } + sum := hex.EncodeToString(h.Sum(nil)) + if len(sum) > 16 { + return sum[:16] + } + return sum +} diff --git a/backend/internal/telemetrymeta/errors_test.go b/backend/internal/telemetrymeta/errors_test.go new file mode 100644 index 00000000..d6934133 --- /dev/null +++ b/backend/internal/telemetrymeta/errors_test.go @@ -0,0 +1,60 @@ +package telemetrymeta + +import ( + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/go-chi/chi/v5" + + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr" +) + +func TestErrorKindAndCode(t *testing.T) { + kind, code := ErrorKindAndCode(apierr.NotFound("SESSION_NOT_FOUND", "Unknown session")) + if kind != "not_found" || code != "SESSION_NOT_FOUND" { + t.Fatalf("typed error = (%q, %q), want (not_found, SESSION_NOT_FOUND)", kind, code) + } + + kind, code = ErrorKindAndCode(errors.New("boom")) + if kind != "internal" || code != "" { + t.Fatalf("raw error = (%q, %q), want (internal, empty)", kind, code) + } +} + +func TestRoutePatternPrefersChiPattern(t *testing.T) { + var got string + r := chi.NewRouter() + r.Get("/api/v1/projects/{projectID}/sessions/{sessionID}", func(w http.ResponseWriter, req *http.Request) { + got = RoutePattern(req) + w.WriteHeader(http.StatusNoContent) + }) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/projects/mer/sessions/sess-1", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusNoContent { + t.Fatalf("status = %d, want 204", rec.Code) + } + if got != "/api/v1/projects/{projectID}/sessions/{sessionID}" { + t.Fatalf("route pattern = %q, want chi route pattern", got) + } +} + +func TestFingerprintStableForSameInputs(t *testing.T) { + first := Fingerprint("httpd", "http_request", "GET", "/api/v1/projects/{projectID}", "5xx", "internal") + second := Fingerprint("httpd", "http_request", "GET", "/api/v1/projects/{projectID}", "5xx", "internal") + other := Fingerprint("httpd", "http_request", "POST", "/api/v1/projects/{projectID}", "5xx", "internal") + + if first == "" || len(first) != 16 { + t.Fatalf("fingerprint = %q, want 16-char digest", first) + } + if first != second { + t.Fatalf("fingerprints differ for same inputs: %q vs %q", first, second) + } + if first == other { + t.Fatalf("fingerprints should differ for different inputs: %q vs %q", first, other) + } +} diff --git a/frontend/src/renderer/components/ShellTopbar.tsx b/frontend/src/renderer/components/ShellTopbar.tsx index 8ab3eec9..69f4b4bb 100644 --- a/frontend/src/renderer/components/ShellTopbar.tsx +++ b/frontend/src/renderer/components/ShellTopbar.tsx @@ -13,7 +13,7 @@ import { import { useWorkspaceQuery, workspaceQueryKey } from "../hooks/useWorkspaceQuery"; import { apiClient, apiErrorMessage } from "../lib/api-client"; import { spawnOrchestrator } from "../lib/spawn-orchestrator"; -import { captureRendererEvent, captureRendererException } from "../lib/telemetry"; +import { addRendererExceptionStep, captureRendererEvent, captureRendererException } from "../lib/telemetry"; import { useUiStore } from "../stores/ui-store"; import { OrchestratorIcon } from "./icons"; import { NewTaskDialog } from "./NewTaskDialog"; @@ -92,6 +92,12 @@ export function ShellTopbar() { const openOrchestrator = async () => { if (!projectId) return; + void addRendererExceptionStep("Orchestrator open requested", { + source: "orchestrator-open", + operation: "open_orchestrator", + surface: isSessionRoute ? "session_detail" : "project_board", + project_id: projectId, + }); void captureRendererEvent("ao.renderer.orchestrator_open_requested", { project_id: projectId }); if (orchestrator) { void navigate({ @@ -109,7 +115,12 @@ export function ShellTopbar() { params: { projectId, sessionId }, }); } catch (error) { - void captureRendererException(error, { source: "orchestrator-open", project_id: projectId }); + void captureRendererException(error, { + source: "orchestrator-open", + operation: "open_orchestrator", + surface: isSessionRoute ? "session_detail" : "project_board", + project_id: projectId, + }); console.error("Failed to spawn orchestrator:", error); } finally { setIsSpawning(false); diff --git a/frontend/src/renderer/components/TelemetryBoundary.tsx b/frontend/src/renderer/components/TelemetryBoundary.tsx index 9ed73c07..98512ab7 100644 --- a/frontend/src/renderer/components/TelemetryBoundary.tsx +++ b/frontend/src/renderer/components/TelemetryBoundary.tsx @@ -19,6 +19,7 @@ export class TelemetryBoundary extends React.Component { componentDidCatch(error: Error, info: React.ErrorInfo) { void captureRendererException(error, { source: "react-error-boundary", + operation: "react_render", }); void info; } diff --git a/frontend/src/renderer/lib/telemetry.test.ts b/frontend/src/renderer/lib/telemetry.test.ts index 3b8d9f6f..34e70d9e 100644 --- a/frontend/src/renderer/lib/telemetry.test.ts +++ b/frontend/src/renderer/lib/telemetry.test.ts @@ -1,5 +1,11 @@ import { describe, expect, it } from "vitest"; -import { routeSurface, sanitizeRendererExceptionProperties, sanitizeRendererProperties } from "./telemetry"; +import { + routeSurface, + sanitizePostHogEvent, + sanitizeReplayRequestName, + sanitizeRendererExceptionProperties, + sanitizeRendererProperties, +} from "./telemetry"; describe("telemetry sanitizers", () => { it("categorizes routes without exporting raw paths", () => { @@ -26,6 +32,7 @@ describe("telemetry sanitizers", () => { it("strips exception details down to coarse metadata", async () => { const props = await sanitizeRendererExceptionProperties(new TypeError("local path /tmp/private"), { source: "window-error", + operation: "project_add", unhandled: true, project_id: "demo-project", component_stack: "App > Shell", @@ -33,10 +40,75 @@ describe("telemetry sanitizers", () => { expect(props).toMatchObject({ error_name: "TypeError", source: "window-error", + operation: "project_add", unhandled: true, }); expect(props).toHaveProperty("project_id_hash"); expect(props).not.toHaveProperty("project_id"); expect(props).not.toHaveProperty("component_stack"); }); + + it("sanitizes exception step context", async () => { + const props = await sanitizeRendererExceptionProperties(new Error("boom"), { + source: "orchestrator-open", + operation: "open_orchestrator", + surface: "session_detail", + project_id: "demo-project", + }); + expect(props).toMatchObject({ + source: "orchestrator-open", + operation: "open_orchestrator", + surface: "session_detail", + }); + expect(props).toHaveProperty("project_id_hash"); + }); + + it("redacts local urls and filesystem paths from outgoing PostHog payloads", () => { + const event = sanitizePostHogEvent({ + event: "$exception", + properties: { + $current_url: "app://renderer/index.html?token=secret", + $initial_current_url: "file:///Users/alice/private/index.html", + message: + "failed to fetch http://localhost:3037/api/v1/projects?token=secret from app://renderer/index.html?token=secret and open /Users/alice/reverb/file.txt", + $exception_list: [ + { + type: "TypeError", + value: + "failed to load /home/alice/.config/reverb/settings.json via http://127.0.0.1:3037/api/v1/projects?token=secret", + stacktrace: { + frames: [ + { filename: "file:///Users/alice/reverb/dist/main.js" }, + { filename: "http://[::1]:3037/api/v1/projects?token=secret" }, + ], + }, + }, + ], + }, + }); + const props = event.properties as Record; + expect(props.$current_url).toBe("[redacted-local-url]"); + expect(props.$initial_current_url).toBe("[redacted-local-url]"); + expect(props.message).toBe( + "failed to fetch [redacted-local-url] from [redacted-local-url] and open [redacted-local-path]", + ); + const exceptionList = props.$exception_list as Array>; + expect(exceptionList[0].value).toBe("failed to load [redacted-local-path] via [redacted-local-url]"); + expect((exceptionList[0].stacktrace as { frames: Array<{ filename: string }> }).frames[0].filename).toBe( + "[redacted-local-url]", + ); + expect((exceptionList[0].stacktrace as { frames: Array<{ filename: string }> }).frames[1].filename).toBe( + "[redacted-local-url]", + ); + }); + + it("redacts replay request names before they leave the renderer", () => { + expect(sanitizeReplayRequestName("file:///Users/alice/private/index.html?token=secret")).toBe( + "[redacted-local-url]", + ); + expect(sanitizeReplayRequestName("http://[::1]:3037/api/v1/projects?token=secret")).toBe("[redacted-local-url]"); + expect(sanitizeReplayRequestName("https://api.example.com/endpoint?token=secret")).toBe( + "https://api.example.com/endpoint", + ); + }); }); diff --git a/frontend/src/renderer/lib/telemetry.ts b/frontend/src/renderer/lib/telemetry.ts index cb66a741..100c290b 100644 --- a/frontend/src/renderer/lib/telemetry.ts +++ b/frontend/src/renderer/lib/telemetry.ts @@ -5,6 +5,10 @@ import { DEFAULT_POSTHOG_HOST, DEFAULT_POSTHOG_PROJECT_KEY } from "../../shared/ const POSTHOG_KEY = import.meta.env.VITE_AO_POSTHOG_KEY?.trim() || DEFAULT_POSTHOG_PROJECT_KEY; const POSTHOG_HOST = import.meta.env.VITE_AO_POSTHOG_HOST?.trim() || DEFAULT_POSTHOG_HOST; const RELEASE_TAG = "2026-01-30"; +const REDACTED_LOCAL_URL = "[redacted-local-url]"; +const REDACTED_LOCAL_PATH = "[redacted-local-path]"; +const EMBEDDED_LOCAL_URL_PATTERN = + /(?:\bfile:\/\/\/\S+|\bapp:\/\/renderer\/\S+|\bhttps?:\/\/(?:localhost|127\.0\.0\.1|\[::1\])(?::\d+)?\S*)/gi; let initPromise: Promise | null = null; let errorHandlersBound = false; @@ -48,6 +52,82 @@ async function hashedTelemetryID(value: unknown): Promise { return sha256Hex(trimmed); } +function isLocalURL(value: string): boolean { + try { + const url = new URL(value); + const hostname = url.hostname.replace(/^\[(.*)\]$/, "$1"); + return ( + url.protocol === "file:" || + (url.protocol === "app:" && url.host === "renderer") || + hostname === "localhost" || + hostname === "127.0.0.1" || + hostname === "::1" + ); + } catch { + return false; + } +} + +function redactEmbeddedLocalURLs(value: string): string { + return value.replace(EMBEDDED_LOCAL_URL_PATTERN, REDACTED_LOCAL_URL); +} + +function redactEmbeddedAbsolutePaths(value: string): string { + return value + .replace(/(?:\/Users\/|\/home\/|\/tmp\/|\/private\/var\/|\/var\/folders\/)\S+/g, REDACTED_LOCAL_PATH) + .replace(/\b[A-Za-z]:\\[^\s)]+/g, REDACTED_LOCAL_PATH); +} + +function sanitizeSensitiveString(value: string): string { + const trimmed = value.trim(); + if (!trimmed) return trimmed; + if (isLocalURL(trimmed)) return REDACTED_LOCAL_URL; + return redactEmbeddedAbsolutePaths(redactEmbeddedLocalURLs(trimmed)); +} + +function sanitizePostHogValue(value: unknown): unknown { + if (typeof value === "string") return sanitizeSensitiveString(value); + if (Array.isArray(value)) return value.map((item) => sanitizePostHogValue(item)); + if (value && typeof value === "object") { + return Object.fromEntries(Object.entries(value).map(([key, nested]) => [key, sanitizePostHogValue(nested)])); + } + return value; +} + +export function sanitizePostHogEvent(event: Record): Record { + return sanitizePostHogValue(event) as Record; +} + +export function sanitizeReplayRequestName(name: string): string { + const withoutQuery = name.split("?")[0] ?? name; + return sanitizeSensitiveString(withoutQuery); +} + +function sanitizePostHogCaptureResult(event: T): T { + return sanitizePostHogEvent(event as unknown as Record) as unknown as T; +} + +async function sanitizeRendererContextProperties(properties?: TelemetryProperties): Promise { + const safe: TelemetryProperties = {}; + if (typeof properties?.source === "string" && properties.source.trim() !== "") { + safe.source = properties.source; + } + if (typeof properties?.operation === "string" && properties.operation.trim() !== "") { + safe.operation = properties.operation; + } + if (typeof properties?.surface === "string" && properties.surface.trim() !== "") { + safe.surface = properties.surface; + } + if (typeof properties?.unhandled === "boolean") { + safe.unhandled = properties.unhandled; + } + const projectIDHash = await hashedTelemetryID(properties?.project_id); + if (projectIDHash) { + safe.project_id_hash = projectIDHash; + } + return safe; +} + export async function sanitizeRendererProperties( event: string, properties?: TelemetryProperties, @@ -89,17 +169,7 @@ export async function sanitizeRendererExceptionProperties( const safe: TelemetryProperties = { error_name: exceptionName(error), }; - if (typeof properties?.source === "string" && properties.source.trim() !== "") { - safe.source = properties.source; - } - if (typeof properties?.unhandled === "boolean") { - safe.unhandled = properties.unhandled; - } - const projectIDHash = await hashedTelemetryID(properties?.project_id); - if (projectIDHash) { - safe.project_id_hash = projectIDHash; - } - return safe; + return { ...safe, ...(await sanitizeRendererContextProperties(properties)) }; } function bindErrorHandlers() { @@ -130,7 +200,17 @@ export async function initTelemetry(): Promise { defaults: RELEASE_TAG, autocapture: false, capture_pageview: false, + capture_exceptions: false, persistence: "localStorage", + before_send: (event) => (event ? sanitizePostHogCaptureResult(event) : event), + session_recording: { + maskCapturedNetworkRequestFn: (request) => { + if (request.name) { + request.name = sanitizeReplayRequestName(request.name); + } + return request; + }, + }, }); posthog.identify(bootstrap.distinctId, { app_version: bootstrap.appVersion, @@ -160,7 +240,13 @@ export async function captureRendererEvent(event: string, properties?: Record): Promise { if (!(await initTelemetry())) return; const safeProperties = await sanitizeRendererExceptionProperties(error, properties); - posthog.capture("ao.renderer.exception", safeProperties); + posthog.captureException(normalizeException(error), safeProperties); +} + +export async function addRendererExceptionStep(message: string, properties?: Record): Promise { + if (!(await initTelemetry())) return; + const safeProperties = await sanitizeRendererContextProperties(properties); + posthog.addExceptionStep(message, safeProperties); } export { routeSurface }; diff --git a/frontend/src/renderer/routes/_shell.tsx b/frontend/src/renderer/routes/_shell.tsx index 7c26c176..762bb939 100644 --- a/frontend/src/renderer/routes/_shell.tsx +++ b/frontend/src/renderer/routes/_shell.tsx @@ -9,7 +9,7 @@ import { useDaemonStatus } from "../hooks/useDaemonStatus"; import { useWorkspaceQuery, workspaceQueryKey, workspaceQueryOptions } from "../hooks/useWorkspaceQuery"; import { apiClient, apiErrorMessage } from "../lib/api-client"; import { refreshDaemonStatus } from "../lib/daemon-status"; -import { captureRendererEvent, captureRendererException } from "../lib/telemetry"; +import { addRendererExceptionStep, captureRendererEvent, captureRendererException } from "../lib/telemetry"; import { ShellProvider } from "../lib/shell-context"; import { spawnOrchestrator } from "../lib/spawn-orchestrator"; import { readStoredTheme, type Theme, useUiStore } from "../stores/ui-store"; @@ -57,6 +57,11 @@ function ShellLayout() { const createProject = useCallback( async (input: { path: string; workerAgent: string; orchestratorAgent: string }) => { + void addRendererExceptionStep("Project add requested", { + source: "project-add", + operation: "project_add", + surface: "project_board", + }); void captureRendererEvent("ao.renderer.project_add_requested"); const { data, error } = await apiClient.POST("/api/v1/projects", { body: { @@ -69,7 +74,11 @@ function ShellLayout() { }); if (error) { const failure = new Error(apiErrorMessage(error)); - void captureRendererException(failure, { source: "project-add" }); + void captureRendererException(failure, { + source: "project-add", + operation: "project_add", + surface: "project_board", + }); throw failure; } if (!data?.project) throw new Error("Project creation returned no project"); @@ -101,12 +110,23 @@ function ShellLayout() { const removeProject = useCallback( async (projectId: string) => { + void addRendererExceptionStep("Project removal requested", { + source: "project-remove", + operation: "project_remove", + surface: "project_board", + project_id: projectId, + }); const { error } = await apiClient.DELETE("/api/v1/projects/{id}", { params: { path: { id: projectId } }, }); if (error) { const failure = new Error(apiErrorMessage(error)); - void captureRendererException(failure, { source: "project-remove", project_id: projectId }); + void captureRendererException(failure, { + source: "project-remove", + operation: "project_remove", + surface: "project_board", + project_id: projectId, + }); throw failure; } void captureRendererEvent("ao.renderer.project_removed", { project_id: projectId });