diff --git a/apps/api/alchemy.run.ts b/apps/api/alchemy.run.ts index 6976dcf4..b3b19909 100644 --- a/apps/api/alchemy.run.ts +++ b/apps/api/alchemy.run.ts @@ -6,6 +6,7 @@ import { KVNamespace, Queue, Worker, + WorkerLoader, WorkerStub, Workflow, } from "alchemy/cloudflare" @@ -205,6 +206,10 @@ export const createMapleApi = async ({ stage, domains }: CreateMapleApiOptions) ...optionalSecret("GITHUB_APP_CLIENT_SECRET"), ...optionalSecret("GITHUB_APP_WEBHOOK_SECRET"), ...optionalPlain("GITHUB_API_BASE_URL"), + // Code Mode sandbox (Cloudflare Dynamic Workers). The `run_code` MCP tool + // runs model-written code in an isolate via this `worker_loader` binding; + // its presence activates the tool. Requires Worker Loader beta access. + LOADER: WorkerLoader(), }, }) diff --git a/apps/api/package.json b/apps/api/package.json index 2039dcdc..0925594e 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -31,6 +31,7 @@ "@effect/platform-bun": "catalog:effect", "@flue/sdk": "1.0.0-beta.1", "@maple-dev/effect-sdk": "workspace:*", + "@maple/codemode": "workspace:*", "@maple/db": "workspace:*", "@maple/domain": "workspace:*", "@maple/effect-cloudflare": "workspace:*", diff --git a/apps/api/src/mcp/lib/dashboard-mutations.test.ts b/apps/api/src/mcp/lib/dashboard-mutations.test.ts index 114042a8..0559fd47 100644 --- a/apps/api/src/mcp/lib/dashboard-mutations.test.ts +++ b/apps/api/src/mcp/lib/dashboard-mutations.test.ts @@ -148,10 +148,15 @@ describe("dashboard mutations on tag-less / description-less dashboards", () => const layer = makeLayer(testDb) let handler: ToolHandler | null = null + // Capture from both tool() and mutatingTool() — update_dashboard registers + // via mutatingTool (it's a mutating tool), but capturing both keeps this + // harness robust regardless of which a tool uses. + const capture = (_name: string, _description: string, _schema: unknown, h: unknown) => { + handler = h as ToolHandler + } const registrar: McpToolRegistrar = { - tool: (_name, _description, _schema, h) => { - handler = h as ToolHandler - }, + tool: capture as McpToolRegistrar["tool"], + mutatingTool: capture as McpToolRegistrar["mutatingTool"], } registerUpdateDashboardTool(registrar) assert.isNotNull(handler) diff --git a/apps/api/src/mcp/tools/add-dashboard-widget.ts b/apps/api/src/mcp/tools/add-dashboard-widget.ts index 92c8d6d3..22c0e5d8 100644 --- a/apps/api/src/mcp/tools/add-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/add-dashboard-widget.ts @@ -45,7 +45,7 @@ const KNOWN_VISUALIZATIONS = [ ] as const export function registerAddDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, 'Add a single widget to an existing dashboard without re-sending the whole document. `visualization` MUST be one of: `chart`, `stat`, `gauge`, `table`, `list`, `pie`, `histogram`, `heatmap`, `funnel` — NOT a free-form title. `gauge` renders a single scalar on a radial gauge (same data shape as `stat`); set `display_json.gauge` to `{ min, max }` and `display_json.thresholds` to color the arc. For line/area/bar charts, pass `visualization: "chart"` and `display_type: "line"`/`"area"`/`"bar"`. Two creation paths:\n\n1. **Structured query builder** (default): pass `data_source_json` + `display_json` to wire the widget to a specific endpoint (`custom_query_builder_timeseries`, `service_overview`, etc.). Trace and log queries omit the metric-only fields (`metricName`/`metricType`/`isMonotonic`/`signalSource`) — only `dataSource: "metrics"` queries carry them. `whereClause` is a custom grammar (`=`, `>`, `<`, `>=`, `<=`, `contains`, `exists` joined by ` AND `) — there is NO SQL `IS NULL`/`IS NOT NULL`; use ` exists` to require an attribute. See the `maple://instructions` resource for the full widget JSON shape (aggregations per source, groupBy prefixes, units, stat reduceToValue, hideSeries).\n\n2. **Raw ClickHouse SQL**: pass `sql` to create a `raw_sql_chart` widget (the tool builds the dataSource for you — `data_source_json` is ignored). `sql` MUST reference `$__orgFilter`. Macros: `$__orgFilter` (required), `$__timeFilter(Column)`, `$__startTime`, `$__endTime`, `$__interval_s` (only useful when SQL also references it, typically inside `toStartOfInterval(…, INTERVAL $__interval_s SECOND)`).\n\n **Before writing raw SQL, call `describe_warehouse_tables`** to discover real table and column names (no args → list every table; `table: ""` → full column list with types, jsonPaths, sorting key, and curated notes on enum casing, units, sort-key hints). Do not guess table or column names — a hallucinated identifier silently produces an empty chart. Columns are PascalCase; values for `StatusCode`/`SeverityText`/`SpanKind` are Title Case (`\'Error\'` not `\'ERROR\'`); span `Duration` is in nanoseconds (divide by 1e6 for ms).\n\n **SELECT shape per `display_type`** (the renderer is opinionated; wrong aliases → empty or `[object Object]`):\n - `line`/`area`/`bar`: time bucket as first column (alias `bucket`) + ONE OR MORE numeric series columns. Each numeric column becomes one series; the column name becomes the legend label. **String columns are dropped**, so for multi-series (e.g., per-service breakdown) pivot in SQL with `countIf(...)` — tall form (`bucket, ServiceName, count()`) collapses to a single aggregate line. Single-series: `SELECT toStartOfInterval(Timestamp, INTERVAL $__interval_s SECOND) AS bucket, count() AS errors FROM ... WHERE $__orgFilter AND $__timeFilter(Timestamp) GROUP BY bucket ORDER BY bucket`. Multi-series wide form: `SELECT toStartOfInterval(Timestamp, INTERVAL $__interval_s SECOND) AS bucket, countIf(ServiceName=\'api\') AS api, countIf(ServiceName=\'web\') AS web FROM ... GROUP BY bucket ORDER BY bucket`. For dynamic series labels, run a discovery query first (e.g., `query_data` or a quick top-N) and inject the values.\n - `stat`: one scalar aliased `value` — `SELECT count() AS value FROM ... WHERE $__orgFilter AND $__timeFilter(Timestamp)`\n - `pie`: `name` (label) + numeric column; cap with `LIMIT 8`-ish\n - `heatmap`: three columns aliased `x`, `y`, `value` (string-cast numeric x/y)\n - `table`: any rows; columns render in order\n - `histogram`: one numeric column aliased `value` (renderer buckets client-side); add `LIMIT 5000`\n - `funnel`: `name` (string stage label) + numeric column; rows render in returned order as descending bars — `ORDER BY value DESC` for a classic funnel, cap with `LIMIT 8`-ish\n\n If `display_type` is omitted it\'s derived from `visualization` (chart→line via `display_json.chartId`, stat→stat, table→table, pie→pie, histogram→histogram, heatmap→heatmap, funnel→funnel). The stat `reduceToValue` transform is auto-injected.\n\n **See `maple://instructions` for the full table catalog, column lists, and worked examples per display type.**\n\nIf `layout_json` is omitted the widget is auto-placed using the same grid logic as the web UI. Returns the new widget id plus an automatic validation summary (verdict, flags). If `verdict` is `suspicious` or `broken`, fix via `update_dashboard_widget` — the chart will not render meaningful data as-is.', Schema.Struct({ diff --git a/apps/api/src/mcp/tools/claim-error-issue.ts b/apps/api/src/mcp/tools/claim-error-issue.ts index e546ae62..eaf871f8 100644 --- a/apps/api/src/mcp/tools/claim-error-issue.ts +++ b/apps/api/src/mcp/tools/claim-error-issue.ts @@ -15,7 +15,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerClaimErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "claim_error_issue", "Claim a lease on an error issue so other agents don't duplicate work. Issues in 'triage' or 'todo' auto-transition to 'in_progress' on claim. Lease defaults to 30 min; call heartbeat_error_issue before it expires or the issue drops back to 'todo'.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/comment-on-error-issue.ts b/apps/api/src/mcp/tools/comment-on-error-issue.ts index 2e32bae3..9dd8f8a2 100644 --- a/apps/api/src/mcp/tools/comment-on-error-issue.ts +++ b/apps/api/src/mcp/tools/comment-on-error-issue.ts @@ -15,7 +15,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerCommentOnErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "comment_on_error_issue", "Add a comment to the issue's timeline. Use kind='agent_note' for automated reasoning steps (visible in the audit log but styled differently in the UI).", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/create-alert-rule.ts b/apps/api/src/mcp/tools/create-alert-rule.ts index bccb2eea..3be2a9ad 100644 --- a/apps/api/src/mcp/tools/create-alert-rule.ts +++ b/apps/api/src/mcp/tools/create-alert-rule.ts @@ -247,7 +247,7 @@ const comparatorLabel: Record = { } export function registerCreateAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "create_alert_rule", "Create an alert rule. Use a template for common cases (high_error_rate, slow_p95, slow_p99, low_apdex, throughput_drop) or template='custom' for full control. " + "Templates auto-fill signal_type, comparator, and a sensible default threshold. " + diff --git a/apps/api/src/mcp/tools/create-dashboard.ts b/apps/api/src/mcp/tools/create-dashboard.ts index 21e77073..c05cec48 100644 --- a/apps/api/src/mcp/tools/create-dashboard.ts +++ b/apps/api/src/mcp/tools/create-dashboard.ts @@ -287,7 +287,7 @@ const TIME_RANGE_MAP: Record = { export function registerCreateDashboardTool(server: McpToolRegistrar) { const templateList = DASHBOARD_TEMPLATES.map((t) => ` ${t.id} — ${t.description}`).join("\n") - server.tool( + server.mutatingTool( "create_dashboard", "Create a dashboard from a template, simplified widget specs, or custom JSON.\n\n" + "Templates:\n" + diff --git a/apps/api/src/mcp/tools/delete-alert-rule.ts b/apps/api/src/mcp/tools/delete-alert-rule.ts index 7a9b6572..80486496 100644 --- a/apps/api/src/mcp/tools/delete-alert-rule.ts +++ b/apps/api/src/mcp/tools/delete-alert-rule.ts @@ -8,7 +8,7 @@ import { AlertRuleId } from "@maple/domain" const decodeAlertRuleId = Schema.decodeUnknownOption(AlertRuleId) export function registerDeleteAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "delete_alert_rule", "Permanently delete an alert rule. This is irreversible and also deletes the rule's incident history, " + "delivery events, and evaluation state. Requires confirm=true. Use list_alert_rules to find rule IDs.", diff --git a/apps/api/src/mcp/tools/heartbeat-error-issue.ts b/apps/api/src/mcp/tools/heartbeat-error-issue.ts index ee23ebce..0b6f0126 100644 --- a/apps/api/src/mcp/tools/heartbeat-error-issue.ts +++ b/apps/api/src/mcp/tools/heartbeat-error-issue.ts @@ -9,7 +9,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerHeartbeatErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "heartbeat_error_issue", "Extend the lease on a claimed error issue. Call this periodically while you work; if the lease expires, the issue drops back to 'todo' and any actor can re-claim it.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/mutating.test.ts b/apps/api/src/mcp/tools/mutating.test.ts index cb2d6e70..bb1977bb 100644 --- a/apps/api/src/mcp/tools/mutating.test.ts +++ b/apps/api/src/mcp/tools/mutating.test.ts @@ -10,6 +10,19 @@ describe("MUTATING_TOOL_NAMES", () => { } }) + it("exactly equals the tools registered via mutatingTool (structural flag <-> shared list)", () => { + // The per-tool `mutating` flag (set at registration via `server.mutatingTool`) + // is the structural truth the run_code gate uses; MUTATING_TOOL_NAMES is the + // static list the chat + /chat/apply paths use (they can't read the flag over + // MCP). This asserts they can't drift in either direction — register a + // mutating tool but forget the list (or vice versa) and CI fails. + const flagged = new Set(mapleToolDefinitions.filter((d) => d.mutating).map((d) => d.name)) + const flaggedButUnlisted = [...flagged].filter((n) => !MUTATING_TOOL_NAMES.has(n)) + const listedButUnflagged = [...MUTATING_TOOL_NAMES].filter((n) => !flagged.has(n)) + expect(flaggedButUnlisted, `registered mutating but absent from MUTATING_TOOL_NAMES: [${flaggedButUnlisted.join(", ")}]`).toEqual([]) + expect(listedButUnflagged, `in MUTATING_TOOL_NAMES but not registered via mutatingTool: [${listedButUnflagged.join(", ")}]`).toEqual([]) + }) + it("excludes read-only tools (so /chat/apply can't run them)", () => { expect(MUTATING_TOOL_NAMES.has("find_errors")).toBe(false) expect(MUTATING_TOOL_NAMES.has("search_traces")).toBe(false) diff --git a/apps/api/src/mcp/tools/mutating.ts b/apps/api/src/mcp/tools/mutating.ts index 878929b6..35c36fc8 100644 --- a/apps/api/src/mcp/tools/mutating.ts +++ b/apps/api/src/mcp/tools/mutating.ts @@ -1,33 +1,4 @@ -/** - * Base names of the mutating MCP tools that the AI chat gates behind approval. - * - * The Flue chat agent wraps these so a model call returns a `proposed` marker - * instead of mutating (see `apps/chat-flue/src/lib/approval.ts`); the web client - * applies the real change via `POST /api/chat/apply`, which only accepts tools - * in this set. Keep the two lists in sync. - */ -export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ - // dashboards - "create_dashboard", - "update_dashboard", - "add_dashboard_widget", - "update_dashboard_widget", - "remove_dashboard_widget", - "reorder_dashboard_widgets", - "replace_dashboard_widgets", - // alerts - "create_alert_rule", - "update_alert_rule", - "delete_alert_rule", - // error issues - "claim_error_issue", - "release_error_issue", - "transition_error_issue", - "comment_on_error_issue", - "heartbeat_error_issue", - "set_issue_severity", - "update_error_notification_policy", - // fixes / agents - "propose_fix", - "register_agent", -]) +// Single source of truth lives in @maple/codemode so the apps/api + apps/chat-flue +// copies can't drift. Re-exported here to keep existing `./mutating` imports stable. +// The fail-closed regression test lives in `./mutating.test.ts`. +export { MUTATING_TOOL_NAMES } from "@maple/codemode" diff --git a/apps/api/src/mcp/tools/propose-fix.ts b/apps/api/src/mcp/tools/propose-fix.ts index 6de4345c..f22642a9 100644 --- a/apps/api/src/mcp/tools/propose-fix.ts +++ b/apps/api/src/mcp/tools/propose-fix.ts @@ -22,7 +22,7 @@ const parseArtifactList = (raw: string | undefined): ReadonlyArray => { } export function registerProposeFixTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "propose_fix", "Attach a fix proposal (PR URL, patch summary, artifacts) to an error issue. Transitions the issue to 'in_review'. The human owner can then accept (→ done) or reject.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/register-agent.ts b/apps/api/src/mcp/tools/register-agent.ts index 3975d8a6..ef3a9529 100644 --- a/apps/api/src/mcp/tools/register-agent.ts +++ b/apps/api/src/mcp/tools/register-agent.ts @@ -18,7 +18,7 @@ const parseCapabilities = (raw: string | undefined): ReadonlyArray => { } export function registerRegisterAgentTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "register_agent", "Register an LLM agent with the error-issue system so it can claim and transition issues. Must be called from a human session (not an agent API key). Returns an actor ID to pin via API-key metadata.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/registry.ts b/apps/api/src/mcp/tools/registry.ts index f8455be8..742893f8 100644 --- a/apps/api/src/mcp/tools/registry.ts +++ b/apps/api/src/mcp/tools/registry.ts @@ -42,6 +42,7 @@ import { registerRemoveDashboardWidgetTool } from "./remove-dashboard-widget" import { registerReplaceDashboardWidgetsTool } from "./replace-dashboard-widgets" import { registerReorderDashboardWidgetsTool } from "./reorder-dashboard-widgets" import { registerMineLogPatternsTool } from "./mine-log-patterns" +import { registerRunCodeTool } from "./run-code" import { registerSearchLogsTool } from "./search-logs" import { registerSearchTracesTool } from "./search-traces" import { registerSearchSessionsTool } from "./search-sessions" @@ -63,6 +64,8 @@ export interface MapleToolDefinition { readonly description: string readonly schema: Schema.Decoder readonly handler: (params: unknown) => Effect.Effect + /** True for state-changing tools (registered via `mutatingTool`). The `run_code` sandbox refuses these. */ + readonly mutating: boolean } export const toInputSchema = (schema: Schema.Top): Record => { @@ -74,14 +77,27 @@ export const toInputSchema = (schema: Schema.Top): Record => { const collectMapleToolDefinitions = (): ReadonlyArray => { const definitions: MapleToolDefinition[] = [] + const add = ( + mutating: boolean, + name: string, + description: string, + schema: Schema.Decoder, + handler: unknown, + ) => { + definitions.push({ + name, + description, + schema, + handler: handler as MapleToolDefinition["handler"], + mutating, + }) + } const registrar: McpToolRegistrar = { tool(name, description, schema, handler) { - definitions.push({ - name, - description, - schema, - handler: handler as MapleToolDefinition["handler"], - }) + add(false, name, description, schema, handler) + }, + mutatingTool(name, description, schema, handler) { + add(true, name, description, schema, handler) }, } @@ -136,6 +152,11 @@ const collectMapleToolDefinitions = (): ReadonlyArray => { registerRegisterAgentTool(registrar) registerListErrorIncidentsTool(registrar) registerUpdateErrorNotificationPolicyTool(registrar) + // Code Mode: a single tool whose sandboxed snippet orchestrates the read-only + // tools above. Registered last so it can reference the full set at runtime + // (it dispatches via `mapleToolDefinitions`); inert unless the LOADER sandbox + // binding is present. + registerRunCodeTool(registrar) return definitions } diff --git a/apps/api/src/mcp/tools/release-error-issue.ts b/apps/api/src/mcp/tools/release-error-issue.ts index 4e7b34ee..d7e13229 100644 --- a/apps/api/src/mcp/tools/release-error-issue.ts +++ b/apps/api/src/mcp/tools/release-error-issue.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeWorkflowState = Schema.decodeUnknownOption(WorkflowState) export function registerReleaseErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "release_error_issue", "Release the lease on an error issue you previously claimed, optionally transitioning it to another workflow state (default: 'todo').", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/remove-dashboard-widget.ts b/apps/api/src/mcp/tools/remove-dashboard-widget.ts index aff1fd18..f092a25e 100644 --- a/apps/api/src/mcp/tools/remove-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/remove-dashboard-widget.ts @@ -6,7 +6,7 @@ import { withDashboardMutation } from "../lib/dashboard-mutations" const TOOL = "remove_dashboard_widget" export function registerRemoveDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Remove a single widget from a dashboard by id. Other widgets and dashboard metadata are left untouched.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts b/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts index 744e38f2..7eda7df4 100644 --- a/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts +++ b/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts @@ -60,7 +60,7 @@ const validateLayoutGeometry = (entries: ReadonlyArray): string[] = } export function registerReorderDashboardWidgetsTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Reposition or resize one or more widgets on a dashboard in a single call. Only the widgets you include are touched; any widget id not present in layouts_json keeps its existing layout. Useful for drag/drop-style moves without re-sending unrelated widget state.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/replace-dashboard-widgets.ts b/apps/api/src/mcp/tools/replace-dashboard-widgets.ts index 04342f0f..b1472179 100644 --- a/apps/api/src/mcp/tools/replace-dashboard-widgets.ts +++ b/apps/api/src/mcp/tools/replace-dashboard-widgets.ts @@ -21,7 +21,7 @@ const TOOL = "replace_dashboard_widgets" const decodeWidget = Schema.decodeUnknownEffect(DashboardWidgetSchema) export function registerReplaceDashboardWidgetsTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Replace ALL widgets on a dashboard in one atomic, validated write — the safe middle ground between many incremental `add/update_dashboard_widget` calls and the corruption-prone full `dashboard_json` replace. Pass `widgets_json`: a JSON array of widget objects (same shape as `widgets[]` from get_dashboard). Each widget's query is validated BEFORE anything is persisted — if any widget references a filter/groupBy the engine can't honor, NOTHING is saved and the offending clauses are returned. Per-widget conveniences: `id` is auto-generated when omitted, and `layout` is auto-placed on a 12-column grid when omitted (so you can pass just `{ visualization, dataSource, display }`). Dashboard metadata (name, description, tags, time range) is left untouched. Returns an automatic validation summary; fix any `suspicious`/`broken` widgets and call again.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/run-code.test.ts b/apps/api/src/mcp/tools/run-code.test.ts new file mode 100644 index 00000000..12914b0e --- /dev/null +++ b/apps/api/src/mcp/tools/run-code.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, it, vi } from "vitest" +import type { McpToolResult } from "./types" +import { mapleToolDefinitions } from "./registry" +import { resolveCodeModeCall, textOfResult } from "./run-code" + +const call = ( + name: string, + input: unknown, + invoke: (definition: (typeof mapleToolDefinitions)[number], decoded: unknown) => Promise, +) => resolveCodeModeCall(mapleToolDefinitions, name, input, invoke) + +const okResult = (text: string, structured?: string): McpToolResult => ({ + content: structured + ? [ + { type: "text", text }, + { type: "text", text: structured }, + ] + : [{ type: "text", text }], +}) + +describe("textOfResult", () => { + it("joins dual content under the Structured content: convention", () => { + expect(textOfResult(okResult("human", '{"a":1}'))).toBe('human\n\nStructured content:\n{"a":1}') + }) + it("returns the single text entry as-is", () => { + expect(textOfResult(okResult("just text"))).toBe("just text") + }) +}) + +describe("resolveCodeModeCall", () => { + it("blocks mutating tools without invoking them", async () => { + const invoke = vi.fn() + const r = await call("create_dashboard", {}, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("MutatingToolBlocked") + expect(invoke).not.toHaveBeenCalled() + }) + + it("refuses to call run_code from inside code mode (no nested sandbox)", async () => { + const invoke = vi.fn() + const r = await call("run_code", { code: "1" }, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("Blocked") + expect(invoke).not.toHaveBeenCalled() + }) + + it("rejects unknown tools", async () => { + const r = await call("not_a_tool", {}, vi.fn()) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("UnknownTool") + }) + + it("rejects input that fails the tool schema before invoking", async () => { + const invoke = vi.fn() + // list_services takes only optional strings; a number for `environment` is invalid. + const r = await call("list_services", { environment: 123 }, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("InvalidInput") + expect(invoke).not.toHaveBeenCalled() + }) + + it("runs a read tool and returns its text on success", async () => { + const invoke = vi.fn(async () => okResult("Services table", '{"total":2}')) + const r = await call("list_services", { environment: "production" }, invoke) + expect(invoke).toHaveBeenCalledOnce() + expect(r.ok).toBe(true) + expect(r.value).toContain("Services table") + expect(r.value).toContain("Structured content:") + }) + + it("surfaces an isError tool result as an error value", async () => { + const invoke = vi.fn(async (): Promise => ({ + isError: true, + content: [{ type: "text", text: "warehouse exploded" }], + })) + const r = await call("list_services", {}, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("ToolError") + expect(r.error?.message).toContain("warehouse exploded") + }) +}) diff --git a/apps/api/src/mcp/tools/run-code.ts b/apps/api/src/mcp/tools/run-code.ts new file mode 100644 index 00000000..318530f9 --- /dev/null +++ b/apps/api/src/mcp/tools/run-code.ts @@ -0,0 +1,157 @@ +import { Effect, FiberSet, Schema } from "effect" +import { formatRunOutput, RUN_CODE_TOOL_NAME, type RpcCallResult } from "@maple/codemode" +import { WorkerEnvironment } from "@/lib/WorkerEnvironment" +import { resolveTenant } from "../lib/query-warehouse" +// Type-only: a value import would create an eager require cycle with registry.ts +// (registry imports this module to register the tool). The definitions are passed +// into resolveCodeModeCall / fetched via dynamic import at request time instead. +import type { MapleToolDefinition } from "./registry" +import { MUTATING_TOOL_NAMES } from "./mutating" +import { requiredStringParam, validationError, type McpToolRegistrar, type McpToolResult } from "./types" + +const DESCRIPTION = `Run a JavaScript snippet that orchestrates other Maple tools in one call, instead of issuing many separate tool calls. Inside the snippet, \`await maple.(input)\` invokes any READ-ONLY Maple tool by name (same names and inputs as the other tools you have) and returns its text output (human-readable text followed by a \`Structured content:\` line of JSON — JSON.parse it to filter/sort). The snippet runs in a sandbox with no network and no imports; \`console.log(...)\` and the \`return\` value come back to you. Mutating tools are NOT callable here — call those directly so they go through approval. Ideal for multi-step investigations (find → for each → inspect → correlate) where chaining and filtering in code beats round-tripping every result.` + +/** Join an McpToolResult's content into the `Structured content:` convention the sandbox API uses. */ +export const textOfResult = (result: McpToolResult): string => { + const texts = result.content.map((c) => c.text) + if (texts.length <= 1) return texts.join("\n") + const [human, ...rest] = texts + return `${human}\n\nStructured content:\n${rest.join("\n")}` +} + +/** + * Resolve one `maple.(input)` call to an RPC result: block mutating tools, + * reject unknown names, decode the input against the tool's schema, then run the + * handler via `invoke` (which the caller binds to the captured request runtime). + * Errors are returned as values so the model can self-correct. Pure of the + * Effect runtime — the dispatch logic is unit-testable with a fake `invoke`. + */ +export const resolveCodeModeCall = async ( + definitions: ReadonlyArray, + name: string, + input: unknown, + invoke: (definition: MapleToolDefinition, decoded: unknown) => Promise, +): Promise => { + if (name === RUN_CODE_TOOL_NAME) { + // `run_code` is in `mapleToolDefinitions` (registered last), so without this + // guard a snippet calling maple.run_code(...) would nest a sandbox. + return { + ok: false, + error: { name: "Blocked", message: "maple.run_code cannot be called from inside code mode." }, + } + } + const definition = definitions.find((d) => d.name === name) + if (!definition) { + return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } + } + // Structural gate: a tool registered via `mutatingTool` carries `mutating: true`, + // so a mutating tool can't slip past code mode regardless of its name. (The + // shared MUTATING_TOOL_NAMES set is verified to equal this flag in tests.) + if (definition.mutating || MUTATING_TOOL_NAMES.has(name)) { + return { + ok: false, + error: { + name: "MutatingToolBlocked", + message: `maple.${name} mutates state and can't run inside code mode. Call the ${name} tool directly so it goes through approval.`, + }, + } + } + let decoded: unknown + try { + decoded = Schema.decodeUnknownSync(definition.schema)(input ?? {}) + } catch (error) { + return { ok: false, error: { name: "InvalidInput", message: String(error) } } + } + try { + const result = await invoke(definition, decoded) + if (result.isError) { + return { ok: false, error: { name: "ToolError", message: textOfResult(result) } } + } + return { ok: true, value: textOfResult(result) } + } catch (error) { + return { + ok: false, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + } + } +} + +/** + * Code Mode for the MCP server (Cloudflare Dynamic Workers). Exposes a single + * `run_code` tool whose sandboxed snippet calls back into the existing read-only + * tool handlers via RPC, run on the SAME request/tenant context — so org scoping + * is identical to a direct tool call and the sandbox can never widen it. Mutating + * tools are blocked inside code (they must go through the host's approval path). + * + * Active when the `LOADER` (worker_loader) binding is present; without it the + * tool returns an "unavailable" result (e.g. local/test runs). The Workers-only + * sandbox driver is imported dynamically so this module's static graph stays + * Node-safe (the tool registry is imported by node-based evals/tests). + */ +export function registerRunCodeTool(server: McpToolRegistrar) { + server.tool( + RUN_CODE_TOOL_NAME, + DESCRIPTION, + Schema.Struct({ + code: requiredStringParam( + "A JavaScript snippet using `await maple.(input)`, `console.log(...)`, and `return`. No imports, no network, no type annotations.", + ), + }), + Effect.fn("McpTool.runCode")(function* ({ code }) { + const tenant = yield* resolveTenant + yield* Effect.annotateCurrentSpan({ orgId: tenant.orgId }) + + const env = yield* WorkerEnvironment + const loader = env.LOADER as WorkerLoader | undefined + if (!loader) { + return validationError( + "Code mode is unavailable here (no sandbox runtime is bound). Call the individual Maple tools directly instead.", + ) + } + if (!code.trim()) { + return validationError("Provide a `code` snippet that uses the `maple` API.") + } + + return yield* Effect.scoped( + Effect.gen(function* () { + // Capture the current request context so RPC callbacks (which fire + // from the isolate while we await the sandbox) can run tool handlers + // with the same tenant/services. `any`: tool handlers are + // type-erased over their service requirements (see registry.ts). + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const runPromise = yield* FiberSet.makeRuntimePromise() + + // Fetched lazily (the static import is type-only to avoid a require + // cycle); registry is fully initialized by request time. + const { mapleToolDefinitions } = yield* Effect.promise(() => import("./registry")) + const dispatch = (name: string, input: unknown): Promise => + resolveCodeModeCall(mapleToolDefinitions, name, input, (definition, decoded) => + runPromise(definition.handler(decoded)), + ) + + const result = yield* Effect.promise(async () => { + const { runCodeInSandbox } = await import("@maple/codemode/sandbox") + return runCodeInSandbox(loader, { + id: `maple-codemode-${crypto.randomUUID()}`, + code, + dispatch, + }) + }) + + yield* Effect.annotateCurrentSpan({ + "codemode.log_lines": result.logs.length, + "codemode.crashed": result.crashed === true, + "codemode.errored": result.error !== null, + }) + + return { + content: [{ type: "text" as const, text: formatRunOutput(result) }], + } satisfies McpToolResult + }), + ) + }), + ) +} diff --git a/apps/api/src/mcp/tools/set-issue-severity.ts b/apps/api/src/mcp/tools/set-issue-severity.ts index f86b8cb1..fc4d364b 100644 --- a/apps/api/src/mcp/tools/set-issue-severity.ts +++ b/apps/api/src/mcp/tools/set-issue-severity.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeSeverity = Schema.decodeUnknownOption(IssueSeverity) export function registerSetIssueSeverityTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "set_issue_severity", "Set or clear the triage severity of an issue. Severity drives escalation routing (critical/high/medium/low). API-key agents write with 'ai' precedence, so a human's manual severity is never overwritten; human sessions write a sticky manual override.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/transition-error-issue.ts b/apps/api/src/mcp/tools/transition-error-issue.ts index 2974a216..e03f687e 100644 --- a/apps/api/src/mcp/tools/transition-error-issue.ts +++ b/apps/api/src/mcp/tools/transition-error-issue.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeWorkflowState = Schema.decodeUnknownOption(WorkflowState) export function registerTransitionErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "transition_error_issue", "Move an error issue to a new workflow state. Valid transitions: triage→(todo|in_progress|cancelled|wontfix); todo→(triage|in_progress|cancelled|wontfix); in_progress→(triage|todo|in_review|cancelled|wontfix); in_review→(triage|in_progress|done|cancelled|wontfix); done→(triage|in_progress|cancelled|wontfix); wontfix→(triage|cancelled).", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/types.ts b/apps/api/src/mcp/tools/types.ts index 323fbe91..5495a2df 100644 --- a/apps/api/src/mcp/tools/types.ts +++ b/apps/api/src/mcp/tools/types.ts @@ -38,12 +38,26 @@ export interface McpToolResult { } export interface McpToolRegistrar { + /** Register a read-only tool. */ tool>( name: string, description: string, schema: TSchema, handler: (params: TSchema["Type"]) => Effect.Effect, ): void + /** + * Register a MUTATING (state-changing) tool. Structurally marks the tool so + * the `run_code` sandbox refuses it and the chat approval-gates it — declared + * here at the tool rather than in a name list, so a copied/new mutating tool + * carries its own gating. The shared `MUTATING_TOOL_NAMES` set is verified to + * equal the set of tools registered this way (see `mutating.test.ts`). + */ + mutatingTool>( + name: string, + description: string, + schema: TSchema, + handler: (params: TSchema["Type"]) => Effect.Effect, + ): void } export const requiredStringParam = (description: string) => Schema.String.annotate({ description }) diff --git a/apps/api/src/mcp/tools/update-alert-rule.ts b/apps/api/src/mcp/tools/update-alert-rule.ts index cd06572e..495c7294 100644 --- a/apps/api/src/mcp/tools/update-alert-rule.ts +++ b/apps/api/src/mcp/tools/update-alert-rule.ts @@ -142,7 +142,7 @@ function buildUpdatedRequest( } export function registerUpdateAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_alert_rule", "Update an existing alert rule. Only provide the fields you want to change — every other field keeps its current value. " + "Use list_alert_rules to find rule IDs and destination IDs, or get_alert_rule to inspect the current config first.", diff --git a/apps/api/src/mcp/tools/update-dashboard-widget.ts b/apps/api/src/mcp/tools/update-dashboard-widget.ts index 1b973a42..b200223b 100644 --- a/apps/api/src/mcp/tools/update-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/update-dashboard-widget.ts @@ -12,7 +12,7 @@ import { resolveTenant } from "../lib/query-warehouse" const TOOL = "update_dashboard_widget" export function registerUpdateDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, 'Replace a single widget on an existing dashboard. Pass the full widget JSON (same shape as one entry in `widgets[]` from get_dashboard) for ONLY the widget you want to change. Other widgets and dashboard metadata are left untouched. The stored widget id is always forced to the widget_id parameter, so any id inside widget_json is ignored.\n\nThe response includes an automatic validation summary (verdict, flags). If `verdict` is `suspicious` or `broken`, fix the widget and call this tool again — the chart will not render meaningful data as-is.\n\nTrace and log queries omit the metric-only fields (`metricName`/`metricType`/`isMonotonic`/`signalSource`) — only `dataSource: "metrics"` queries carry them. `whereClause` is a custom grammar (`=`, `>`, `<`, `>=`, `<=`, `contains`, `exists` joined by ` AND `) — there is NO SQL `IS NULL`/`IS NOT NULL`; use ` exists` to require an attribute. See the `maple://instructions` resource for the full widget JSON shape (aggregations per source, groupBy prefixes, units, stat reduceToValue, hideSeries).', Schema.Struct({ diff --git a/apps/api/src/mcp/tools/update-dashboard.ts b/apps/api/src/mcp/tools/update-dashboard.ts index fe195136..6b4778a3 100644 --- a/apps/api/src/mcp/tools/update-dashboard.ts +++ b/apps/api/src/mcp/tools/update-dashboard.ts @@ -18,7 +18,7 @@ const TIME_RANGE_MAP: Record = { } export function registerUpdateDashboardTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_dashboard", "Update an existing dashboard's top-level metadata (name, description, time_range). For widget-level changes prefer the incremental tools: add_dashboard_widget, update_dashboard_widget, remove_dashboard_widget, reorder_dashboard_widgets — they do not require re-sending the whole dashboard. `dashboard_json` is still accepted as an escape hatch for full replacement but is expensive on large dashboards and easy to corrupt.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/update-error-notification-policy.ts b/apps/api/src/mcp/tools/update-error-notification-policy.ts index 32b4fc24..b67f6401 100644 --- a/apps/api/src/mcp/tools/update-error-notification-policy.ts +++ b/apps/api/src/mcp/tools/update-error-notification-policy.ts @@ -16,7 +16,7 @@ const decodeSeverity = Schema.decodeUnknownOption(AlertSeverity) const decodeDestinationId = Schema.decodeUnknownEffect(AlertDestinationId) export function registerUpdateErrorNotificationPolicyTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_error_notification_policy", "Configure the org-wide error notification policy. Controls whether incidents (first-seen, regression, auto-resolve) dispatch to alert destinations. Omit a field to leave it unchanged.", Schema.Struct({ diff --git a/apps/chat-flue/alchemy.run.ts b/apps/chat-flue/alchemy.run.ts index 26d6c7b2..48f53351 100644 --- a/apps/chat-flue/alchemy.run.ts +++ b/apps/chat-flue/alchemy.run.ts @@ -1,7 +1,7 @@ import { execFileSync } from "node:child_process" import path from "node:path" import alchemy from "alchemy" -import { Ai, DurableObjectNamespace, Worker } from "alchemy/cloudflare" +import { Ai, DurableObjectNamespace, Worker, WorkerLoader } from "alchemy/cloudflare" import type { MapleDomains, MapleStage } from "@maple/infra/cloudflare" import { CLOUDFLARE_WORKER_PLACEMENT, @@ -109,6 +109,11 @@ export const createChatFlueWorker = async ({ stage, domains, mapleApiUrl }: Crea ...optionalPlain("MAPLE_ENVIRONMENT", resolveDeploymentEnvironment(stage)), ...optionalPlain("MAPLE_CHAT_MODEL"), ...optionalPlain("MAPLE_TRIAGE_MODEL"), + // Code Mode sandbox (Cloudflare Dynamic Workers). The `worker_loader` + // binding powers the `run_code` tool's isolate; its presence is what + // activates Code Mode at runtime. Requires Worker Loader beta access on + // the account. + LOADER: WorkerLoader(), ...optionalPlain("MAPLE_AUTH_MODE", "self_hosted"), ...optionalSecret("MAPLE_ROOT_PASSWORD"), ...optionalSecret("CLERK_SECRET_KEY"), diff --git a/apps/chat-flue/package.json b/apps/chat-flue/package.json index a6d34b7b..124358a6 100644 --- a/apps/chat-flue/package.json +++ b/apps/chat-flue/package.json @@ -3,17 +3,23 @@ "private": true, "type": "module", "scripts": { - "dev": "flue dev --target cloudflare", + "dev": "portless", + "dev:app": "flue dev --target cloudflare --port ${PORT:-3583} --env ../../.env.local", "build": "flue build --target cloudflare", "deploy": "flue build --target cloudflare && wrangler deploy --config dist/maple_chat_flue/wrangler.json", "connect": "flue connect maple-chat local", "test": "vitest run --passWithNoTests", "typecheck": "tsc --noEmit" }, + "portless": { + "name": "chat-flue", + "script": "dev:app" + }, "dependencies": { "@clerk/backend": "^2.30.1", "@flue/opentelemetry": "1.0.0-beta.1", "@flue/runtime": "1.0.0-beta.2", + "@maple/codemode": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^2.0.0", "@opentelemetry/exporter-trace-otlp-http": "^0.205.0", diff --git a/apps/chat-flue/src/agents/maple-chat.ts b/apps/chat-flue/src/agents/maple-chat.ts index e73189a7..293bfd73 100644 --- a/apps/chat-flue/src/agents/maple-chat.ts +++ b/apps/chat-flue/src/agents/maple-chat.ts @@ -2,6 +2,7 @@ import { createAgent, type AgentRouteHandler, type McpServerConnection } from "@ import { tracing } from "cloudflare:workers" import { applyApprovalGates } from "../lib/approval.ts" import { instanceIdFromAgentPath } from "../lib/auth.ts" +import { buildCodeModeApi, createRunCodeTool, type CodeModeApi } from "../lib/codemode/index.ts" import type { ChatFlueEnv } from "../lib/env.ts" import { connectMapleMcp, MCP_DEFAULT_TIMEOUT_MS } from "../lib/mcp.ts" import { buildSystemPrompt, modeFromInstanceId } from "../lib/modes.ts" @@ -74,7 +75,6 @@ export default createAgent(async (ctx) => { // the Phase 2 frontend integration point; until then the base prompt for the // mode is used. const mode = modeFromInstanceId(ctx.id) - const instructions = buildSystemPrompt({ mode }) // Connect to Maple's MCP server (all tools). We tolerate connection failures so // the agent still answers on Workers AI when apps/api or INTERNAL_SERVICE_TOKEN @@ -114,6 +114,22 @@ export default createAgent(async (ctx) => { } } + // Code Mode: add a `run_code` tool backed by the SAME gated tools (so mutations + // still only propose) and inject the generated `maple.*` API into the prompt. + // The direct tools stay available alongside it. Active whenever the Worker + // Loader sandbox is bound — i.e. everywhere except local dev, where it degrades + // to the direct tools. + let codeModeApi: CodeModeApi | undefined + if (tools.length > 0 && ctx.env.LOADER) { + codeModeApi = buildCodeModeApi(tools) + tools = [...tools, createRunCodeTool(ctx.env, codeModeApi)] + } + + const instructions = buildSystemPrompt({ + mode, + codeMode: codeModeApi ? { declaration: codeModeApi.declaration } : undefined, + }) + return { model: ctx.env.MAPLE_CHAT_MODEL ?? DEFAULT_MODEL, instructions, diff --git a/apps/chat-flue/src/lib/approval.ts b/apps/chat-flue/src/lib/approval.ts index 29200bd1..50e61147 100644 --- a/apps/chat-flue/src/lib/approval.ts +++ b/apps/chat-flue/src/lib/approval.ts @@ -1,41 +1,15 @@ import type { ToolDefinition } from "@flue/runtime" +// Single source of truth shared with apps/api (apps/api re-exports the same set) +// so the gated-tool lists can't drift. The legacy chat agent gated these with +// `@cloudflare/ai-chat`'s approval interrupt — Flue's event stream has no +// human-in-the-loop interrupt, so we use **propose-then-apply** instead: the +// agent calls the tool, but its `execute` returns a proposal marker WITHOUT +// performing the mutation. The web client renders an approval card from that +// result and performs the real mutation (via Maple's existing API) on approve. +import { MUTATING_TOOL_NAMES } from "@maple/codemode" import { baseToolName } from "./mcp.ts" -/** - * Mutating Maple tools (base names). The legacy chat agent gated these with - * `@cloudflare/ai-chat`'s approval interrupt — Flue's event stream has no - * human-in-the-loop interrupt, so we use **propose-then-apply** instead: the - * agent calls the tool, but its `execute` returns a proposal marker WITHOUT - * performing the mutation. The web client renders an approval card from that - * result and performs the real mutation (via Maple's existing API) on approve. - * - * Keep in sync with the mutating tools in apps/api/src/mcp/tools. - */ -export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ - // dashboards - "create_dashboard", - "update_dashboard", - "add_dashboard_widget", - "update_dashboard_widget", - "remove_dashboard_widget", - "reorder_dashboard_widgets", - "replace_dashboard_widgets", - // alerts - "create_alert_rule", - "update_alert_rule", - "delete_alert_rule", - // error issues - "claim_error_issue", - "release_error_issue", - "transition_error_issue", - "comment_on_error_issue", - "heartbeat_error_issue", - "set_issue_severity", - "update_error_notification_policy", - // fixes / agents - "propose_fix", - "register_agent", -]) +export { MUTATING_TOOL_NAMES } /** Marker an approval-gated tool returns instead of mutating. */ export interface ToolProposal { diff --git a/apps/chat-flue/src/lib/codemode/api-gen.ts b/apps/chat-flue/src/lib/codemode/api-gen.ts new file mode 100644 index 00000000..de071b8d --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/api-gen.ts @@ -0,0 +1,40 @@ +import type { ToolDefinition } from "@flue/runtime" +import { buildApiDeclaration, RUN_CODE_TOOL_NAME, type CodeModeToolSpec, type JsonSchema } from "@maple/codemode" +import { baseToolName } from "../mcp.ts" + +export interface CodeModeApi { + /** The `declare const maple: { ... }` surface injected into the system prompt. */ + readonly declaration: string + /** Base tool name -> the (approval-gated) Flue tool `execute`, the RPC backend. */ + readonly dispatch: ReadonlyMap + /** Base names exposed to code mode (for telemetry / debugging). */ + readonly toolNames: ReadonlyArray +} + +const isJsonSchema = (p: unknown): p is JsonSchema => + typeof p === "object" && p !== null && ("properties" in p || "type" in p) + +/** + * Project the connected (already approval-gated) MCP tools into a Code Mode API: + * the `maple.*` TypeScript declaration for the prompt plus a name->execute + * dispatch map. Built from the SAME gated array the direct-tool path uses, so a + * mutating `maple.create_dashboard(...)` call runs the proposal-returning + * `execute` and never mutates — approval gating is inherited for free. + */ +export const buildCodeModeApi = (tools: ReadonlyArray): CodeModeApi => { + const dispatch = new Map() + const specs: CodeModeToolSpec[] = [] + for (const tool of tools) { + const name = baseToolName(tool.name) + // Never expose run_code to itself (the chat path appends run_code after this + // runs, so this is defense-in-depth against a future ordering change). + if (name === RUN_CODE_TOOL_NAME || dispatch.has(name)) continue + dispatch.set(name, tool.execute) + specs.push({ + name, + description: tool.description, + parameters: isJsonSchema(tool.parameters) ? tool.parameters : undefined, + }) + } + return { declaration: buildApiDeclaration(specs), dispatch, toolNames: specs.map((s) => s.name) } +} diff --git a/apps/chat-flue/src/lib/codemode/codemode.test.ts b/apps/chat-flue/src/lib/codemode/codemode.test.ts new file mode 100644 index 00000000..a9187d11 --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/codemode.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it, vi } from "vitest" +import type { ToolDefinition } from "@flue/runtime" +import type { CodeProposal } from "@maple/codemode" +import { buildCodeModeApi } from "./api-gen.ts" +import { createCodeModeDispatch } from "./run-code-tool.ts" + +const tool = (name: string, execute: ToolDefinition["execute"], parameters: object = { type: "object", properties: {} }): ToolDefinition => ({ + name, + description: `desc for ${name}`, + parameters, + execute, +}) + +describe("buildCodeModeApi", () => { + it("strips the mcp__maple__ prefix and builds a declaration + dispatch", () => { + const tools = [ + tool("mcp__maple__find_errors", async () => "errors", { + type: "object", + properties: { service: { type: "string", description: "svc" } }, + required: [], + }), + tool("mcp__maple__list_services", async () => "services"), + ] + const api = buildCodeModeApi(tools) + expect(api.toolNames).toContain("find_errors") + expect(api.toolNames).toContain("list_services") + expect(api.declaration).toContain("find_errors(input: { /** svc */ service?: string }): Promise;") + expect(api.dispatch.get("find_errors")).toBeTypeOf("function") + }) + + it("keeps the first tool when base names collide", () => { + const first = vi.fn(async () => "first") + const second = vi.fn(async () => "second") + const api = buildCodeModeApi([tool("mcp__maple__x", first), tool("x", second)]) + expect(api.dispatch.size).toBe(1) + expect(api.dispatch.get("x")).toBe(first) + }) +}) + +describe("createCodeModeDispatch", () => { + it("returns ok:false for an unknown tool", async () => { + const dispatch = createCodeModeDispatch(new Map(), () => {}) + const r = await dispatch("nope", {}) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("UnknownTool") + }) + + it("runs a read tool and returns its value", async () => { + const map = new Map([["list_services", async () => "services table"]]) + const r = await createCodeModeDispatch(map, () => {})("list_services", { environment: "prod" }) + expect(r).toEqual({ ok: true, value: "services table" }) + }) + + it("collects a proposal from a gated mutating tool while returning its value", async () => { + const proposals: CodeProposal[] = [] + // Gated mutating execute returns a proposal marker instead of mutating. + const gated: ToolDefinition["execute"] = async (args) => + JSON.stringify({ status: "proposed", tool: "create_dashboard", input: args }) + const map = new Map([["create_dashboard", gated]]) + const r = await createCodeModeDispatch(map, (p) => proposals.push(p))("create_dashboard", { title: "x" }) + expect(r.ok).toBe(true) + expect(proposals).toEqual([{ tool: "create_dashboard", input: { title: "x" } }]) + }) +}) diff --git a/apps/chat-flue/src/lib/codemode/index.ts b/apps/chat-flue/src/lib/codemode/index.ts new file mode 100644 index 00000000..f3a7fb8d --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/index.ts @@ -0,0 +1,2 @@ +export { buildCodeModeApi, type CodeModeApi } from "./api-gen.ts" +export { createRunCodeTool } from "./run-code-tool.ts" diff --git a/apps/chat-flue/src/lib/codemode/run-code-tool.ts b/apps/chat-flue/src/lib/codemode/run-code-tool.ts new file mode 100644 index 00000000..9b4a2abe --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/run-code-tool.ts @@ -0,0 +1,78 @@ +import type { ToolDefinition } from "@flue/runtime" +import { formatRunResult, RUN_CODE_TOOL_NAME, type CodeProposal, type RpcCallResult } from "@maple/codemode" +import { parseToolProposal } from "../approval.ts" +import type { ChatFlueEnv } from "../env.ts" +import type { CodeModeApi } from "./api-gen.ts" + +/** + * Build the `maple.(input)` dispatch for a code run: look up the gated + * tool `execute`, run it, and — because mutating tools' gated execute returns a + * proposal marker instead of mutating — collect any proposal via `onProposal` + * while still returning its value to the model. Unknown tools become error + * values so the model self-corrects. Extracted (and free of the Workers-only + * sandbox import) so the approval-collection logic is unit-testable. + */ +export const createCodeModeDispatch = ( + dispatch: CodeModeApi["dispatch"], + onProposal: (proposal: CodeProposal) => void, +) => { + return async (name: string, input: unknown): Promise => { + const execute = dispatch.get(name) + if (!execute) { + return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } + } + const value = await execute((input ?? {}) as Record) + const proposal = parseToolProposal(value) + if (proposal) onProposal({ tool: proposal.tool, input: proposal.input }) + return { ok: true, value } + } +} + +const DESCRIPTION = `Run a JavaScript snippet against Maple's observability data using the \`maple\` API declared in the system prompt. Prefer this for any multi-step investigation: call several \`maple.*\` tools, filter/aggregate their results in code, and \`console.log\`/\`return\` only what matters — one call instead of many round-trips. Imports and network are disabled. \`await maple.(input)\` returns the tool's text output and throws on failure (wrap in try/catch to keep going). Mutating tools only PROPOSE a change for the user to approve.` + +/** + * A single local Flue tool that executes model-written code in a fresh + * Cloudflare Dynamic Worker isolate (network blocked), bridging each + * `maple.(input)` call back to the connected MCP tools via the + * supervisor RPC. Mutating calls run the approval-gated `execute`, so they + * return a proposal marker instead of mutating; the proposals are collected and + * surfaced to the web client as a `proposed_batch` envelope. + */ +export const createRunCodeTool = (env: ChatFlueEnv, api: CodeModeApi): ToolDefinition => ({ + name: RUN_CODE_TOOL_NAME, + description: DESCRIPTION, + parameters: { + type: "object", + properties: { + code: { + type: "string", + description: + "A JavaScript snippet. Use `await maple.(input)`, `console.log(...)`, and `return`. No imports, no network, no type annotations.", + }, + }, + required: ["code"], + }, + execute: async (args) => { + const code = typeof args?.code === "string" ? args.code : "" + const loader = env.LOADER + if (!loader) { + return "Code mode is unavailable (no sandbox runtime is bound). Call the mcp__maple__* tools directly instead." + } + if (!code.trim()) { + return "No code provided. Pass a `code` string that uses the `maple` API." + } + + const proposals: CodeProposal[] = [] + const dispatch = createCodeModeDispatch(api.dispatch, (p) => proposals.push(p)) + + // Dynamic import: the sandbox driver pulls in `cloudflare:workers`, so keep + // it out of this module's static graph (importable by Node-based tests). + const { runCodeInSandbox } = await import("@maple/codemode/sandbox") + const result = await runCodeInSandbox(loader, { + id: `maple-codemode-${crypto.randomUUID()}`, + code, + dispatch, + }) + return formatRunResult(result, proposals) + }, +}) diff --git a/apps/chat-flue/src/lib/env.ts b/apps/chat-flue/src/lib/env.ts index 79502075..8a7ef329 100644 --- a/apps/chat-flue/src/lib/env.ts +++ b/apps/chat-flue/src/lib/env.ts @@ -15,6 +15,16 @@ export interface ChatFlueEnv { /** Deployment environment label, surfaced on telemetry. */ MAPLE_ENVIRONMENT?: string + // --- Code Mode (Cloudflare Dynamic Workers / Worker Loader) --- + /** + * Worker Loader binding (`worker_loader`) used to spin up a fresh sandbox + * isolate per `run_code` call. Its presence is what activates Code Mode: when + * bound, the agent gets a `run_code` tool + the generated `maple.*` API; when + * absent (e.g. local dev), the agent uses the direct tools. Requires + * Cloudflare Worker Loader beta access on the account. + */ + LOADER?: WorkerLoader + // --- Telemetry (OpenTelemetry → Maple ingest) --- /** * Maple ingest key (org-scoped; use the internal-org key, same as `apps/api`). diff --git a/apps/chat-flue/src/lib/modes.ts b/apps/chat-flue/src/lib/modes.ts index fa031621..1bb47603 100644 --- a/apps/chat-flue/src/lib/modes.ts +++ b/apps/chat-flue/src/lib/modes.ts @@ -4,7 +4,7 @@ // mode + context in each request body; here `buildSystemPrompt` assembles the // instructions and the mode is derived from the agent instance id. -import { DASHBOARD_BUILDER_SYSTEM_PROMPT, SYSTEM_PROMPT } from "./prompts.ts" +import { DASHBOARD_BUILDER_SYSTEM_PROMPT, formatCodeModeBlock, SYSTEM_PROMPT } from "./prompts.ts" import { tabIdFromInstanceId } from "./org.ts" export type ChatMode = "default" | "dashboard-builder" | "alert" | "widget-fix" @@ -229,6 +229,8 @@ export interface BuildSystemPromptArgs { alertContext?: AlertContext widgetFixContext?: WidgetFixContext pageContext?: PageContextPayload + /** When set (Code Mode on), append the `run_code` instructions + `maple.*` API. */ + codeMode?: { declaration: string } } /** @@ -237,10 +239,14 @@ export interface BuildSystemPromptArgs { * apps/chat-agent/src/index.ts `runChatTurn`. */ export const buildSystemPrompt = (args: BuildSystemPromptArgs): string => { - const { mode, alertContext, widgetFixContext, pageContext } = args + const { mode, alertContext, widgetFixContext, pageContext, codeMode } = args let prompt = mode === "dashboard-builder" ? DASHBOARD_BUILDER_SYSTEM_PROMPT : SYSTEM_PROMPT + if (codeMode) { + prompt += `\n\n${formatCodeModeBlock(codeMode.declaration)}` + } + if (mode === "alert" && alertContext) { prompt += `\n${formatAlertContextBlock(alertContext)}` } diff --git a/apps/chat-flue/src/lib/prompts.ts b/apps/chat-flue/src/lib/prompts.ts index 24d6fa8c..3fd011be 100644 --- a/apps/chat-flue/src/lib/prompts.ts +++ b/apps/chat-flue/src/lib/prompts.ts @@ -12,6 +12,53 @@ Maple's tools are exposed over MCP and named \`mcp__maple__\` (for example \`mcp__maple__find_errors\`). This document refers to them by their short names; call them by their full \`mcp__maple__\` name.` +/** + * Code Mode block (appended when the `run_code` sandbox is bound). + * Gives the model a `run_code` tool and the generated `maple.*` API surface so + * it can write one snippet that chains/filters many tool calls instead of + * round-tripping each. The direct `mcp__maple__*` tools remain available. + */ +export const formatCodeModeBlock = (declaration: string): string => `## Code Mode — prefer writing code for multi-step work + +You have a \`run_code\` tool that runs a JavaScript snippet in a secure sandbox. +Inside that snippet you can call Maple through the typed \`maple\` API below. Each +method maps to a Maple tool and returns the tool's text output. + +Prefer \`run_code\` whenever a task needs MORE THAN ONE tool call — e.g. "find the +worst service then show a sample trace", looping over results, filtering, or +combining several queries. Writing one snippet that does the whole investigation +is faster and cheaper than calling the \`mcp__maple__*\` tools one at a time. For a +single lookup, calling the direct tool is fine. + +### The \`maple\` API +\`\`\`ts +${declaration} +\`\`\` + +### Rules for the code you write +- Plain JavaScript only. No \`import\`/\`require\`, no type annotations, no network. +- \`await maple.(input)\` returns a STRING: human-readable text followed by a + \`Structured content:\` line with JSON. \`JSON.parse\` that JSON block when you need + to filter or sort programmatically. +- A failing call THROWS — wrap risky calls in \`try/catch\` to continue; otherwise + the error is reported back to you to fix on the next turn. +- \`console.log(...)\` anything you want to see; \`return\` a final value. Only your + logged/returned output comes back — keep it small (summarize, don't dump). +- Mutating tools (create/update/delete/transition) only PROPOSE a change: calling + one queues it for the user's approval and does NOT take effect. Call it once + with the intended arguments; never write Approve/Deny prose. + +### Example +\`\`\`js +const raw = await maple.list_services({}) +const services = JSON.parse(raw.split("Structured content:")[1]).services ?? [] +const worst = services.sort((a, b) => b.errorRate - a.errorRate)[0] +if (!worst) return "No services found." +console.log("Worst service:", worst.name, "errorRate", worst.errorRate) +const traces = await maple.search_traces({ service: worst.name, only_errors: true, limit: 1 }) +console.log(traces) +\`\`\`` + const APPROVAL_NOTE = `## Mutating actions are approved before they take effect Tools that create, update, delete, or transition state (dashboards, alert rules, error issues, notification policies, comments, fix proposals) do not take effect diff --git a/apps/web/src/components/chat/chat-conversation.tsx b/apps/web/src/components/chat/chat-conversation.tsx index deeb4595..d239b3d9 100644 --- a/apps/web/src/components/chat/chat-conversation.tsx +++ b/apps/web/src/components/chat/chat-conversation.tsx @@ -17,7 +17,7 @@ import { type PageContextPayload, } from "./auto-contexts" import type { ChatContext } from "./context-preamble" -import { parseToolProposal } from "./tool-proposal" +import { parseToolProposal, parseToolProposalBatch } from "./tool-proposal" import { PageContextChips } from "./page-context-chips" import { Conversation, @@ -372,6 +372,38 @@ export function ChatConversation({ } if (isToolPart(part)) { const tp = part as ToolPart + // Code Mode: one run_code call can queue several + // mutations — render an approval card per proposal, + // keyed `${toolCallId}#${i}` so each resolves + // independently. + const batch = + tp.state === "output-available" + ? parseToolProposalBatch(tp.output) + : null + if (batch) { + flushTools() + batch.forEach((proposal, bi) => { + const cardKey = `${tp.toolCallId}#${bi}` + const resolved = resolvedApprovals.get(cardKey) + nodes.push( + + handleApprove( + cardKey, + proposal.tool, + proposal.input, + ) + } + onDeny={() => resolveApproval(cardKey, "denied")} + />, + ) + }) + continue + } const proposal = tp.state === "output-available" ? parseToolProposal(tp.output) diff --git a/apps/web/src/components/chat/tool-proposal.test.ts b/apps/web/src/components/chat/tool-proposal.test.ts index e27bdba9..c7dfe885 100644 --- a/apps/web/src/components/chat/tool-proposal.test.ts +++ b/apps/web/src/components/chat/tool-proposal.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest" -import { parseToolProposal } from "./tool-proposal" +import { parseToolProposal, parseToolProposalBatch } from "./tool-proposal" describe("parseToolProposal", () => { it("parses a JSON-string proposal (Flue's tool output)", () => { @@ -27,4 +27,41 @@ describe("parseToolProposal", () => { expect(parseToolProposal(undefined)).toBeNull() expect(parseToolProposal(42)).toBeNull() }) + + it("does not treat a proposed_batch as a single proposal", () => { + const out = JSON.stringify({ + status: "proposed_batch", + proposals: [{ tool: "create_dashboard", input: {} }], + }) + expect(parseToolProposal(out)).toBeNull() + }) +}) + +describe("parseToolProposalBatch", () => { + it("parses a run_code proposed_batch envelope into one proposal per change", () => { + const out = JSON.stringify({ + status: "proposed_batch", + proposals: [ + { tool: "create_dashboard", input: { title: "x" } }, + { tool: "add_dashboard_widget", input: { id: "1" } }, + ], + text: "did stuff", + }) + const batch = parseToolProposalBatch(out) + expect(batch).toHaveLength(2) + expect(batch?.[0]).toEqual({ status: "proposed", tool: "create_dashboard", input: { title: "x" } }) + expect(batch?.[1]?.tool).toBe("add_dashboard_widget") + }) + + it("drops malformed entries and returns null when nothing valid remains", () => { + expect( + parseToolProposalBatch(JSON.stringify({ status: "proposed_batch", proposals: [{ no: "tool" }] })), + ).toBeNull() + }) + + it("returns null for non-batch output", () => { + expect(parseToolProposalBatch("plain text")).toBeNull() + expect(parseToolProposalBatch(JSON.stringify({ status: "proposed", tool: "x" }))).toBeNull() + expect(parseToolProposalBatch(null)).toBeNull() + }) }) diff --git a/apps/web/src/components/chat/tool-proposal.ts b/apps/web/src/components/chat/tool-proposal.ts index 7aeedd66..c9e1b0ab 100644 --- a/apps/web/src/components/chat/tool-proposal.ts +++ b/apps/web/src/components/chat/tool-proposal.ts @@ -26,3 +26,30 @@ export const parseToolProposal = (output: unknown): ToolProposal | null => { ? { status: "proposed", tool: v.tool, input: v.input } : null } + +/** + * Code Mode (`run_code`) can queue several mutations in one snippet. Its output + * is a `{ status: "proposed_batch", proposals: [...] }` envelope; parse it into + * one {@link ToolProposal} per queued change so the UI can render an approval + * card for each. Mirrors `formatRunResult` in `@maple/codemode`. Returns `null` + * when the output isn't a batch envelope. + */ +export const parseToolProposalBatch = (output: unknown): ToolProposal[] | null => { + let value: unknown = output + if (typeof output === "string") { + try { + value = JSON.parse(output) + } catch { + return null + } + } + if (!value || typeof value !== "object") return null + const v = value as Record + if (v.status !== "proposed_batch" || !Array.isArray(v.proposals)) return null + const proposals = v.proposals + .filter((p): p is { tool: string; input: unknown } => { + return !!p && typeof p === "object" && typeof (p as Record).tool === "string" + }) + .map((p) => ({ status: "proposed" as const, tool: p.tool, input: p.input })) + return proposals.length > 0 ? proposals : null +} diff --git a/bun.lock b/bun.lock index d767dca0..5807dce2 100644 --- a/bun.lock +++ b/bun.lock @@ -43,6 +43,7 @@ "@effect/platform-bun": "catalog:effect", "@flue/sdk": "1.0.0-beta.1", "@maple-dev/effect-sdk": "workspace:*", + "@maple/codemode": "workspace:*", "@maple/db": "workspace:*", "@maple/domain": "workspace:*", "@maple/effect-cloudflare": "workspace:*", @@ -77,6 +78,7 @@ "@clerk/backend": "^2.30.1", "@flue/opentelemetry": "1.0.0-beta.1", "@flue/runtime": "1.0.0-beta.2", + "@maple/codemode": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^2.0.0", "@opentelemetry/exporter-trace-otlp-http": "^0.205.0", @@ -423,6 +425,14 @@ "typescript": "catalog:tooling", }, }, + "packages/codemode": { + "name": "@maple/codemode", + "devDependencies": { + "@cloudflare/workers-types": "4.20260603.1", + "typescript": "catalog:tooling", + "vitest": "catalog:", + }, + }, "packages/db": { "name": "@maple/db", "dependencies": { @@ -1394,6 +1404,8 @@ "@maple/clickhouse-cli": ["@maple/clickhouse-cli@workspace:packages/clickhouse-cli"], + "@maple/codemode": ["@maple/codemode@workspace:packages/codemode"], + "@maple/db": ["@maple/db@workspace:packages/db"], "@maple/domain": ["@maple/domain@workspace:packages/domain"], diff --git a/knip.json b/knip.json index 047d1968..7e0ebc04 100644 --- a/knip.json +++ b/knip.json @@ -50,6 +50,9 @@ "lib/effect-cloudflare": { "ignoreDependencies": ["cloudflare"] }, + "packages/codemode": { + "ignoreDependencies": ["cloudflare"] + }, "packages/query-engine": { "entry": ["src/ch/expr.ts", "src/drain/index.ts"] } diff --git a/packages/codemode/package.json b/packages/codemode/package.json new file mode 100644 index 00000000..ec9eef50 --- /dev/null +++ b/packages/codemode/package.json @@ -0,0 +1,19 @@ +{ + "name": "@maple/codemode", + "private": true, + "type": "module", + "description": "Cloudflare 'Code Mode' building blocks: generate a typed TS API from tool specs, build the sandbox-isolate harness, and run model-written code in a dynamic worker. The root barrel is pure (no Workers runtime); `./sandbox` pulls in `cloudflare:workers`.", + "exports": { + ".": "./src/index.ts", + "./sandbox": "./src/sandbox.ts" + }, + "scripts": { + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "@cloudflare/workers-types": "4.20260603.1", + "typescript": "catalog:tooling", + "vitest": "catalog:" + } +} diff --git a/packages/codemode/src/api-gen.test.ts b/packages/codemode/src/api-gen.test.ts new file mode 100644 index 00000000..198f28bf --- /dev/null +++ b/packages/codemode/src/api-gen.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from "vitest" +import { buildApiDeclaration, clampDesc, escapeJsDoc, inputTypeForTool, tsTypeForSchema } from "./api-gen.ts" +import type { JsonSchema } from "./types.ts" + +describe("tsTypeForSchema", () => { + it("maps primitives", () => { + expect(tsTypeForSchema({ type: "string" })).toBe("string") + expect(tsTypeForSchema({ type: "integer" })).toBe("number") + expect(tsTypeForSchema({ type: "number" })).toBe("number") + expect(tsTypeForSchema({ type: "boolean" })).toBe("boolean") + }) + + it("renders arrays of the item type", () => { + expect(tsTypeForSchema({ type: "array", items: { type: "string" } })).toBe("string[]") + }) + + it("renders small string enums as literal unions", () => { + expect(tsTypeForSchema({ enum: ["traces", "logs", "metrics"] })).toBe( + '"traces" | "logs" | "metrics"', + ) + }) + + it("falls back to string for a giant enum", () => { + const big = Array.from({ length: 20 }, (_, i) => `v${i}`) + expect(tsTypeForSchema({ type: "string", enum: big })).toBe("string") + }) + + it("collapses deeply nested objects to Record", () => { + const schema: JsonSchema = { + type: "object", + properties: { + a: { type: "object", properties: { b: { type: "object", properties: { c: { type: "string" } } } } }, + }, + } + const out = tsTypeForSchema(schema, 0) + expect(out).toContain("Record") + }) + + it("treats optional-vs-required via the required array", () => { + const schema: JsonSchema = { + type: "object", + properties: { id: { type: "string" }, limit: { type: "integer" } }, + required: ["id"], + } + const out = inputTypeForTool(schema) + expect(out).toContain("id: string") + expect(out).toContain("limit?: number") + }) + + it("renders anyOf as a union", () => { + expect(tsTypeForSchema({ anyOf: [{ type: "string" }, { type: "number" }] })).toBe( + "string | number", + ) + }) +}) + +describe("inputTypeForTool", () => { + it("returns a Record for a parameterless tool", () => { + expect(inputTypeForTool(undefined)).toBe("Record") + expect(inputTypeForTool({ type: "object", properties: {} })).toBe("Record") + }) +}) + +describe("escapeJsDoc / clampDesc", () => { + it("neutralizes a comment terminator", () => { + expect(escapeJsDoc("ends here */ and more")).toBe("ends here *\\/ and more") + }) + + it("collapses whitespace and clamps length", () => { + expect(clampDesc(" a\n b c ", 50)).toBe("a b c") + expect(clampDesc("abcdefgh", 6)).toBe("abc...") + }) +}) + +describe("buildApiDeclaration", () => { + it("emits one sorted, JSDoc'd method per tool returning Promise", () => { + const decl = buildApiDeclaration([ + { + name: "find_errors", + description: "Find errors", + parameters: { type: "object", properties: { service: { type: "string", description: "svc name */ x" } } }, + }, + { name: "compare_periods", description: "Compare two periods", parameters: undefined }, + ]) + // sorted: compare_periods before find_errors + expect(decl.indexOf("compare_periods")).toBeLessThan(decl.indexOf("find_errors")) + expect(decl).toContain("declare const maple: {") + expect(decl).toContain("find_errors(input: { /** svc name *\\/ x */ service?: string }): Promise;") + expect(decl).toContain("compare_periods(input: Record): Promise;") + expect(decl).not.toContain("*/ x */ service") // terminator escaped + }) +}) diff --git a/packages/codemode/src/api-gen.ts b/packages/codemode/src/api-gen.ts new file mode 100644 index 00000000..02bd42af --- /dev/null +++ b/packages/codemode/src/api-gen.ts @@ -0,0 +1,106 @@ +import type { CodeModeToolSpec, JsonSchema } from "./types.ts" + +const MAX_METHOD_DESC = 280 +const MAX_PROP_DESC = 100 +const MAX_OBJECT_DEPTH = 2 + +/** Neutralize a comment terminator so a description can't close a JSDoc/inline comment. */ +export const escapeJsDoc = (s: string): string => s.replace(/\*\//g, "*\\/") + +/** Collapse whitespace and clamp to `max` chars so 50+ tools don't blow context. */ +export const clampDesc = (s: string | undefined, max: number): string => { + if (!s) return "" + const oneLine = s.replace(/\s+/g, " ").trim() + if (oneLine.length <= max) return oneLine + return `${oneLine.slice(0, Math.max(0, max - 3))}...` +} + +const safeIdent = (name: string): string => + /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name) ? name : JSON.stringify(name) + +const literalUnion = (values: ReadonlyArray): string | null => { + const lits = values.filter( + (v) => typeof v === "string" || typeof v === "number" || typeof v === "boolean", + ) + if (lits.length !== values.length || lits.length === 0 || lits.length > 12) return null + return Array.from(new Set(lits.map((v) => JSON.stringify(v)))).join(" | ") +} + +/** + * Pragmatic JSON-Schema → TS type. Accurate at the top level (property names, + * required-ness, primitives, small enums) and deliberately coarse deeper down + * (nested objects past `MAX_OBJECT_DEPTH` collapse to `Record`). + */ +export const tsTypeForSchema = (schema: JsonSchema | undefined, depth = 0): string => { + if (!schema || typeof schema !== "object") return "unknown" + + if (Array.isArray(schema.enum)) { + const union = literalUnion(schema.enum) + if (union) return union + } + + const variants = schema.anyOf ?? schema.oneOf + if (variants && variants.length > 0) { + const parts = Array.from(new Set(variants.map((v) => tsTypeForSchema(v, depth)))) + return parts.join(" | ") + } + + const rawType = Array.isArray(schema.type) ? schema.type.find((t) => t !== "null") : schema.type + switch (rawType) { + case "string": + return "string" + case "number": + case "integer": + return "number" + case "boolean": + return "boolean" + case "null": + return "null" + case "array": + return `${tsTypeForSchema(schema.items, depth + 1)}[]` + case "object": + return objectType(schema, depth) + default: + return schema.properties ? objectType(schema, depth) : "unknown" + } +} + +const objectType = (schema: JsonSchema, depth: number): string => { + const props = schema.properties + if (!props || Object.keys(props).length === 0) return "Record" + if (depth >= MAX_OBJECT_DEPTH) return "Record" + const required = new Set(schema.required ?? []) + const fields = Object.entries(props).map(([key, value]) => { + const optional = required.has(key) ? "" : "?" + const desc = clampDesc(value?.description, MAX_PROP_DESC) + const comment = desc ? `/** ${escapeJsDoc(desc)} */ ` : "" + return `${comment}${safeIdent(key)}${optional}: ${tsTypeForSchema(value, depth + 1)}` + }) + return `{ ${fields.join("; ")} }` +} + +/** The single `input` parameter type for a tool's generated method. */ +export const inputTypeForTool = (schema: JsonSchema | undefined): string => { + if (!schema?.properties || Object.keys(schema.properties).length === 0) { + return "Record" + } + return objectType(schema, 0) +} + +/** + * Render the `declare const maple: { ... }` surface the model writes code + * against - one JSDoc'd async method per tool, sorted for stable output. Every + * method returns `Promise` (the tool's text output) and throws on + * failure, so the model can `try/catch` or let the harness report the error. + */ +export const buildApiDeclaration = (tools: ReadonlyArray): string => { + const methods = [...tools] + .sort((a, b) => a.name.localeCompare(b.name)) + .map((tool) => { + const desc = clampDesc(tool.description, MAX_METHOD_DESC) + const jsdoc = desc ? `\t/** ${escapeJsDoc(desc)} */\n` : "" + return `${jsdoc}\t${tool.name}(input: ${inputTypeForTool(tool.parameters)}): Promise;` + }) + .join("\n") + return `declare const maple: {\n${methods}\n};` +} diff --git a/packages/codemode/src/format.test.ts b/packages/codemode/src/format.test.ts new file mode 100644 index 00000000..1664eda3 --- /dev/null +++ b/packages/codemode/src/format.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "vitest" +import { formatRunOutput, formatRunResult, MAX_PROPOSALS_PER_RUN } from "./format.ts" +import { PROPOSED_BATCH_STATUS, type CodeRunResult } from "./types.ts" + +const base: CodeRunResult = { logs: [], returnValue: undefined, error: null } + +describe("formatRunOutput", () => { + it("renders console + return value", () => { + const out = formatRunOutput({ ...base, logs: ["a", "b"], returnValue: { n: 1 } }) + expect(out).toContain("Console output:\na\nb") + expect(out).toContain('Return value:\n{\n "n": 1\n}') + }) + + it("surfaces an error", () => { + const out = formatRunOutput({ ...base, error: { name: "Boom", message: "bad" } }) + expect(out).toBe("Error (Boom): bad") + }) + + it("explains a crash distinctly", () => { + const out = formatRunOutput({ ...base, crashed: true, error: { name: "TimeoutError", message: "aborted" } }) + expect(out).toContain("Code mode failed to run your snippet (TimeoutError): aborted") + }) + + it("handles an empty run", () => { + expect(formatRunOutput(base)).toContain("no console output") + }) +}) + +describe("formatRunResult", () => { + it("returns plain text when there are no proposals", () => { + const out = formatRunResult({ ...base, logs: ["hi"] }) + expect(out).toContain("Console output:\nhi") + expect(() => JSON.parse(out)).toThrow() + }) + + it("wraps proposals in a proposed_batch envelope", () => { + const out = formatRunResult({ ...base, logs: ["did stuff"] }, [ + { tool: "create_dashboard", input: { title: "x" } }, + { tool: "add_dashboard_widget", input: { id: "1" } }, + ]) + const parsed = JSON.parse(out) + expect(parsed.status).toBe(PROPOSED_BATCH_STATUS) + expect(parsed.proposals).toHaveLength(2) + expect(parsed.text).toContain("Queued 2 change(s) for approval: create_dashboard, add_dashboard_widget.") + }) + + it("caps the number of proposals surfaced from one run", () => { + const many = Array.from({ length: MAX_PROPOSALS_PER_RUN + 5 }, (_, i) => ({ + tool: "create_dashboard", + input: { i }, + })) + const parsed = JSON.parse(formatRunResult(base, many)) + expect(parsed.proposals).toHaveLength(MAX_PROPOSALS_PER_RUN) + expect(parsed.text).toContain("5 more change(s) were dropped") + }) +}) diff --git a/packages/codemode/src/format.ts b/packages/codemode/src/format.ts new file mode 100644 index 00000000..732f2477 --- /dev/null +++ b/packages/codemode/src/format.ts @@ -0,0 +1,77 @@ +import type { CodeProposal, CodeRunResult } from "./types.ts" +import { DEFAULT_OUTPUT_CAP_BYTES, PROPOSED_BATCH_STATUS } from "./types.ts" + +const capText = (s: string, cap: number): string => + s.length > cap ? `${s.slice(0, cap)}\n...[truncated]` : s + +/** Build the human/model-facing summary of a sandbox run. */ +export const formatRunOutput = (result: CodeRunResult, cap = DEFAULT_OUTPUT_CAP_BYTES): string => { + const parts: string[] = [] + + if (result.crashed && result.error) { + parts.push(`Code mode failed to run your snippet (${result.error.name}): ${result.error.message}`) + return capText(parts.join("\n\n"), cap) + } + + if (result.logs.length > 0) { + parts.push(`Console output:\n${result.logs.join("\n")}`) + } + + if (result.returnValue !== undefined) { + let rendered: string + try { + rendered = + typeof result.returnValue === "string" + ? result.returnValue + : JSON.stringify(result.returnValue, null, 2) + } catch { + rendered = String(result.returnValue) + } + parts.push(`Return value:\n${rendered}`) + } + + if (result.error) { + parts.push(`Error (${result.error.name}): ${result.error.message}`) + } + + if (parts.length === 0) { + parts.push("(code ran with no console output and no return value)") + } + + return capText(parts.join("\n\n"), cap) +} + +/** + * Hard cap on proposals surfaced from a single code run. A run queuing more than + * this is almost certainly a mistake/runaway; bounding it keeps the returned + * envelope (and the number of approval cards) from growing without limit. + */ +export const MAX_PROPOSALS_PER_RUN = 25 + +/** + * The final string `run_code` returns to the model. When the run queued mutating + * proposals (chat approval flow), wrap it as a `proposed_batch` envelope the web + * client parses into one approval card per proposal; otherwise return the plain + * summary so the model just reads its results. Both the inner `text` (via + * `formatRunOutput`) and the proposal count are bounded so the envelope can't + * grow unboundedly with the model's run. + */ +export const formatRunResult = ( + result: CodeRunResult, + proposals: ReadonlyArray = [], + cap = DEFAULT_OUTPUT_CAP_BYTES, +): string => { + const text = formatRunOutput(result, cap) + if (proposals.length === 0) return text + + const kept = proposals.slice(0, MAX_PROPOSALS_PER_RUN) + const dropped = proposals.length - kept.length + const queueNote = + `\n\nQueued ${kept.length} change(s) for approval: ${kept.map((p) => p.tool).join(", ")}.` + + (dropped > 0 ? ` (${dropped} more change(s) were dropped — keep code-mode runs to a few mutations.)` : "") + return JSON.stringify({ + status: PROPOSED_BATCH_STATUS, + proposals: kept, + text: text + queueNote, + }) +} diff --git a/packages/codemode/src/harness.test.ts b/packages/codemode/src/harness.test.ts new file mode 100644 index 00000000..7cb7121d --- /dev/null +++ b/packages/codemode/src/harness.test.ts @@ -0,0 +1,97 @@ +import { mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { pathToFileURL } from "node:url" +import { afterEach, describe, expect, it } from "vitest" +import { buildSandboxModules, SANDBOX_MAIN_MODULE } from "./harness.ts" +import type { RpcCallResult } from "./types.ts" + +const tmpDirs: string[] = [] +afterEach(() => { + for (const dir of tmpDirs.splice(0)) rmSync(dir, { recursive: true, force: true }) +}) + +/** + * Write the real two-module set (main.js + user.js) to a temp dir and import + * `main.js` so its relative `import ./user.js` resolves — exercising the actual + * composition the sandbox loads, with a fake `env.MAPLE`. + */ +const runHarness = async ( + code: string, + dispatch: (name: string, input: unknown) => Promise, + capBytes?: number, +): Promise<{ logs: string[]; returnValue: unknown; error: { name: string; message: string } | null }> => { + const dir = mkdtempSync(join(tmpdir(), "codemode-harness-")) + tmpDirs.push(dir) + const modules = buildSandboxModules(code, capBytes) + for (const [name, source] of Object.entries(modules)) writeFileSync(join(dir, name), source) + let mod: { default: { fetch: (req: Request, env: unknown) => Promise } } + try { + // A snippet that fails to parse breaks user.js; Node surfaces it here at + // import time. The real sandbox catches the equivalent failure at fetch and + // reports a crashed run — model that with a crashed-shaped result. + mod = await import(pathToFileURL(join(dir, SANDBOX_MAIN_MODULE)).href) + } catch (e) { + return { logs: [], returnValue: undefined, error: { name: "LoadError", message: String(e) } } + } + const env = { MAPLE: { call: (name: string, input: unknown) => dispatch(name, input) } } + const res = await mod.default.fetch(new Request("https://codemode/run"), env) + return res.json() +} + +const ok = (value: string): RpcCallResult => ({ ok: true, value }) + +describe("buildSandboxModules", () => { + it("captures console.log output", async () => { + const out = await runHarness(`console.log("hello", { a: 1 })`, async () => ok("x")) + expect(out.logs).toEqual(['hello {"a":1}']) + expect(out.error).toBeNull() + }) + + it("captures the user function's return value", async () => { + const out = await runHarness(`return { count: 2 }`, async () => ok("x")) + expect(out.returnValue).toEqual({ count: 2 }) + }) + + it("routes maple.(input) through env.MAPLE.call and returns its value", async () => { + const calls: Array<[string, unknown]> = [] + const out = await runHarness( + `const r = await maple.find_errors({ service: "api" }); console.log(r)`, + async (name, input) => { + calls.push([name, input]) + return ok(`called ${name}`) + }, + ) + expect(calls).toEqual([["find_errors", { service: "api" }]]) + expect(out.logs).toEqual(["called find_errors"]) + }) + + it("throws inside user code when a call returns ok:false", async () => { + const out = await runHarness( + `try { await maple.boom({}) } catch (e) { console.log("caught", e.message) }`, + async () => ({ ok: false, error: { name: "BadTool", message: "nope" } }), + ) + expect(out.logs).toEqual(["caught nope"]) + expect(out.error).toBeNull() + }) + + it("captures an uncaught error as a value", async () => { + const out = await runHarness(`throw new Error("kaboom")`, async () => ok("x")) + expect(out.error?.message).toBe("kaboom") + }) + + it("truncates output past the byte cap", async () => { + const out = await runHarness(`for (let i = 0; i < 100; i++) console.log("x".repeat(50))`, async () => ok("x"), 1000) + expect(out.logs.at(-1)).toBe("[output truncated]") + const total = out.logs.join("").length + expect(total).toBeLessThan(1300) + }) + + it("isolates a break-out attempt to the user module (can't reach the harness scope)", async () => { + // `})();` would, in an inline splice, close the wrapper and run in the + // harness scope. As its own module it just fails to parse -> crashed run. + const out = await runHarness(`console.log("before"); })(); __logs.length = 0;`, async () => ok("x")) + expect(out.error).not.toBeNull() + expect(out.logs).toEqual([]) + }) +}) diff --git a/packages/codemode/src/harness.ts b/packages/codemode/src/harness.ts new file mode 100644 index 00000000..06ece17d --- /dev/null +++ b/packages/codemode/src/harness.ts @@ -0,0 +1,103 @@ +import { DEFAULT_OUTPUT_CAP_BYTES } from "./types.ts" + +/** Entry module of the sandbox worker. */ +export const SANDBOX_MAIN_MODULE = "main.js" +const USER_MODULE = "user.js" + +/** + * Wrap the model's snippet as its OWN module that exports an async function of + * `(maple, console)`. Because the snippet is the function body of a separate + * module — not spliced into the harness's `fetch` scope — it cannot reach the + * harness internals (`__logs`/`__cap`/`env`); a snippet that tries to break out + * of the function (e.g. ending in `})();`) just makes this module fail to parse, + * which surfaces as a crashed run rather than tampering with log/cap capture. + */ +const buildUserModule = (userCode: string): string => + `export default async function (maple, console) {\n${userCode}\n}\n` + +/** + * The harness module: installs a byte-capped `console` shim and a `maple` Proxy + * (whose only capability is `env.MAPLE.call(name, input)` — an RPC stub back to + * the supervisor; outbound network is blocked by the loader via + * `globalOutbound: null`), runs the user module's exported function, and ships + * `{ logs, returnValue, error }` back as JSON via the fetch response. Nothing + * here depends on capturing the parent's console. + */ +const buildMainModule = (capBytes: number): string => { + const cap = Math.max(1000, Math.floor(capBytes)) + return `import runUser from "./${USER_MODULE}"; +export default { + async fetch(request, env) { + const __cap = ${cap}; + const __logs = []; + let __bytes = 0; + let __truncated = false; + const __push = (level, args) => { + if (__truncated) { return; } + let line; + try { + line = args.map((a) => { + if (typeof a === "string") { return a; } + try { return JSON.stringify(a); } catch (_e) { return String(a); } + }).join(" "); + } catch (_e) { line = "[unserializable log]"; } + const prefix = level === "log" ? "" : "[" + level + "] "; + line = prefix + line; + const room = __cap - __bytes; + if (room <= 0) { __truncated = true; __logs.push("[output truncated]"); return; } + if (line.length > room) { line = line.slice(0, room) + " ...[truncated]"; __truncated = true; } + __bytes += line.length; + __logs.push(line); + if (__truncated) { __logs.push("[output truncated]"); } + }; + const __console = { + log: (...a) => __push("log", a), + info: (...a) => __push("info", a), + warn: (...a) => __push("warn", a), + error: (...a) => __push("error", a), + debug: (...a) => __push("debug", a), + }; + const maple = new Proxy({}, { + get(_t, prop) { + if (typeof prop !== "string") { return undefined; } + return async (input) => { + const r = await env.MAPLE.call(prop, input == null ? {} : input); + if (r && r.ok) { return r.value; } + const err = new Error((r && r.error && r.error.message) || ("maple." + prop + " failed")); + err.name = (r && r.error && r.error.name) || "MapleToolError"; + throw err; + }; + }, + }); + let __return; + let __error = null; + try { + __return = await runUser(maple, __console); + } catch (e) { + __error = { + name: (e && e.name) || "Error", + message: (e && e.message) || String(e), + stack: e && e.stack ? String(e.stack).slice(0, 2000) : undefined, + }; + } + let __serialized; + try { __serialized = __return === undefined ? undefined : JSON.parse(JSON.stringify(__return)); } + catch (_e) { __serialized = String(__return); } + return Response.json({ logs: __logs, returnValue: __serialized, error: __error }); + }, +}; +` +} + +/** + * Build the module set for the dynamic-worker sandbox: the harness entry + * (`main.js`) plus the model's snippet as its own module (`user.js`). Splitting + * them keeps the model's code out of the harness scope — see `buildUserModule`. + */ +export const buildSandboxModules = ( + userCode: string, + capBytes = DEFAULT_OUTPUT_CAP_BYTES, +): Record => ({ + [SANDBOX_MAIN_MODULE]: buildMainModule(capBytes), + [USER_MODULE]: buildUserModule(userCode), +}) diff --git a/packages/codemode/src/index.ts b/packages/codemode/src/index.ts new file mode 100644 index 00000000..9cba8d34 --- /dev/null +++ b/packages/codemode/src/index.ts @@ -0,0 +1,7 @@ +// Pure (Node/test-safe) Code Mode helpers. The Workers-only sandbox driver +// (`MapleSupervisor`, `runCodeInSandbox`) lives behind `@maple/codemode/sandbox`. +export * from "./types.ts" +export * from "./api-gen.ts" +export * from "./harness.ts" +export * from "./format.ts" +export * from "./mutating.ts" diff --git a/packages/codemode/src/mutating.ts b/packages/codemode/src/mutating.ts new file mode 100644 index 00000000..2b44f6bf --- /dev/null +++ b/packages/codemode/src/mutating.ts @@ -0,0 +1,39 @@ +/** + * Base names of the mutating Maple MCP tools — the static list the cross-app / + * over-MCP consumers use, shared by apps/api and apps/chat-flue so they can't + * drift: + * - apps/chat-flue wraps these so a model call returns a `proposed` marker + * instead of mutating (the web client applies the real change via + * `POST /api/chat/apply`, which only accepts tools in this set). + * - apps/api's MCP `run_code` sandbox refuses them. + * + * The structural source of truth is the per-tool `mutating` flag set at + * registration via `server.mutatingTool(...)`; an apps/api test asserts this + * list exactly equals the set of tools registered that way, so adding a mutating + * tool without listing it here (or vice versa) fails CI. + */ +export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ + // dashboards + "create_dashboard", + "update_dashboard", + "add_dashboard_widget", + "update_dashboard_widget", + "remove_dashboard_widget", + "reorder_dashboard_widgets", + "replace_dashboard_widgets", + // alerts + "create_alert_rule", + "update_alert_rule", + "delete_alert_rule", + // error issues + "claim_error_issue", + "release_error_issue", + "transition_error_issue", + "comment_on_error_issue", + "heartbeat_error_issue", + "set_issue_severity", + "update_error_notification_policy", + // fixes / agents + "propose_fix", + "register_agent", +]) diff --git a/packages/codemode/src/sandbox.ts b/packages/codemode/src/sandbox.ts new file mode 100644 index 00000000..ffe031c4 --- /dev/null +++ b/packages/codemode/src/sandbox.ts @@ -0,0 +1,108 @@ +// Workers-runtime-only Code Mode driver. Pulls in `cloudflare:workers` +// (`RpcTarget`) and the `WorkerLoader` binding, so it lives behind the +// `@maple/codemode/sandbox` subpath — the root barrel stays Node/test-safe. +import { RpcTarget } from "cloudflare:workers" +import { buildSandboxModules, SANDBOX_MAIN_MODULE } from "./harness.ts" +import type { CodeRunResult, RpcCallResult } from "./types.ts" +import { + DEFAULT_COMPAT_DATE, + DEFAULT_CPU_MS, + DEFAULT_SUBREQUESTS, + DEFAULT_WALL_MS, +} from "./types.ts" + +/** Host-supplied bridge: run one `maple.(input)` call and return its result. */ +export type CodeModeDispatch = (name: string, input: unknown) => Promise + +/** + * The RPC target handed to the sandbox isolate as `env.MAPLE`. It must be an + * `RpcTarget` subclass so Cloudflare passes it across the Worker Loader boundary + * by reference (a plain object would structured-clone and drop the method). The + * dispatch closure stays in the parent isolate; the sandbox only gets a stub. + */ +export class MapleSupervisor extends RpcTarget { + readonly #dispatch: CodeModeDispatch + + constructor(dispatch: CodeModeDispatch) { + super() + this.#dispatch = dispatch + } + + async call(name: string, input: unknown): Promise { + try { + return await this.#dispatch(name, input) + } catch (error) { + return { + ok: false, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + } + } + } +} + +export interface RunCodeOptions { + /** Model-written snippet (plain JS, spliced into the harness IIFE). */ + readonly code: string + /** Bridge each `maple.(input)` call back to the host's tools. */ + readonly dispatch: CodeModeDispatch + /** Unique-per-call id → fresh isolate each run (Code Mode semantics). */ + readonly id: string + readonly capBytes?: number + readonly compatibilityDate?: string + readonly cpuMs?: number + readonly subRequests?: number + readonly wallMs?: number +} + +/** + * Load the model's snippet into a fresh dynamic worker with network disabled + * (`globalOutbound: null`) and only the `maple` RPC capability, run it, and + * return the captured `{ logs, returnValue, error }`. Never throws — a load + * failure, RPC failure, or wall-clock timeout is reported as a `crashed` + * result so the caller can surface it to the model as a value. + */ +export const runCodeInSandbox = async ( + loader: WorkerLoader, + options: RunCodeOptions, +): Promise => { + const supervisor = new MapleSupervisor(options.dispatch) + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), options.wallMs ?? DEFAULT_WALL_MS) + try { + const stub = loader.get(options.id, async () => ({ + compatibilityDate: options.compatibilityDate ?? DEFAULT_COMPAT_DATE, + mainModule: SANDBOX_MAIN_MODULE, + modules: buildSandboxModules(options.code, options.capBytes), + env: { MAPLE: supervisor }, + globalOutbound: null, + limits: { + cpuMs: options.cpuMs ?? DEFAULT_CPU_MS, + subRequests: options.subRequests ?? DEFAULT_SUBREQUESTS, + }, + })) + const response = await stub + .getEntrypoint() + .fetch("https://codemode/run", { signal: controller.signal }) + const payload = (await response.json()) as Partial | null + return { + logs: payload?.logs ?? [], + returnValue: payload?.returnValue, + error: payload?.error ?? null, + } + } catch (error) { + return { + logs: [], + returnValue: undefined, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + crashed: true, + } + } finally { + clearTimeout(timer) + } +} diff --git a/packages/codemode/src/types.ts b/packages/codemode/src/types.ts new file mode 100644 index 00000000..ec86ffbb --- /dev/null +++ b/packages/codemode/src/types.ts @@ -0,0 +1,72 @@ +// Shared, dependency-free types + constants for Code Mode. Importable by both +// the pure helpers (api-gen / harness / format) and the Workers-only `./sandbox` +// driver, and safe to pull into Node-side unit tests. + +/** A trimmed JSON Schema node, as produced by the tool registries. */ +export interface JsonSchema { + type?: string | string[] + properties?: Record + required?: ReadonlyArray + items?: JsonSchema + enum?: ReadonlyArray + description?: string + anyOf?: ReadonlyArray + oneOf?: ReadonlyArray + $ref?: string + [key: string]: unknown +} + +/** The minimum a tool must expose to be projected into the `maple.*` API. */ +export interface CodeModeToolSpec { + /** Name the model calls as `maple.(...)` — already stripped of any `mcp__` prefix. */ + readonly name: string + readonly description: string + /** Raw JSON Schema for the tool's single input object (may be absent). */ + readonly parameters?: JsonSchema +} + +/** Result the sandbox isolate returns (parsed from its fetch response). */ +export interface CodeRunResult { + readonly logs: ReadonlyArray + readonly returnValue: unknown + readonly error: { name: string; message: string; stack?: string } | null + /** Set when the isolate failed to load/run at the harness boundary (not user code). */ + readonly crashed?: boolean +} + +/** Envelope crossing the RPC boundary for each `maple.(input)` call. */ +export interface RpcCallResult { + readonly ok: boolean + readonly value?: string + readonly error?: { name: string; message: string } +} + +/** One pending mutation captured from a code run (chat propose-then-apply flow). */ +export interface CodeProposal { + readonly tool: string + readonly input: unknown +} + +/** Canonical name of the Code Mode tool — shared so dispatch can refuse self-calls. */ +export const RUN_CODE_TOOL_NAME = "run_code" + +export const PROPOSED_BATCH_STATUS = "proposed_batch" as const + +/** The `run_code` output envelope when a code run queued mutating proposals. */ +export interface ProposedBatch { + readonly status: typeof PROPOSED_BATCH_STATUS + readonly proposals: ReadonlyArray + /** Human/model-facing summary of the run (console + return value + queue note). */ + readonly text: string +} + +export const DEFAULT_OUTPUT_CAP_BYTES = 24_000 +/** + * Compatibility date for the dynamically-loaded isolate. The harness uses only + * standard globals (Response, Proxy, console), so any recent date works; this + * matches the blog's Worker Loader example. Bump deliberately. + */ +export const DEFAULT_COMPAT_DATE = "2025-06-01" +export const DEFAULT_CPU_MS = 10_000 +export const DEFAULT_SUBREQUESTS = 50 +export const DEFAULT_WALL_MS = 20_000 diff --git a/packages/codemode/tsconfig.json b/packages/codemode/tsconfig.json new file mode 100644 index 00000000..f9e824c4 --- /dev/null +++ b/packages/codemode/tsconfig.json @@ -0,0 +1,17 @@ +{ + "include": ["src/**/*.ts"], + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "lib": ["ES2022"], + "types": ["node", "@cloudflare/workers-types"], + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + "skipLibCheck": true, + "strict": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + } +}