From e88d9a835e3aa2260c7efd461a0b1001e32f19ce Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 01:10:59 +0200 Subject: [PATCH 01/10] feat(chat,mcp): Cloudflare Code Mode for the AI chat and MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of handing the model 51 tools one at a time, it can now write a JS snippet against a generated typed `maple.*` API that runs in a Cloudflare Dynamic Worker isolate (network blocked); each `maple.(input)` RPCs back to the existing tools. Multi-step investigations collapse into one round-trip. - packages/codemode: source-only shared package (pure root + ./sandbox subpath that imports cloudflare:workers). JSON-schema→TS API gen, the sandbox harness (splices user JS into an async IIFE module, no eval), proposed_batch formatting, and runCodeInSandbox + MapleSupervisor (RpcTarget). - chat-flue: a `run_code` Flue tool, injected only when MAPLE_CODE_MODE=1 and the LOADER binding is present (hybrid — the 51 direct tools stay). Dispatch reuses the approval-gated tool execs, so mutating maple.* calls become proposals, collected into a proposed_batch the web renders as one approval card each. - apps/api MCP: a `run_code` tool dispatching to registry handlers under the captured request runtime (FiberSet.makeRuntimePromise + Effect.scoped), preserving org scoping. Mutating tools are blocked inside code. - Deploy: WorkerLoader() binding added to both alchemy.run.ts, gated on the flag. Worker Loader is a Cloudflare beta, so the binding only deploys when MAPLE_CODE_MODE is set and the agent no-ops Code Mode without it (direct tools only) — default behavior is unchanged. Verified via unit tests (the harness runs end-to-end in Node), the Flue build, prompt rendering, and a 24-package typecheck; live isolate execution needs a deployed stage with beta access. Co-Authored-By: Claude Opus 4.8 --- apps/api/alchemy.run.ts | 8 +- apps/api/package.json | 1 + apps/api/src/mcp/tools/registry.ts | 5 + apps/api/src/mcp/tools/run-code.test.ts | 73 +++++++++ apps/api/src/mcp/tools/run-code.ts | 146 ++++++++++++++++++ apps/chat-flue/alchemy.run.ts | 8 +- apps/chat-flue/package.json | 1 + apps/chat-flue/src/agents/maple-chat.ts | 17 +- apps/chat-flue/src/lib/codemode/api-gen.ts | 38 +++++ .../src/lib/codemode/codemode.test.ts | 64 ++++++++ apps/chat-flue/src/lib/codemode/index.ts | 2 + .../src/lib/codemode/run-code-tool.ts | 80 ++++++++++ apps/chat-flue/src/lib/env.ts | 14 ++ apps/chat-flue/src/lib/modes.ts | 10 +- apps/chat-flue/src/lib/prompts.ts | 47 ++++++ .../src/components/chat/chat-conversation.tsx | 34 +++- .../src/components/chat/tool-proposal.test.ts | 39 ++++- apps/web/src/components/chat/tool-proposal.ts | 27 ++++ bun.lock | 88 ++++++++--- packages/codemode/package.json | 19 +++ packages/codemode/src/api-gen.test.ts | 92 +++++++++++ packages/codemode/src/api-gen.ts | 106 +++++++++++++ packages/codemode/src/format.test.ts | 46 ++++++ packages/codemode/src/format.ts | 65 ++++++++ packages/codemode/src/harness.test.ts | 69 +++++++++ packages/codemode/src/harness.ts | 84 ++++++++++ packages/codemode/src/index.ts | 6 + packages/codemode/src/sandbox.ts | 108 +++++++++++++ packages/codemode/src/types.ts | 69 +++++++++ packages/codemode/tsconfig.json | 17 ++ 30 files changed, 1351 insertions(+), 32 deletions(-) create mode 100644 apps/api/src/mcp/tools/run-code.test.ts create mode 100644 apps/api/src/mcp/tools/run-code.ts create mode 100644 apps/chat-flue/src/lib/codemode/api-gen.ts create mode 100644 apps/chat-flue/src/lib/codemode/codemode.test.ts create mode 100644 apps/chat-flue/src/lib/codemode/index.ts create mode 100644 apps/chat-flue/src/lib/codemode/run-code-tool.ts create mode 100644 packages/codemode/package.json create mode 100644 packages/codemode/src/api-gen.test.ts create mode 100644 packages/codemode/src/api-gen.ts create mode 100644 packages/codemode/src/format.test.ts create mode 100644 packages/codemode/src/format.ts create mode 100644 packages/codemode/src/harness.test.ts create mode 100644 packages/codemode/src/harness.ts create mode 100644 packages/codemode/src/index.ts create mode 100644 packages/codemode/src/sandbox.ts create mode 100644 packages/codemode/src/types.ts create mode 100644 packages/codemode/tsconfig.json diff --git a/apps/api/alchemy.run.ts b/apps/api/alchemy.run.ts index d7df47a1..7e45131e 100644 --- a/apps/api/alchemy.run.ts +++ b/apps/api/alchemy.run.ts @@ -1,6 +1,6 @@ import path from "node:path" import alchemy from "alchemy" -import { D1Database, KVNamespace, Queue, Worker, WorkerStub, Workflow } from "alchemy/cloudflare" +import { D1Database, KVNamespace, Queue, Worker, WorkerLoader, WorkerStub, Workflow } from "alchemy/cloudflare" import type { MapleDomains, MapleStage } from "@maple/infra/cloudflare" import { resolveD1Name, resolveDeploymentEnvironment, resolveWorkerName } from "@maple/infra/cloudflare" @@ -153,6 +153,12 @@ export const createMapleApi = async ({ stage, domains }: CreateMapleApiOptions) ...optionalSecret("GITHUB_APP_CLIENT_SECRET"), ...optionalSecret("GITHUB_APP_WEBHOOK_SECRET"), ...optionalPlain("GITHUB_API_BASE_URL"), + // Code Mode (Cloudflare Dynamic Workers). The `run_code` MCP tool runs + // model-written code in a sandbox isolate via this `worker_loader` binding. + // Added only when MAPLE_CODE_MODE is set (the binding needs Worker Loader + // beta access); the tool stays inert at runtime without it. + ...optionalPlain("MAPLE_CODE_MODE"), + ...(process.env.MAPLE_CODE_MODE?.trim() ? { LOADER: WorkerLoader() } : {}), }, }) diff --git a/apps/api/package.json b/apps/api/package.json index 6319114b..5bcdc318 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -33,6 +33,7 @@ "@flue/sdk": "1.0.0-beta.1", "@libsql/client": "0.15.15", "@maple-dev/effect-sdk": "workspace:*", + "@maple/codemode": "workspace:*", "@maple/db": "workspace:*", "@maple/domain": "workspace:*", "@maple/effect-cloudflare": "workspace:*", diff --git a/apps/api/src/mcp/tools/registry.ts b/apps/api/src/mcp/tools/registry.ts index f8455be8..c291e0e0 100644 --- a/apps/api/src/mcp/tools/registry.ts +++ b/apps/api/src/mcp/tools/registry.ts @@ -42,6 +42,7 @@ import { registerRemoveDashboardWidgetTool } from "./remove-dashboard-widget" import { registerReplaceDashboardWidgetsTool } from "./replace-dashboard-widgets" import { registerReorderDashboardWidgetsTool } from "./reorder-dashboard-widgets" import { registerMineLogPatternsTool } from "./mine-log-patterns" +import { registerRunCodeTool } from "./run-code" import { registerSearchLogsTool } from "./search-logs" import { registerSearchTracesTool } from "./search-traces" import { registerSearchSessionsTool } from "./search-sessions" @@ -136,6 +137,10 @@ const collectMapleToolDefinitions = (): ReadonlyArray => { registerRegisterAgentTool(registrar) registerListErrorIncidentsTool(registrar) registerUpdateErrorNotificationPolicyTool(registrar) + // Code Mode: a single tool whose sandboxed snippet orchestrates the read-only + // tools above. Registered last so it can reference the full set at runtime + // (it dispatches via `mapleToolDefinitions`); inert unless MAPLE_CODE_MODE=1. + registerRunCodeTool(registrar) return definitions } diff --git a/apps/api/src/mcp/tools/run-code.test.ts b/apps/api/src/mcp/tools/run-code.test.ts new file mode 100644 index 00000000..016f0304 --- /dev/null +++ b/apps/api/src/mcp/tools/run-code.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it, vi } from "vitest" +import type { McpToolResult } from "./types" +import { mapleToolDefinitions } from "./registry" +import { resolveCodeModeCall, textOfResult } from "./run-code" + +const call = ( + name: string, + input: unknown, + invoke: (definition: (typeof mapleToolDefinitions)[number], decoded: unknown) => Promise, +) => resolveCodeModeCall(mapleToolDefinitions, name, input, invoke) + +const okResult = (text: string, structured?: string): McpToolResult => ({ + content: structured + ? [ + { type: "text", text }, + { type: "text", text: structured }, + ] + : [{ type: "text", text }], +}) + +describe("textOfResult", () => { + it("joins dual content under the Structured content: convention", () => { + expect(textOfResult(okResult("human", '{"a":1}'))).toBe('human\n\nStructured content:\n{"a":1}') + }) + it("returns the single text entry as-is", () => { + expect(textOfResult(okResult("just text"))).toBe("just text") + }) +}) + +describe("resolveCodeModeCall", () => { + it("blocks mutating tools without invoking them", async () => { + const invoke = vi.fn() + const r = await call("create_dashboard", {}, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("MutatingToolBlocked") + expect(invoke).not.toHaveBeenCalled() + }) + + it("rejects unknown tools", async () => { + const r = await call("not_a_tool", {}, vi.fn()) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("UnknownTool") + }) + + it("rejects input that fails the tool schema before invoking", async () => { + const invoke = vi.fn() + // list_services takes only optional strings; a number for `environment` is invalid. + const r = await call("list_services", { environment: 123 }, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("InvalidInput") + expect(invoke).not.toHaveBeenCalled() + }) + + it("runs a read tool and returns its text on success", async () => { + const invoke = vi.fn(async () => okResult("Services table", '{"total":2}')) + const r = await call("list_services", { environment: "production" }, invoke) + expect(invoke).toHaveBeenCalledOnce() + expect(r.ok).toBe(true) + expect(r.value).toContain("Services table") + expect(r.value).toContain("Structured content:") + }) + + it("surfaces an isError tool result as an error value", async () => { + const invoke = vi.fn(async (): Promise => ({ + isError: true, + content: [{ type: "text", text: "warehouse exploded" }], + })) + const r = await call("list_services", {}, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("ToolError") + expect(r.error?.message).toContain("warehouse exploded") + }) +}) diff --git a/apps/api/src/mcp/tools/run-code.ts b/apps/api/src/mcp/tools/run-code.ts new file mode 100644 index 00000000..a6a9a1ce --- /dev/null +++ b/apps/api/src/mcp/tools/run-code.ts @@ -0,0 +1,146 @@ +import { Effect, FiberSet, Schema } from "effect" +import { formatRunOutput, type RpcCallResult } from "@maple/codemode" +import { WorkerEnvironment } from "@/lib/WorkerEnvironment" +import { resolveTenant } from "../lib/query-warehouse" +// Type-only: a value import would create an eager require cycle with registry.ts +// (registry imports this module to register the tool). The definitions are passed +// into resolveCodeModeCall / fetched via dynamic import at request time instead. +import type { MapleToolDefinition } from "./registry" +import { MUTATING_TOOL_NAMES } from "./mutating" +import { requiredStringParam, validationError, type McpToolRegistrar, type McpToolResult } from "./types" + +const DESCRIPTION = `Run a JavaScript snippet that orchestrates other Maple tools in one call, instead of issuing many separate tool calls. Inside the snippet, \`await maple.(input)\` invokes any READ-ONLY Maple tool by name (same names and inputs as the other tools you have) and returns its text output (human-readable text followed by a \`Structured content:\` line of JSON — JSON.parse it to filter/sort). The snippet runs in a sandbox with no network and no imports; \`console.log(...)\` and the \`return\` value come back to you. Mutating tools are NOT callable here — call those directly so they go through approval. Ideal for multi-step investigations (find → for each → inspect → correlate) where chaining and filtering in code beats round-tripping every result.` + +/** Join an McpToolResult's content into the `Structured content:` convention the sandbox API uses. */ +export const textOfResult = (result: McpToolResult): string => { + const texts = result.content.map((c) => c.text) + if (texts.length <= 1) return texts.join("\n") + const [human, ...rest] = texts + return `${human}\n\nStructured content:\n${rest.join("\n")}` +} + +/** + * Resolve one `maple.(input)` call to an RPC result: block mutating tools, + * reject unknown names, decode the input against the tool's schema, then run the + * handler via `invoke` (which the caller binds to the captured request runtime). + * Errors are returned as values so the model can self-correct. Pure of the + * Effect runtime — the dispatch logic is unit-testable with a fake `invoke`. + */ +export const resolveCodeModeCall = async ( + definitions: ReadonlyArray, + name: string, + input: unknown, + invoke: (definition: MapleToolDefinition, decoded: unknown) => Promise, +): Promise => { + if (MUTATING_TOOL_NAMES.has(name)) { + return { + ok: false, + error: { + name: "MutatingToolBlocked", + message: `maple.${name} mutates state and can't run inside code mode. Call the ${name} tool directly so it goes through approval.`, + }, + } + } + const definition = definitions.find((d) => d.name === name) + if (!definition) { + return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } + } + let decoded: unknown + try { + decoded = Schema.decodeUnknownSync(definition.schema)(input ?? {}) + } catch (error) { + return { ok: false, error: { name: "InvalidInput", message: String(error) } } + } + try { + const result = await invoke(definition, decoded) + if (result.isError) { + return { ok: false, error: { name: "ToolError", message: textOfResult(result) } } + } + return { ok: true, value: textOfResult(result) } + } catch (error) { + return { + ok: false, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + } + } +} + +/** + * Code Mode for the MCP server (Cloudflare Dynamic Workers). Exposes a single + * `run_code` tool whose sandboxed snippet calls back into the existing read-only + * tool handlers via RPC, run on the SAME request/tenant context — so org scoping + * is identical to a direct tool call and the sandbox can never widen it. Mutating + * tools are blocked inside code (they must go through the host's approval path). + * + * Flag-gated at runtime: returns an error result unless `MAPLE_CODE_MODE=1` and + * the `LOADER` (worker_loader) binding is present. The Workers-only sandbox + * driver is imported dynamically so this module's static graph stays Node-safe + * (the tool registry is imported by node-based evals/tests). + */ +export function registerRunCodeTool(server: McpToolRegistrar) { + server.tool( + "run_code", + DESCRIPTION, + Schema.Struct({ + code: requiredStringParam( + "A JavaScript snippet using `await maple.(input)`, `console.log(...)`, and `return`. No imports, no network, no type annotations.", + ), + }), + Effect.fn("McpTool.runCode")(function* ({ code }) { + const tenant = yield* resolveTenant + yield* Effect.annotateCurrentSpan({ orgId: tenant.orgId }) + + const env = yield* WorkerEnvironment + const loader = env.LOADER as WorkerLoader | undefined + if (env.MAPLE_CODE_MODE !== "1" || !loader) { + return validationError( + "Code mode is not enabled on this deployment. Call the individual Maple tools directly instead.", + ) + } + if (!code.trim()) { + return validationError("Provide a `code` snippet that uses the `maple` API.") + } + + return yield* Effect.scoped( + Effect.gen(function* () { + // Capture the current request context so RPC callbacks (which fire + // from the isolate while we await the sandbox) can run tool handlers + // with the same tenant/services. `any`: tool handlers are + // type-erased over their service requirements (see registry.ts). + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const runPromise = yield* FiberSet.makeRuntimePromise() + + // Fetched lazily (the static import is type-only to avoid a require + // cycle); registry is fully initialized by request time. + const { mapleToolDefinitions } = yield* Effect.promise(() => import("./registry")) + const dispatch = (name: string, input: unknown): Promise => + resolveCodeModeCall(mapleToolDefinitions, name, input, (definition, decoded) => + runPromise(definition.handler(decoded)), + ) + + const result = yield* Effect.promise(async () => { + const { runCodeInSandbox } = await import("@maple/codemode/sandbox") + return runCodeInSandbox(loader, { + id: `maple-codemode-${crypto.randomUUID()}`, + code, + dispatch, + }) + }) + + yield* Effect.annotateCurrentSpan({ + "codemode.log_lines": result.logs.length, + "codemode.crashed": result.crashed === true, + "codemode.errored": result.error !== null, + }) + + return { + content: [{ type: "text" as const, text: formatRunOutput(result) }], + } satisfies McpToolResult + }), + ) + }), + ) +} diff --git a/apps/chat-flue/alchemy.run.ts b/apps/chat-flue/alchemy.run.ts index 8fda2a9f..44639e43 100644 --- a/apps/chat-flue/alchemy.run.ts +++ b/apps/chat-flue/alchemy.run.ts @@ -1,7 +1,7 @@ import { execFileSync } from "node:child_process" import path from "node:path" import alchemy from "alchemy" -import { Ai, DurableObjectNamespace, Worker } from "alchemy/cloudflare" +import { Ai, DurableObjectNamespace, Worker, WorkerLoader } from "alchemy/cloudflare" import type { MapleDomains, MapleStage } from "@maple/infra/cloudflare" import { resolveDeploymentEnvironment, resolveWorkerName } from "@maple/infra/cloudflare" @@ -104,6 +104,12 @@ export const createChatFlueWorker = async ({ stage, domains, mapleApiUrl }: Crea ...optionalPlain("MAPLE_ENVIRONMENT", resolveDeploymentEnvironment(stage)), ...optionalPlain("MAPLE_CHAT_MODEL"), ...optionalPlain("MAPLE_TRIAGE_MODEL"), + // Code Mode (Cloudflare Dynamic Workers / Worker Loader). The `worker_loader` + // binding is added only when MAPLE_CODE_MODE is set, since it requires Worker + // Loader beta access on the account; otherwise deploys are unaffected and the + // agent no-ops Code Mode at runtime (LOADER absent → direct tools only). + ...optionalPlain("MAPLE_CODE_MODE"), + ...(process.env.MAPLE_CODE_MODE?.trim() ? { LOADER: WorkerLoader() } : {}), ...optionalPlain("MAPLE_AUTH_MODE", "self_hosted"), ...optionalSecret("MAPLE_ROOT_PASSWORD"), ...optionalSecret("CLERK_SECRET_KEY"), diff --git a/apps/chat-flue/package.json b/apps/chat-flue/package.json index a6d34b7b..3fc7a054 100644 --- a/apps/chat-flue/package.json +++ b/apps/chat-flue/package.json @@ -14,6 +14,7 @@ "@clerk/backend": "^2.30.1", "@flue/opentelemetry": "1.0.0-beta.1", "@flue/runtime": "1.0.0-beta.2", + "@maple/codemode": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^2.0.0", "@opentelemetry/exporter-trace-otlp-http": "^0.205.0", diff --git a/apps/chat-flue/src/agents/maple-chat.ts b/apps/chat-flue/src/agents/maple-chat.ts index e73189a7..5f29e3c7 100644 --- a/apps/chat-flue/src/agents/maple-chat.ts +++ b/apps/chat-flue/src/agents/maple-chat.ts @@ -2,6 +2,7 @@ import { createAgent, type AgentRouteHandler, type McpServerConnection } from "@ import { tracing } from "cloudflare:workers" import { applyApprovalGates } from "../lib/approval.ts" import { instanceIdFromAgentPath } from "../lib/auth.ts" +import { buildCodeModeApi, createRunCodeTool, type CodeModeApi } from "../lib/codemode/index.ts" import type { ChatFlueEnv } from "../lib/env.ts" import { connectMapleMcp, MCP_DEFAULT_TIMEOUT_MS } from "../lib/mcp.ts" import { buildSystemPrompt, modeFromInstanceId } from "../lib/modes.ts" @@ -74,7 +75,6 @@ export default createAgent(async (ctx) => { // the Phase 2 frontend integration point; until then the base prompt for the // mode is used. const mode = modeFromInstanceId(ctx.id) - const instructions = buildSystemPrompt({ mode }) // Connect to Maple's MCP server (all tools). We tolerate connection failures so // the agent still answers on Workers AI when apps/api or INTERNAL_SERVICE_TOKEN @@ -114,6 +114,21 @@ export default createAgent(async (ctx) => { } } + // Code Mode (hybrid, flag-gated): when enabled and the sandbox is bound, add a + // `run_code` tool backed by the SAME gated tools (so mutations still only + // propose) and inject the generated `maple.*` API into the prompt. The direct + // tools stay available as a fallback. No-ops without the LOADER binding. + let codeModeApi: CodeModeApi | undefined + if (tools.length > 0 && ctx.env.MAPLE_CODE_MODE === "1" && ctx.env.LOADER) { + codeModeApi = buildCodeModeApi(tools) + tools = [...tools, createRunCodeTool(ctx.env, codeModeApi)] + } + + const instructions = buildSystemPrompt({ + mode, + codeMode: codeModeApi ? { declaration: codeModeApi.declaration } : undefined, + }) + return { model: ctx.env.MAPLE_CHAT_MODEL ?? DEFAULT_MODEL, instructions, diff --git a/apps/chat-flue/src/lib/codemode/api-gen.ts b/apps/chat-flue/src/lib/codemode/api-gen.ts new file mode 100644 index 00000000..0ba402eb --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/api-gen.ts @@ -0,0 +1,38 @@ +import type { ToolDefinition } from "@flue/runtime" +import { buildApiDeclaration, type CodeModeToolSpec, type JsonSchema } from "@maple/codemode" +import { baseToolName } from "../mcp.ts" + +export interface CodeModeApi { + /** The `declare const maple: { ... }` surface injected into the system prompt. */ + readonly declaration: string + /** Base tool name -> the (approval-gated) Flue tool `execute`, the RPC backend. */ + readonly dispatch: ReadonlyMap + /** Base names exposed to code mode (for telemetry / debugging). */ + readonly toolNames: ReadonlyArray +} + +const isJsonSchema = (p: unknown): p is JsonSchema => + typeof p === "object" && p !== null && ("properties" in p || "type" in p) + +/** + * Project the connected (already approval-gated) MCP tools into a Code Mode API: + * the `maple.*` TypeScript declaration for the prompt plus a name->execute + * dispatch map. Built from the SAME gated array the direct-tool path uses, so a + * mutating `maple.create_dashboard(...)` call runs the proposal-returning + * `execute` and never mutates — approval gating is inherited for free. + */ +export const buildCodeModeApi = (tools: ReadonlyArray): CodeModeApi => { + const dispatch = new Map() + const specs: CodeModeToolSpec[] = [] + for (const tool of tools) { + const name = baseToolName(tool.name) + if (dispatch.has(name)) continue + dispatch.set(name, tool.execute) + specs.push({ + name, + description: tool.description, + parameters: isJsonSchema(tool.parameters) ? tool.parameters : undefined, + }) + } + return { declaration: buildApiDeclaration(specs), dispatch, toolNames: specs.map((s) => s.name) } +} diff --git a/apps/chat-flue/src/lib/codemode/codemode.test.ts b/apps/chat-flue/src/lib/codemode/codemode.test.ts new file mode 100644 index 00000000..a9187d11 --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/codemode.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it, vi } from "vitest" +import type { ToolDefinition } from "@flue/runtime" +import type { CodeProposal } from "@maple/codemode" +import { buildCodeModeApi } from "./api-gen.ts" +import { createCodeModeDispatch } from "./run-code-tool.ts" + +const tool = (name: string, execute: ToolDefinition["execute"], parameters: object = { type: "object", properties: {} }): ToolDefinition => ({ + name, + description: `desc for ${name}`, + parameters, + execute, +}) + +describe("buildCodeModeApi", () => { + it("strips the mcp__maple__ prefix and builds a declaration + dispatch", () => { + const tools = [ + tool("mcp__maple__find_errors", async () => "errors", { + type: "object", + properties: { service: { type: "string", description: "svc" } }, + required: [], + }), + tool("mcp__maple__list_services", async () => "services"), + ] + const api = buildCodeModeApi(tools) + expect(api.toolNames).toContain("find_errors") + expect(api.toolNames).toContain("list_services") + expect(api.declaration).toContain("find_errors(input: { /** svc */ service?: string }): Promise;") + expect(api.dispatch.get("find_errors")).toBeTypeOf("function") + }) + + it("keeps the first tool when base names collide", () => { + const first = vi.fn(async () => "first") + const second = vi.fn(async () => "second") + const api = buildCodeModeApi([tool("mcp__maple__x", first), tool("x", second)]) + expect(api.dispatch.size).toBe(1) + expect(api.dispatch.get("x")).toBe(first) + }) +}) + +describe("createCodeModeDispatch", () => { + it("returns ok:false for an unknown tool", async () => { + const dispatch = createCodeModeDispatch(new Map(), () => {}) + const r = await dispatch("nope", {}) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("UnknownTool") + }) + + it("runs a read tool and returns its value", async () => { + const map = new Map([["list_services", async () => "services table"]]) + const r = await createCodeModeDispatch(map, () => {})("list_services", { environment: "prod" }) + expect(r).toEqual({ ok: true, value: "services table" }) + }) + + it("collects a proposal from a gated mutating tool while returning its value", async () => { + const proposals: CodeProposal[] = [] + // Gated mutating execute returns a proposal marker instead of mutating. + const gated: ToolDefinition["execute"] = async (args) => + JSON.stringify({ status: "proposed", tool: "create_dashboard", input: args }) + const map = new Map([["create_dashboard", gated]]) + const r = await createCodeModeDispatch(map, (p) => proposals.push(p))("create_dashboard", { title: "x" }) + expect(r.ok).toBe(true) + expect(proposals).toEqual([{ tool: "create_dashboard", input: { title: "x" } }]) + }) +}) diff --git a/apps/chat-flue/src/lib/codemode/index.ts b/apps/chat-flue/src/lib/codemode/index.ts new file mode 100644 index 00000000..cd8c9de2 --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/index.ts @@ -0,0 +1,2 @@ +export { buildCodeModeApi, type CodeModeApi } from "./api-gen.ts" +export { createRunCodeTool, RUN_CODE_TOOL_NAME } from "./run-code-tool.ts" diff --git a/apps/chat-flue/src/lib/codemode/run-code-tool.ts b/apps/chat-flue/src/lib/codemode/run-code-tool.ts new file mode 100644 index 00000000..86fa0eab --- /dev/null +++ b/apps/chat-flue/src/lib/codemode/run-code-tool.ts @@ -0,0 +1,80 @@ +import type { ToolDefinition } from "@flue/runtime" +import { formatRunResult, type CodeProposal, type RpcCallResult } from "@maple/codemode" +import { parseToolProposal } from "../approval.ts" +import type { ChatFlueEnv } from "../env.ts" +import type { CodeModeApi } from "./api-gen.ts" + +export const RUN_CODE_TOOL_NAME = "run_code" + +/** + * Build the `maple.(input)` dispatch for a code run: look up the gated + * tool `execute`, run it, and — because mutating tools' gated execute returns a + * proposal marker instead of mutating — collect any proposal via `onProposal` + * while still returning its value to the model. Unknown tools become error + * values so the model self-corrects. Extracted (and free of the Workers-only + * sandbox import) so the approval-collection logic is unit-testable. + */ +export const createCodeModeDispatch = ( + dispatch: CodeModeApi["dispatch"], + onProposal: (proposal: CodeProposal) => void, +) => { + return async (name: string, input: unknown): Promise => { + const execute = dispatch.get(name) + if (!execute) { + return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } + } + const value = await execute((input ?? {}) as Record) + const proposal = parseToolProposal(value) + if (proposal) onProposal({ tool: proposal.tool, input: proposal.input }) + return { ok: true, value } + } +} + +const DESCRIPTION = `Run a JavaScript snippet against Maple's observability data using the \`maple\` API declared in the system prompt. Prefer this for any multi-step investigation: call several \`maple.*\` tools, filter/aggregate their results in code, and \`console.log\`/\`return\` only what matters — one call instead of many round-trips. Imports and network are disabled. \`await maple.(input)\` returns the tool's text output and throws on failure (wrap in try/catch to keep going). Mutating tools only PROPOSE a change for the user to approve.` + +/** + * A single local Flue tool that executes model-written code in a fresh + * Cloudflare Dynamic Worker isolate (network blocked), bridging each + * `maple.(input)` call back to the connected MCP tools via the + * supervisor RPC. Mutating calls run the approval-gated `execute`, so they + * return a proposal marker instead of mutating; the proposals are collected and + * surfaced to the web client as a `proposed_batch` envelope. + */ +export const createRunCodeTool = (env: ChatFlueEnv, api: CodeModeApi): ToolDefinition => ({ + name: RUN_CODE_TOOL_NAME, + description: DESCRIPTION, + parameters: { + type: "object", + properties: { + code: { + type: "string", + description: + "A JavaScript snippet. Use `await maple.(input)`, `console.log(...)`, and `return`. No imports, no network, no type annotations.", + }, + }, + required: ["code"], + }, + execute: async (args) => { + const code = typeof args?.code === "string" ? args.code : "" + const loader = env.LOADER + if (!loader) { + return "Code mode is unavailable (no sandbox runtime is bound). Call the mcp__maple__* tools directly instead." + } + if (!code.trim()) { + return "No code provided. Pass a `code` string that uses the `maple` API." + } + + const proposals: CodeProposal[] = [] + const dispatch = createCodeModeDispatch(api.dispatch, (p) => proposals.push(p)) + + // Dynamic import: the sandbox driver pulls in `cloudflare:workers`, so keep + // it out of this module's static graph (importable by Node-based tests). + const { runCodeInSandbox } = await import("@maple/codemode/sandbox") + const result = await runCodeInSandbox(loader, { + id: `maple-codemode-${crypto.randomUUID()}`, + code, + dispatch, + }) + return formatRunResult(result, proposals) + }, +}) diff --git a/apps/chat-flue/src/lib/env.ts b/apps/chat-flue/src/lib/env.ts index 79502075..2bd16f9f 100644 --- a/apps/chat-flue/src/lib/env.ts +++ b/apps/chat-flue/src/lib/env.ts @@ -15,6 +15,20 @@ export interface ChatFlueEnv { /** Deployment environment label, surfaced on telemetry. */ MAPLE_ENVIRONMENT?: string + // --- Code Mode (Cloudflare Dynamic Workers / Worker Loader) --- + /** + * `"1"` enables Code Mode: the agent gets a `run_code` tool + a generated + * `maple.*` API and writes code instead of calling 50+ tools one at a time. + * Hybrid — direct tools stay available. No-ops unless `LOADER` is also bound. + */ + MAPLE_CODE_MODE?: string + /** + * Worker Loader binding (`worker_loader`) used to spin up a fresh sandbox + * isolate per `run_code` call. Bound only when `MAPLE_CODE_MODE` is set; the + * binding requires Cloudflare Worker Loader beta access on the account. + */ + LOADER?: WorkerLoader + // --- Telemetry (OpenTelemetry → Maple ingest) --- /** * Maple ingest key (org-scoped; use the internal-org key, same as `apps/api`). diff --git a/apps/chat-flue/src/lib/modes.ts b/apps/chat-flue/src/lib/modes.ts index fa031621..1bb47603 100644 --- a/apps/chat-flue/src/lib/modes.ts +++ b/apps/chat-flue/src/lib/modes.ts @@ -4,7 +4,7 @@ // mode + context in each request body; here `buildSystemPrompt` assembles the // instructions and the mode is derived from the agent instance id. -import { DASHBOARD_BUILDER_SYSTEM_PROMPT, SYSTEM_PROMPT } from "./prompts.ts" +import { DASHBOARD_BUILDER_SYSTEM_PROMPT, formatCodeModeBlock, SYSTEM_PROMPT } from "./prompts.ts" import { tabIdFromInstanceId } from "./org.ts" export type ChatMode = "default" | "dashboard-builder" | "alert" | "widget-fix" @@ -229,6 +229,8 @@ export interface BuildSystemPromptArgs { alertContext?: AlertContext widgetFixContext?: WidgetFixContext pageContext?: PageContextPayload + /** When set (Code Mode on), append the `run_code` instructions + `maple.*` API. */ + codeMode?: { declaration: string } } /** @@ -237,10 +239,14 @@ export interface BuildSystemPromptArgs { * apps/chat-agent/src/index.ts `runChatTurn`. */ export const buildSystemPrompt = (args: BuildSystemPromptArgs): string => { - const { mode, alertContext, widgetFixContext, pageContext } = args + const { mode, alertContext, widgetFixContext, pageContext, codeMode } = args let prompt = mode === "dashboard-builder" ? DASHBOARD_BUILDER_SYSTEM_PROMPT : SYSTEM_PROMPT + if (codeMode) { + prompt += `\n\n${formatCodeModeBlock(codeMode.declaration)}` + } + if (mode === "alert" && alertContext) { prompt += `\n${formatAlertContextBlock(alertContext)}` } diff --git a/apps/chat-flue/src/lib/prompts.ts b/apps/chat-flue/src/lib/prompts.ts index 24d6fa8c..ec0a80a4 100644 --- a/apps/chat-flue/src/lib/prompts.ts +++ b/apps/chat-flue/src/lib/prompts.ts @@ -12,6 +12,53 @@ Maple's tools are exposed over MCP and named \`mcp__maple__\` (for example \`mcp__maple__find_errors\`). This document refers to them by their short names; call them by their full \`mcp__maple__\` name.` +/** + * Code Mode block (appended when `MAPLE_CODE_MODE=1` and the sandbox is bound). + * Gives the model a `run_code` tool and the generated `maple.*` API surface so + * it can write one snippet that chains/filters many tool calls instead of + * round-tripping each. The direct `mcp__maple__*` tools remain available. + */ +export const formatCodeModeBlock = (declaration: string): string => `## Code Mode — prefer writing code for multi-step work + +You have a \`run_code\` tool that runs a JavaScript snippet in a secure sandbox. +Inside that snippet you can call Maple through the typed \`maple\` API below. Each +method maps to a Maple tool and returns the tool's text output. + +Prefer \`run_code\` whenever a task needs MORE THAN ONE tool call — e.g. "find the +worst service then show a sample trace", looping over results, filtering, or +combining several queries. Writing one snippet that does the whole investigation +is faster and cheaper than calling the \`mcp__maple__*\` tools one at a time. For a +single lookup, calling the direct tool is fine. + +### The \`maple\` API +\`\`\`ts +${declaration} +\`\`\` + +### Rules for the code you write +- Plain JavaScript only. No \`import\`/\`require\`, no type annotations, no network. +- \`await maple.(input)\` returns a STRING: human-readable text followed by a + \`Structured content:\` line with JSON. \`JSON.parse\` that JSON block when you need + to filter or sort programmatically. +- A failing call THROWS — wrap risky calls in \`try/catch\` to continue; otherwise + the error is reported back to you to fix on the next turn. +- \`console.log(...)\` anything you want to see; \`return\` a final value. Only your + logged/returned output comes back — keep it small (summarize, don't dump). +- Mutating tools (create/update/delete/transition) only PROPOSE a change: calling + one queues it for the user's approval and does NOT take effect. Call it once + with the intended arguments; never write Approve/Deny prose. + +### Example +\`\`\`js +const raw = await maple.list_services({}) +const services = JSON.parse(raw.split("Structured content:")[1]).services ?? [] +const worst = services.sort((a, b) => b.errorRate - a.errorRate)[0] +if (!worst) return "No services found." +console.log("Worst service:", worst.name, "errorRate", worst.errorRate) +const traces = await maple.search_traces({ service: worst.name, only_errors: true, limit: 1 }) +console.log(traces) +\`\`\`` + const APPROVAL_NOTE = `## Mutating actions are approved before they take effect Tools that create, update, delete, or transition state (dashboards, alert rules, error issues, notification policies, comments, fix proposals) do not take effect diff --git a/apps/web/src/components/chat/chat-conversation.tsx b/apps/web/src/components/chat/chat-conversation.tsx index deeb4595..d239b3d9 100644 --- a/apps/web/src/components/chat/chat-conversation.tsx +++ b/apps/web/src/components/chat/chat-conversation.tsx @@ -17,7 +17,7 @@ import { type PageContextPayload, } from "./auto-contexts" import type { ChatContext } from "./context-preamble" -import { parseToolProposal } from "./tool-proposal" +import { parseToolProposal, parseToolProposalBatch } from "./tool-proposal" import { PageContextChips } from "./page-context-chips" import { Conversation, @@ -372,6 +372,38 @@ export function ChatConversation({ } if (isToolPart(part)) { const tp = part as ToolPart + // Code Mode: one run_code call can queue several + // mutations — render an approval card per proposal, + // keyed `${toolCallId}#${i}` so each resolves + // independently. + const batch = + tp.state === "output-available" + ? parseToolProposalBatch(tp.output) + : null + if (batch) { + flushTools() + batch.forEach((proposal, bi) => { + const cardKey = `${tp.toolCallId}#${bi}` + const resolved = resolvedApprovals.get(cardKey) + nodes.push( + + handleApprove( + cardKey, + proposal.tool, + proposal.input, + ) + } + onDeny={() => resolveApproval(cardKey, "denied")} + />, + ) + }) + continue + } const proposal = tp.state === "output-available" ? parseToolProposal(tp.output) diff --git a/apps/web/src/components/chat/tool-proposal.test.ts b/apps/web/src/components/chat/tool-proposal.test.ts index e27bdba9..c7dfe885 100644 --- a/apps/web/src/components/chat/tool-proposal.test.ts +++ b/apps/web/src/components/chat/tool-proposal.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest" -import { parseToolProposal } from "./tool-proposal" +import { parseToolProposal, parseToolProposalBatch } from "./tool-proposal" describe("parseToolProposal", () => { it("parses a JSON-string proposal (Flue's tool output)", () => { @@ -27,4 +27,41 @@ describe("parseToolProposal", () => { expect(parseToolProposal(undefined)).toBeNull() expect(parseToolProposal(42)).toBeNull() }) + + it("does not treat a proposed_batch as a single proposal", () => { + const out = JSON.stringify({ + status: "proposed_batch", + proposals: [{ tool: "create_dashboard", input: {} }], + }) + expect(parseToolProposal(out)).toBeNull() + }) +}) + +describe("parseToolProposalBatch", () => { + it("parses a run_code proposed_batch envelope into one proposal per change", () => { + const out = JSON.stringify({ + status: "proposed_batch", + proposals: [ + { tool: "create_dashboard", input: { title: "x" } }, + { tool: "add_dashboard_widget", input: { id: "1" } }, + ], + text: "did stuff", + }) + const batch = parseToolProposalBatch(out) + expect(batch).toHaveLength(2) + expect(batch?.[0]).toEqual({ status: "proposed", tool: "create_dashboard", input: { title: "x" } }) + expect(batch?.[1]?.tool).toBe("add_dashboard_widget") + }) + + it("drops malformed entries and returns null when nothing valid remains", () => { + expect( + parseToolProposalBatch(JSON.stringify({ status: "proposed_batch", proposals: [{ no: "tool" }] })), + ).toBeNull() + }) + + it("returns null for non-batch output", () => { + expect(parseToolProposalBatch("plain text")).toBeNull() + expect(parseToolProposalBatch(JSON.stringify({ status: "proposed", tool: "x" }))).toBeNull() + expect(parseToolProposalBatch(null)).toBeNull() + }) }) diff --git a/apps/web/src/components/chat/tool-proposal.ts b/apps/web/src/components/chat/tool-proposal.ts index 7aeedd66..c9e1b0ab 100644 --- a/apps/web/src/components/chat/tool-proposal.ts +++ b/apps/web/src/components/chat/tool-proposal.ts @@ -26,3 +26,30 @@ export const parseToolProposal = (output: unknown): ToolProposal | null => { ? { status: "proposed", tool: v.tool, input: v.input } : null } + +/** + * Code Mode (`run_code`) can queue several mutations in one snippet. Its output + * is a `{ status: "proposed_batch", proposals: [...] }` envelope; parse it into + * one {@link ToolProposal} per queued change so the UI can render an approval + * card for each. Mirrors `formatRunResult` in `@maple/codemode`. Returns `null` + * when the output isn't a batch envelope. + */ +export const parseToolProposalBatch = (output: unknown): ToolProposal[] | null => { + let value: unknown = output + if (typeof output === "string") { + try { + value = JSON.parse(output) + } catch { + return null + } + } + if (!value || typeof value !== "object") return null + const v = value as Record + if (v.status !== "proposed_batch" || !Array.isArray(v.proposals)) return null + const proposals = v.proposals + .filter((p): p is { tool: string; input: unknown } => { + return !!p && typeof p === "object" && typeof (p as Record).tool === "string" + }) + .map((p) => ({ status: "proposed" as const, tool: p.tool, input: p.input })) + return proposals.length > 0 ? proposals : null +} diff --git a/bun.lock b/bun.lock index 1571de79..ff78dd4e 100644 --- a/bun.lock +++ b/bun.lock @@ -44,6 +44,7 @@ "@flue/sdk": "1.0.0-beta.1", "@libsql/client": "0.15.15", "@maple-dev/effect-sdk": "workspace:*", + "@maple/codemode": "workspace:*", "@maple/db": "workspace:*", "@maple/domain": "workspace:*", "@maple/effect-cloudflare": "workspace:*", @@ -77,6 +78,7 @@ "@clerk/backend": "^2.30.1", "@flue/opentelemetry": "1.0.0-beta.1", "@flue/runtime": "1.0.0-beta.2", + "@maple/codemode": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^2.0.0", "@opentelemetry/exporter-trace-otlp-http": "^0.205.0", @@ -423,6 +425,14 @@ "typescript": "catalog:tooling", }, }, + "packages/codemode": { + "name": "@maple/codemode", + "devDependencies": { + "@cloudflare/workers-types": "4.20260603.1", + "typescript": "catalog:tooling", + "vitest": "catalog:", + }, + }, "packages/db": { "name": "@maple/db", "dependencies": { @@ -1392,6 +1402,8 @@ "@maple/clickhouse-cli": ["@maple/clickhouse-cli@workspace:packages/clickhouse-cli"], + "@maple/codemode": ["@maple/codemode@workspace:packages/codemode"], + "@maple/db": ["@maple/db@workspace:packages/db"], "@maple/domain": ["@maple/domain@workspace:packages/domain"], @@ -2482,7 +2494,7 @@ "abort-controller": ["abort-controller@3.0.0", "", { "dependencies": { "event-target-shim": "^5.0.0" } }, "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg=="], - "accepts": ["accepts@1.3.8", "", { "dependencies": { "mime-types": "~2.1.34", "negotiator": "0.6.3" } }, "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw=="], + "accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="], "acorn": ["acorn@8.17.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-xRQbDb9BnwDafYNn6Vwl839DYVjqXYb1XVGtWAZ1kcDc6iwAL4hg3B1dZlRiuENFeO2H53gFG3in621AdERVAg=="], @@ -3962,7 +3974,7 @@ "napi-build-utils": ["napi-build-utils@2.0.0", "", {}, "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA=="], - "negotiator": ["negotiator@0.6.3", "", {}, "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="], + "negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], "neotraverse": ["neotraverse@0.6.18", "", {}, "sha512-Z4SmBUweYa09+o6pG+eASabEpP6QkQ70yHj351pQoEXIs8uHbaU2DWVmzBANKgflPa47A50PtB2+NgRpQvr7vA=="], @@ -4428,7 +4440,7 @@ "semver": ["semver@7.8.5", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-Y7/KDsb8LjooZpwaqGyulO6DQlksgCncchHGk+sZIY4SBvUocMBEFH5Ur1fI4dV+Jvl0w6cjvucaIi40puRioA=="], - "send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], + "send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], "serialize-error": ["serialize-error@2.1.0", "", {}, "sha512-ghgmKt5o4Tly5yEG/UJp8qTd0AN7Xalw4XBtDEKP655B699qMEtra1WlXeE6WIvdEG481JvRxULKsInq/iNysw=="], @@ -5090,6 +5102,8 @@ "@esbuild-kit/core-utils/esbuild": ["esbuild@0.18.20", "", { "optionalDependencies": { "@esbuild/android-arm": "0.18.20", "@esbuild/android-arm64": "0.18.20", "@esbuild/android-x64": "0.18.20", "@esbuild/darwin-arm64": "0.18.20", "@esbuild/darwin-x64": "0.18.20", "@esbuild/freebsd-arm64": "0.18.20", "@esbuild/freebsd-x64": "0.18.20", "@esbuild/linux-arm": "0.18.20", "@esbuild/linux-arm64": "0.18.20", "@esbuild/linux-ia32": "0.18.20", "@esbuild/linux-loong64": "0.18.20", "@esbuild/linux-mips64el": "0.18.20", "@esbuild/linux-ppc64": "0.18.20", "@esbuild/linux-riscv64": "0.18.20", "@esbuild/linux-s390x": "0.18.20", "@esbuild/linux-x64": "0.18.20", "@esbuild/netbsd-x64": "0.18.20", "@esbuild/openbsd-x64": "0.18.20", "@esbuild/sunos-x64": "0.18.20", "@esbuild/win32-arm64": "0.18.20", "@esbuild/win32-ia32": "0.18.20", "@esbuild/win32-x64": "0.18.20" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA=="], + "@expo/cli/accepts": ["accepts@1.3.8", "", { "dependencies": { "mime-types": "~2.1.34", "negotiator": "0.6.3" } }, "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw=="], + "@expo/cli/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], "@expo/cli/ci-info": ["ci-info@3.9.0", "", {}, "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ=="], @@ -5100,6 +5114,8 @@ "@expo/cli/pretty-format": ["pretty-format@29.7.0", "", { "dependencies": { "@jest/schemas": "^29.6.3", "ansi-styles": "^5.0.0", "react-is": "^18.0.0" } }, "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ=="], + "@expo/cli/send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], + "@expo/cli/wrap-ansi": ["wrap-ansi@7.0.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q=="], "@expo/cli/zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], @@ -5504,8 +5520,6 @@ "@wallet-standard/errors/commander": ["commander@13.1.0", "", {}, "sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw=="], - "accepts/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], - "agents/esbuild": ["esbuild@0.28.1", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.28.1", "@esbuild/android-arm": "0.28.1", "@esbuild/android-arm64": "0.28.1", "@esbuild/android-x64": "0.28.1", "@esbuild/darwin-arm64": "0.28.1", "@esbuild/darwin-x64": "0.28.1", "@esbuild/freebsd-arm64": "0.28.1", "@esbuild/freebsd-x64": "0.28.1", "@esbuild/linux-arm": "0.28.1", "@esbuild/linux-arm64": "0.28.1", "@esbuild/linux-ia32": "0.28.1", "@esbuild/linux-loong64": "0.28.1", "@esbuild/linux-mips64el": "0.28.1", "@esbuild/linux-ppc64": "0.28.1", "@esbuild/linux-riscv64": "0.28.1", "@esbuild/linux-s390x": "0.28.1", "@esbuild/linux-x64": "0.28.1", "@esbuild/netbsd-arm64": "0.28.1", "@esbuild/netbsd-x64": "0.28.1", "@esbuild/openbsd-arm64": "0.28.1", "@esbuild/openbsd-x64": "0.28.1", "@esbuild/openharmony-arm64": "0.28.1", "@esbuild/sunos-x64": "0.28.1", "@esbuild/win32-arm64": "0.28.1", "@esbuild/win32-ia32": "0.28.1", "@esbuild/win32-x64": "0.28.1" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-HrJrvZv5ayxBzPfwphOoNzkzOIIlifzk0KJrGK2c8R4+LKpMtpYLQeUdjnwjWv/LZlkH2laZk+4w78pi99D4Vw=="], "aggregate-error/indent-string": ["indent-string@4.0.0", "", {}, "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg=="], @@ -5596,6 +5610,8 @@ "engine.io/@types/node": ["@types/node@24.13.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-fRa09kZTgu8o71KFcDjUFuc7F+dEbZYZmkI0mg5YBTRs0yMKjYHsq/c0urDKeDb+D5qVgXOdFcuu+DZPKOITwA=="], + "engine.io/accepts": ["accepts@1.3.8", "", { "dependencies": { "mime-types": "~2.1.34", "negotiator": "0.6.3" } }, "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw=="], + "engine.io/cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="], "expo/pretty-format": ["pretty-format@29.7.0", "", { "dependencies": { "@jest/schemas": "^29.6.3", "ansi-styles": "^5.0.0", "react-is": "^18.0.0" } }, "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ=="], @@ -5612,12 +5628,8 @@ "expo-router/semver": ["semver@7.6.3", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A=="], - "express/accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="], - "express/cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="], - "express/send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], - "find-process/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], "find-process/commander": ["commander@14.0.3", "", {}, "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw=="], @@ -5704,8 +5716,6 @@ "metro/@babel/traverse": ["@babel/traverse@7.29.7", "", { "dependencies": { "@babel/code-frame": "^7.29.7", "@babel/generator": "^7.29.7", "@babel/helper-globals": "^7.29.7", "@babel/parser": "^7.29.7", "@babel/template": "^7.29.7", "@babel/types": "^7.29.7", "debug": "^4.3.1" } }, "sha512-EhlfNQtZ+NK22w5BM61ciuiq1m58ed33Wr1Xan//ZRTy6hgjnwyCffRYwzsGXdASJSUJ1guZILsErh1eQcl+zw=="], - "metro/accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="], - "metro/ci-info": ["ci-info@2.0.0", "", {}, "sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ=="], "metro/hermes-parser": ["hermes-parser@0.35.0", "", { "dependencies": { "hermes-estree": "0.35.0" } }, "sha512-9JLjeHxBx8T4CAsydZR49PNZUaix+WpQJwu9p2010lu+7Kwl6D/7wYFFJxoz+aXkaaClp9Zfg6W6/zVlSJORaA=="], @@ -5858,18 +5868,14 @@ "seek-bzip/commander": ["commander@6.2.1", "", {}, "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA=="], - "send/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], - - "send/fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="], - - "serve-static/send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], - "sha.js/safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], "simple-plist/bplist-parser": ["bplist-parser@0.3.1", "", { "dependencies": { "big-integer": "1.6.x" } }, "sha512-PyJxiNtA5T2PlLIeBot4lbp7rj4OadzjnMZD/G5zuBNt8ei/yCU7+wW0h2bag9vr8c+/WuRWmSxbqAl9hL1rBA=="], "sitemap/@types/node": ["@types/node@24.13.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-fRa09kZTgu8o71KFcDjUFuc7F+dEbZYZmkI0mg5YBTRs0yMKjYHsq/c0urDKeDb+D5qVgXOdFcuu+DZPKOITwA=="], + "socket.io/accepts": ["accepts@1.3.8", "", { "dependencies": { "mime-types": "~2.1.34", "negotiator": "0.6.3" } }, "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw=="], + "source-map-support/source-map": ["source-map@0.6.1", "", {}, "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="], "stack-utils/escape-string-regexp": ["escape-string-regexp@2.0.0", "", {}, "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="], @@ -6068,6 +6074,10 @@ "@esbuild-kit/core-utils/esbuild/@esbuild/win32-x64": ["@esbuild/win32-x64@0.18.20", "", { "os": "win32", "cpu": "x64" }, "sha512-kTdfRcSiDfQca/y9QIkng02avJ+NCaQvrMejlsB3RRv5sE9rRoeBPISaZpKxHELzRxZyLvNts1P27W3wV+8geQ=="], + "@expo/cli/accepts/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], + + "@expo/cli/accepts/negotiator": ["negotiator@0.6.3", "", {}, "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="], + "@expo/cli/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "@expo/cli/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], @@ -6086,6 +6096,10 @@ "@expo/cli/pretty-format/ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="], + "@expo/cli/send/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], + + "@expo/cli/send/fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="], + "@expo/cli/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "@expo/cli/wrap-ansi/string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], @@ -6360,6 +6374,8 @@ "@react-native/dev-middleware/open/is-wsl": ["is-wsl@2.2.0", "", { "dependencies": { "is-docker": "^2.0.0" } }, "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww=="], + "@react-native/dev-middleware/serve-static/send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], + "@react-navigation/core/query-string/decode-uri-component": ["decode-uri-component@0.2.2", "", {}, "sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ=="], "@react-navigation/core/query-string/filter-obj": ["filter-obj@1.1.0", "", {}, "sha512-8rXg1ZnX7xzy2NGDVkBVaAy+lSlPNwad13BtgSlLuxfIslyt5Vg64U7tFcCt4WS1R0hvtnQybT/IyCkGZ3DpXQ=="], @@ -6488,8 +6504,6 @@ "@types/sax/@types/node/undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="], - "accepts/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], - "agents/esbuild/@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.28.1", "", { "os": "aix", "cpu": "ppc64" }, "sha512-Svl7tq8k/08+p6CXPpRjQ1fKX+1odH/BQbb48fV6fj3CWHhsoIOoY87w1oHXm0qEpkIK3ZfVgp0hed3XBXzXMQ=="], "agents/esbuild/@esbuild/android-arm": ["@esbuild/android-arm@0.28.1", "", { "os": "android", "cpu": "arm" }, "sha512-0k2F129Xdio1TdJfzJ8sy1Q47vUD2NnwdhiAf7drUN1EBTfPf4hsFCtmMgu/6m8JSzsBrlmVjudMBQqOfG8usQ=="], @@ -6740,6 +6754,10 @@ "engine.io/@types/node/undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="], + "engine.io/accepts/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], + + "engine.io/accepts/negotiator": ["negotiator@0.6.3", "", {}, "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="], + "expo-modules-autolinking/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "expo-modules-autolinking/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], @@ -6752,8 +6770,6 @@ "expo/pretty-format/ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="], - "express/accepts/negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], - "find-process/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "find-process/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], @@ -6800,8 +6816,6 @@ "metro-transform-plugins/@babel/traverse/@babel/parser": ["@babel/parser@7.29.7", "", { "dependencies": { "@babel/types": "^7.29.7" }, "bin": "./bin/babel-parser.js" }, "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg=="], - "metro/accepts/negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], - "metro/hermes-parser/hermes-estree": ["hermes-estree@0.35.0", "", {}, "sha512-xVx5Opwy8Oo1I5yGpVRhCvWL/iV3M+ylksSKVNlxxD90cpDpR/AR1jLYqK8HWihm065a6UI3HeyAmYzwS8NOOg=="], "metro/yargs/cliui": ["cliui@8.0.1", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" } }, "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ=="], @@ -6936,10 +6950,12 @@ "run-jxa/execa/strip-final-newline": ["strip-final-newline@2.0.0", "", {}, "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA=="], - "send/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], - "sitemap/@types/node/undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="], + "socket.io/accepts/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], + + "socket.io/accepts/negotiator": ["negotiator@0.6.3", "", {}, "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="], + "terminal-link/ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], "test-exclude/minimatch/brace-expansion": ["brace-expansion@1.1.15", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg=="], @@ -7206,6 +7222,8 @@ "@babel/highlight/chalk/supports-color/has-flag": ["has-flag@3.0.0", "", {}, "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw=="], + "@expo/cli/accepts/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], + "@expo/cli/glob/minimatch/brace-expansion": ["brace-expansion@5.0.6", "", { "dependencies": { "balanced-match": "^4.0.2" } }, "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g=="], "@expo/cli/ora/chalk/ansi-styles": ["ansi-styles@3.2.1", "", { "dependencies": { "color-convert": "^1.9.0" } }, "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA=="], @@ -7218,6 +7236,8 @@ "@expo/cli/ora/strip-ansi/ansi-regex": ["ansi-regex@4.1.1", "", {}, "sha512-ILlv4k/3f6vfQ4OoP2AGvirOktlQ98ZEL1k9FaQjxa3L1abBgbuTDAdPOpvbGncC0BTVQrl+OM8xZGK6tWXt7g=="], + "@expo/cli/send/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], + "@expo/cli/wrap-ansi/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "@expo/cli/wrap-ansi/string-width/is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], @@ -7290,6 +7310,12 @@ "@react-native/community-cli-plugin/@react-native/dev-middleware/open/is-wsl": ["is-wsl@2.2.0", "", { "dependencies": { "is-docker": "^2.0.0" } }, "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww=="], + "@react-native/community-cli-plugin/@react-native/dev-middleware/serve-static/send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], + + "@react-native/dev-middleware/serve-static/send/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], + + "@react-native/dev-middleware/serve-static/send/fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="], + "@solana/codecs/@solana/codecs-core/@solana/errors/commander": ["commander@15.0.0", "", {}, "sha512-z67u4ZhzCL/Tydu1lJARtEZYWbWaN7oYLHbsuzocr6y4N6WZAagG3RQ4FW61V1/0+jImpj293XfrcYnd1qxtPg=="], "@solana/codecs/@solana/codecs-numbers/@solana/errors/commander": ["commander@15.0.0", "", {}, "sha512-z67u4ZhzCL/Tydu1lJARtEZYWbWaN7oYLHbsuzocr6y4N6WZAagG3RQ4FW61V1/0+jImpj293XfrcYnd1qxtPg=="], @@ -7352,6 +7378,8 @@ "astro/vite/esbuild/@esbuild/win32-x64": ["@esbuild/win32-x64@0.25.12", "", { "os": "win32", "cpu": "x64" }, "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA=="], + "engine.io/accepts/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], + "ink-confirm-input/ink-text-input/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "ink-confirm-input/ink-text-input/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], @@ -7406,6 +7434,8 @@ "rimraf/glob/minimatch/brace-expansion": ["brace-expansion@1.1.15", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg=="], + "socket.io/accepts/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], + "uniwind/@tailwindcss/oxide/@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" }, "bundled": true }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="], "uniwind/@tailwindcss/oxide/@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="], @@ -7460,6 +7490,12 @@ "@react-native/codegen/yargs/cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + "@react-native/community-cli-plugin/@react-native/dev-middleware/serve-static/send/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], + + "@react-native/community-cli-plugin/@react-native/dev-middleware/serve-static/send/fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="], + + "@react-native/dev-middleware/serve-static/send/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], + "metro/yargs/cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], "qrcode/yargs/cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], @@ -7477,5 +7513,7 @@ "@expo/package-manager/ora/cli-cursor/restore-cursor/onetime/mimic-fn": ["mimic-fn@1.2.0", "", {}, "sha512-jf84uxzwiuiIVKiOLpfYk7N46TSy8ubTonmneY9vrpHNAnp0QBt2BxWV9dO3/j+BoVAb+a5G6YDPW3M5HOdMWQ=="], "@react-native/babel-plugin-codegen/@react-native/codegen/yargs/cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + + "@react-native/community-cli-plugin/@react-native/dev-middleware/serve-static/send/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], } } diff --git a/packages/codemode/package.json b/packages/codemode/package.json new file mode 100644 index 00000000..ec9eef50 --- /dev/null +++ b/packages/codemode/package.json @@ -0,0 +1,19 @@ +{ + "name": "@maple/codemode", + "private": true, + "type": "module", + "description": "Cloudflare 'Code Mode' building blocks: generate a typed TS API from tool specs, build the sandbox-isolate harness, and run model-written code in a dynamic worker. The root barrel is pure (no Workers runtime); `./sandbox` pulls in `cloudflare:workers`.", + "exports": { + ".": "./src/index.ts", + "./sandbox": "./src/sandbox.ts" + }, + "scripts": { + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "@cloudflare/workers-types": "4.20260603.1", + "typescript": "catalog:tooling", + "vitest": "catalog:" + } +} diff --git a/packages/codemode/src/api-gen.test.ts b/packages/codemode/src/api-gen.test.ts new file mode 100644 index 00000000..198f28bf --- /dev/null +++ b/packages/codemode/src/api-gen.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from "vitest" +import { buildApiDeclaration, clampDesc, escapeJsDoc, inputTypeForTool, tsTypeForSchema } from "./api-gen.ts" +import type { JsonSchema } from "./types.ts" + +describe("tsTypeForSchema", () => { + it("maps primitives", () => { + expect(tsTypeForSchema({ type: "string" })).toBe("string") + expect(tsTypeForSchema({ type: "integer" })).toBe("number") + expect(tsTypeForSchema({ type: "number" })).toBe("number") + expect(tsTypeForSchema({ type: "boolean" })).toBe("boolean") + }) + + it("renders arrays of the item type", () => { + expect(tsTypeForSchema({ type: "array", items: { type: "string" } })).toBe("string[]") + }) + + it("renders small string enums as literal unions", () => { + expect(tsTypeForSchema({ enum: ["traces", "logs", "metrics"] })).toBe( + '"traces" | "logs" | "metrics"', + ) + }) + + it("falls back to string for a giant enum", () => { + const big = Array.from({ length: 20 }, (_, i) => `v${i}`) + expect(tsTypeForSchema({ type: "string", enum: big })).toBe("string") + }) + + it("collapses deeply nested objects to Record", () => { + const schema: JsonSchema = { + type: "object", + properties: { + a: { type: "object", properties: { b: { type: "object", properties: { c: { type: "string" } } } } }, + }, + } + const out = tsTypeForSchema(schema, 0) + expect(out).toContain("Record") + }) + + it("treats optional-vs-required via the required array", () => { + const schema: JsonSchema = { + type: "object", + properties: { id: { type: "string" }, limit: { type: "integer" } }, + required: ["id"], + } + const out = inputTypeForTool(schema) + expect(out).toContain("id: string") + expect(out).toContain("limit?: number") + }) + + it("renders anyOf as a union", () => { + expect(tsTypeForSchema({ anyOf: [{ type: "string" }, { type: "number" }] })).toBe( + "string | number", + ) + }) +}) + +describe("inputTypeForTool", () => { + it("returns a Record for a parameterless tool", () => { + expect(inputTypeForTool(undefined)).toBe("Record") + expect(inputTypeForTool({ type: "object", properties: {} })).toBe("Record") + }) +}) + +describe("escapeJsDoc / clampDesc", () => { + it("neutralizes a comment terminator", () => { + expect(escapeJsDoc("ends here */ and more")).toBe("ends here *\\/ and more") + }) + + it("collapses whitespace and clamps length", () => { + expect(clampDesc(" a\n b c ", 50)).toBe("a b c") + expect(clampDesc("abcdefgh", 6)).toBe("abc...") + }) +}) + +describe("buildApiDeclaration", () => { + it("emits one sorted, JSDoc'd method per tool returning Promise", () => { + const decl = buildApiDeclaration([ + { + name: "find_errors", + description: "Find errors", + parameters: { type: "object", properties: { service: { type: "string", description: "svc name */ x" } } }, + }, + { name: "compare_periods", description: "Compare two periods", parameters: undefined }, + ]) + // sorted: compare_periods before find_errors + expect(decl.indexOf("compare_periods")).toBeLessThan(decl.indexOf("find_errors")) + expect(decl).toContain("declare const maple: {") + expect(decl).toContain("find_errors(input: { /** svc name *\\/ x */ service?: string }): Promise;") + expect(decl).toContain("compare_periods(input: Record): Promise;") + expect(decl).not.toContain("*/ x */ service") // terminator escaped + }) +}) diff --git a/packages/codemode/src/api-gen.ts b/packages/codemode/src/api-gen.ts new file mode 100644 index 00000000..02bd42af --- /dev/null +++ b/packages/codemode/src/api-gen.ts @@ -0,0 +1,106 @@ +import type { CodeModeToolSpec, JsonSchema } from "./types.ts" + +const MAX_METHOD_DESC = 280 +const MAX_PROP_DESC = 100 +const MAX_OBJECT_DEPTH = 2 + +/** Neutralize a comment terminator so a description can't close a JSDoc/inline comment. */ +export const escapeJsDoc = (s: string): string => s.replace(/\*\//g, "*\\/") + +/** Collapse whitespace and clamp to `max` chars so 50+ tools don't blow context. */ +export const clampDesc = (s: string | undefined, max: number): string => { + if (!s) return "" + const oneLine = s.replace(/\s+/g, " ").trim() + if (oneLine.length <= max) return oneLine + return `${oneLine.slice(0, Math.max(0, max - 3))}...` +} + +const safeIdent = (name: string): string => + /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name) ? name : JSON.stringify(name) + +const literalUnion = (values: ReadonlyArray): string | null => { + const lits = values.filter( + (v) => typeof v === "string" || typeof v === "number" || typeof v === "boolean", + ) + if (lits.length !== values.length || lits.length === 0 || lits.length > 12) return null + return Array.from(new Set(lits.map((v) => JSON.stringify(v)))).join(" | ") +} + +/** + * Pragmatic JSON-Schema → TS type. Accurate at the top level (property names, + * required-ness, primitives, small enums) and deliberately coarse deeper down + * (nested objects past `MAX_OBJECT_DEPTH` collapse to `Record`). + */ +export const tsTypeForSchema = (schema: JsonSchema | undefined, depth = 0): string => { + if (!schema || typeof schema !== "object") return "unknown" + + if (Array.isArray(schema.enum)) { + const union = literalUnion(schema.enum) + if (union) return union + } + + const variants = schema.anyOf ?? schema.oneOf + if (variants && variants.length > 0) { + const parts = Array.from(new Set(variants.map((v) => tsTypeForSchema(v, depth)))) + return parts.join(" | ") + } + + const rawType = Array.isArray(schema.type) ? schema.type.find((t) => t !== "null") : schema.type + switch (rawType) { + case "string": + return "string" + case "number": + case "integer": + return "number" + case "boolean": + return "boolean" + case "null": + return "null" + case "array": + return `${tsTypeForSchema(schema.items, depth + 1)}[]` + case "object": + return objectType(schema, depth) + default: + return schema.properties ? objectType(schema, depth) : "unknown" + } +} + +const objectType = (schema: JsonSchema, depth: number): string => { + const props = schema.properties + if (!props || Object.keys(props).length === 0) return "Record" + if (depth >= MAX_OBJECT_DEPTH) return "Record" + const required = new Set(schema.required ?? []) + const fields = Object.entries(props).map(([key, value]) => { + const optional = required.has(key) ? "" : "?" + const desc = clampDesc(value?.description, MAX_PROP_DESC) + const comment = desc ? `/** ${escapeJsDoc(desc)} */ ` : "" + return `${comment}${safeIdent(key)}${optional}: ${tsTypeForSchema(value, depth + 1)}` + }) + return `{ ${fields.join("; ")} }` +} + +/** The single `input` parameter type for a tool's generated method. */ +export const inputTypeForTool = (schema: JsonSchema | undefined): string => { + if (!schema?.properties || Object.keys(schema.properties).length === 0) { + return "Record" + } + return objectType(schema, 0) +} + +/** + * Render the `declare const maple: { ... }` surface the model writes code + * against - one JSDoc'd async method per tool, sorted for stable output. Every + * method returns `Promise` (the tool's text output) and throws on + * failure, so the model can `try/catch` or let the harness report the error. + */ +export const buildApiDeclaration = (tools: ReadonlyArray): string => { + const methods = [...tools] + .sort((a, b) => a.name.localeCompare(b.name)) + .map((tool) => { + const desc = clampDesc(tool.description, MAX_METHOD_DESC) + const jsdoc = desc ? `\t/** ${escapeJsDoc(desc)} */\n` : "" + return `${jsdoc}\t${tool.name}(input: ${inputTypeForTool(tool.parameters)}): Promise;` + }) + .join("\n") + return `declare const maple: {\n${methods}\n};` +} diff --git a/packages/codemode/src/format.test.ts b/packages/codemode/src/format.test.ts new file mode 100644 index 00000000..69932572 --- /dev/null +++ b/packages/codemode/src/format.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from "vitest" +import { formatRunOutput, formatRunResult } from "./format.ts" +import { PROPOSED_BATCH_STATUS, type CodeRunResult } from "./types.ts" + +const base: CodeRunResult = { logs: [], returnValue: undefined, error: null } + +describe("formatRunOutput", () => { + it("renders console + return value", () => { + const out = formatRunOutput({ ...base, logs: ["a", "b"], returnValue: { n: 1 } }) + expect(out).toContain("Console output:\na\nb") + expect(out).toContain('Return value:\n{\n "n": 1\n}') + }) + + it("surfaces an error", () => { + const out = formatRunOutput({ ...base, error: { name: "Boom", message: "bad" } }) + expect(out).toBe("Error (Boom): bad") + }) + + it("explains a crash distinctly", () => { + const out = formatRunOutput({ ...base, crashed: true, error: { name: "TimeoutError", message: "aborted" } }) + expect(out).toContain("Code mode failed to run your snippet (TimeoutError): aborted") + }) + + it("handles an empty run", () => { + expect(formatRunOutput(base)).toContain("no console output") + }) +}) + +describe("formatRunResult", () => { + it("returns plain text when there are no proposals", () => { + const out = formatRunResult({ ...base, logs: ["hi"] }) + expect(out).toContain("Console output:\nhi") + expect(() => JSON.parse(out)).toThrow() + }) + + it("wraps proposals in a proposed_batch envelope", () => { + const out = formatRunResult({ ...base, logs: ["did stuff"] }, [ + { tool: "create_dashboard", input: { title: "x" } }, + { tool: "add_dashboard_widget", input: { id: "1" } }, + ]) + const parsed = JSON.parse(out) + expect(parsed.status).toBe(PROPOSED_BATCH_STATUS) + expect(parsed.proposals).toHaveLength(2) + expect(parsed.text).toContain("Queued 2 change(s) for approval: create_dashboard, add_dashboard_widget.") + }) +}) diff --git a/packages/codemode/src/format.ts b/packages/codemode/src/format.ts new file mode 100644 index 00000000..6558c07f --- /dev/null +++ b/packages/codemode/src/format.ts @@ -0,0 +1,65 @@ +import type { CodeProposal, CodeRunResult } from "./types.ts" +import { DEFAULT_OUTPUT_CAP_BYTES, PROPOSED_BATCH_STATUS } from "./types.ts" + +const capText = (s: string, cap: number): string => + s.length > cap ? `${s.slice(0, cap)}\n...[truncated]` : s + +/** Build the human/model-facing summary of a sandbox run. */ +export const formatRunOutput = (result: CodeRunResult, cap = DEFAULT_OUTPUT_CAP_BYTES): string => { + const parts: string[] = [] + + if (result.crashed && result.error) { + parts.push(`Code mode failed to run your snippet (${result.error.name}): ${result.error.message}`) + return capText(parts.join("\n\n"), cap) + } + + if (result.logs.length > 0) { + parts.push(`Console output:\n${result.logs.join("\n")}`) + } + + if (result.returnValue !== undefined) { + let rendered: string + try { + rendered = + typeof result.returnValue === "string" + ? result.returnValue + : JSON.stringify(result.returnValue, null, 2) + } catch { + rendered = String(result.returnValue) + } + parts.push(`Return value:\n${rendered}`) + } + + if (result.error) { + parts.push(`Error (${result.error.name}): ${result.error.message}`) + } + + if (parts.length === 0) { + parts.push("(code ran with no console output and no return value)") + } + + return capText(parts.join("\n\n"), cap) +} + +/** + * The final string `run_code` returns to the model. When the run queued mutating + * proposals (chat approval flow), wrap it as a `proposed_batch` envelope the web + * client parses into one approval card per proposal; otherwise return the plain + * summary so the model just reads its results. + */ +export const formatRunResult = ( + result: CodeRunResult, + proposals: ReadonlyArray = [], + cap = DEFAULT_OUTPUT_CAP_BYTES, +): string => { + const text = formatRunOutput(result, cap) + if (proposals.length === 0) return text + const queueNote = `\n\nQueued ${proposals.length} change(s) for approval: ${proposals + .map((p) => p.tool) + .join(", ")}.` + return JSON.stringify({ + status: PROPOSED_BATCH_STATUS, + proposals, + text: text + queueNote, + }) +} diff --git a/packages/codemode/src/harness.test.ts b/packages/codemode/src/harness.test.ts new file mode 100644 index 00000000..dd76b71d --- /dev/null +++ b/packages/codemode/src/harness.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from "vitest" +import { buildHarnessModule } from "./harness.ts" +import type { RpcCallResult } from "./types.ts" + +/** + * Load the generated harness module as an ESM data URL and run its `fetch` + * handler in-process with a fake `env.MAPLE` — exercises log/return/error + * capture without the Workers runtime. + */ +const runHarness = async ( + code: string, + dispatch: (name: string, input: unknown) => Promise, + capBytes?: number, +): Promise<{ logs: string[]; returnValue: unknown; error: { name: string; message: string } | null }> => { + const src = buildHarnessModule(code, capBytes) + const mod = await import(`data:text/javascript,${encodeURIComponent(src)}`) + const env = { MAPLE: { call: (name: string, input: unknown) => dispatch(name, input) } } + const res = await mod.default.fetch(new Request("https://codemode/run"), env) + return res.json() +} + +const ok = (value: string): RpcCallResult => ({ ok: true, value }) + +describe("buildHarnessModule", () => { + it("captures console.log output", async () => { + const out = await runHarness(`console.log("hello", { a: 1 })`, async () => ok("x")) + expect(out.logs).toEqual(['hello {"a":1}']) + expect(out.error).toBeNull() + }) + + it("captures the IIFE return value", async () => { + const out = await runHarness(`return { count: 2 }`, async () => ok("x")) + expect(out.returnValue).toEqual({ count: 2 }) + }) + + it("routes maple.(input) through env.MAPLE.call and returns its value", async () => { + const calls: Array<[string, unknown]> = [] + const out = await runHarness( + `const r = await maple.find_errors({ service: "api" }); console.log(r)`, + async (name, input) => { + calls.push([name, input]) + return ok(`called ${name}`) + }, + ) + expect(calls).toEqual([["find_errors", { service: "api" }]]) + expect(out.logs).toEqual(["called find_errors"]) + }) + + it("throws inside user code when a call returns ok:false", async () => { + const out = await runHarness( + `try { await maple.boom({}) } catch (e) { console.log("caught", e.message) }`, + async () => ({ ok: false, error: { name: "BadTool", message: "nope" } }), + ) + expect(out.logs).toEqual(["caught nope"]) + expect(out.error).toBeNull() + }) + + it("captures an uncaught error as a value", async () => { + const out = await runHarness(`throw new Error("kaboom")`, async () => ok("x")) + expect(out.error?.message).toBe("kaboom") + }) + + it("truncates output past the byte cap", async () => { + const out = await runHarness(`for (let i = 0; i < 100; i++) console.log("x".repeat(50))`, async () => ok("x"), 1000) + expect(out.logs.at(-1)).toBe("[output truncated]") + const total = out.logs.join("").length + expect(total).toBeLessThan(1300) + }) +}) diff --git a/packages/codemode/src/harness.ts b/packages/codemode/src/harness.ts new file mode 100644 index 00000000..5af4e68c --- /dev/null +++ b/packages/codemode/src/harness.ts @@ -0,0 +1,84 @@ +import { DEFAULT_OUTPUT_CAP_BYTES } from "./types.ts" + +/** + * Build the source of the dynamic-worker module that runs the model's code. + * + * The model's code is spliced **directly** into an async IIFE in the module + * body — there is no runtime `eval`/`new Function`; the snippet simply *is* the + * module. The only capability exposed is `env.MAPLE.call(name, input)` (an RPC + * stub back to the supervisor); outbound network is blocked by the loader + * (`globalOutbound: null`). `console.*` output and the IIFE's return value are + * captured inside the isolate and shipped back as JSON via the fetch response, + * so we never depend on capturing the parent's console. + * + * Splicing untrusted source is safe here precisely because the isolate has no + * authority beyond `maple.*` (already the model's authority) and is bounded by + * CPU/subrequest limits — the worst a hostile snippet can do is fail its own + * isolate. + */ +export const buildHarnessModule = (userCode: string, capBytes = DEFAULT_OUTPUT_CAP_BYTES): string => { + const cap = Math.max(1000, Math.floor(capBytes)) + return `export default { + async fetch(request, env) { + const __cap = ${cap}; + const __logs = []; + let __bytes = 0; + let __truncated = false; + const __push = (level, args) => { + if (__truncated) { return; } + let line; + try { + line = args.map((a) => { + if (typeof a === "string") { return a; } + try { return JSON.stringify(a); } catch (_e) { return String(a); } + }).join(" "); + } catch (_e) { line = "[unserializable log]"; } + const prefix = level === "log" ? "" : "[" + level + "] "; + line = prefix + line; + const room = __cap - __bytes; + if (room <= 0) { __truncated = true; __logs.push("[output truncated]"); return; } + if (line.length > room) { line = line.slice(0, room) + " ...[truncated]"; __truncated = true; } + __bytes += line.length; + __logs.push(line); + if (__truncated) { __logs.push("[output truncated]"); } + }; + const console = { + log: (...a) => __push("log", a), + info: (...a) => __push("info", a), + warn: (...a) => __push("warn", a), + error: (...a) => __push("error", a), + debug: (...a) => __push("debug", a), + }; + const maple = new Proxy({}, { + get(_t, prop) { + if (typeof prop !== "string") { return undefined; } + return async (input) => { + const r = await env.MAPLE.call(prop, input == null ? {} : input); + if (r && r.ok) { return r.value; } + const err = new Error((r && r.error && r.error.message) || ("maple." + prop + " failed")); + err.name = (r && r.error && r.error.name) || "MapleToolError"; + throw err; + }; + }, + }); + let __return; + let __error = null; + try { + __return = await (async () => { +${userCode} + })(); + } catch (e) { + __error = { + name: (e && e.name) || "Error", + message: (e && e.message) || String(e), + stack: e && e.stack ? String(e.stack).slice(0, 2000) : undefined, + }; + } + let __serialized; + try { __serialized = __return === undefined ? undefined : JSON.parse(JSON.stringify(__return)); } + catch (_e) { __serialized = String(__return); } + return Response.json({ logs: __logs, returnValue: __serialized, error: __error }); + }, +}; +` +} diff --git a/packages/codemode/src/index.ts b/packages/codemode/src/index.ts new file mode 100644 index 00000000..d9baaa75 --- /dev/null +++ b/packages/codemode/src/index.ts @@ -0,0 +1,6 @@ +// Pure (Node/test-safe) Code Mode helpers. The Workers-only sandbox driver +// (`MapleSupervisor`, `runCodeInSandbox`) lives behind `@maple/codemode/sandbox`. +export * from "./types.ts" +export * from "./api-gen.ts" +export * from "./harness.ts" +export * from "./format.ts" diff --git a/packages/codemode/src/sandbox.ts b/packages/codemode/src/sandbox.ts new file mode 100644 index 00000000..723e0683 --- /dev/null +++ b/packages/codemode/src/sandbox.ts @@ -0,0 +1,108 @@ +// Workers-runtime-only Code Mode driver. Pulls in `cloudflare:workers` +// (`RpcTarget`) and the `WorkerLoader` binding, so it lives behind the +// `@maple/codemode/sandbox` subpath — the root barrel stays Node/test-safe. +import { RpcTarget } from "cloudflare:workers" +import { buildHarnessModule } from "./harness.ts" +import type { CodeRunResult, RpcCallResult } from "./types.ts" +import { + DEFAULT_COMPAT_DATE, + DEFAULT_CPU_MS, + DEFAULT_SUBREQUESTS, + DEFAULT_WALL_MS, +} from "./types.ts" + +/** Host-supplied bridge: run one `maple.(input)` call and return its result. */ +export type CodeModeDispatch = (name: string, input: unknown) => Promise + +/** + * The RPC target handed to the sandbox isolate as `env.MAPLE`. It must be an + * `RpcTarget` subclass so Cloudflare passes it across the Worker Loader boundary + * by reference (a plain object would structured-clone and drop the method). The + * dispatch closure stays in the parent isolate; the sandbox only gets a stub. + */ +export class MapleSupervisor extends RpcTarget { + readonly #dispatch: CodeModeDispatch + + constructor(dispatch: CodeModeDispatch) { + super() + this.#dispatch = dispatch + } + + async call(name: string, input: unknown): Promise { + try { + return await this.#dispatch(name, input) + } catch (error) { + return { + ok: false, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + } + } + } +} + +export interface RunCodeOptions { + /** Model-written snippet (plain JS, spliced into the harness IIFE). */ + readonly code: string + /** Bridge each `maple.(input)` call back to the host's tools. */ + readonly dispatch: CodeModeDispatch + /** Unique-per-call id → fresh isolate each run (Code Mode semantics). */ + readonly id: string + readonly capBytes?: number + readonly compatibilityDate?: string + readonly cpuMs?: number + readonly subRequests?: number + readonly wallMs?: number +} + +/** + * Load the model's snippet into a fresh dynamic worker with network disabled + * (`globalOutbound: null`) and only the `maple` RPC capability, run it, and + * return the captured `{ logs, returnValue, error }`. Never throws — a load + * failure, RPC failure, or wall-clock timeout is reported as a `crashed` + * result so the caller can surface it to the model as a value. + */ +export const runCodeInSandbox = async ( + loader: WorkerLoader, + options: RunCodeOptions, +): Promise => { + const supervisor = new MapleSupervisor(options.dispatch) + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), options.wallMs ?? DEFAULT_WALL_MS) + try { + const stub = loader.get(options.id, async () => ({ + compatibilityDate: options.compatibilityDate ?? DEFAULT_COMPAT_DATE, + mainModule: "main.js", + modules: { "main.js": buildHarnessModule(options.code, options.capBytes) }, + env: { MAPLE: supervisor }, + globalOutbound: null, + limits: { + cpuMs: options.cpuMs ?? DEFAULT_CPU_MS, + subRequests: options.subRequests ?? DEFAULT_SUBREQUESTS, + }, + })) + const response = await stub + .getEntrypoint() + .fetch("https://codemode/run", { signal: controller.signal }) + const payload = (await response.json()) as Partial | null + return { + logs: payload?.logs ?? [], + returnValue: payload?.returnValue, + error: payload?.error ?? null, + } + } catch (error) { + return { + logs: [], + returnValue: undefined, + error: { + name: error instanceof Error ? error.name : "Error", + message: error instanceof Error ? error.message : String(error), + }, + crashed: true, + } + } finally { + clearTimeout(timer) + } +} diff --git a/packages/codemode/src/types.ts b/packages/codemode/src/types.ts new file mode 100644 index 00000000..6b9cc956 --- /dev/null +++ b/packages/codemode/src/types.ts @@ -0,0 +1,69 @@ +// Shared, dependency-free types + constants for Code Mode. Importable by both +// the pure helpers (api-gen / harness / format) and the Workers-only `./sandbox` +// driver, and safe to pull into Node-side unit tests. + +/** A trimmed JSON Schema node, as produced by the tool registries. */ +export interface JsonSchema { + type?: string | string[] + properties?: Record + required?: ReadonlyArray + items?: JsonSchema + enum?: ReadonlyArray + description?: string + anyOf?: ReadonlyArray + oneOf?: ReadonlyArray + $ref?: string + [key: string]: unknown +} + +/** The minimum a tool must expose to be projected into the `maple.*` API. */ +export interface CodeModeToolSpec { + /** Name the model calls as `maple.(...)` — already stripped of any `mcp__` prefix. */ + readonly name: string + readonly description: string + /** Raw JSON Schema for the tool's single input object (may be absent). */ + readonly parameters?: JsonSchema +} + +/** Result the sandbox isolate returns (parsed from its fetch response). */ +export interface CodeRunResult { + readonly logs: ReadonlyArray + readonly returnValue: unknown + readonly error: { name: string; message: string; stack?: string } | null + /** Set when the isolate failed to load/run at the harness boundary (not user code). */ + readonly crashed?: boolean +} + +/** Envelope crossing the RPC boundary for each `maple.(input)` call. */ +export interface RpcCallResult { + readonly ok: boolean + readonly value?: string + readonly error?: { name: string; message: string } +} + +/** One pending mutation captured from a code run (chat propose-then-apply flow). */ +export interface CodeProposal { + readonly tool: string + readonly input: unknown +} + +export const PROPOSED_BATCH_STATUS = "proposed_batch" as const + +/** The `run_code` output envelope when a code run queued mutating proposals. */ +export interface ProposedBatch { + readonly status: typeof PROPOSED_BATCH_STATUS + readonly proposals: ReadonlyArray + /** Human/model-facing summary of the run (console + return value + queue note). */ + readonly text: string +} + +export const DEFAULT_OUTPUT_CAP_BYTES = 24_000 +/** + * Compatibility date for the dynamically-loaded isolate. The harness uses only + * standard globals (Response, Proxy, console), so any recent date works; this + * matches the blog's Worker Loader example. Bump deliberately. + */ +export const DEFAULT_COMPAT_DATE = "2025-06-01" +export const DEFAULT_CPU_MS = 10_000 +export const DEFAULT_SUBREQUESTS = 50 +export const DEFAULT_WALL_MS = 20_000 diff --git a/packages/codemode/tsconfig.json b/packages/codemode/tsconfig.json new file mode 100644 index 00000000..f9e824c4 --- /dev/null +++ b/packages/codemode/tsconfig.json @@ -0,0 +1,17 @@ +{ + "include": ["src/**/*.ts"], + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "lib": ["ES2022"], + "types": ["node", "@cloudflare/workers-types"], + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + "skipLibCheck": true, + "strict": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + } +} From 68fca884dac71d9cc740d91629f2ef98ac9d47be Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 02:40:16 +0200 Subject: [PATCH 02/10] fix(knip): register packages/codemode + drop unused export CI Knip failure: packages/codemode imports `cloudflare:workers` (in the ./sandbox driver), which Knip reports as an unlisted `cloudflare` dependency (unlisted = error). Add the workspace to knip.json with the same `ignoreDependencies: ["cloudflare"]` the other Worker-importing workspaces use (apps/api, apps/chat-flue, lib/effect-cloudflare). Also stop exporting RUN_CODE_TOOL_NAME (used only internally) to clear the new unused-export warning. Co-Authored-By: Claude Opus 4.8 --- apps/chat-flue/src/lib/codemode/index.ts | 2 +- apps/chat-flue/src/lib/codemode/run-code-tool.ts | 2 +- knip.json | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/chat-flue/src/lib/codemode/index.ts b/apps/chat-flue/src/lib/codemode/index.ts index cd8c9de2..f3a7fb8d 100644 --- a/apps/chat-flue/src/lib/codemode/index.ts +++ b/apps/chat-flue/src/lib/codemode/index.ts @@ -1,2 +1,2 @@ export { buildCodeModeApi, type CodeModeApi } from "./api-gen.ts" -export { createRunCodeTool, RUN_CODE_TOOL_NAME } from "./run-code-tool.ts" +export { createRunCodeTool } from "./run-code-tool.ts" diff --git a/apps/chat-flue/src/lib/codemode/run-code-tool.ts b/apps/chat-flue/src/lib/codemode/run-code-tool.ts index 86fa0eab..ea891ac1 100644 --- a/apps/chat-flue/src/lib/codemode/run-code-tool.ts +++ b/apps/chat-flue/src/lib/codemode/run-code-tool.ts @@ -4,7 +4,7 @@ import { parseToolProposal } from "../approval.ts" import type { ChatFlueEnv } from "../env.ts" import type { CodeModeApi } from "./api-gen.ts" -export const RUN_CODE_TOOL_NAME = "run_code" +const RUN_CODE_TOOL_NAME = "run_code" /** * Build the `maple.(input)` dispatch for a code run: look up the gated diff --git a/knip.json b/knip.json index 41960aa9..78817ee9 100644 --- a/knip.json +++ b/knip.json @@ -46,6 +46,9 @@ "lib/effect-cloudflare": { "ignoreDependencies": ["cloudflare"] }, + "packages/codemode": { + "ignoreDependencies": ["cloudflare"] + }, "packages/query-engine": { "entry": ["src/ch/expr.ts", "src/drain/index.ts"] } From ca1ebf63b247fd9e4167b6abaecc2699f91edb55 Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 02:47:47 +0200 Subject: [PATCH 03/10] =?UTF-8?q?fix(codemode):=20address=20review=20?= =?UTF-8?q?=E2=80=94=20recursion=20guard,=20harness=20isolation,=20batch?= =?UTF-8?q?=20cap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - run_code self-recursion: a code-mode snippet could call maple.run_code(...) on the MCP path (run_code is in mapleToolDefinitions and isn't mutating), nesting a sandbox inside the running one. resolveCodeModeCall now refuses the RUN_CODE_TOOL_NAME (shared const), and buildCodeModeApi skips it too (defense in depth for the chat path). - Harness break-out: the model's snippet was spliced inline into the harness fetch scope, so code ending in `})();` could escape and tamper with the output cap / log capture. It now runs as its own module (user.js exporting an async fn of (maple, console)); a break-out just fails to parse → crashed run, and it can't reach __logs/__cap/env. Keeps the no-eval property. - Unbounded proposed_batch: formatRunResult now caps proposals at MAX_PROPOSALS_PER_RUN (25) with a dropped-count note, bounding the envelope (and the number of approval cards) regardless of the run. Co-Authored-By: Claude Opus 4.8 --- apps/api/src/mcp/tools/run-code.test.ts | 8 +++ apps/api/src/mcp/tools/run-code.ts | 13 +++- apps/chat-flue/src/lib/codemode/api-gen.ts | 6 +- .../src/lib/codemode/run-code-tool.ts | 4 +- packages/codemode/src/format.test.ts | 12 +++- packages/codemode/src/format.ts | 22 +++++-- packages/codemode/src/harness.test.ts | 46 ++++++++++++--- packages/codemode/src/harness.ts | 59 ++++++++++++------- packages/codemode/src/sandbox.ts | 6 +- packages/codemode/src/types.ts | 3 + 10 files changed, 134 insertions(+), 45 deletions(-) diff --git a/apps/api/src/mcp/tools/run-code.test.ts b/apps/api/src/mcp/tools/run-code.test.ts index 016f0304..12914b0e 100644 --- a/apps/api/src/mcp/tools/run-code.test.ts +++ b/apps/api/src/mcp/tools/run-code.test.ts @@ -36,6 +36,14 @@ describe("resolveCodeModeCall", () => { expect(invoke).not.toHaveBeenCalled() }) + it("refuses to call run_code from inside code mode (no nested sandbox)", async () => { + const invoke = vi.fn() + const r = await call("run_code", { code: "1" }, invoke) + expect(r.ok).toBe(false) + expect(r.error?.name).toBe("Blocked") + expect(invoke).not.toHaveBeenCalled() + }) + it("rejects unknown tools", async () => { const r = await call("not_a_tool", {}, vi.fn()) expect(r.ok).toBe(false) diff --git a/apps/api/src/mcp/tools/run-code.ts b/apps/api/src/mcp/tools/run-code.ts index a6a9a1ce..6496caf6 100644 --- a/apps/api/src/mcp/tools/run-code.ts +++ b/apps/api/src/mcp/tools/run-code.ts @@ -1,5 +1,5 @@ import { Effect, FiberSet, Schema } from "effect" -import { formatRunOutput, type RpcCallResult } from "@maple/codemode" +import { formatRunOutput, RUN_CODE_TOOL_NAME, type RpcCallResult } from "@maple/codemode" import { WorkerEnvironment } from "@/lib/WorkerEnvironment" import { resolveTenant } from "../lib/query-warehouse" // Type-only: a value import would create an eager require cycle with registry.ts @@ -32,6 +32,15 @@ export const resolveCodeModeCall = async ( input: unknown, invoke: (definition: MapleToolDefinition, decoded: unknown) => Promise, ): Promise => { + if (name === RUN_CODE_TOOL_NAME) { + // `run_code` is in `mapleToolDefinitions` (registered last) and isn't a + // mutating tool, so without this guard a snippet calling maple.run_code(...) + // would nest a sandbox inside the running one. + return { + ok: false, + error: { name: "Blocked", message: "maple.run_code cannot be called from inside code mode." }, + } + } if (MUTATING_TOOL_NAMES.has(name)) { return { ok: false, @@ -82,7 +91,7 @@ export const resolveCodeModeCall = async ( */ export function registerRunCodeTool(server: McpToolRegistrar) { server.tool( - "run_code", + RUN_CODE_TOOL_NAME, DESCRIPTION, Schema.Struct({ code: requiredStringParam( diff --git a/apps/chat-flue/src/lib/codemode/api-gen.ts b/apps/chat-flue/src/lib/codemode/api-gen.ts index 0ba402eb..de071b8d 100644 --- a/apps/chat-flue/src/lib/codemode/api-gen.ts +++ b/apps/chat-flue/src/lib/codemode/api-gen.ts @@ -1,5 +1,5 @@ import type { ToolDefinition } from "@flue/runtime" -import { buildApiDeclaration, type CodeModeToolSpec, type JsonSchema } from "@maple/codemode" +import { buildApiDeclaration, RUN_CODE_TOOL_NAME, type CodeModeToolSpec, type JsonSchema } from "@maple/codemode" import { baseToolName } from "../mcp.ts" export interface CodeModeApi { @@ -26,7 +26,9 @@ export const buildCodeModeApi = (tools: ReadonlyArray): CodeMode const specs: CodeModeToolSpec[] = [] for (const tool of tools) { const name = baseToolName(tool.name) - if (dispatch.has(name)) continue + // Never expose run_code to itself (the chat path appends run_code after this + // runs, so this is defense-in-depth against a future ordering change). + if (name === RUN_CODE_TOOL_NAME || dispatch.has(name)) continue dispatch.set(name, tool.execute) specs.push({ name, diff --git a/apps/chat-flue/src/lib/codemode/run-code-tool.ts b/apps/chat-flue/src/lib/codemode/run-code-tool.ts index ea891ac1..9b4a2abe 100644 --- a/apps/chat-flue/src/lib/codemode/run-code-tool.ts +++ b/apps/chat-flue/src/lib/codemode/run-code-tool.ts @@ -1,11 +1,9 @@ import type { ToolDefinition } from "@flue/runtime" -import { formatRunResult, type CodeProposal, type RpcCallResult } from "@maple/codemode" +import { formatRunResult, RUN_CODE_TOOL_NAME, type CodeProposal, type RpcCallResult } from "@maple/codemode" import { parseToolProposal } from "../approval.ts" import type { ChatFlueEnv } from "../env.ts" import type { CodeModeApi } from "./api-gen.ts" -const RUN_CODE_TOOL_NAME = "run_code" - /** * Build the `maple.(input)` dispatch for a code run: look up the gated * tool `execute`, run it, and — because mutating tools' gated execute returns a diff --git a/packages/codemode/src/format.test.ts b/packages/codemode/src/format.test.ts index 69932572..1664eda3 100644 --- a/packages/codemode/src/format.test.ts +++ b/packages/codemode/src/format.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest" -import { formatRunOutput, formatRunResult } from "./format.ts" +import { formatRunOutput, formatRunResult, MAX_PROPOSALS_PER_RUN } from "./format.ts" import { PROPOSED_BATCH_STATUS, type CodeRunResult } from "./types.ts" const base: CodeRunResult = { logs: [], returnValue: undefined, error: null } @@ -43,4 +43,14 @@ describe("formatRunResult", () => { expect(parsed.proposals).toHaveLength(2) expect(parsed.text).toContain("Queued 2 change(s) for approval: create_dashboard, add_dashboard_widget.") }) + + it("caps the number of proposals surfaced from one run", () => { + const many = Array.from({ length: MAX_PROPOSALS_PER_RUN + 5 }, (_, i) => ({ + tool: "create_dashboard", + input: { i }, + })) + const parsed = JSON.parse(formatRunResult(base, many)) + expect(parsed.proposals).toHaveLength(MAX_PROPOSALS_PER_RUN) + expect(parsed.text).toContain("5 more change(s) were dropped") + }) }) diff --git a/packages/codemode/src/format.ts b/packages/codemode/src/format.ts index 6558c07f..732f2477 100644 --- a/packages/codemode/src/format.ts +++ b/packages/codemode/src/format.ts @@ -41,11 +41,20 @@ export const formatRunOutput = (result: CodeRunResult, cap = DEFAULT_OUTPUT_CAP_ return capText(parts.join("\n\n"), cap) } +/** + * Hard cap on proposals surfaced from a single code run. A run queuing more than + * this is almost certainly a mistake/runaway; bounding it keeps the returned + * envelope (and the number of approval cards) from growing without limit. + */ +export const MAX_PROPOSALS_PER_RUN = 25 + /** * The final string `run_code` returns to the model. When the run queued mutating * proposals (chat approval flow), wrap it as a `proposed_batch` envelope the web * client parses into one approval card per proposal; otherwise return the plain - * summary so the model just reads its results. + * summary so the model just reads its results. Both the inner `text` (via + * `formatRunOutput`) and the proposal count are bounded so the envelope can't + * grow unboundedly with the model's run. */ export const formatRunResult = ( result: CodeRunResult, @@ -54,12 +63,15 @@ export const formatRunResult = ( ): string => { const text = formatRunOutput(result, cap) if (proposals.length === 0) return text - const queueNote = `\n\nQueued ${proposals.length} change(s) for approval: ${proposals - .map((p) => p.tool) - .join(", ")}.` + + const kept = proposals.slice(0, MAX_PROPOSALS_PER_RUN) + const dropped = proposals.length - kept.length + const queueNote = + `\n\nQueued ${kept.length} change(s) for approval: ${kept.map((p) => p.tool).join(", ")}.` + + (dropped > 0 ? ` (${dropped} more change(s) were dropped — keep code-mode runs to a few mutations.)` : "") return JSON.stringify({ status: PROPOSED_BATCH_STATUS, - proposals, + proposals: kept, text: text + queueNote, }) } diff --git a/packages/codemode/src/harness.test.ts b/packages/codemode/src/harness.test.ts index dd76b71d..7cb7121d 100644 --- a/packages/codemode/src/harness.test.ts +++ b/packages/codemode/src/harness.test.ts @@ -1,19 +1,39 @@ -import { describe, expect, it } from "vitest" -import { buildHarnessModule } from "./harness.ts" +import { mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { pathToFileURL } from "node:url" +import { afterEach, describe, expect, it } from "vitest" +import { buildSandboxModules, SANDBOX_MAIN_MODULE } from "./harness.ts" import type { RpcCallResult } from "./types.ts" +const tmpDirs: string[] = [] +afterEach(() => { + for (const dir of tmpDirs.splice(0)) rmSync(dir, { recursive: true, force: true }) +}) + /** - * Load the generated harness module as an ESM data URL and run its `fetch` - * handler in-process with a fake `env.MAPLE` — exercises log/return/error - * capture without the Workers runtime. + * Write the real two-module set (main.js + user.js) to a temp dir and import + * `main.js` so its relative `import ./user.js` resolves — exercising the actual + * composition the sandbox loads, with a fake `env.MAPLE`. */ const runHarness = async ( code: string, dispatch: (name: string, input: unknown) => Promise, capBytes?: number, ): Promise<{ logs: string[]; returnValue: unknown; error: { name: string; message: string } | null }> => { - const src = buildHarnessModule(code, capBytes) - const mod = await import(`data:text/javascript,${encodeURIComponent(src)}`) + const dir = mkdtempSync(join(tmpdir(), "codemode-harness-")) + tmpDirs.push(dir) + const modules = buildSandboxModules(code, capBytes) + for (const [name, source] of Object.entries(modules)) writeFileSync(join(dir, name), source) + let mod: { default: { fetch: (req: Request, env: unknown) => Promise } } + try { + // A snippet that fails to parse breaks user.js; Node surfaces it here at + // import time. The real sandbox catches the equivalent failure at fetch and + // reports a crashed run — model that with a crashed-shaped result. + mod = await import(pathToFileURL(join(dir, SANDBOX_MAIN_MODULE)).href) + } catch (e) { + return { logs: [], returnValue: undefined, error: { name: "LoadError", message: String(e) } } + } const env = { MAPLE: { call: (name: string, input: unknown) => dispatch(name, input) } } const res = await mod.default.fetch(new Request("https://codemode/run"), env) return res.json() @@ -21,14 +41,14 @@ const runHarness = async ( const ok = (value: string): RpcCallResult => ({ ok: true, value }) -describe("buildHarnessModule", () => { +describe("buildSandboxModules", () => { it("captures console.log output", async () => { const out = await runHarness(`console.log("hello", { a: 1 })`, async () => ok("x")) expect(out.logs).toEqual(['hello {"a":1}']) expect(out.error).toBeNull() }) - it("captures the IIFE return value", async () => { + it("captures the user function's return value", async () => { const out = await runHarness(`return { count: 2 }`, async () => ok("x")) expect(out.returnValue).toEqual({ count: 2 }) }) @@ -66,4 +86,12 @@ describe("buildHarnessModule", () => { const total = out.logs.join("").length expect(total).toBeLessThan(1300) }) + + it("isolates a break-out attempt to the user module (can't reach the harness scope)", async () => { + // `})();` would, in an inline splice, close the wrapper and run in the + // harness scope. As its own module it just fails to parse -> crashed run. + const out = await runHarness(`console.log("before"); })(); __logs.length = 0;`, async () => ok("x")) + expect(out.error).not.toBeNull() + expect(out.logs).toEqual([]) + }) }) diff --git a/packages/codemode/src/harness.ts b/packages/codemode/src/harness.ts index 5af4e68c..06ece17d 100644 --- a/packages/codemode/src/harness.ts +++ b/packages/codemode/src/harness.ts @@ -1,24 +1,32 @@ import { DEFAULT_OUTPUT_CAP_BYTES } from "./types.ts" +/** Entry module of the sandbox worker. */ +export const SANDBOX_MAIN_MODULE = "main.js" +const USER_MODULE = "user.js" + +/** + * Wrap the model's snippet as its OWN module that exports an async function of + * `(maple, console)`. Because the snippet is the function body of a separate + * module — not spliced into the harness's `fetch` scope — it cannot reach the + * harness internals (`__logs`/`__cap`/`env`); a snippet that tries to break out + * of the function (e.g. ending in `})();`) just makes this module fail to parse, + * which surfaces as a crashed run rather than tampering with log/cap capture. + */ +const buildUserModule = (userCode: string): string => + `export default async function (maple, console) {\n${userCode}\n}\n` + /** - * Build the source of the dynamic-worker module that runs the model's code. - * - * The model's code is spliced **directly** into an async IIFE in the module - * body — there is no runtime `eval`/`new Function`; the snippet simply *is* the - * module. The only capability exposed is `env.MAPLE.call(name, input)` (an RPC - * stub back to the supervisor); outbound network is blocked by the loader - * (`globalOutbound: null`). `console.*` output and the IIFE's return value are - * captured inside the isolate and shipped back as JSON via the fetch response, - * so we never depend on capturing the parent's console. - * - * Splicing untrusted source is safe here precisely because the isolate has no - * authority beyond `maple.*` (already the model's authority) and is bounded by - * CPU/subrequest limits — the worst a hostile snippet can do is fail its own - * isolate. + * The harness module: installs a byte-capped `console` shim and a `maple` Proxy + * (whose only capability is `env.MAPLE.call(name, input)` — an RPC stub back to + * the supervisor; outbound network is blocked by the loader via + * `globalOutbound: null`), runs the user module's exported function, and ships + * `{ logs, returnValue, error }` back as JSON via the fetch response. Nothing + * here depends on capturing the parent's console. */ -export const buildHarnessModule = (userCode: string, capBytes = DEFAULT_OUTPUT_CAP_BYTES): string => { +const buildMainModule = (capBytes: number): string => { const cap = Math.max(1000, Math.floor(capBytes)) - return `export default { + return `import runUser from "./${USER_MODULE}"; +export default { async fetch(request, env) { const __cap = ${cap}; const __logs = []; @@ -42,7 +50,7 @@ export const buildHarnessModule = (userCode: string, capBytes = DEFAULT_OUTPUT_C __logs.push(line); if (__truncated) { __logs.push("[output truncated]"); } }; - const console = { + const __console = { log: (...a) => __push("log", a), info: (...a) => __push("info", a), warn: (...a) => __push("warn", a), @@ -64,9 +72,7 @@ export const buildHarnessModule = (userCode: string, capBytes = DEFAULT_OUTPUT_C let __return; let __error = null; try { - __return = await (async () => { -${userCode} - })(); + __return = await runUser(maple, __console); } catch (e) { __error = { name: (e && e.name) || "Error", @@ -82,3 +88,16 @@ ${userCode} }; ` } + +/** + * Build the module set for the dynamic-worker sandbox: the harness entry + * (`main.js`) plus the model's snippet as its own module (`user.js`). Splitting + * them keeps the model's code out of the harness scope — see `buildUserModule`. + */ +export const buildSandboxModules = ( + userCode: string, + capBytes = DEFAULT_OUTPUT_CAP_BYTES, +): Record => ({ + [SANDBOX_MAIN_MODULE]: buildMainModule(capBytes), + [USER_MODULE]: buildUserModule(userCode), +}) diff --git a/packages/codemode/src/sandbox.ts b/packages/codemode/src/sandbox.ts index 723e0683..ffe031c4 100644 --- a/packages/codemode/src/sandbox.ts +++ b/packages/codemode/src/sandbox.ts @@ -2,7 +2,7 @@ // (`RpcTarget`) and the `WorkerLoader` binding, so it lives behind the // `@maple/codemode/sandbox` subpath — the root barrel stays Node/test-safe. import { RpcTarget } from "cloudflare:workers" -import { buildHarnessModule } from "./harness.ts" +import { buildSandboxModules, SANDBOX_MAIN_MODULE } from "./harness.ts" import type { CodeRunResult, RpcCallResult } from "./types.ts" import { DEFAULT_COMPAT_DATE, @@ -74,8 +74,8 @@ export const runCodeInSandbox = async ( try { const stub = loader.get(options.id, async () => ({ compatibilityDate: options.compatibilityDate ?? DEFAULT_COMPAT_DATE, - mainModule: "main.js", - modules: { "main.js": buildHarnessModule(options.code, options.capBytes) }, + mainModule: SANDBOX_MAIN_MODULE, + modules: buildSandboxModules(options.code, options.capBytes), env: { MAPLE: supervisor }, globalOutbound: null, limits: { diff --git a/packages/codemode/src/types.ts b/packages/codemode/src/types.ts index 6b9cc956..ec86ffbb 100644 --- a/packages/codemode/src/types.ts +++ b/packages/codemode/src/types.ts @@ -47,6 +47,9 @@ export interface CodeProposal { readonly input: unknown } +/** Canonical name of the Code Mode tool — shared so dispatch can refuse self-calls. */ +export const RUN_CODE_TOOL_NAME = "run_code" + export const PROPOSED_BATCH_STATUS = "proposed_batch" as const /** The `run_code` output envelope when a code run queued mutating proposals. */ From 1d2e12213a1e4324e8b1b2f6548373d41d4447de Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 03:10:54 +0200 Subject: [PATCH 04/10] fix(codemode): single source of truth for mutating tools + fail-closed test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review (fails-open): the MCP run_code sandbox blocks mutations via a hand-maintained MUTATING_TOOL_NAMES set with no test guaranteeing every mutating registry tool is in it, kept in two drifting copies. run_code makes the set fail OPEN (an ungated name runs its real handler), so: - Move MUTATING_TOOL_NAMES into @maple/codemode as the single source of truth; apps/api/mutating.ts re-exports it and apps/chat-flue/approval.ts imports it, so the two copies can no longer drift. - Add a fail-closed regression test: every registry tool whose name looks mutating (create_/update_/delete_/… via shared MUTATING_TOOL_PREFIXES) must be in the set, so a new mutating tool can't ship ungated. Co-Authored-By: Claude Opus 4.8 --- apps/api/src/mcp/tools/mutating.test.ts | 15 +++++ apps/api/src/mcp/tools/mutating.ts | 37 ++--------- apps/chat-flue/src/lib/approval.ts | 44 +++---------- packages/codemode/src/index.ts | 1 + packages/codemode/src/mutating.test.ts | 29 +++++++++ packages/codemode/src/mutating.ts | 83 +++++++++++++++++++++++++ 6 files changed, 141 insertions(+), 68 deletions(-) create mode 100644 packages/codemode/src/mutating.test.ts create mode 100644 packages/codemode/src/mutating.ts diff --git a/apps/api/src/mcp/tools/mutating.test.ts b/apps/api/src/mcp/tools/mutating.test.ts index cb2d6e70..466c0f8c 100644 --- a/apps/api/src/mcp/tools/mutating.test.ts +++ b/apps/api/src/mcp/tools/mutating.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "vitest" +import { looksMutating } from "@maple/codemode" import { mapleToolDefinitions } from "./registry" import { MUTATING_TOOL_NAMES } from "./mutating" @@ -10,6 +11,20 @@ describe("MUTATING_TOOL_NAMES", () => { } }) + it("gates every conventionally-named mutating tool (run_code's sandbox fails OPEN otherwise)", () => { + // The inverse of the check above, and the safety-critical one: a registry + // tool that looks mutating (create_/update_/delete_/…) but is missing from + // the set would run its real side effect inside `run_code`. This fails CI so + // a new mutating tool can't ship ungated. + const ungated = mapleToolDefinitions + .map((d) => d.name) + .filter((name) => looksMutating(name) && !MUTATING_TOOL_NAMES.has(name)) + expect( + ungated, + `mutating-looking tools are NOT in MUTATING_TOOL_NAMES — gate them or they run real side effects inside run_code: [${ungated.join(", ")}]`, + ).toEqual([]) + }) + it("excludes read-only tools (so /chat/apply can't run them)", () => { expect(MUTATING_TOOL_NAMES.has("find_errors")).toBe(false) expect(MUTATING_TOOL_NAMES.has("search_traces")).toBe(false) diff --git a/apps/api/src/mcp/tools/mutating.ts b/apps/api/src/mcp/tools/mutating.ts index 878929b6..35c36fc8 100644 --- a/apps/api/src/mcp/tools/mutating.ts +++ b/apps/api/src/mcp/tools/mutating.ts @@ -1,33 +1,4 @@ -/** - * Base names of the mutating MCP tools that the AI chat gates behind approval. - * - * The Flue chat agent wraps these so a model call returns a `proposed` marker - * instead of mutating (see `apps/chat-flue/src/lib/approval.ts`); the web client - * applies the real change via `POST /api/chat/apply`, which only accepts tools - * in this set. Keep the two lists in sync. - */ -export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ - // dashboards - "create_dashboard", - "update_dashboard", - "add_dashboard_widget", - "update_dashboard_widget", - "remove_dashboard_widget", - "reorder_dashboard_widgets", - "replace_dashboard_widgets", - // alerts - "create_alert_rule", - "update_alert_rule", - "delete_alert_rule", - // error issues - "claim_error_issue", - "release_error_issue", - "transition_error_issue", - "comment_on_error_issue", - "heartbeat_error_issue", - "set_issue_severity", - "update_error_notification_policy", - // fixes / agents - "propose_fix", - "register_agent", -]) +// Single source of truth lives in @maple/codemode so the apps/api + apps/chat-flue +// copies can't drift. Re-exported here to keep existing `./mutating` imports stable. +// The fail-closed regression test lives in `./mutating.test.ts`. +export { MUTATING_TOOL_NAMES } from "@maple/codemode" diff --git a/apps/chat-flue/src/lib/approval.ts b/apps/chat-flue/src/lib/approval.ts index 29200bd1..50e61147 100644 --- a/apps/chat-flue/src/lib/approval.ts +++ b/apps/chat-flue/src/lib/approval.ts @@ -1,41 +1,15 @@ import type { ToolDefinition } from "@flue/runtime" +// Single source of truth shared with apps/api (apps/api re-exports the same set) +// so the gated-tool lists can't drift. The legacy chat agent gated these with +// `@cloudflare/ai-chat`'s approval interrupt — Flue's event stream has no +// human-in-the-loop interrupt, so we use **propose-then-apply** instead: the +// agent calls the tool, but its `execute` returns a proposal marker WITHOUT +// performing the mutation. The web client renders an approval card from that +// result and performs the real mutation (via Maple's existing API) on approve. +import { MUTATING_TOOL_NAMES } from "@maple/codemode" import { baseToolName } from "./mcp.ts" -/** - * Mutating Maple tools (base names). The legacy chat agent gated these with - * `@cloudflare/ai-chat`'s approval interrupt — Flue's event stream has no - * human-in-the-loop interrupt, so we use **propose-then-apply** instead: the - * agent calls the tool, but its `execute` returns a proposal marker WITHOUT - * performing the mutation. The web client renders an approval card from that - * result and performs the real mutation (via Maple's existing API) on approve. - * - * Keep in sync with the mutating tools in apps/api/src/mcp/tools. - */ -export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ - // dashboards - "create_dashboard", - "update_dashboard", - "add_dashboard_widget", - "update_dashboard_widget", - "remove_dashboard_widget", - "reorder_dashboard_widgets", - "replace_dashboard_widgets", - // alerts - "create_alert_rule", - "update_alert_rule", - "delete_alert_rule", - // error issues - "claim_error_issue", - "release_error_issue", - "transition_error_issue", - "comment_on_error_issue", - "heartbeat_error_issue", - "set_issue_severity", - "update_error_notification_policy", - // fixes / agents - "propose_fix", - "register_agent", -]) +export { MUTATING_TOOL_NAMES } /** Marker an approval-gated tool returns instead of mutating. */ export interface ToolProposal { diff --git a/packages/codemode/src/index.ts b/packages/codemode/src/index.ts index d9baaa75..9cba8d34 100644 --- a/packages/codemode/src/index.ts +++ b/packages/codemode/src/index.ts @@ -4,3 +4,4 @@ export * from "./types.ts" export * from "./api-gen.ts" export * from "./harness.ts" export * from "./format.ts" +export * from "./mutating.ts" diff --git a/packages/codemode/src/mutating.test.ts b/packages/codemode/src/mutating.test.ts new file mode 100644 index 00000000..2f6684cf --- /dev/null +++ b/packages/codemode/src/mutating.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest" +import { looksMutating, MUTATING_TOOL_NAMES, MUTATING_TOOL_PREFIXES } from "./mutating.ts" + +describe("looksMutating", () => { + it("flags conventionally-named mutating tools", () => { + expect(looksMutating("create_dashboard")).toBe(true) + expect(looksMutating("delete_alert_rule")).toBe(true) + expect(looksMutating("set_issue_severity")).toBe(true) + expect(looksMutating("register_agent")).toBe(true) + }) + + it("does not flag read-only tools or run_code", () => { + for (const name of ["find_errors", "search_traces", "list_dashboards", "get_dashboard", "query_data", "run_code"]) { + expect(looksMutating(name), name).toBe(false) + } + }) + + it("every gated tool name matches the mutating convention (prefixes stay in sync with the set)", () => { + for (const name of MUTATING_TOOL_NAMES) { + expect(looksMutating(name), `${name} is gated but matches no MUTATING_TOOL_PREFIXES`).toBe(true) + } + }) + + it("exposes the prefix list", () => { + expect(MUTATING_TOOL_PREFIXES).toContain("create_") + expect(MUTATING_TOOL_PREFIXES).not.toContain("get_") + expect(MUTATING_TOOL_PREFIXES).not.toContain("run_") + }) +}) diff --git a/packages/codemode/src/mutating.ts b/packages/codemode/src/mutating.ts new file mode 100644 index 00000000..f94612b8 --- /dev/null +++ b/packages/codemode/src/mutating.ts @@ -0,0 +1,83 @@ +/** + * Base names of the mutating Maple MCP tools — the **single source of truth** + * for approval gating, imported by both apps/api and apps/chat-flue so the lists + * can't drift. + * + * Enforcement points: + * - apps/chat-flue wraps these so a model call returns a `proposed` marker + * instead of mutating (the web client applies the real change via + * `POST /api/chat/apply`, which only accepts tools in this set). + * - The MCP `run_code` sandbox (apps/api) refuses these, so a snippet can't + * trigger an ungated mutation. + * + * Because `run_code` makes this set fail **open** (a name absent from it runs + * its real handler), apps/api has a regression test asserting every + * conventionally-named mutating tool in the registry is present here — add a + * mutating tool without gating it and CI fails. + */ +export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ + // dashboards + "create_dashboard", + "update_dashboard", + "add_dashboard_widget", + "update_dashboard_widget", + "remove_dashboard_widget", + "reorder_dashboard_widgets", + "replace_dashboard_widgets", + // alerts + "create_alert_rule", + "update_alert_rule", + "delete_alert_rule", + // error issues + "claim_error_issue", + "release_error_issue", + "transition_error_issue", + "comment_on_error_issue", + "heartbeat_error_issue", + "set_issue_severity", + "update_error_notification_policy", + // fixes / agents + "propose_fix", + "register_agent", +]) + +/** + * Name prefixes that denote a state-changing tool in Maple's verb_noun tool + * taxonomy. Used by the apps/api regression test to fail CI when a tool that + * looks mutating is missing from {@link MUTATING_TOOL_NAMES}. Keep read-only + * verbs (find/get/list/search/inspect/describe/query/run/…) out of this list. + */ +export const MUTATING_TOOL_PREFIXES: ReadonlyArray = [ + "create_", + "update_", + "delete_", + "add_", + "remove_", + "reorder_", + "replace_", + "claim_", + "release_", + "transition_", + "comment_", + "heartbeat_", + "set_", + "propose_", + "register_", + "archive_", + "restore_", + "enable_", + "disable_", + "mute_", + "unmute_", + "rename_", + "assign_", + "acknowledge_", + "snooze_", + "resolve_", + "reopen_", + "close_", +] + +/** True when a tool name looks state-changing by Maple's verb_noun convention. */ +export const looksMutating = (name: string): boolean => + MUTATING_TOOL_PREFIXES.some((prefix) => name.startsWith(prefix)) From b613a346825a9c8c6e9cd8d7da8071cb7b0fe611 Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 03:20:15 +0200 Subject: [PATCH 05/10] chore(codemode): widen mutating verb prefixes for the fail-closed guard Review (awareness-only): the fail-closed test only catches an ungated mutating tool if its name starts with a verb in MUTATING_TOOL_PREFIXES. Broaden the list with the verbs called out (purge_/apply_/submit_/merge_/clear_/bulk_) plus other common mutating verbs (none collide with current read-only tools), shrinking the heuristic's blind spot. The structural fix (a `mutating: true` flag at tool registration) is left as a follow-up. Co-Authored-By: Claude Opus 4.8 --- packages/codemode/src/mutating.ts | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/packages/codemode/src/mutating.ts b/packages/codemode/src/mutating.ts index f94612b8..a0b12e80 100644 --- a/packages/codemode/src/mutating.ts +++ b/packages/codemode/src/mutating.ts @@ -46,6 +46,12 @@ export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ * taxonomy. Used by the apps/api regression test to fail CI when a tool that * looks mutating is missing from {@link MUTATING_TOOL_NAMES}. Keep read-only * verbs (find/get/list/search/inspect/describe/query/run/…) out of this list. + * + * This is a heuristic guard, not a proof: a mutating tool named with a verb NOT + * listed here would slip past both the gate and the test. The list is kept broad + * to shrink that gap; the structural fix (a `mutating: true` flag declared at + * tool registration, deriving the set + gate) is tracked as a follow-up. When + * adding a tool with a new mutating verb, add its prefix here. */ export const MUTATING_TOOL_PREFIXES: ReadonlyArray = [ "create_", @@ -76,6 +82,31 @@ export const MUTATING_TOOL_PREFIXES: ReadonlyArray = [ "resolve_", "reopen_", "close_", + // Additional mutating verbs (defensive — none collide with current read tools). + "purge_", + "apply_", + "submit_", + "merge_", + "clear_", + "bulk_", + "send_", + "sync_", + "cancel_", + "revoke_", + "grant_", + "rotate_", + "import_", + "retry_", + "trigger_", + "dispatch_", + "move_", + "copy_", + "duplicate_", + "upsert_", + "toggle_", + "approve_", + "deny_", + "unassign_", ] /** True when a tool name looks state-changing by Maple's verb_noun convention. */ From 2a4e677c4c5052cd784721beb10a68325004a6b9 Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 11:11:51 +0200 Subject: [PATCH 06/10] feat(codemode): structural mutating-tool gate via mutatingTool registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the name-based gate with a per-tool structural flag, closing the heuristic's blind spot (a mutating tool named with an out-of-list verb could slip past the run_code sandbox). - McpToolRegistrar gains `mutatingTool()` alongside `tool()`; the 19 mutating tools now register via `server.mutatingTool(...)`, so each declares its own mutating-ness at registration (and a copied tool carries the gating with it). MapleToolDefinition gains `mutating: boolean`. - run_code's resolveCodeModeCall now blocks on `definition.mutating` (the structural flag), not just the name set — a mutating tool can't slip the gate regardless of its name. - mutating.test.ts asserts the flag set EXACTLY equals the shared MUTATING_TOOL_NAMES (both directions) so the structural flag and the static list (still needed by chat-flue + /chat/apply, which see tools over MCP) can't drift. The verb-prefix heuristic is kept as belt-and-suspenders for an unflagged, conventionally-named mutating tool. Closes the follow-up to the prior review's awareness-only finding. Co-Authored-By: Claude Opus 4.8 --- .../api/src/mcp/tools/add-dashboard-widget.ts | 2 +- apps/api/src/mcp/tools/claim-error-issue.ts | 2 +- .../src/mcp/tools/comment-on-error-issue.ts | 2 +- apps/api/src/mcp/tools/create-alert-rule.ts | 2 +- apps/api/src/mcp/tools/create-dashboard.ts | 2 +- apps/api/src/mcp/tools/delete-alert-rule.ts | 2 +- .../src/mcp/tools/heartbeat-error-issue.ts | 2 +- apps/api/src/mcp/tools/mutating.test.ts | 14 +++++++++- apps/api/src/mcp/tools/propose-fix.ts | 2 +- apps/api/src/mcp/tools/register-agent.ts | 2 +- apps/api/src/mcp/tools/registry.ts | 27 ++++++++++++++----- apps/api/src/mcp/tools/release-error-issue.ts | 2 +- .../src/mcp/tools/remove-dashboard-widget.ts | 2 +- .../mcp/tools/reorder-dashboard-widgets.ts | 2 +- .../mcp/tools/replace-dashboard-widgets.ts | 2 +- apps/api/src/mcp/tools/run-code.ts | 18 +++++++------ apps/api/src/mcp/tools/set-issue-severity.ts | 2 +- .../src/mcp/tools/transition-error-issue.ts | 2 +- apps/api/src/mcp/tools/types.ts | 14 ++++++++++ apps/api/src/mcp/tools/update-alert-rule.ts | 2 +- .../src/mcp/tools/update-dashboard-widget.ts | 2 +- apps/api/src/mcp/tools/update-dashboard.ts | 2 +- .../tools/update-error-notification-policy.ts | 2 +- 23 files changed, 77 insertions(+), 34 deletions(-) diff --git a/apps/api/src/mcp/tools/add-dashboard-widget.ts b/apps/api/src/mcp/tools/add-dashboard-widget.ts index 74311ed6..9c8c7791 100644 --- a/apps/api/src/mcp/tools/add-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/add-dashboard-widget.ts @@ -45,7 +45,7 @@ const KNOWN_VISUALIZATIONS = [ ] as const export function registerAddDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, 'Add a single widget to an existing dashboard without re-sending the whole document. `visualization` MUST be one of: `chart`, `stat`, `gauge`, `table`, `list`, `pie`, `histogram`, `heatmap`, `funnel` — NOT a free-form title. `gauge` renders a single scalar on a radial gauge (same data shape as `stat`); set `display_json.gauge` to `{ min, max }` and `display_json.thresholds` to color the arc. For line/area/bar charts, pass `visualization: "chart"` and `display_type: "line"`/`"area"`/`"bar"`. Two creation paths:\n\n1. **Structured query builder** (default): pass `data_source_json` + `display_json` to wire the widget to a specific endpoint (`custom_query_builder_timeseries`, `service_overview`, etc.). Trace and log queries omit the metric-only fields (`metricName`/`metricType`/`isMonotonic`/`signalSource`) — only `dataSource: "metrics"` queries carry them. `whereClause` is a custom grammar (`=`, `>`, `<`, `>=`, `<=`, `contains`, `exists` joined by ` AND `) — there is NO SQL `IS NULL`/`IS NOT NULL`; use ` exists` to require an attribute. See the `maple://instructions` resource for the full widget JSON shape (aggregations per source, groupBy prefixes, units, stat reduceToValue, hideSeries).\n\n2. **Raw ClickHouse SQL**: pass `sql` to create a `raw_sql_chart` widget (the tool builds the dataSource for you — `data_source_json` is ignored). `sql` MUST reference `$__orgFilter`. Macros: `$__orgFilter` (required), `$__timeFilter(Column)`, `$__startTime`, `$__endTime`, `$__interval_s` (only useful when SQL also references it, typically inside `toStartOfInterval(…, INTERVAL $__interval_s SECOND)`).\n\n **Before writing raw SQL, call `describe_warehouse_tables`** to discover real table and column names (no args → list every table; `table: ""` → full column list with types, jsonPaths, sorting key, and curated notes on enum casing, units, sort-key hints). Do not guess table or column names — a hallucinated identifier silently produces an empty chart. Columns are PascalCase; values for `StatusCode`/`SeverityText`/`SpanKind` are Title Case (`\'Error\'` not `\'ERROR\'`); span `Duration` is in nanoseconds (divide by 1e6 for ms).\n\n **SELECT shape per `display_type`** (the renderer is opinionated; wrong aliases → empty or `[object Object]`):\n - `line`/`area`/`bar`: time bucket as first column (alias `bucket`) + ONE OR MORE numeric series columns. Each numeric column becomes one series; the column name becomes the legend label. **String columns are dropped**, so for multi-series (e.g., per-service breakdown) pivot in SQL with `countIf(...)` — tall form (`bucket, ServiceName, count()`) collapses to a single aggregate line. Single-series: `SELECT toStartOfInterval(Timestamp, INTERVAL $__interval_s SECOND) AS bucket, count() AS errors FROM ... WHERE $__orgFilter AND $__timeFilter(Timestamp) GROUP BY bucket ORDER BY bucket`. Multi-series wide form: `SELECT toStartOfInterval(Timestamp, INTERVAL $__interval_s SECOND) AS bucket, countIf(ServiceName=\'api\') AS api, countIf(ServiceName=\'web\') AS web FROM ... GROUP BY bucket ORDER BY bucket`. For dynamic series labels, run a discovery query first (e.g., `query_data` or a quick top-N) and inject the values.\n - `stat`: one scalar aliased `value` — `SELECT count() AS value FROM ... WHERE $__orgFilter AND $__timeFilter(Timestamp)`\n - `pie`: `name` (label) + numeric column; cap with `LIMIT 8`-ish\n - `heatmap`: three columns aliased `x`, `y`, `value` (string-cast numeric x/y)\n - `table`: any rows; columns render in order\n - `histogram`: one numeric column aliased `value` (renderer buckets client-side); add `LIMIT 5000`\n - `funnel`: `name` (string stage label) + numeric column; rows render in returned order as descending bars — `ORDER BY value DESC` for a classic funnel, cap with `LIMIT 8`-ish\n\n If `display_type` is omitted it\'s derived from `visualization` (chart→line via `display_json.chartId`, stat→stat, table→table, pie→pie, histogram→histogram, heatmap→heatmap, funnel→funnel). The stat `reduceToValue` transform is auto-injected.\n\n **See `maple://instructions` for the full table catalog, column lists, and worked examples per display type.**\n\nIf `layout_json` is omitted the widget is auto-placed using the same grid logic as the web UI. Returns the new widget id plus an automatic validation summary (verdict, flags). If `verdict` is `suspicious` or `broken`, fix via `update_dashboard_widget` — the chart will not render meaningful data as-is.', Schema.Struct({ diff --git a/apps/api/src/mcp/tools/claim-error-issue.ts b/apps/api/src/mcp/tools/claim-error-issue.ts index c277102e..92305c3f 100644 --- a/apps/api/src/mcp/tools/claim-error-issue.ts +++ b/apps/api/src/mcp/tools/claim-error-issue.ts @@ -15,7 +15,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerClaimErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "claim_error_issue", "Claim a lease on an error issue so other agents don't duplicate work. Issues in 'triage' or 'todo' auto-transition to 'in_progress' on claim. Lease defaults to 30 min; call heartbeat_error_issue before it expires or the issue drops back to 'todo'.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/comment-on-error-issue.ts b/apps/api/src/mcp/tools/comment-on-error-issue.ts index 6bc7cb62..10733c62 100644 --- a/apps/api/src/mcp/tools/comment-on-error-issue.ts +++ b/apps/api/src/mcp/tools/comment-on-error-issue.ts @@ -15,7 +15,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerCommentOnErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "comment_on_error_issue", "Add a comment to the issue's timeline. Use kind='agent_note' for automated reasoning steps (visible in the audit log but styled differently in the UI).", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/create-alert-rule.ts b/apps/api/src/mcp/tools/create-alert-rule.ts index 6031249a..b95d712c 100644 --- a/apps/api/src/mcp/tools/create-alert-rule.ts +++ b/apps/api/src/mcp/tools/create-alert-rule.ts @@ -247,7 +247,7 @@ const comparatorLabel: Record = { } export function registerCreateAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "create_alert_rule", "Create an alert rule. Use a template for common cases (high_error_rate, slow_p95, slow_p99, low_apdex, throughput_drop) or template='custom' for full control. " + "Templates auto-fill signal_type, comparator, and a sensible default threshold. " + diff --git a/apps/api/src/mcp/tools/create-dashboard.ts b/apps/api/src/mcp/tools/create-dashboard.ts index 774705d1..3845966b 100644 --- a/apps/api/src/mcp/tools/create-dashboard.ts +++ b/apps/api/src/mcp/tools/create-dashboard.ts @@ -287,7 +287,7 @@ const TIME_RANGE_MAP: Record = { export function registerCreateDashboardTool(server: McpToolRegistrar) { const templateList = DASHBOARD_TEMPLATES.map((t) => ` ${t.id} — ${t.description}`).join("\n") - server.tool( + server.mutatingTool( "create_dashboard", "Create a dashboard from a template, simplified widget specs, or custom JSON.\n\n" + "Templates:\n" + diff --git a/apps/api/src/mcp/tools/delete-alert-rule.ts b/apps/api/src/mcp/tools/delete-alert-rule.ts index 8f6a4ddb..0b24ebbd 100644 --- a/apps/api/src/mcp/tools/delete-alert-rule.ts +++ b/apps/api/src/mcp/tools/delete-alert-rule.ts @@ -8,7 +8,7 @@ import { AlertRuleId } from "@maple/domain" const decodeAlertRuleId = Schema.decodeUnknownOption(AlertRuleId) export function registerDeleteAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "delete_alert_rule", "Permanently delete an alert rule. This is irreversible and also deletes the rule's incident history, " + "delivery events, and evaluation state. Requires confirm=true. Use list_alert_rules to find rule IDs.", diff --git a/apps/api/src/mcp/tools/heartbeat-error-issue.ts b/apps/api/src/mcp/tools/heartbeat-error-issue.ts index 9033dbc8..f5e0f9c0 100644 --- a/apps/api/src/mcp/tools/heartbeat-error-issue.ts +++ b/apps/api/src/mcp/tools/heartbeat-error-issue.ts @@ -9,7 +9,7 @@ import { ErrorIssueId } from "@maple/domain/http" const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) export function registerHeartbeatErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "heartbeat_error_issue", "Extend the lease on a claimed error issue. Call this periodically while you work; if the lease expires, the issue drops back to 'todo' and any actor can re-claim it.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/mutating.test.ts b/apps/api/src/mcp/tools/mutating.test.ts index 466c0f8c..cf3da8d9 100644 --- a/apps/api/src/mcp/tools/mutating.test.ts +++ b/apps/api/src/mcp/tools/mutating.test.ts @@ -11,7 +11,19 @@ describe("MUTATING_TOOL_NAMES", () => { } }) - it("gates every conventionally-named mutating tool (run_code's sandbox fails OPEN otherwise)", () => { + it("exactly equals the tools registered via mutatingTool (structural flag <-> shared list)", () => { + // The flag (set at registration via `server.mutatingTool`) is the structural + // truth the run_code gate uses; MUTATING_TOOL_NAMES is the static list the + // chat + /chat/apply paths use (they can't read the flag over MCP). This + // asserts they can't drift in either direction. + const flagged = new Set(mapleToolDefinitions.filter((d) => d.mutating).map((d) => d.name)) + const flaggedButUnlisted = [...flagged].filter((n) => !MUTATING_TOOL_NAMES.has(n)) + const listedButUnflagged = [...MUTATING_TOOL_NAMES].filter((n) => !flagged.has(n)) + expect(flaggedButUnlisted, `registered mutating but absent from MUTATING_TOOL_NAMES: [${flaggedButUnlisted.join(", ")}]`).toEqual([]) + expect(listedButUnflagged, `in MUTATING_TOOL_NAMES but not registered via mutatingTool: [${listedButUnflagged.join(", ")}]`).toEqual([]) + }) + + it("gates every conventionally-named mutating tool (belt-and-suspenders for an unflagged mutating tool)", () => { // The inverse of the check above, and the safety-critical one: a registry // tool that looks mutating (create_/update_/delete_/…) but is missing from // the set would run its real side effect inside `run_code`. This fails CI so diff --git a/apps/api/src/mcp/tools/propose-fix.ts b/apps/api/src/mcp/tools/propose-fix.ts index 1acaaa9a..38520b30 100644 --- a/apps/api/src/mcp/tools/propose-fix.ts +++ b/apps/api/src/mcp/tools/propose-fix.ts @@ -22,7 +22,7 @@ const parseArtifactList = (raw: string | undefined): ReadonlyArray => { } export function registerProposeFixTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "propose_fix", "Attach a fix proposal (PR URL, patch summary, artifacts) to an error issue. Transitions the issue to 'in_review'. The human owner can then accept (→ done) or reject.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/register-agent.ts b/apps/api/src/mcp/tools/register-agent.ts index 716ed66d..5e96d2e4 100644 --- a/apps/api/src/mcp/tools/register-agent.ts +++ b/apps/api/src/mcp/tools/register-agent.ts @@ -18,7 +18,7 @@ const parseCapabilities = (raw: string | undefined): ReadonlyArray => { } export function registerRegisterAgentTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "register_agent", "Register an LLM agent with the error-issue system so it can claim and transition issues. Must be called from a human session (not an agent API key). Returns an actor ID to pin via API-key metadata.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/registry.ts b/apps/api/src/mcp/tools/registry.ts index c291e0e0..98ad4b5e 100644 --- a/apps/api/src/mcp/tools/registry.ts +++ b/apps/api/src/mcp/tools/registry.ts @@ -64,6 +64,8 @@ export interface MapleToolDefinition { readonly description: string readonly schema: Schema.Decoder readonly handler: (params: unknown) => Effect.Effect + /** True for state-changing tools (registered via `mutatingTool`). The `run_code` sandbox refuses these. */ + readonly mutating: boolean } export const toInputSchema = (schema: Schema.Top): Record => { @@ -75,14 +77,27 @@ export const toInputSchema = (schema: Schema.Top): Record => { const collectMapleToolDefinitions = (): ReadonlyArray => { const definitions: MapleToolDefinition[] = [] + const add = ( + mutating: boolean, + name: string, + description: string, + schema: Schema.Decoder, + handler: unknown, + ) => { + definitions.push({ + name, + description, + schema, + handler: handler as MapleToolDefinition["handler"], + mutating, + }) + } const registrar: McpToolRegistrar = { tool(name, description, schema, handler) { - definitions.push({ - name, - description, - schema, - handler: handler as MapleToolDefinition["handler"], - }) + add(false, name, description, schema, handler) + }, + mutatingTool(name, description, schema, handler) { + add(true, name, description, schema, handler) }, } diff --git a/apps/api/src/mcp/tools/release-error-issue.ts b/apps/api/src/mcp/tools/release-error-issue.ts index aa5f86ca..aa3b6a08 100644 --- a/apps/api/src/mcp/tools/release-error-issue.ts +++ b/apps/api/src/mcp/tools/release-error-issue.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeWorkflowState = Schema.decodeUnknownOption(WorkflowState) export function registerReleaseErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "release_error_issue", "Release the lease on an error issue you previously claimed, optionally transitioning it to another workflow state (default: 'todo').", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/remove-dashboard-widget.ts b/apps/api/src/mcp/tools/remove-dashboard-widget.ts index ba264ea5..925a4bf2 100644 --- a/apps/api/src/mcp/tools/remove-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/remove-dashboard-widget.ts @@ -6,7 +6,7 @@ import { withDashboardMutation } from "../lib/dashboard-mutations" const TOOL = "remove_dashboard_widget" export function registerRemoveDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Remove a single widget from a dashboard by id. Other widgets and dashboard metadata are left untouched.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts b/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts index 78ef4624..e1342360 100644 --- a/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts +++ b/apps/api/src/mcp/tools/reorder-dashboard-widgets.ts @@ -60,7 +60,7 @@ const validateLayoutGeometry = (entries: ReadonlyArray): string[] = } export function registerReorderDashboardWidgetsTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Reposition or resize one or more widgets on a dashboard in a single call. Only the widgets you include are touched; any widget id not present in layouts_json keeps its existing layout. Useful for drag/drop-style moves without re-sending unrelated widget state.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/replace-dashboard-widgets.ts b/apps/api/src/mcp/tools/replace-dashboard-widgets.ts index e361b5aa..82fa5f3d 100644 --- a/apps/api/src/mcp/tools/replace-dashboard-widgets.ts +++ b/apps/api/src/mcp/tools/replace-dashboard-widgets.ts @@ -21,7 +21,7 @@ const TOOL = "replace_dashboard_widgets" const decodeWidget = Schema.decodeUnknownEffect(DashboardWidgetSchema) export function registerReplaceDashboardWidgetsTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, "Replace ALL widgets on a dashboard in one atomic, validated write — the safe middle ground between many incremental `add/update_dashboard_widget` calls and the corruption-prone full `dashboard_json` replace. Pass `widgets_json`: a JSON array of widget objects (same shape as `widgets[]` from get_dashboard). Each widget's query is validated BEFORE anything is persisted — if any widget references a filter/groupBy the engine can't honor, NOTHING is saved and the offending clauses are returned. Per-widget conveniences: `id` is auto-generated when omitted, and `layout` is auto-placed on a 12-column grid when omitted (so you can pass just `{ visualization, dataSource, display }`). Dashboard metadata (name, description, tags, time range) is left untouched. Returns an automatic validation summary; fix any `suspicious`/`broken` widgets and call again.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/run-code.ts b/apps/api/src/mcp/tools/run-code.ts index 6496caf6..681a8ffd 100644 --- a/apps/api/src/mcp/tools/run-code.ts +++ b/apps/api/src/mcp/tools/run-code.ts @@ -33,15 +33,21 @@ export const resolveCodeModeCall = async ( invoke: (definition: MapleToolDefinition, decoded: unknown) => Promise, ): Promise => { if (name === RUN_CODE_TOOL_NAME) { - // `run_code` is in `mapleToolDefinitions` (registered last) and isn't a - // mutating tool, so without this guard a snippet calling maple.run_code(...) - // would nest a sandbox inside the running one. + // `run_code` is in `mapleToolDefinitions` (registered last), so without this + // guard a snippet calling maple.run_code(...) would nest a sandbox. return { ok: false, error: { name: "Blocked", message: "maple.run_code cannot be called from inside code mode." }, } } - if (MUTATING_TOOL_NAMES.has(name)) { + const definition = definitions.find((d) => d.name === name) + if (!definition) { + return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } + } + // Structural gate: a tool registered via `mutatingTool` carries `mutating: true`, + // so a mutating tool can't slip past code mode regardless of its name. (The + // shared MUTATING_TOOL_NAMES set is verified to equal this flag in tests.) + if (definition.mutating || MUTATING_TOOL_NAMES.has(name)) { return { ok: false, error: { @@ -50,10 +56,6 @@ export const resolveCodeModeCall = async ( }, } } - const definition = definitions.find((d) => d.name === name) - if (!definition) { - return { ok: false, error: { name: "UnknownTool", message: `maple.${name} is not available` } } - } let decoded: unknown try { decoded = Schema.decodeUnknownSync(definition.schema)(input ?? {}) diff --git a/apps/api/src/mcp/tools/set-issue-severity.ts b/apps/api/src/mcp/tools/set-issue-severity.ts index 3ca690f4..798abebf 100644 --- a/apps/api/src/mcp/tools/set-issue-severity.ts +++ b/apps/api/src/mcp/tools/set-issue-severity.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeSeverity = Schema.decodeUnknownOption(IssueSeverity) export function registerSetIssueSeverityTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "set_issue_severity", "Set or clear the triage severity of an issue. Severity drives escalation routing (critical/high/medium/low). API-key agents write with 'ai' precedence, so a human's manual severity is never overwritten; human sessions write a sticky manual override.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/transition-error-issue.ts b/apps/api/src/mcp/tools/transition-error-issue.ts index 12a8e25c..2e84e136 100644 --- a/apps/api/src/mcp/tools/transition-error-issue.ts +++ b/apps/api/src/mcp/tools/transition-error-issue.ts @@ -16,7 +16,7 @@ const decodeIssueId = Schema.decodeUnknownOption(ErrorIssueId) const decodeWorkflowState = Schema.decodeUnknownOption(WorkflowState) export function registerTransitionErrorIssueTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "transition_error_issue", "Move an error issue to a new workflow state. Valid transitions: triage→(todo|in_progress|cancelled|wontfix); todo→(triage|in_progress|cancelled|wontfix); in_progress→(triage|todo|in_review|cancelled|wontfix); in_review→(triage|in_progress|done|cancelled|wontfix); done→(triage|in_progress|cancelled|wontfix); wontfix→(triage|cancelled).", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/types.ts b/apps/api/src/mcp/tools/types.ts index 10d3785e..534cf667 100644 --- a/apps/api/src/mcp/tools/types.ts +++ b/apps/api/src/mcp/tools/types.ts @@ -38,12 +38,26 @@ export interface McpToolResult { } export interface McpToolRegistrar { + /** Register a read-only tool. */ tool>( name: string, description: string, schema: TSchema, handler: (params: TSchema["Type"]) => Effect.Effect, ): void + /** + * Register a MUTATING (state-changing) tool. Structurally marks the tool so + * the `run_code` sandbox refuses it and the chat approval-gates it — declared + * here at the tool rather than in a name list, so a copied/new mutating tool + * carries its own gating. The shared `MUTATING_TOOL_NAMES` set is verified to + * equal the set of tools registered this way (see `mutating.test.ts`). + */ + mutatingTool>( + name: string, + description: string, + schema: TSchema, + handler: (params: TSchema["Type"]) => Effect.Effect, + ): void } export const requiredStringParam = (description: string) => Schema.String.annotate({ description }) diff --git a/apps/api/src/mcp/tools/update-alert-rule.ts b/apps/api/src/mcp/tools/update-alert-rule.ts index 8af66b42..303bb2d4 100644 --- a/apps/api/src/mcp/tools/update-alert-rule.ts +++ b/apps/api/src/mcp/tools/update-alert-rule.ts @@ -142,7 +142,7 @@ function buildUpdatedRequest( } export function registerUpdateAlertRuleTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_alert_rule", "Update an existing alert rule. Only provide the fields you want to change — every other field keeps its current value. " + "Use list_alert_rules to find rule IDs and destination IDs, or get_alert_rule to inspect the current config first.", diff --git a/apps/api/src/mcp/tools/update-dashboard-widget.ts b/apps/api/src/mcp/tools/update-dashboard-widget.ts index 47e800c5..c969ebf9 100644 --- a/apps/api/src/mcp/tools/update-dashboard-widget.ts +++ b/apps/api/src/mcp/tools/update-dashboard-widget.ts @@ -12,7 +12,7 @@ import { resolveTenant } from "../lib/query-warehouse" const TOOL = "update_dashboard_widget" export function registerUpdateDashboardWidgetTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( TOOL, 'Replace a single widget on an existing dashboard. Pass the full widget JSON (same shape as one entry in `widgets[]` from get_dashboard) for ONLY the widget you want to change. Other widgets and dashboard metadata are left untouched. The stored widget id is always forced to the widget_id parameter, so any id inside widget_json is ignored.\n\nThe response includes an automatic validation summary (verdict, flags). If `verdict` is `suspicious` or `broken`, fix the widget and call this tool again — the chart will not render meaningful data as-is.\n\nTrace and log queries omit the metric-only fields (`metricName`/`metricType`/`isMonotonic`/`signalSource`) — only `dataSource: "metrics"` queries carry them. `whereClause` is a custom grammar (`=`, `>`, `<`, `>=`, `<=`, `contains`, `exists` joined by ` AND `) — there is NO SQL `IS NULL`/`IS NOT NULL`; use ` exists` to require an attribute. See the `maple://instructions` resource for the full widget JSON shape (aggregations per source, groupBy prefixes, units, stat reduceToValue, hideSeries).', Schema.Struct({ diff --git a/apps/api/src/mcp/tools/update-dashboard.ts b/apps/api/src/mcp/tools/update-dashboard.ts index 7266e79d..eca98884 100644 --- a/apps/api/src/mcp/tools/update-dashboard.ts +++ b/apps/api/src/mcp/tools/update-dashboard.ts @@ -18,7 +18,7 @@ const TIME_RANGE_MAP: Record = { } export function registerUpdateDashboardTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_dashboard", "Update an existing dashboard's top-level metadata (name, description, time_range). For widget-level changes prefer the incremental tools: add_dashboard_widget, update_dashboard_widget, remove_dashboard_widget, reorder_dashboard_widgets — they do not require re-sending the whole dashboard. `dashboard_json` is still accepted as an escape hatch for full replacement but is expensive on large dashboards and easy to corrupt.", Schema.Struct({ diff --git a/apps/api/src/mcp/tools/update-error-notification-policy.ts b/apps/api/src/mcp/tools/update-error-notification-policy.ts index 60ff82af..b803d769 100644 --- a/apps/api/src/mcp/tools/update-error-notification-policy.ts +++ b/apps/api/src/mcp/tools/update-error-notification-policy.ts @@ -16,7 +16,7 @@ const decodeSeverity = Schema.decodeUnknownOption(AlertSeverity) const decodeDestinationId = Schema.decodeUnknownEffect(AlertDestinationId) export function registerUpdateErrorNotificationPolicyTool(server: McpToolRegistrar) { - server.tool( + server.mutatingTool( "update_error_notification_policy", "Configure the org-wide error notification policy. Controls whether incidents (first-seen, regression, auto-resolve) dispatch to alert destinations. Omit a field to leave it unchanged.", Schema.Struct({ From 9c029b5b1dbfc44eb27eec23101d1501223d5056 Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 11:34:10 +0200 Subject: [PATCH 07/10] fix(test): update registrar mock for mutatingTool (CI TypeScript suite) dashboard-mutations.test.ts builds an McpToolRegistrar mock and calls registerUpdateDashboardTool to capture the handler. update_dashboard now registers via server.mutatingTool, so the tool()-only mock threw "server.mutatingTool is not a function". Capture from both tool() and mutatingTool(). Full apps/api suite green (614/614). Co-Authored-By: Claude Opus 4.8 --- apps/api/src/mcp/lib/dashboard-mutations.test.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/apps/api/src/mcp/lib/dashboard-mutations.test.ts b/apps/api/src/mcp/lib/dashboard-mutations.test.ts index 71b92006..06fb9c95 100644 --- a/apps/api/src/mcp/lib/dashboard-mutations.test.ts +++ b/apps/api/src/mcp/lib/dashboard-mutations.test.ts @@ -148,10 +148,15 @@ describe("dashboard mutations on tag-less / description-less dashboards", () => const layer = makeLayer(dbUrl) let handler: ToolHandler | null = null + // Capture from both tool() and mutatingTool() — update_dashboard registers + // via mutatingTool (it's a mutating tool), but capturing both keeps this + // harness robust regardless of which a tool uses. + const capture = (_name: string, _description: string, _schema: unknown, h: unknown) => { + handler = h as ToolHandler + } const registrar: McpToolRegistrar = { - tool: (_name, _description, _schema, h) => { - handler = h as ToolHandler - }, + tool: capture as McpToolRegistrar["tool"], + mutatingTool: capture as McpToolRegistrar["mutatingTool"], } registerUpdateDashboardTool(registrar) assert.isNotNull(handler) From 1c6124680bdc51f4ceb4d7f8b854603f16268065 Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 11:44:10 +0200 Subject: [PATCH 08/10] refactor(codemode): drop the MAPLE_CODE_MODE flag and verb-prefix heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code Mode is a core capability, not a flagged experiment — rework it to its intended end state. - Remove MAPLE_CODE_MODE everywhere. Code Mode is active whenever the Worker Loader sandbox (`LOADER`) is bound; the agent (chat) and run_code tool (MCP) key off the binding's presence, degrading to the direct tools when it's absent (e.g. local dev). Both alchemy deploys now attach `LOADER: WorkerLoader()` unconditionally — deploying requires Worker Loader beta access on the account. - Drop the verb-prefix heuristic (MUTATING_TOOL_PREFIXES / looksMutating and its tests). It was a stepping stone before the structural `mutating` flag landed; with per-tool `server.mutatingTool(...)` registration + the exact flag<->list equality test, the heuristic is redundant scaffolding. typecheck 24/24, knip clean, apps/api 613 + codemode 25 tests green, chat-flue builds. Co-Authored-By: Claude Opus 4.8 --- apps/api/alchemy.run.ts | 10 ++- apps/api/src/mcp/tools/mutating.test.ts | 24 ++----- apps/api/src/mcp/tools/registry.ts | 3 +- apps/api/src/mcp/tools/run-code.ts | 12 ++-- apps/chat-flue/alchemy.run.ts | 11 ++- apps/chat-flue/src/agents/maple-chat.ts | 11 +-- apps/chat-flue/src/lib/env.ts | 12 ++-- apps/chat-flue/src/lib/prompts.ts | 2 +- packages/codemode/src/mutating.test.ts | 29 -------- packages/codemode/src/mutating.ts | 91 +++---------------------- 10 files changed, 41 insertions(+), 164 deletions(-) delete mode 100644 packages/codemode/src/mutating.test.ts diff --git a/apps/api/alchemy.run.ts b/apps/api/alchemy.run.ts index 7e45131e..38c59766 100644 --- a/apps/api/alchemy.run.ts +++ b/apps/api/alchemy.run.ts @@ -153,12 +153,10 @@ export const createMapleApi = async ({ stage, domains }: CreateMapleApiOptions) ...optionalSecret("GITHUB_APP_CLIENT_SECRET"), ...optionalSecret("GITHUB_APP_WEBHOOK_SECRET"), ...optionalPlain("GITHUB_API_BASE_URL"), - // Code Mode (Cloudflare Dynamic Workers). The `run_code` MCP tool runs - // model-written code in a sandbox isolate via this `worker_loader` binding. - // Added only when MAPLE_CODE_MODE is set (the binding needs Worker Loader - // beta access); the tool stays inert at runtime without it. - ...optionalPlain("MAPLE_CODE_MODE"), - ...(process.env.MAPLE_CODE_MODE?.trim() ? { LOADER: WorkerLoader() } : {}), + // Code Mode sandbox (Cloudflare Dynamic Workers). The `run_code` MCP tool + // runs model-written code in an isolate via this `worker_loader` binding; + // its presence activates the tool. Requires Worker Loader beta access. + LOADER: WorkerLoader(), }, }) diff --git a/apps/api/src/mcp/tools/mutating.test.ts b/apps/api/src/mcp/tools/mutating.test.ts index cf3da8d9..bb1977bb 100644 --- a/apps/api/src/mcp/tools/mutating.test.ts +++ b/apps/api/src/mcp/tools/mutating.test.ts @@ -1,5 +1,4 @@ import { describe, expect, it } from "vitest" -import { looksMutating } from "@maple/codemode" import { mapleToolDefinitions } from "./registry" import { MUTATING_TOOL_NAMES } from "./mutating" @@ -12,10 +11,11 @@ describe("MUTATING_TOOL_NAMES", () => { }) it("exactly equals the tools registered via mutatingTool (structural flag <-> shared list)", () => { - // The flag (set at registration via `server.mutatingTool`) is the structural - // truth the run_code gate uses; MUTATING_TOOL_NAMES is the static list the - // chat + /chat/apply paths use (they can't read the flag over MCP). This - // asserts they can't drift in either direction. + // The per-tool `mutating` flag (set at registration via `server.mutatingTool`) + // is the structural truth the run_code gate uses; MUTATING_TOOL_NAMES is the + // static list the chat + /chat/apply paths use (they can't read the flag over + // MCP). This asserts they can't drift in either direction — register a + // mutating tool but forget the list (or vice versa) and CI fails. const flagged = new Set(mapleToolDefinitions.filter((d) => d.mutating).map((d) => d.name)) const flaggedButUnlisted = [...flagged].filter((n) => !MUTATING_TOOL_NAMES.has(n)) const listedButUnflagged = [...MUTATING_TOOL_NAMES].filter((n) => !flagged.has(n)) @@ -23,20 +23,6 @@ describe("MUTATING_TOOL_NAMES", () => { expect(listedButUnflagged, `in MUTATING_TOOL_NAMES but not registered via mutatingTool: [${listedButUnflagged.join(", ")}]`).toEqual([]) }) - it("gates every conventionally-named mutating tool (belt-and-suspenders for an unflagged mutating tool)", () => { - // The inverse of the check above, and the safety-critical one: a registry - // tool that looks mutating (create_/update_/delete_/…) but is missing from - // the set would run its real side effect inside `run_code`. This fails CI so - // a new mutating tool can't ship ungated. - const ungated = mapleToolDefinitions - .map((d) => d.name) - .filter((name) => looksMutating(name) && !MUTATING_TOOL_NAMES.has(name)) - expect( - ungated, - `mutating-looking tools are NOT in MUTATING_TOOL_NAMES — gate them or they run real side effects inside run_code: [${ungated.join(", ")}]`, - ).toEqual([]) - }) - it("excludes read-only tools (so /chat/apply can't run them)", () => { expect(MUTATING_TOOL_NAMES.has("find_errors")).toBe(false) expect(MUTATING_TOOL_NAMES.has("search_traces")).toBe(false) diff --git a/apps/api/src/mcp/tools/registry.ts b/apps/api/src/mcp/tools/registry.ts index 98ad4b5e..742893f8 100644 --- a/apps/api/src/mcp/tools/registry.ts +++ b/apps/api/src/mcp/tools/registry.ts @@ -154,7 +154,8 @@ const collectMapleToolDefinitions = (): ReadonlyArray => { registerUpdateErrorNotificationPolicyTool(registrar) // Code Mode: a single tool whose sandboxed snippet orchestrates the read-only // tools above. Registered last so it can reference the full set at runtime - // (it dispatches via `mapleToolDefinitions`); inert unless MAPLE_CODE_MODE=1. + // (it dispatches via `mapleToolDefinitions`); inert unless the LOADER sandbox + // binding is present. registerRunCodeTool(registrar) return definitions diff --git a/apps/api/src/mcp/tools/run-code.ts b/apps/api/src/mcp/tools/run-code.ts index 681a8ffd..318530f9 100644 --- a/apps/api/src/mcp/tools/run-code.ts +++ b/apps/api/src/mcp/tools/run-code.ts @@ -86,10 +86,10 @@ export const resolveCodeModeCall = async ( * is identical to a direct tool call and the sandbox can never widen it. Mutating * tools are blocked inside code (they must go through the host's approval path). * - * Flag-gated at runtime: returns an error result unless `MAPLE_CODE_MODE=1` and - * the `LOADER` (worker_loader) binding is present. The Workers-only sandbox - * driver is imported dynamically so this module's static graph stays Node-safe - * (the tool registry is imported by node-based evals/tests). + * Active when the `LOADER` (worker_loader) binding is present; without it the + * tool returns an "unavailable" result (e.g. local/test runs). The Workers-only + * sandbox driver is imported dynamically so this module's static graph stays + * Node-safe (the tool registry is imported by node-based evals/tests). */ export function registerRunCodeTool(server: McpToolRegistrar) { server.tool( @@ -106,9 +106,9 @@ export function registerRunCodeTool(server: McpToolRegistrar) { const env = yield* WorkerEnvironment const loader = env.LOADER as WorkerLoader | undefined - if (env.MAPLE_CODE_MODE !== "1" || !loader) { + if (!loader) { return validationError( - "Code mode is not enabled on this deployment. Call the individual Maple tools directly instead.", + "Code mode is unavailable here (no sandbox runtime is bound). Call the individual Maple tools directly instead.", ) } if (!code.trim()) { diff --git a/apps/chat-flue/alchemy.run.ts b/apps/chat-flue/alchemy.run.ts index 44639e43..ef7216e4 100644 --- a/apps/chat-flue/alchemy.run.ts +++ b/apps/chat-flue/alchemy.run.ts @@ -104,12 +104,11 @@ export const createChatFlueWorker = async ({ stage, domains, mapleApiUrl }: Crea ...optionalPlain("MAPLE_ENVIRONMENT", resolveDeploymentEnvironment(stage)), ...optionalPlain("MAPLE_CHAT_MODEL"), ...optionalPlain("MAPLE_TRIAGE_MODEL"), - // Code Mode (Cloudflare Dynamic Workers / Worker Loader). The `worker_loader` - // binding is added only when MAPLE_CODE_MODE is set, since it requires Worker - // Loader beta access on the account; otherwise deploys are unaffected and the - // agent no-ops Code Mode at runtime (LOADER absent → direct tools only). - ...optionalPlain("MAPLE_CODE_MODE"), - ...(process.env.MAPLE_CODE_MODE?.trim() ? { LOADER: WorkerLoader() } : {}), + // Code Mode sandbox (Cloudflare Dynamic Workers). The `worker_loader` + // binding powers the `run_code` tool's isolate; its presence is what + // activates Code Mode at runtime. Requires Worker Loader beta access on + // the account. + LOADER: WorkerLoader(), ...optionalPlain("MAPLE_AUTH_MODE", "self_hosted"), ...optionalSecret("MAPLE_ROOT_PASSWORD"), ...optionalSecret("CLERK_SECRET_KEY"), diff --git a/apps/chat-flue/src/agents/maple-chat.ts b/apps/chat-flue/src/agents/maple-chat.ts index 5f29e3c7..293bfd73 100644 --- a/apps/chat-flue/src/agents/maple-chat.ts +++ b/apps/chat-flue/src/agents/maple-chat.ts @@ -114,12 +114,13 @@ export default createAgent(async (ctx) => { } } - // Code Mode (hybrid, flag-gated): when enabled and the sandbox is bound, add a - // `run_code` tool backed by the SAME gated tools (so mutations still only - // propose) and inject the generated `maple.*` API into the prompt. The direct - // tools stay available as a fallback. No-ops without the LOADER binding. + // Code Mode: add a `run_code` tool backed by the SAME gated tools (so mutations + // still only propose) and inject the generated `maple.*` API into the prompt. + // The direct tools stay available alongside it. Active whenever the Worker + // Loader sandbox is bound — i.e. everywhere except local dev, where it degrades + // to the direct tools. let codeModeApi: CodeModeApi | undefined - if (tools.length > 0 && ctx.env.MAPLE_CODE_MODE === "1" && ctx.env.LOADER) { + if (tools.length > 0 && ctx.env.LOADER) { codeModeApi = buildCodeModeApi(tools) tools = [...tools, createRunCodeTool(ctx.env, codeModeApi)] } diff --git a/apps/chat-flue/src/lib/env.ts b/apps/chat-flue/src/lib/env.ts index 2bd16f9f..8a7ef329 100644 --- a/apps/chat-flue/src/lib/env.ts +++ b/apps/chat-flue/src/lib/env.ts @@ -16,16 +16,12 @@ export interface ChatFlueEnv { MAPLE_ENVIRONMENT?: string // --- Code Mode (Cloudflare Dynamic Workers / Worker Loader) --- - /** - * `"1"` enables Code Mode: the agent gets a `run_code` tool + a generated - * `maple.*` API and writes code instead of calling 50+ tools one at a time. - * Hybrid — direct tools stay available. No-ops unless `LOADER` is also bound. - */ - MAPLE_CODE_MODE?: string /** * Worker Loader binding (`worker_loader`) used to spin up a fresh sandbox - * isolate per `run_code` call. Bound only when `MAPLE_CODE_MODE` is set; the - * binding requires Cloudflare Worker Loader beta access on the account. + * isolate per `run_code` call. Its presence is what activates Code Mode: when + * bound, the agent gets a `run_code` tool + the generated `maple.*` API; when + * absent (e.g. local dev), the agent uses the direct tools. Requires + * Cloudflare Worker Loader beta access on the account. */ LOADER?: WorkerLoader diff --git a/apps/chat-flue/src/lib/prompts.ts b/apps/chat-flue/src/lib/prompts.ts index ec0a80a4..3fd011be 100644 --- a/apps/chat-flue/src/lib/prompts.ts +++ b/apps/chat-flue/src/lib/prompts.ts @@ -13,7 +13,7 @@ Maple's tools are exposed over MCP and named \`mcp__maple__\` (for example call them by their full \`mcp__maple__\` name.` /** - * Code Mode block (appended when `MAPLE_CODE_MODE=1` and the sandbox is bound). + * Code Mode block (appended when the `run_code` sandbox is bound). * Gives the model a `run_code` tool and the generated `maple.*` API surface so * it can write one snippet that chains/filters many tool calls instead of * round-tripping each. The direct `mcp__maple__*` tools remain available. diff --git a/packages/codemode/src/mutating.test.ts b/packages/codemode/src/mutating.test.ts deleted file mode 100644 index 2f6684cf..00000000 --- a/packages/codemode/src/mutating.test.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { describe, expect, it } from "vitest" -import { looksMutating, MUTATING_TOOL_NAMES, MUTATING_TOOL_PREFIXES } from "./mutating.ts" - -describe("looksMutating", () => { - it("flags conventionally-named mutating tools", () => { - expect(looksMutating("create_dashboard")).toBe(true) - expect(looksMutating("delete_alert_rule")).toBe(true) - expect(looksMutating("set_issue_severity")).toBe(true) - expect(looksMutating("register_agent")).toBe(true) - }) - - it("does not flag read-only tools or run_code", () => { - for (const name of ["find_errors", "search_traces", "list_dashboards", "get_dashboard", "query_data", "run_code"]) { - expect(looksMutating(name), name).toBe(false) - } - }) - - it("every gated tool name matches the mutating convention (prefixes stay in sync with the set)", () => { - for (const name of MUTATING_TOOL_NAMES) { - expect(looksMutating(name), `${name} is gated but matches no MUTATING_TOOL_PREFIXES`).toBe(true) - } - }) - - it("exposes the prefix list", () => { - expect(MUTATING_TOOL_PREFIXES).toContain("create_") - expect(MUTATING_TOOL_PREFIXES).not.toContain("get_") - expect(MUTATING_TOOL_PREFIXES).not.toContain("run_") - }) -}) diff --git a/packages/codemode/src/mutating.ts b/packages/codemode/src/mutating.ts index a0b12e80..2b44f6bf 100644 --- a/packages/codemode/src/mutating.ts +++ b/packages/codemode/src/mutating.ts @@ -1,19 +1,16 @@ /** - * Base names of the mutating Maple MCP tools — the **single source of truth** - * for approval gating, imported by both apps/api and apps/chat-flue so the lists - * can't drift. - * - * Enforcement points: + * Base names of the mutating Maple MCP tools — the static list the cross-app / + * over-MCP consumers use, shared by apps/api and apps/chat-flue so they can't + * drift: * - apps/chat-flue wraps these so a model call returns a `proposed` marker * instead of mutating (the web client applies the real change via * `POST /api/chat/apply`, which only accepts tools in this set). - * - The MCP `run_code` sandbox (apps/api) refuses these, so a snippet can't - * trigger an ungated mutation. + * - apps/api's MCP `run_code` sandbox refuses them. * - * Because `run_code` makes this set fail **open** (a name absent from it runs - * its real handler), apps/api has a regression test asserting every - * conventionally-named mutating tool in the registry is present here — add a - * mutating tool without gating it and CI fails. + * The structural source of truth is the per-tool `mutating` flag set at + * registration via `server.mutatingTool(...)`; an apps/api test asserts this + * list exactly equals the set of tools registered that way, so adding a mutating + * tool without listing it here (or vice versa) fails CI. */ export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ // dashboards @@ -40,75 +37,3 @@ export const MUTATING_TOOL_NAMES: ReadonlySet = new Set([ "propose_fix", "register_agent", ]) - -/** - * Name prefixes that denote a state-changing tool in Maple's verb_noun tool - * taxonomy. Used by the apps/api regression test to fail CI when a tool that - * looks mutating is missing from {@link MUTATING_TOOL_NAMES}. Keep read-only - * verbs (find/get/list/search/inspect/describe/query/run/…) out of this list. - * - * This is a heuristic guard, not a proof: a mutating tool named with a verb NOT - * listed here would slip past both the gate and the test. The list is kept broad - * to shrink that gap; the structural fix (a `mutating: true` flag declared at - * tool registration, deriving the set + gate) is tracked as a follow-up. When - * adding a tool with a new mutating verb, add its prefix here. - */ -export const MUTATING_TOOL_PREFIXES: ReadonlyArray = [ - "create_", - "update_", - "delete_", - "add_", - "remove_", - "reorder_", - "replace_", - "claim_", - "release_", - "transition_", - "comment_", - "heartbeat_", - "set_", - "propose_", - "register_", - "archive_", - "restore_", - "enable_", - "disable_", - "mute_", - "unmute_", - "rename_", - "assign_", - "acknowledge_", - "snooze_", - "resolve_", - "reopen_", - "close_", - // Additional mutating verbs (defensive — none collide with current read tools). - "purge_", - "apply_", - "submit_", - "merge_", - "clear_", - "bulk_", - "send_", - "sync_", - "cancel_", - "revoke_", - "grant_", - "rotate_", - "import_", - "retry_", - "trigger_", - "dispatch_", - "move_", - "copy_", - "duplicate_", - "upsert_", - "toggle_", - "approve_", - "deny_", - "unassign_", -] - -/** True when a tool name looks state-changing by Maple's verb_noun convention. */ -export const looksMutating = (name: string): boolean => - MUTATING_TOOL_PREFIXES.some((prefix) => name.startsWith(prefix)) From e9ee8ef8565798060f15e3123b2d1b7f78e5226a Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 11:56:31 +0200 Subject: [PATCH 09/10] fix(chat-flue): run dev under portless so chat-flue.localhost is proxied MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chat-flue was the only app whose dev script ran `flue dev` directly instead of through portless, so portless never registered `chat-flue.localhost` and the web app's `siblingUrl("chat-flue")` request got a bare portless 404 — surfacing in the browser as a CORS error (the 404 has no CORS headers). The worker itself is healthy on :3583 with correct CORS. Mirror the other apps: `dev` → `portless`, `dev:app` runs `flue dev --port ${PORT:-3583}`, and a `portless` block names the host `chat-flue`. Pre-existing gap (since the Flue rework), unrelated to Code Mode. Co-Authored-By: Claude Opus 4.8 --- apps/chat-flue/package.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/chat-flue/package.json b/apps/chat-flue/package.json index 3fc7a054..9eef91c5 100644 --- a/apps/chat-flue/package.json +++ b/apps/chat-flue/package.json @@ -3,13 +3,18 @@ "private": true, "type": "module", "scripts": { - "dev": "flue dev --target cloudflare", + "dev": "portless", + "dev:app": "flue dev --target cloudflare --port ${PORT:-3583}", "build": "flue build --target cloudflare", "deploy": "flue build --target cloudflare && wrangler deploy --config dist/maple_chat_flue/wrangler.json", "connect": "flue connect maple-chat local", "test": "vitest run --passWithNoTests", "typecheck": "tsc --noEmit" }, + "portless": { + "name": "chat-flue", + "script": "dev:app" + }, "dependencies": { "@clerk/backend": "^2.30.1", "@flue/opentelemetry": "1.0.0-beta.1", From 46bc4195d91a11715e2f46bedf54f2e8b324d28a Mon Sep 17 00:00:00 2001 From: Makisuo Date: Tue, 23 Jun 2026 12:04:51 +0200 Subject: [PATCH 10/10] fix(chat-flue): load root .env.local in dev so /agents auth resolves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chat-flue's `flue dev` loaded no env (flue excludes .dev.vars/.env*, and no --env was passed), so the worker had no MAPLE_AUTH_MODE / CLERK_* / MAPLE_ROOT_ PASSWORD — every /agents/* request 401'd while the web app sent a Clerk token. Pass `--env ../../.env.local` (the shared dev secrets the other apps load via --env-file), which flue injects into the worker runtime env. Verified: with it, the internal-token /workflows guard passes (404 on unknown workflow) vs 401 without — so MAPLE_AUTH_MODE=clerk + CLERK_SECRET_KEY + INTERNAL_SERVICE_TOKEN + MAPLE_API_URL now reach the worker (fixes both /agents auth and the chat->MCP connection). Co-Authored-By: Claude Opus 4.8 --- apps/chat-flue/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/chat-flue/package.json b/apps/chat-flue/package.json index 9eef91c5..124358a6 100644 --- a/apps/chat-flue/package.json +++ b/apps/chat-flue/package.json @@ -4,7 +4,7 @@ "type": "module", "scripts": { "dev": "portless", - "dev:app": "flue dev --target cloudflare --port ${PORT:-3583}", + "dev:app": "flue dev --target cloudflare --port ${PORT:-3583} --env ../../.env.local", "build": "flue build --target cloudflare", "deploy": "flue build --target cloudflare && wrangler deploy --config dist/maple_chat_flue/wrangler.json", "connect": "flue connect maple-chat local",