From 31d633f99e0a212061aa0c85bf0edcf5cd86c282 Mon Sep 17 00:00:00 2001 From: remote service administrator Date: Thu, 25 Jun 2026 19:02:18 -0400 Subject: [PATCH 1/4] feat(cli): pass chDB config file to local server --- apps/cli/src/commands/server.ts | 27 +++++++++++++++---- apps/cli/src/server/chdb.ts | 3 +++ apps/cli/src/server/serve.ts | 7 ++++- .../content/docs/local-mode/cli-reference.md | 1 + 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/apps/cli/src/commands/server.ts b/apps/cli/src/commands/server.ts index 0135faf3..20442905 100644 --- a/apps/cli/src/commands/server.ts +++ b/apps/cli/src/commands/server.ts @@ -104,6 +104,12 @@ const dataDirFlag = Flag.optional( ), ) +const chdbConfigFileFlag = Flag.optional( + Flag.string("chdb-config-file").pipe( + Flag.withDescription("Optional ClickHouse config file passed to embedded chDB"), + ), +) + const backgroundFlag = Flag.boolean("background").pipe( Flag.withAlias("d"), Flag.withDescription("Run the server detached (logs to ~/.maple/maple.log); stop with `maple stop`"), @@ -149,7 +155,12 @@ const probeHealth = (addr: string): Effect.Effect => * file; we poll `/health` until it binds, then print a summary and return so the * parent process exits. */ -const startDetached = (port: number, dataDir: string, offline: boolean): Effect.Effect => +const startDetached = ( + port: number, + dataDir: string, + offline: boolean, + chdbConfigFile: string | undefined, +): Effect.Effect => Effect.gen(function* () { const logPath = logFilePath(dataDir) // Rebuild the command explicitly rather than slicing argv: a Bun-compiled @@ -165,6 +176,7 @@ const startDetached = (port: number, dataDir: string, offline: boolean): Effect. String(port), "--data-dir", dataDir, + ...(chdbConfigFile ? ["--chdb-config-file", chdbConfigFile] : []), ...(offline ? ["--offline"] : []), ] @@ -214,6 +226,7 @@ const startDetached = (port: number, dataDir: string, offline: boolean): Effect. export const start = Command.make("start", { port, dataDir: dataDirFlag, + chdbConfigFile: chdbConfigFileFlag, background: backgroundFlag, offline: offlineFlag, reset: resetFlag, @@ -298,7 +311,8 @@ export const start = Command.make("start", { } // Detached: spawn the same command without --background and exit. - if (a.background) return yield* startDetached(a.port, dataDir, a.offline) + if (a.background) + return yield* startDetached(a.port, dataDir, a.offline, Option.getOrUndefined(a.chdbConfigFile)) yield* Effect.sync(() => process.stderr.write( @@ -325,9 +339,12 @@ export const start = Command.make("start", { }), ) - const { port: boundPort } = yield* startServer({ port: a.port, dataDir, assets }).pipe( - Effect.mapError((e) => new ServerError({ message: `failed to start: ${e.message}` })), - ) + const { port: boundPort } = yield* startServer({ + port: a.port, + dataDir, + configFile: Option.getOrUndefined(a.chdbConfigFile), + assets, + }).pipe(Effect.mapError((e) => new ServerError({ message: `failed to start: ${e.message}` }))) started = true // Bootstrap succeeded — stamp the store so a later start over an diff --git a/apps/cli/src/server/chdb.ts b/apps/cli/src/server/chdb.ts index c13613f7..840fc1ca 100644 --- a/apps/cli/src/server/chdb.ts +++ b/apps/cli/src/server/chdb.ts @@ -81,6 +81,8 @@ export interface ChdbOptions { readonly dataDir: string /** Full DDL applied once at open (idempotent `IF NOT EXISTS`). */ readonly schemaSql: string + /** Optional ClickHouse config file passed through to chDB. */ + readonly configFile?: string } /** @@ -117,6 +119,7 @@ export class Chdb { "--async_load_databases=0", "--async_load_system_database=0", `--path=${options.dataDir}`, + ...(options.configFile ? [`--config-file=${options.configFile}`] : []), ] const argBufs = args.map(cstr) const argv = new BigUint64Array(args.length) diff --git a/apps/cli/src/server/serve.ts b/apps/cli/src/server/serve.ts index 05ee7790..5bef6986 100644 --- a/apps/cli/src/server/serve.ts +++ b/apps/cli/src/server/serve.ts @@ -26,6 +26,7 @@ export interface AssetResolver { export interface ServerOptions { readonly port: number readonly dataDir: string + readonly configFile?: string /** Serves the bundled SPA; omit to disable the UI (API-only). */ readonly assets?: AssetResolver } @@ -335,7 +336,11 @@ export const startServer = ( options: ServerOptions, ): Effect.Effect<{ readonly port: number }, ChdbError, Scope.Scope> => Effect.gen(function* () { - const db = yield* acquireChdb({ dataDir: options.dataDir, schemaSql }) + const db = yield* acquireChdb({ + dataDir: options.dataDir, + schemaSql, + configFile: options.configFile, + }) // A dedicated runtime carrying the OTel tracer for per-request spans: the // Bun.serve handler runs outside Effect, so each request's span effect is // run through this runtime. Disposed on scope close, which flushes any diff --git a/apps/landing/src/content/docs/local-mode/cli-reference.md b/apps/landing/src/content/docs/local-mode/cli-reference.md index fc3655e3..81b3a040 100644 --- a/apps/landing/src/content/docs/local-mode/cli-reference.md +++ b/apps/landing/src/content/docs/local-mode/cli-reference.md @@ -44,6 +44,7 @@ Start the local ingest + query server (embedded ClickHouse via chDB). | -------------------- | --------------- | ----------------------------------------------------------------------------------- | | `--port ` | `4318` | Port for OTLP/HTTP ingest, the query API, and the bundled UI | | `--data-dir ` | `~/.maple/data` | Embedded ClickHouse data directory | +| `--chdb-config-file ` | | Optional ClickHouse config file passed to embedded chDB | | `--offline` | `false` | Serve the UI bundled in this binary (from `127.0.0.1`) instead of `local.maple.dev` | | `--background`, `-d` | `false` | Run detached (logs to `~/.maple/maple.log`); stop with `maple stop` | | `--reset` | `false` | Wipe the existing store before starting — use after an incompatible upgrade | From 8be7e3ed0cfcdcab5852002480d8d91f45545640 Mon Sep 17 00:00:00 2001 From: remote service administrator Date: Thu, 25 Jun 2026 19:04:48 -0400 Subject: [PATCH 2/4] feat(cli): add local chDB checkpoint command --- apps/cli/src/cli.ts | 3 +- apps/cli/src/commands/server.ts | 23 ++ apps/cli/src/server/chdb.ts | 4 +- apps/cli/src/server/checkpoints.ts | 201 ++++++++++++++++++ .../content/docs/local-mode/cli-reference.md | 23 ++ 5 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 apps/cli/src/server/checkpoints.ts diff --git a/apps/cli/src/cli.ts b/apps/cli/src/cli.ts index 404ca8ca..88be3904 100644 --- a/apps/cli/src/cli.ts +++ b/apps/cli/src/cli.ts @@ -9,7 +9,7 @@ import { metrics, query } from "./commands/data" import { timeseries, breakdown, compare } from "./commands/analytics" import { login, logout, whoami } from "./commands/auth" import { use } from "./commands/config" -import { start, stop, reset } from "./commands/server" +import { start, stop, reset, checkpoint } from "./commands/server" import { update } from "./commands/update" // One CLI, two backends. Every query command bottoms out at the shared @@ -46,6 +46,7 @@ export const cli = Command.make("maple").pipe( start, stop, reset, + checkpoint, // Self-update update, // Services diff --git a/apps/cli/src/commands/server.ts b/apps/cli/src/commands/server.ts index 20442905..434ee57c 100644 --- a/apps/cli/src/commands/server.ts +++ b/apps/cli/src/commands/server.ts @@ -15,6 +15,7 @@ import { storeMarkerPath, storeOpenMarkerPath, } from "../server/store-version" +import { createCheckpoint } from "../server/checkpoints" import { resolveUiAssets } from "../server/ui-assets" import { amber, bold, cyan, dim, green, underline } from "../lib/style" import { MAPLE_VERSION } from "../version" @@ -460,3 +461,25 @@ export const reset = Command.make("reset", { dataDir: dataDirFlag, yes: yesFlag }), ), ) + +export const checkpoint = Command.make("checkpoint", { dataDir: dataDirFlag, port }).pipe( + Command.withDescription("Create and validate a restorable checkpoint of the local chDB store"), + Command.withHandler( + Effect.fnUntraced(function* (a) { + const dataDir = Option.getOrUndefined(a.dataDir) ?? defaultDataDir() + const result = yield* createCheckpoint({ dataDir, port: a.port }).pipe( + Effect.mapError((e) => new ServerError({ message: e.message })), + ) + yield* Effect.sync(() => + process.stdout.write( + `${green("✓")} checkpoint created\n` + + ` ${dim("path")} ${prettyPath(result.path)}\n` + + ` ${dim("traces")} ${result.manifest.validation.traces}\n` + + ` ${dim("logs")} ${result.manifest.validation.logs}\n` + + ` ${dim("metrics")} ${result.manifest.validation.metricsSum}\n` + + ` ${dim("views")} ${result.manifest.validation.materializedViews}\n`, + ), + ) + }), + ), +) diff --git a/apps/cli/src/server/chdb.ts b/apps/cli/src/server/chdb.ts index 840fc1ca..8d8caeaf 100644 --- a/apps/cli/src/server/chdb.ts +++ b/apps/cli/src/server/chdb.ts @@ -83,6 +83,8 @@ export interface ChdbOptions { readonly schemaSql: string /** Optional ClickHouse config file passed through to chDB. */ readonly configFile?: string + /** Apply the Maple schema after connect. Defaults to true. */ + readonly bootstrapSchema?: boolean } /** @@ -135,7 +137,7 @@ export class Chdb { throw new Error(Chdb.#connectFailure(options.dataDir, "chdb_connect produced a NULL connection")) const db = new Chdb(sym, connPtrPtr, conn) - db.#bootstrap(options.schemaSql) + if (options.bootstrapSchema !== false) db.#bootstrap(options.schemaSql) return db } diff --git a/apps/cli/src/server/checkpoints.ts b/apps/cli/src/server/checkpoints.ts new file mode 100644 index 00000000..ce0f82d8 --- /dev/null +++ b/apps/cli/src/server/checkpoints.ts @@ -0,0 +1,201 @@ +import { mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { readdir, rename, rm, stat, writeFile } from "node:fs/promises" +import { tmpdir } from "node:os" +import { basename, join, resolve, sep } from "node:path" +import { Effect, Schema } from "effect" +import { Chdb } from "./chdb" +import { SCHEMA_FINGERPRINT } from "./serve" +import schemaSql from "./schema/local-schema.sql" with { type: "text" } +import { CHDB_VERSION, MAPLE_VERSION } from "../version" + +export class CheckpointError extends Schema.TaggedErrorClass()( + "@maple/cli/CheckpointError", + { message: Schema.String }, +) {} + +export interface CheckpointOptions { + readonly dataDir: string + readonly port: number +} + +const checkpointRoot = (dataDir: string): string => join(dataDir, "backups") +const buildingDir = (dataDir: string): string => join(checkpointRoot(dataDir), "building") +const currentDir = (dataDir: string): string => join(checkpointRoot(dataDir), "current") +const previousDir = (dataDir: string): string => join(checkpointRoot(dataDir), "previous") + +const backupSqlPath = (name: string): string => `backups/${name}/backup` + +const xmlEscape = (value: string): string => + value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'") + +const dataDirWithSlash = (dataDir: string): string => { + const abs = resolve(dataDir) + return abs.endsWith(sep) ? abs : `${abs}${sep}` +} + +const writeBackupConfig = (path: string, sourceDataDir?: string): void => { + const sourceDisk = sourceDataDir + ? ` + + + + ${xmlEscape(dataDirWithSlash(sourceDataDir))} + + + ` + : "" + writeFileSync( + path, + ` + + ${sourceDataDir ? "src" : "default"} + backups + ${sourceDisk} + +`, + ) +} + +const postLocalQuery = async (port: number, sql: string): Promise => { + const response = await fetch(`http://127.0.0.1:${port}/local/query`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ sql }), + }) + if (!response.ok) { + const detail = await response.text().catch(() => "") + throw new Error(`local query failed (${response.status} ${response.statusText})${detail ? `: ${detail}` : ""}`) + } + return response.json() +} + +const readJsonRows = (text: string): ReadonlyArray> => + text + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line) as Record) + +const countFrom = (rows: ReadonlyArray>): number => { + const row = rows[0] + if (!row) return 0 + const value = row["count()"] ?? row.count + return typeof value === "number" ? value : Number(value ?? 0) +} + +const queryCount = (db: Chdb, sql: string): number => countFrom(readJsonRows(db.query(sql))) + +const dirSize = async (path: string): Promise => { + let total = 0 + const entries = await readdir(path, { withFileTypes: true }) + for (const entry of entries) { + const child = join(path, entry.name) + if (entry.isDirectory()) { + total += await dirSize(child) + } else if (entry.isFile()) { + total += (await stat(child)).size + } + } + return total +} + +interface CheckpointManifest { + readonly mapleVersion: string + readonly chdbVersion: string + readonly schemaFingerprint: string + readonly createdAt: string + readonly sourceDataDir: string + readonly backupPath: string + readonly backupBytes: number + readonly validation: { + readonly validatedAt: string + readonly traces: number + readonly logs: number + readonly metricsSum: number + readonly materializedViews: number + } +} + +const validateBackup = async (dataDir: string): Promise => { + const scratchParent = mkdtempSync(join(tmpdir(), "maple-checkpoint-")) + const scratchData = join(scratchParent, "data") + const scratchConfig = join(scratchParent, "config.xml") + writeBackupConfig(scratchConfig, dataDir) + let db: Chdb | undefined + try { + db = Chdb.open({ + dataDir: scratchData, + schemaSql, + configFile: scratchConfig, + bootstrapSchema: false, + }) + db.exec("CREATE DATABASE IF NOT EXISTS default") + db.exec( + `RESTORE DATABASE default FROM Disk('src', '${backupSqlPath("building")}') ` + + "SETTINGS allow_different_database_def=1", + ) + return { + validatedAt: new Date().toISOString(), + traces: queryCount(db, "SELECT count() FROM traces"), + logs: queryCount(db, "SELECT count() FROM logs"), + metricsSum: queryCount(db, "SELECT count() FROM metrics_sum"), + materializedViews: queryCount( + db, + "SELECT count() FROM system.tables WHERE database = 'default' AND engine = 'MaterializedView'", + ), + } + } finally { + db?.close() + rmSync(scratchParent, { recursive: true, force: true }) + } +} + +const promoteBuilding = async (dataDir: string): Promise => { + await rm(previousDir(dataDir), { recursive: true, force: true }) + try { + await rename(currentDir(dataDir), previousDir(dataDir)) + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error + } + await rename(buildingDir(dataDir), currentDir(dataDir)) +} + +export const createCheckpoint = ( + options: CheckpointOptions, +): Effect.Effect<{ readonly path: string; readonly manifest: CheckpointManifest }, CheckpointError> => + Effect.tryPromise({ + try: async () => { + const root = checkpointRoot(options.dataDir) + const building = buildingDir(options.dataDir) + const name = basename(building) + if (name !== "building") throw new Error("internal checkpoint path error") + await rm(building, { recursive: true, force: true }) + + await postLocalQuery( + options.port, + `BACKUP DATABASE default TO Disk('default', '${backupSqlPath("building")}')`, + ) + + const validation = await validateBackup(options.dataDir) + const manifest: CheckpointManifest = { + mapleVersion: MAPLE_VERSION, + chdbVersion: CHDB_VERSION, + schemaFingerprint: SCHEMA_FINGERPRINT, + createdAt: new Date().toISOString(), + sourceDataDir: resolve(options.dataDir), + backupPath: backupSqlPath("current"), + backupBytes: await dirSize(join(building, "backup")), + validation, + } + await writeFile(join(building, "manifest.json"), `${JSON.stringify(manifest, null, 2)}\n`) + await promoteBuilding(options.dataDir) + return { path: join(root, "current"), manifest: { ...manifest, backupPath: backupSqlPath("current") } } + }, + catch: (error) => + new CheckpointError({ message: error instanceof Error ? error.message : String(error) }), + }) diff --git a/apps/landing/src/content/docs/local-mode/cli-reference.md b/apps/landing/src/content/docs/local-mode/cli-reference.md index 81b3a040..3f5bfaa9 100644 --- a/apps/landing/src/content/docs/local-mode/cli-reference.md +++ b/apps/landing/src/content/docs/local-mode/cli-reference.md @@ -198,6 +198,29 @@ maple query "SELECT ServiceName, count() FROM traces GROUP BY ServiceName ORDER > **Local only.** Raw SQL against the multi-tenant cloud warehouse would let a client read other orgs' data, so `maple query` returns a clear error in remote mode. Every other command works in both modes. +### `maple checkpoint` + +Create and validate a restorable checkpoint of the local chDB store. The running +server must have been started with a chDB config that allows ClickHouse backups: + +```xml + + + default + backups + + +``` + +```bash +maple start --chdb-config-file ./chdb-backups.xml +maple checkpoint +``` + +Checkpoints are written under the data directory at +`backups/{building,current,previous}`. `building` is never used for restore; +only a validated checkpoint is promoted to `current`. + ## Analytics ### `maple timeseries` From 19c053064328d85ede4012efcf0483aa5045d5b2 Mon Sep 17 00:00:00 2001 From: remote service administrator Date: Thu, 25 Jun 2026 19:07:18 -0400 Subject: [PATCH 3/4] feat(cli): restore dirty local store from checkpoint --- apps/cli/src/cli.ts | 3 +- apps/cli/src/commands/server.ts | 115 +++++++++++++++--- apps/cli/src/server/checkpoints.ts | 79 +++++++++++- .../content/docs/local-mode/cli-reference.md | 58 +++++---- 4 files changed, 209 insertions(+), 46 deletions(-) diff --git a/apps/cli/src/cli.ts b/apps/cli/src/cli.ts index 88be3904..8f8aab02 100644 --- a/apps/cli/src/cli.ts +++ b/apps/cli/src/cli.ts @@ -9,7 +9,7 @@ import { metrics, query } from "./commands/data" import { timeseries, breakdown, compare } from "./commands/analytics" import { login, logout, whoami } from "./commands/auth" import { use } from "./commands/config" -import { start, stop, reset, checkpoint } from "./commands/server" +import { start, stop, reset, checkpoint, restore } from "./commands/server" import { update } from "./commands/update" // One CLI, two backends. Every query command bottoms out at the shared @@ -47,6 +47,7 @@ export const cli = Command.make("maple").pipe( stop, reset, checkpoint, + restore, // Self-update update, // Services diff --git a/apps/cli/src/commands/server.ts b/apps/cli/src/commands/server.ts index 434ee57c..9c3c4cb0 100644 --- a/apps/cli/src/commands/server.ts +++ b/apps/cli/src/commands/server.ts @@ -15,7 +15,7 @@ import { storeMarkerPath, storeOpenMarkerPath, } from "../server/store-version" -import { createCheckpoint } from "../server/checkpoints" +import { createCheckpoint, restoreCheckpoint } from "../server/checkpoints" import { resolveUiAssets } from "../server/ui-assets" import { amber, bold, cyan, dim, green, underline } from "../lib/style" import { MAPLE_VERSION } from "../version" @@ -124,6 +124,11 @@ const resetFlag = Flag.boolean("reset").pipe( Flag.withDefault(false), ) +const onDirtyStoreFlag = Flag.choice("on-dirty-store", ["wipe", "fail", "restore-checkpoint"]).pipe( + Flag.withDescription("Recovery policy when the local chDB store was not cleanly closed"), + Flag.withDefault("wipe" as const), +) + const yesFlag = Flag.boolean("yes").pipe( Flag.withAlias("y"), Flag.withDescription("Skip the confirmation prompt"), @@ -231,6 +236,7 @@ export const start = Command.make("start", { background: backgroundFlag, offline: offlineFlag, reset: resetFlag, + onDirtyStore: onDirtyStoreFlag, }).pipe( Command.withDescription("Start the local ingest + query server (embedded ClickHouse via chDB)"), Command.withHandler( @@ -258,6 +264,52 @@ export const start = Command.make("start", { yield* fs.makeDirectory(dataDir, { recursive: true }) + // A store left "open" (the previous server died without running its close + // finalizer) may be inconsistent — reopening it can crash chDB natively, + // which we cannot catch. Auto-wipe and bootstrap fresh instead of walking + // into the crash. (`--reset` already wiped above, so the marker is gone.) + if (isStoreDirty(dataDir)) { + if (a.onDirtyStore === "fail") { + return yield* new ServerError({ + message: + `the local store at ${prettyPath(dataDir)} was not cleanly closed. ` + + `Run \`${bold("maple restore --yes")}\` to restore from the last checkpoint, ` + + `or \`${bold("maple start --reset")}\` to wipe it.`, + }) + } + if (a.onDirtyStore === "restore-checkpoint") { + yield* Effect.sync(() => + process.stderr.write( + amber( + "⚠ the local store was left inconsistent by an unclean shutdown — " + + "restoring the last checkpoint\n", + ), + ), + ) + const restored = yield* restoreCheckpoint(dataDir).pipe( + Effect.mapError((e) => new ServerError({ message: e.message })), + ) + yield* Effect.sync(() => + process.stderr.write( + `${green("✓")} restored checkpoint; quarantined dirty store at ${prettyPath(restored.quarantinePath)}\n`, + ), + ) + } else { + yield* Effect.sync(() => + process.stderr.write( + amber( + "⚠ the local store was left inconsistent by an unclean shutdown — " + + "wiping it and starting fresh (local telemetry data is discarded)\n", + ), + ), + ) + yield* fs.remove(dataDir, { recursive: true, force: true }).pipe(Effect.ignore) + yield* fs.remove(storeMarkerPath(dataDir), { force: true }).pipe(Effect.ignore) + yield* fs.remove(storeOpenMarkerPath(dataDir), { force: true }).pipe(Effect.ignore) + yield* fs.makeDirectory(dataDir, { recursive: true }) + } + } + // Refuse to open a store written by an incompatible chDB build: re-loading // its persisted materialized views crashes the C++ runtime natively // (SIGTRAP), which we cannot catch. Fresh/matching stores pass through. @@ -271,25 +323,6 @@ export const start = Command.make("start", { }) } - // A store left "open" (the previous server died without running its close - // finalizer) may be inconsistent — reopening it can crash chDB natively, - // which we cannot catch. Auto-wipe and bootstrap fresh instead of walking - // into the crash. (`--reset` already wiped above, so the marker is gone.) - if (isStoreDirty(dataDir)) { - yield* Effect.sync(() => - process.stderr.write( - amber( - "⚠ the local store was left inconsistent by an unclean shutdown — " + - "wiping it and starting fresh (local telemetry data is discarded)\n", - ), - ), - ) - yield* fs.remove(dataDir, { recursive: true, force: true }).pipe(Effect.ignore) - yield* fs.remove(storeMarkerPath(dataDir), { force: true }).pipe(Effect.ignore) - yield* fs.remove(storeOpenMarkerPath(dataDir), { force: true }).pipe(Effect.ignore) - yield* fs.makeDirectory(dataDir, { recursive: true }) - } - // A store bootstrapped from an older bundled schema can't be evolved in // place: `CREATE … IF NOT EXISTS` is a no-op on existing tables, so a // column added to the schema (e.g. ServiceNamespace on trace_list_mv) @@ -483,3 +516,45 @@ export const checkpoint = Command.make("checkpoint", { dataDir: dataDirFlag, por }), ), ) + +export const restore = Command.make("restore", { dataDir: dataDirFlag, yes: yesFlag }).pipe( + Command.withDescription("Restore the local chDB store from the last promoted checkpoint"), + Command.withHandler( + Effect.fnUntraced(function* (a) { + const fs = yield* FileSystem + const dataDir = Option.getOrUndefined(a.dataDir) ?? defaultDataDir() + + const pidOpt = yield* readPid(fs, pidFilePath(dataDir)) + if (Option.isSome(pidOpt) && isProcessAlive(pidOpt.value)) { + return yield* new ServerError({ + message: `maple is running (PID ${pidOpt.value}) — stop it first with \`maple stop\``, + }) + } + + if (!a.yes) { + yield* Effect.sync(() => + process.stderr.write( + `This replaces the local store at ${bold(prettyPath(dataDir))} with the last checkpoint.\n` + + `The existing store is moved aside for quarantine, not deleted.\n` + + `Re-run with ${bold("maple restore --yes")} to confirm.\n`, + ), + ) + return + } + + const result = yield* restoreCheckpoint(dataDir).pipe( + Effect.mapError((e) => new ServerError({ message: e.message })), + ) + yield* Effect.sync(() => + process.stderr.write( + `${green("✓")} restored checkpoint\n` + + ` ${dim("quarantine")} ${prettyPath(result.quarantinePath)}\n` + + ` ${dim("traces")} ${result.validation.traces}\n` + + ` ${dim("logs")} ${result.validation.logs}\n` + + ` ${dim("metrics")} ${result.validation.metricsSum}\n` + + ` ${dim("views")} ${result.validation.materializedViews}\n`, + ), + ) + }), + ), +) diff --git a/apps/cli/src/server/checkpoints.ts b/apps/cli/src/server/checkpoints.ts index ce0f82d8..5cb54c15 100644 --- a/apps/cli/src/server/checkpoints.ts +++ b/apps/cli/src/server/checkpoints.ts @@ -1,11 +1,12 @@ -import { mkdtempSync, rmSync, writeFileSync } from "node:fs" -import { readdir, rename, rm, stat, writeFile } from "node:fs/promises" +import { existsSync, mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { cp, readdir, rename, rm, stat, writeFile } from "node:fs/promises" import { tmpdir } from "node:os" import { basename, join, resolve, sep } from "node:path" import { Effect, Schema } from "effect" import { Chdb } from "./chdb" import { SCHEMA_FINGERPRINT } from "./serve" import schemaSql from "./schema/local-schema.sql" with { type: "text" } +import { markStoreClosed, storeMarkerJson, storeMarkerPath } from "./store-version" import { CHDB_VERSION, MAPLE_VERSION } from "../version" export class CheckpointError extends Schema.TaggedErrorClass()( @@ -22,6 +23,8 @@ const checkpointRoot = (dataDir: string): string => join(dataDir, "backups") const buildingDir = (dataDir: string): string => join(checkpointRoot(dataDir), "building") const currentDir = (dataDir: string): string => join(checkpointRoot(dataDir), "current") const previousDir = (dataDir: string): string => join(checkpointRoot(dataDir), "previous") +const restoreBuildingDir = (dataDir: string): string => `${dataDir}.restore-building` +const quarantineDir = (dataDir: string): string => `${dataDir}.quarantine-${new Date().toISOString().replace(/[:.]/g, "-")}` const backupSqlPath = (name: string): string => `backups/${name}/backup` @@ -155,6 +158,43 @@ const validateBackup = async (dataDir: string): Promise => { + const scratchParent = mkdtempSync(join(tmpdir(), "maple-restore-")) + const restoreConfig = join(scratchParent, "config.xml") + writeBackupConfig(restoreConfig, sourceDataDir) + let db: Chdb | undefined + try { + db = Chdb.open({ + dataDir: targetDataDir, + schemaSql, + configFile: restoreConfig, + bootstrapSchema: false, + }) + db.exec("CREATE DATABASE IF NOT EXISTS default") + db.exec( + `RESTORE DATABASE default FROM Disk('src', '${backupSqlPath(checkpointName)}') ` + + "SETTINGS allow_different_database_def=1", + ) + return { + validatedAt: new Date().toISOString(), + traces: queryCount(db, "SELECT count() FROM traces"), + logs: queryCount(db, "SELECT count() FROM logs"), + metricsSum: queryCount(db, "SELECT count() FROM metrics_sum"), + materializedViews: queryCount( + db, + "SELECT count() FROM system.tables WHERE database = 'default' AND engine = 'MaterializedView'", + ), + } + } finally { + db?.close() + rmSync(scratchParent, { recursive: true, force: true }) + } +} + const promoteBuilding = async (dataDir: string): Promise => { await rm(previousDir(dataDir), { recursive: true, force: true }) try { @@ -199,3 +239,38 @@ export const createCheckpoint = ( catch: (error) => new CheckpointError({ message: error instanceof Error ? error.message : String(error) }), }) + +export const restoreCheckpoint = ( + dataDir: string, +): Effect.Effect<{ readonly quarantinePath: string; readonly validation: CheckpointManifest["validation"] }, CheckpointError> => + Effect.tryPromise({ + try: async () => { + const sourceBackup = join(currentDir(dataDir), "backup") + if (!existsSync(sourceBackup)) { + throw new Error(`no checkpoint found at ${sourceBackup}`) + } + + const restoreDir = restoreBuildingDir(dataDir) + await rm(restoreDir, { recursive: true, force: true }) + const validation = await restoreIntoScratch(dataDir, restoreDir, "current") + + if (existsSync(join(dataDir, "backups"))) { + await cp(join(dataDir, "backups"), join(restoreDir, "backups"), { + recursive: true, + force: true, + }) + } + + const quarantinePath = quarantineDir(dataDir) + await rename(dataDir, quarantinePath) + await rename(restoreDir, dataDir) + markStoreClosed(dataDir) + writeFileSync( + storeMarkerPath(dataDir), + storeMarkerJson(MAPLE_VERSION, new Date().toISOString(), SCHEMA_FINGERPRINT), + ) + return { quarantinePath, validation } + }, + catch: (error) => + new CheckpointError({ message: error instanceof Error ? error.message : String(error) }), + }) diff --git a/apps/landing/src/content/docs/local-mode/cli-reference.md b/apps/landing/src/content/docs/local-mode/cli-reference.md index 3f5bfaa9..1c325b11 100644 --- a/apps/landing/src/content/docs/local-mode/cli-reference.md +++ b/apps/landing/src/content/docs/local-mode/cli-reference.md @@ -48,6 +48,7 @@ Start the local ingest + query server (embedded ClickHouse via chDB). | `--offline` | `false` | Serve the UI bundled in this binary (from `127.0.0.1`) instead of `local.maple.dev` | | `--background`, `-d` | `false` | Run detached (logs to `~/.maple/maple.log`); stop with `maple stop` | | `--reset` | `false` | Wipe the existing store before starting — use after an incompatible upgrade | +| `--on-dirty-store ` | `wipe` | Recovery policy when the store was not cleanly closed | ```bash maple start # foreground, UI from local.maple.dev @@ -72,6 +73,40 @@ Delete the local chDB store so the next `maple start` bootstraps fresh. Refuses | `--data-dir ` | `~/.maple/data` | Store to delete | | `--yes`, `-y` | `false` | Skip the confirmation prompt | +### `maple checkpoint` + +Create and validate a restorable checkpoint of the local chDB store. The running +server must have been started with a chDB config that allows ClickHouse backups: + +```xml + + + default + backups + + +``` + +```bash +maple start --chdb-config-file ./chdb-backups.xml +maple checkpoint +``` + +Checkpoints are written under the data directory at +`backups/{building,current,previous}`. `building` is never used for restore; +only a validated checkpoint is promoted to `current`. + +### `maple restore` + +Restore the local chDB store from the last promoted checkpoint. Refuses to run +while a server still owns the store. The existing store is moved aside for +quarantine rather than deleted. + +| Flag | Default | Description | +| ------------------- | --------------- | ---------------------------- | +| `--data-dir ` | `~/.maple/data` | Store to restore | +| `--yes`, `-y` | `false` | Skip the confirmation prompt | + ## Services ### `maple services` @@ -198,29 +233,6 @@ maple query "SELECT ServiceName, count() FROM traces GROUP BY ServiceName ORDER > **Local only.** Raw SQL against the multi-tenant cloud warehouse would let a client read other orgs' data, so `maple query` returns a clear error in remote mode. Every other command works in both modes. -### `maple checkpoint` - -Create and validate a restorable checkpoint of the local chDB store. The running -server must have been started with a chDB config that allows ClickHouse backups: - -```xml - - - default - backups - - -``` - -```bash -maple start --chdb-config-file ./chdb-backups.xml -maple checkpoint -``` - -Checkpoints are written under the data directory at -`backups/{building,current,previous}`. `building` is never used for restore; -only a validated checkpoint is promoted to `current`. - ## Analytics ### `maple timeseries` From 3fa344f34cfabc5e61cfcb278fe6c8bf99aefe81 Mon Sep 17 00:00:00 2001 From: remote service administrator Date: Fri, 26 Jun 2026 11:29:01 -0400 Subject: [PATCH 4/4] fix(cli): harden chDB checkpoint recovery --- apps/cli/src/commands/server.ts | 37 +++++--- apps/cli/src/server/checkpoints.ts | 76 ++++++++++++--- apps/cli/test/checkpoints.test.ts | 143 +++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 30 deletions(-) create mode 100644 apps/cli/test/checkpoints.test.ts diff --git a/apps/cli/src/commands/server.ts b/apps/cli/src/commands/server.ts index 9c3c4cb0..2c72573b 100644 --- a/apps/cli/src/commands/server.ts +++ b/apps/cli/src/commands/server.ts @@ -264,6 +264,19 @@ export const start = Command.make("start", { yield* fs.makeDirectory(dataDir, { recursive: true }) + // Refuse to open a store written by an incompatible chDB build: re-loading + // its persisted materialized views crashes the C++ runtime natively + // (SIGTRAP), which we cannot catch. Fresh/matching stores pass through. + const compat = checkStoreCompatible(dataDir) + if (!compat.compatible) { + return yield* new ServerError({ + message: + `the local store at ${prettyPath(dataDir)} is incompatible with this build's chDB ` + + `(store: ${compat.found}; build: ${compat.current}) — loading it would crash chDB. ` + + `Wipe it with \`${bold("maple reset")}\`, or start fresh via \`${bold("maple start --reset")}\`.`, + }) + } + // A store left "open" (the previous server died without running its close // finalizer) may be inconsistent — reopening it can crash chDB natively, // which we cannot catch. Auto-wipe and bootstrap fresh instead of walking @@ -310,19 +323,6 @@ export const start = Command.make("start", { } } - // Refuse to open a store written by an incompatible chDB build: re-loading - // its persisted materialized views crashes the C++ runtime natively - // (SIGTRAP), which we cannot catch. Fresh/matching stores pass through. - const compat = checkStoreCompatible(dataDir) - if (!compat.compatible) { - return yield* new ServerError({ - message: - `the local store at ${prettyPath(dataDir)} is incompatible with this build's chDB ` + - `(store: ${compat.found}; build: ${compat.current}) — loading it would crash chDB. ` + - `Wipe it with \`${bold("maple reset")}\`, or start fresh via \`${bold("maple start --reset")}\`.`, - }) - } - // A store bootstrapped from an older bundled schema can't be evolved in // place: `CREATE … IF NOT EXISTS` is a no-op on existing tables, so a // column added to the schema (e.g. ServiceNamespace on trace_list_mv) @@ -346,7 +346,12 @@ export const start = Command.make("start", { // Detached: spawn the same command without --background and exit. if (a.background) - return yield* startDetached(a.port, dataDir, a.offline, Option.getOrUndefined(a.chdbConfigFile)) + return yield* startDetached( + a.port, + dataDir, + a.offline, + Option.getOrUndefined(a.chdbConfigFile), + ) yield* Effect.sync(() => process.stderr.write( @@ -378,7 +383,9 @@ export const start = Command.make("start", { dataDir, configFile: Option.getOrUndefined(a.chdbConfigFile), assets, - }).pipe(Effect.mapError((e) => new ServerError({ message: `failed to start: ${e.message}` }))) + }).pipe( + Effect.mapError((e) => new ServerError({ message: `failed to start: ${e.message}` })), + ) started = true // Bootstrap succeeded — stamp the store so a later start over an diff --git a/apps/cli/src/server/checkpoints.ts b/apps/cli/src/server/checkpoints.ts index 5cb54c15..ab4b683d 100644 --- a/apps/cli/src/server/checkpoints.ts +++ b/apps/cli/src/server/checkpoints.ts @@ -1,5 +1,5 @@ import { existsSync, mkdtempSync, rmSync, writeFileSync } from "node:fs" -import { cp, readdir, rename, rm, stat, writeFile } from "node:fs/promises" +import { cp, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises" import { tmpdir } from "node:os" import { basename, join, resolve, sep } from "node:path" import { Effect, Schema } from "effect" @@ -19,12 +19,13 @@ export interface CheckpointOptions { readonly port: number } -const checkpointRoot = (dataDir: string): string => join(dataDir, "backups") -const buildingDir = (dataDir: string): string => join(checkpointRoot(dataDir), "building") -const currentDir = (dataDir: string): string => join(checkpointRoot(dataDir), "current") -const previousDir = (dataDir: string): string => join(checkpointRoot(dataDir), "previous") +export const checkpointRoot = (dataDir: string): string => join(dataDir, "backups") +export const buildingDir = (dataDir: string): string => join(checkpointRoot(dataDir), "building") +export const currentDir = (dataDir: string): string => join(checkpointRoot(dataDir), "current") +export const previousDir = (dataDir: string): string => join(checkpointRoot(dataDir), "previous") const restoreBuildingDir = (dataDir: string): string => `${dataDir}.restore-building` -const quarantineDir = (dataDir: string): string => `${dataDir}.quarantine-${new Date().toISOString().replace(/[:.]/g, "-")}` +const quarantineDir = (dataDir: string): string => + `${dataDir}.quarantine-${new Date().toISOString().replace(/[:.]/g, "-")}` const backupSqlPath = (name: string): string => `backups/${name}/backup` @@ -41,7 +42,7 @@ const dataDirWithSlash = (dataDir: string): string => { return abs.endsWith(sep) ? abs : `${abs}${sep}` } -const writeBackupConfig = (path: string, sourceDataDir?: string): void => { +export const writeBackupConfig = (path: string, sourceDataDir?: string): void => { const sourceDisk = sourceDataDir ? ` @@ -72,7 +73,9 @@ const postLocalQuery = async (port: number, sql: string): Promise => { }) if (!response.ok) { const detail = await response.text().catch(() => "") - throw new Error(`local query failed (${response.status} ${response.statusText})${detail ? `: ${detail}` : ""}`) + throw new Error( + `local query failed (${response.status} ${response.statusText})${detail ? `: ${detail}` : ""}`, + ) } return response.json() } @@ -124,6 +127,28 @@ interface CheckpointManifest { } } +export const readCheckpointManifest = async (dataDir: string): Promise => { + const path = join(currentDir(dataDir), "manifest.json") + let raw: string + try { + raw = await readFile(path, "utf8") + } catch { + throw new Error(`checkpoint manifest not found at ${path}`) + } + const parsed = JSON.parse(raw) as Partial + if (parsed.chdbVersion !== CHDB_VERSION) { + throw new Error( + `checkpoint chDB version mismatch (checkpoint: ${parsed.chdbVersion ?? "unknown"}; build: ${CHDB_VERSION})`, + ) + } + if (parsed.schemaFingerprint !== SCHEMA_FINGERPRINT) { + throw new Error( + `checkpoint schema mismatch (checkpoint: ${parsed.schemaFingerprint ?? "unknown"}; build: ${SCHEMA_FINGERPRINT})`, + ) + } + return parsed as CheckpointManifest +} + const validateBackup = async (dataDir: string): Promise => { const scratchParent = mkdtempSync(join(tmpdir(), "maple-checkpoint-")) const scratchData = join(scratchParent, "data") @@ -195,7 +220,7 @@ const restoreIntoScratch = async ( } } -const promoteBuilding = async (dataDir: string): Promise => { +export const promoteBuilding = async (dataDir: string): Promise => { await rm(previousDir(dataDir), { recursive: true, force: true }) try { await rename(currentDir(dataDir), previousDir(dataDir)) @@ -216,10 +241,24 @@ export const createCheckpoint = ( if (name !== "building") throw new Error("internal checkpoint path error") await rm(building, { recursive: true, force: true }) - await postLocalQuery( - options.port, - `BACKUP DATABASE default TO Disk('default', '${backupSqlPath("building")}')`, - ) + try { + await postLocalQuery( + options.port, + `BACKUP DATABASE default TO Disk('default', '${backupSqlPath("building")}')`, + ) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + if ( + message.includes("backups.allowed_disk") || + message.includes("INVALID_CONFIG_PARAMETER") + ) { + throw new Error( + "checkpoints require the local server to be started with `--chdb-config-file` " + + "pointing at a ClickHouse backups config", + ) + } + throw error + } const validation = await validateBackup(options.dataDir) const manifest: CheckpointManifest = { @@ -234,7 +273,10 @@ export const createCheckpoint = ( } await writeFile(join(building, "manifest.json"), `${JSON.stringify(manifest, null, 2)}\n`) await promoteBuilding(options.dataDir) - return { path: join(root, "current"), manifest: { ...manifest, backupPath: backupSqlPath("current") } } + return { + path: join(root, "current"), + manifest: { ...manifest, backupPath: backupSqlPath("current") }, + } }, catch: (error) => new CheckpointError({ message: error instanceof Error ? error.message : String(error) }), @@ -242,13 +284,17 @@ export const createCheckpoint = ( export const restoreCheckpoint = ( dataDir: string, -): Effect.Effect<{ readonly quarantinePath: string; readonly validation: CheckpointManifest["validation"] }, CheckpointError> => +): Effect.Effect< + { readonly quarantinePath: string; readonly validation: CheckpointManifest["validation"] }, + CheckpointError +> => Effect.tryPromise({ try: async () => { const sourceBackup = join(currentDir(dataDir), "backup") if (!existsSync(sourceBackup)) { throw new Error(`no checkpoint found at ${sourceBackup}`) } + await readCheckpointManifest(dataDir) const restoreDir = restoreBuildingDir(dataDir) await rm(restoreDir, { recursive: true, force: true }) diff --git a/apps/cli/test/checkpoints.test.ts b/apps/cli/test/checkpoints.test.ts new file mode 100644 index 00000000..dca5813a --- /dev/null +++ b/apps/cli/test/checkpoints.test.ts @@ -0,0 +1,143 @@ +import { describe, it } from "@effect/vitest" +import { ok, rejects, strictEqual } from "node:assert" +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { + buildingDir, + currentDir, + previousDir, + promoteBuilding, + readCheckpointManifest, + writeBackupConfig, +} from "../src/server/checkpoints" +import { SCHEMA_FINGERPRINT } from "../src/server/serve" +import { CHDB_VERSION, MAPLE_VERSION } from "../src/version" + +const withDataDir = async (run: (dataDir: string) => Promise | void): Promise => { + const parent = mkdtempSync(join(tmpdir(), "maple-checkpoint-test-")) + const dataDir = join(parent, "data") + mkdirSync(dataDir, { recursive: true }) + try { + await run(dataDir) + } finally { + rmSync(parent, { recursive: true, force: true }) + } +} + +const writeMarker = (path: string, value: string): void => { + mkdirSync(path, { recursive: true }) + writeFileSync(join(path, "marker.txt"), value) +} + +const readMarker = (path: string): string => readFileSync(join(path, "marker.txt"), "utf8") + +const manifest = (overrides: Record = {}): string => + `${JSON.stringify( + { + mapleVersion: MAPLE_VERSION, + chdbVersion: CHDB_VERSION, + schemaFingerprint: SCHEMA_FINGERPRINT, + createdAt: "2026-01-01T00:00:00.000Z", + sourceDataDir: "/tmp/maple-data", + backupPath: "backups/current/backup", + backupBytes: 123, + validation: { + validatedAt: "2026-01-01T00:00:01.000Z", + traces: 1, + logs: 2, + metricsSum: 3, + materializedViews: 33, + }, + ...overrides, + }, + null, + 2, + )}\n` + +describe("writeBackupConfig", () => { + it("writes the runtime backup config for the default disk", async () => { + await withDataDir((dataDir) => { + const configPath = join(dataDir, "config.xml") + writeBackupConfig(configPath) + + const xml = readFileSync(configPath, "utf8") + ok(xml.includes("default")) + ok(xml.includes("backups")) + ok(!xml.includes("")) + }) + }) + + it("writes a restore config with an escaped source disk path", async () => { + await withDataDir((dataDir) => { + const configPath = join(dataDir, "config.xml") + const sourceDataDir = join(dataDir, "source & ") + writeBackupConfig(configPath, sourceDataDir) + + const xml = readFileSync(configPath, "utf8") + ok(xml.includes("src")) + ok(xml.includes("backups")) + ok(xml.includes("")) + ok(xml.includes("source & <store>")) + ok(xml.includes("")) + }) + }) +}) + +describe("promoteBuilding", () => { + it("promotes building to current when no current checkpoint exists", async () => { + await withDataDir(async (dataDir) => { + writeMarker(buildingDir(dataDir), "new") + + await promoteBuilding(dataDir) + + ok(!existsSync(buildingDir(dataDir))) + strictEqual(readMarker(currentDir(dataDir)), "new") + ok(!existsSync(previousDir(dataDir))) + }) + }) + + it("moves current to previous and replaces any older previous checkpoint", async () => { + await withDataDir(async (dataDir) => { + writeMarker(previousDir(dataDir), "old-previous") + writeMarker(currentDir(dataDir), "old-current") + writeMarker(buildingDir(dataDir), "new-current") + + await promoteBuilding(dataDir) + + ok(!existsSync(buildingDir(dataDir))) + strictEqual(readMarker(currentDir(dataDir)), "new-current") + strictEqual(readMarker(previousDir(dataDir)), "old-current") + }) + }) +}) + +describe("readCheckpointManifest", () => { + it("round-trips a compatible checkpoint manifest", async () => { + await withDataDir(async (dataDir) => { + mkdirSync(currentDir(dataDir), { recursive: true }) + writeFileSync(join(currentDir(dataDir), "manifest.json"), manifest()) + + const parsed = await readCheckpointManifest(dataDir) + + strictEqual(parsed.chdbVersion, CHDB_VERSION) + strictEqual(parsed.schemaFingerprint, SCHEMA_FINGERPRINT) + strictEqual(parsed.validation.materializedViews, 33) + }) + }) + + it("rejects a manifest from a different chDB or schema", async () => { + await withDataDir(async (dataDir) => { + mkdirSync(currentDir(dataDir), { recursive: true }) + writeFileSync(join(currentDir(dataDir), "manifest.json"), manifest({ chdbVersion: "v0.0.0" })) + + await rejects(readCheckpointManifest(dataDir), /checkpoint chDB version mismatch/) + + writeFileSync( + join(currentDir(dataDir), "manifest.json"), + manifest({ schemaFingerprint: "old-schema" }), + ) + await rejects(readCheckpointManifest(dataDir), /checkpoint schema mismatch/) + }) + }) +})