rivet-dev
diff --git a/‎docs/benchmarks.mdx‎
Lines changed: 150 additions & 0 deletions b/‎docs/benchmarks.mdx‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎docs/docs.json‎
Lines changed: 2 additions & 1 deletion b/‎docs/docs.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎packages/secure-exec/benchmarks/bench-utils.ts‎
Lines changed: 95 additions & 0 deletions b/‎packages/secure-exec/benchmarks/bench-utils.ts‎
Lines changed: 95 additions & 0 deletions
@@ -0,0 +1,150 @@
+---
+title: "Benchmarks"
+description: "Benchmark methodology, hardware, and results for Secure Exec"
+---
+
+{/* Cost figures generated by scripts/calculate-costs.js — rerun when updating pricing */}
+
+## Results
+
+### Cold Start Latency
+
+Sequential — runtimes created one at a time:
+
+| Batch Size | Samples | Mean    | p50     | p95     | p99      |
+| ---------- | ------- | ------- | ------- | ------- | -------- |
+| 1          | 5       | 15.0 ms | 14.9 ms | 15.3 ms | —        |
+| 10         | 50      | 14.6 ms | 14.4 ms | 15.9 ms | —        |
+| 50         | 250     | 14.6 ms | 14.3 ms | 16.6 ms | 18.1 ms  |
+| 100        | 500     | 14.6 ms | 14.4 ms | 16.2 ms | 17.9 ms  |
+| 200        | 1000    | 14.6 ms | 14.3 ms | 16.1 ms | 19.6 ms  |
+
+Concurrent — up to `os.availableParallelism() - 4` runtimes created in parallel (16 on this machine):
+
+| Batch Size | Samples | Mean    | p50     | p95     | p99      |
+| ---------- | ------- | ------- | ------- | ------- | -------- |
+| 1          | 5       | 18.4 ms | 15.8 ms | 28.0 ms | —        |
+| 10         | 50      | 24.4 ms | 23.0 ms | 32.3 ms | —        |
+| 50         | 250     | 35.0 ms | 35.0 ms | 44.5 ms | 47.2 ms  |
+| 100        | 500     | 35.1 ms | 35.5 ms | 44.9 ms | 48.7 ms  |
+| 200        | 1000    | 35.2 ms | 35.1 ms | 44.6 ms | 51.0 ms  |
+
+p99 is omitted (—) where sample count is below 100, as the percentile is not statistically meaningful at that size.
+
+**Key takeaway:** Sequential cold start is stable at **~14.3 ms p50** regardless of batch size — no
+degradation over time. Concurrent cold start scales from 15 ms to ~35 ms at 200 instances due
+to CPU contention, with p95 staying under 45 ms and p99 under 51 ms.
+
+### Warm Start Latency
+
+Sequential:
+
+| Batch Size | Samples | Mean   | p50    | p95    | p99     |
+| ---------- | ------- | ------ | ------ | ------ | ------- |
+| 1          | 5       | 3.0 ms | 3.0 ms | 3.3 ms | —       |
+| 10         | 50      | 3.1 ms | 3.0 ms | 3.6 ms | —       |
+| 50         | 250     | 3.1 ms | 3.0 ms | 3.7 ms | 5.0 ms  |
+| 100        | 500     | 3.2 ms | 3.0 ms | 3.9 ms | 5.7 ms  |
+| 200        | 1000    | 3.1 ms | 3.0 ms | 3.7 ms | 4.6 ms  |
+
+Concurrent:
+
+| Batch Size | Samples | Mean    | p50     | p95     | p99      |
+| ---------- | ------- | ------- | ------- | ------- | -------- |
+| 1          | 5       | 3.6 ms  | 3.3 ms  | 5.3 ms  | —        |
+| 10         | 50      | 5.8 ms  | 5.8 ms  | 7.8 ms  | —        |
+| 50         | 250     | 10.2 ms | 10.2 ms | 13.3 ms | 16.0 ms  |
+| 100        | 500     | 10.4 ms | 10.2 ms | 15.9 ms | 20.7 ms  |
+| 200        | 1000    | 10.7 ms | 10.4 ms | 15.1 ms | 23.2 ms  |
+
+p99 is omitted (—) where sample count is below 100.
+
+**Key takeaway:** Warm start is **~3 ms sequential** — roughly 5× faster than cold start.
+The difference (~11 ms) is the cost of V8 isolate creation, which only happens once.
+Concurrent warm start tops out around 11 ms, dominated by thread contention rather than
+runtime overhead.
+
+### Memory Overhead
+
+| Batch Size | Iterations | Total RSS Delta | Per-Runtime RSS | Per-Runtime Heap | Teardown Reclaimed |
+| ---------- | ---------- | --------------- | --------------- | ---------------- | ------------------ |
+| 1          | 5          | 6.1 MB          | 6.1 MB          | 0.25 MB          | 2.3 MB             |
+| 10         | 5          | 41.9 MB         | 4.2 MB          | ~0 MB            | 24.0 MB            |
+| 50         | 5          | 170.8 MB        | 3.4 MB          | ~0 MB            | 120.8 MB           |
+| 100        | 5          | 303.9 MB        | 3.0 MB          | ~0 MB            | 241.4 MB           |
+| 200        | 5          | 609.7 MB        | 3.1 MB          | ~0 MB            | 483.0 MB           |
+
+**Key takeaway:** Per-runtime RSS converges to **≤3.1 MB at scale**. The batch=1 figure (6.1 MB)
+is inflated by fixed per-process overhead that amortizes away. JS heap delta is ~0, indicating
+the cost is dominated by native memory (V8 isolate, thread stacks, OS-mapped pages). RSS is
+an upper bound — true per-isolate memory is likely lower. Teardown reclaims 38–79% of RSS
+(higher at larger batch sizes where fixed overhead is a smaller fraction).
+
+## Methodology
+
+### Cold Start
+
+Time from `new NodeRuntime()` through the first `runtime.run()` completing. This captures
+V8 isolate creation, context setup, bridge installation, module compilation, and initial
+execution. A trivial ESM module (`export const x = 1`) is used so the measurement reflects
+pure runtime overhead without workload noise.
+
+Each configuration runs 5 iterations (× batch size samples each) with 1 warmup iteration
+discarded. Tail percentiles at small batch sizes (≤10) have low sample counts and should be
+interpreted with caution.
+
+Sandbox provider comparison uses the **p95 TTI** (time-to-interactive) from [ComputeSDK benchmarks](https://www.computesdk.com/benchmarks/). As of March 2026, **e2b** is the best-performing sandbox provider at **0.95s** p95 TTI.
+
+### Warm Start
+
+Time for a second `runtime.run()` on an already-initialized runtime. The V8 isolate is
+reused, but a fresh V8 context is created and all bridge globals (console, require, import,
+process) are re-installed. Module caches are cleared between runs. The difference between
+cold and warm start (~12 ms) isolates the cost of `new ivm.Isolate()`.
+
+### Memory Per Instance
+
+RSS (Resident Set Size) delta per live runtime, measured via `process.memoryUsage().rss`
+before and after spinning up N runtimes. Testing in batch averages out per-process fixed
+costs. Each batch size runs 5 iterations. GC is forced (two passes) between measurements
+(`--expose-gc`).
+
+RSS is a process-wide metric that includes JS-side wrappers, thread stacks, and OS-mapped
+pages beyond the isolate itself — the reported per-runtime figure is an **upper bound** on
+the true per-isolate cost.
+
+Sandbox provider comparison uses the **minimum allocatable memory** across popular providers (e2b, Daytona, Modal, Cloudflare) as of March 2026. The minimum is **256 MB** (Modal and Cloudflare).
+
+### Cost Per Second
+
+See the [cost evaluation](/docs/cost-evaluation) for full methodology and multi-provider comparison.
+
+## Test Environment
+
+| Component          | Details                                                                  |
+| ------------------ | ------------------------------------------------------------------------ |
+| CPU                | 12th Gen Intel i7-12700KF, 12 cores / 20 threads @ 3.7 GHz, 25 MB cache |
+| Node.js            | v24.13.0                                                                 |
+| RAM                | 2× 32 GB Kingston FURY Beast DDR4 (KF3200C16D4/32GX)                    |
+| RAM rated          | 3200 MHz CL16, dual-rank                                                |
+| RAM actual         | 2400 MT/s                                                                |
+| OS                 | Linux (kernel 6.x)                                                       |
+| Timing mitigation  | `"freeze"` (default) — `Date.now()` and `performance.now()` are frozen inside the isolate; host-side `performance.now()` used for measurement is unaffected |
+
+## Reproducing
+
+```bash
+# Clone and install
+git clone https://github.com/rivet-dev/secure-exec
+cd secure-exec && pnpm install
+
+# Run both benchmarks (saves timestamped results to benchmarks/results/)
+cd packages/secure-exec
+./benchmarks/run-benchmarks.sh
+
+# Or run individually
+npx tsx benchmarks/coldstart.bench.ts        # cold + warm start
+node --expose-gc --import tsx/esm benchmarks/memory.bench.ts  # memory
+```
+
+Results will vary by hardware. The numbers above are from the test environment described above.
@@ -73,7 +73,8 @@
           "security-model",
           "nodejs-compatibility",
           "python-compatibility",
-          "cloudflare-workers-comparison"
+          "cloudflare-workers-comparison",
+          "benchmarks"
         ]
       }
     ]
 
@@ -0,0 +1,95 @@
+import {
+	NodeRuntime,
+	createNodeDriver,
+	createNodeRuntimeDriverFactory,
+} from "../src/index.js";
+import os from "node:os";
+
+export const BATCH_SIZES = [1, 10, 50, 100, 200];
+export const ITERATIONS = 5;
+export const MEMORY_ITERATIONS = 5;
+export const WARMUP_ITERATIONS = 1;
+export const TRIVIAL_CODE = `export const x = 1;`;
+// Cap concurrency below available parallelism to leave headroom for the bench harness itself.
+export const MAX_CONCURRENCY = Math.max(1, os.availableParallelism() - 4);
+
+export function createBenchRuntime(): NodeRuntime {
+	return new NodeRuntime({
+		systemDriver: createNodeDriver(),
+		runtimeDriverFactory: createNodeRuntimeDriverFactory(),
+	});
+}
+
+export function percentile(sorted: number[], p: number): number {
+	const idx = Math.ceil((p / 100) * sorted.length) - 1;
+	return sorted[Math.max(0, idx)];
+}
+
+export function stats(samples: number[]) {
+	const sorted = [...samples].sort((a, b) => a - b);
+	const mean = samples.reduce((a, b) => a + b, 0) / samples.length;
+	return {
+		mean: round(mean),
+		p50: round(percentile(sorted, 50)),
+		p95: round(percentile(sorted, 95)),
+		p99: round(percentile(sorted, 99)),
+		min: round(sorted[0]),
+		max: round(sorted[sorted.length - 1]),
+	};
+}
+
+export function round(n: number, decimals = 2): number {
+	const f = 10 ** decimals;
+	return Math.round(n * f) / f;
+}
+
+export function formatBytes(bytes: number): string {
+	if (Math.abs(bytes) < 1024) return `${bytes} B`;
+	const mb = bytes / (1024 * 1024);
+	return `${round(mb, 2)} MB`;
+}
+
+export function getHardware() {
+	const cpus = os.cpus();
+	return {
+		cpu: cpus[0]?.model ?? "unknown",
+		cores: os.availableParallelism(),
+		ram: `${round(os.totalmem() / (1024 ** 3), 1)} GB`,
+		node: process.version,
+		os: `${os.type()} ${os.release()}`,
+		arch: os.arch(),
+	};
+}
+
+export function forceGC() {
+	if (global.gc) {
+		global.gc();
+	} else {
+		console.error("WARNING: global.gc not available. Run with --expose-gc");
+	}
+}
+
+export async function sleep(ms: number): Promise<void> {
+	return new Promise((r) => setTimeout(r, ms));
+}
+
+/** Print a table to stderr for human readability. */
+export function printTable(
+	headers: string[],
+	rows: (string | number)[][],
+): void {
+	const widths = headers.map((h, i) =>
+		Math.max(h.length, ...rows.map((r) => String(r[i]).length)),
+	);
+	const sep = widths.map((w) => "-".repeat(w)).join(" | ");
+	const fmt = (row: (string | number)[]) =>
+		row.map((c, i) => String(c).padStart(widths[i])).join(" | ");
+
+	console.error("");
+	console.error(fmt(headers));
+	console.error(sep);
+	for (const row of rows) {
+		console.error(fmt(row));
+	}
+	console.error("");
+}
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,8 @@`
`73`	`73`	`"security-model",`
`74`	`74`	`"nodejs-compatibility",`
`75`	`75`	`"python-compatibility",`
`76`		`- "cloudflare-workers-comparison"`
	`76`	`+ "cloudflare-workers-comparison",`
	`77`	`+ "benchmarks"`
`77`	`78`	`]`
`78`	`79`	`}`
`79`	`80`	`]`