diff --git a/docs/api/primitive-catalog.md b/docs/api/primitive-catalog.md
index 80ffe43..d06d544 100644
--- a/docs/api/primitive-catalog.md
+++ b/docs/api/primitive-catalog.md
@@ -7,7 +7,7 @@
 
 # Primitive catalog — the never-stale anti-reinvention inventory
 
-> **GENERATED** from `@tangle-network/agent-runtime@0.87.0` and `@tangle-network/agent-eval@0.103.1` by `scripts/gen-primitive-catalog.mjs`. Do NOT hand-edit — run `pnpm run docs:api`. This is the mechanical companion to the JUDGMENT in `canonical-api.md` (§2 decision table + §1.5 AgentProfile law): that doc says WHICH primitive to reach for and what NOT to build; this catalog proves WHAT exists. Per-symbol signatures + `file:line` live in the per-module pages under `docs/api/`.
+> **GENERATED** from `@tangle-network/agent-runtime@0.87.0` and `@tangle-network/agent-eval@0.103.2` by `scripts/gen-primitive-catalog.mjs`. Do NOT hand-edit — run `pnpm run docs:api`. This is the mechanical companion to the JUDGMENT in `canonical-api.md` (§2 decision table + §1.5 AgentProfile law): that doc says WHICH primitive to reach for and what NOT to build; this catalog proves WHAT exists. Per-symbol signatures + `file:line` live in the per-module pages under `docs/api/`.
 
 ## 1. agent-runtime — own public surface
 
@@ -92,7 +92,7 @@ Import from `@tangle-network/agent-runtime` — 211 exports.
 | `DELEGATED_LOOP_MODES` | const | All valid delegated-loop mode names — used for validation and CLI surfaces. |
 | `FORWARD_HEADERS` | const | Standard names — lowercased so Headers maps interop on every runtime. |
 | `INTELLIGENCE_WIRE_VERSION` | const | Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). |
-| `AgentEvalError` | class | _(no summary — add a TSDoc line at the declaration)_ |
+| `AgentEvalError` | class | Base class for every contract error this package throws — carries the stable |
 | `BackendTransportError` | class | A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success |
 | `CircuitBreakerState` | class | Live circuit-breaker state — one instance per (participant, conversation run). |
 | `CircuitOpenError` | class | Thrown when the circuit breaker is open for a participant and no retry is allowed yet. |
@@ -276,7 +276,7 @@ Import from `@tangle-network/agent-runtime/loops` — 430 exports.
 | `createMcpEnvironment` | function | Wrap any MCP server as an `Environment`: `tools/list` becomes `AgenticTool[]` with provider-safe schemas; the domain supplies only the artifact lifecycle hooks. |
 | `createPushTraceSource` | function | A push source for OWNED tool loops (router-tools / cli-bridge tool dispatch): the loop calls |
 | `createSandboxLineage` | function | Build a lineage bound to one client + its probed capabilities. The |
-| `createSandboxToolPartState` | function | _(no summary — add a TSDoc line at the declaration)_ |
+| `createSandboxToolPartState` | function | Fresh per-turn {@link SandboxToolPartState} for {@link mapSandboxToolEvent} — an |
 | `createScope` | function | Create the reactive `Scope` a driver's `Agent.act` runs inside: spawn children on an atomically reserved conserved budget, settle via the `next()` cursor, journal for replay. |
 | `createScopeAnalyst` | function | Build a `ScopeAnalyst` that spawns the analyst agent through `Scope.spawn` (so its compute is |
 | `createShapeRegistry` | function | Build a fresh open `ShapeRegistry`. A factory is stored type-erased and re-cast on resolve — the |
@@ -287,7 +287,7 @@ Import from `@tangle-network/agent-runtime/loops` — 430 exports.
 | `decodeToolPart` | function | Decode a part with a specific harness's adapter when known, else try every registered adapter |
 | `defaultSelectWinner` | function | The kernel's winner argmax — best-valid-score, ties broken by earliest index, |
 | `defaultToolDetectors` | function | The default online panel for a tool-call pipe: a worker repeating the same call, or hammering |
-| `defineLeaderboard` | function | _(no summary — add a TSDoc line at the declaration)_ |
+| `defineLeaderboard` | function | Assemble a declarative spec (`cases` + `prompt` + `score`) into a runnable |
 | `definePersona` | function | Build a frozen `Persona`. Fails loud on the executors-supplied invariant: a persona with |
 | `defineStrategy` | function | Author a Strategy from the composable steps — the open, compact way. |
 | `delegate` | function | Delegate an INTENT to a default authoring supervisor and return its `SupervisedResult` unchanged. |
@@ -967,7 +967,7 @@ Import from `@tangle-network/agent-eval/campaign` — 226 exports.
 | `defaultRenderDiff` | function | Default surface diff renderer: produces a unified baseline/winner text diff for prompt surfaces or a worktree-ref summary for code surfaces. |
 | `detectScale` | function | Detect the native scale of a set of scores: 0-100 when any magnitude clears |
 | `dimensionRegressions` | function | Per-critical-dimension regression guard. For each dimension, pair the |
-| `discoverEvalFixtures` | function | _(no summary — add a TSDoc line at the declaration)_ |
+| `discoverEvalFixtures` | function | Walk `evalsDir` and return the relative name of every fixture directory (one containing an exact-case `PROMPT.md`). |
 | `emitLoopProvenance` | function | Build the provenance record + OTel spans and persist them durably under the |
 | `evolutionaryProposer` | function | Wrap a stateless `Mutator` (GEPA, AxGEPA, reflective-mutation) as a `SurfaceProposer` that mutates the current best surface into N candidates each generation. |
 | `extractFapoAttributionSignals` | function | Scan a findings array and extract FAPO attribution signals — per-level counts and failure clusters used to decide which optimization level to escalate to next. |
@@ -987,8 +987,8 @@ Import from `@tangle-network/agent-eval/campaign` — 226 exports.
 | `isProposedCandidate` | function | Type guard: a proposal carrying its rationale vs a bare |
 | `labelTrustRank` | function | Ordinal rank for a label-trust tier; absent ⇒ `unverified` (rank 0). |
 | `llmJudge` | function | Build a campaign-shaped `JudgeConfig` whose `score()` makes ONE LLM call |
-| `loadEvalFixture` | function | _(no summary — add a TSDoc line at the declaration)_ |
-| `loadEvalFixtureScenarios` | function | _(no summary — add a TSDoc line at the declaration)_ |
+| `loadEvalFixture` | function | Load ONE fixture by name: reads `PROMPT.md` (plus `EVAL.ts`/`EVAL.tsx` and `package.json` under |
+| `loadEvalFixtureScenarios` | function | Load fixtures (all discovered, or just `names`) as campaign `Scenario`s tagged `eval-fixture`. |
 | `loopProvenanceSpans` | function | Build the loop's OTLP-ingestable spans from a provenance record. One root |
 | `makePlaybackDispatch` | function | Adapt a `PlaybackDriver` into a `runProfileMatrix` dispatch. The artifact the |
 | `memoryCurationProposer` | function | Build the CURATOR proposer. |
@@ -998,9 +998,9 @@ Import from `@tangle-network/agent-eval/campaign` — 226 exports.
 | `paretoSignificanceGate` | function | Wrap the bus + a policy as a `Gate`. Plugs into the existing |
 | `parseSkillPatchResponse` | function | Parse a SkillOpt LLM response into validated `SkillPatch` objects, throwing `SkillPatchParseError` on malformed JSON and silently dropping ops that violate the edit budget. |
 | `patchEditCount` | function | Total ops in a patch — the edit-budget axis (SkillOpt's "textual learning |
-| `planCampaignRun` | function | _(no summary — add a TSDoc line at the declaration)_ |
-| `planEvalFixtureRun` | function | _(no summary — add a TSDoc line at the declaration)_ |
-| `policyEditProposer` | function | _(no summary — add a TSDoc line at the declaration)_ |
+| `planCampaignRun` | function | Plan a campaign WITHOUT dispatching: computes the manifest hash and the per-cell |
+| `planEvalFixtureRun` | function | Dry-run planner for a fixture campaign: loads the scenarios, delegates to `planCampaignRun`, |
+| `policyEditProposer` | function | `SurfaceProposer` that admission-checks typed analyst `PolicyEdit`s and applies each |
 | `provenanceRecordPath` | function | Canonical durable paths under the run dir. |
 | `provenanceSpansPath` | function | Canonical path for the durable OTLP spans JSONL file under a loop run directory. |
 | `renderScoreboardMarkdown` | function | Render the scoreboard as a launch-readiness Markdown document — the literal |
@@ -1024,11 +1024,11 @@ Import from `@tangle-network/agent-eval/campaign` — 226 exports.
 | `traceAnalystProposer` | function | Wrap agent-eval's trace-analyst registry as a SurfaceProposer (prompt-tier). |
 | `userStoryScoreboard` | function | Flatten story verdicts into the per-requirement scoreboard — the literal |
 | `paretoPolicy` | const | The default strategy: symmetric multi-objective Pareto significance. Ship iff |
-| `FsLabeledScenarioStore` | class | _(no summary — add a TSDoc line at the declaration)_ |
-| `LabeledScenarioStoreError` | class | _(no summary — add a TSDoc line at the declaration)_ |
+| `FsLabeledScenarioStore` | class | Filesystem `LabeledScenarioStore`: appends one JSONL file per source with provenance and |
+| `LabeledScenarioStoreError` | class | Typed rejection from a labeled-scenario store (bad provenance, rate limit, invalid sample args) — carries a stable string `code`. |
 | `ProfileMatrixError` | class | Thrown when the matrix is misconfigured (no profiles, a profile whose model |
 | `SkillPatchParseError` | class | Parse + validate the patch response. Throws `SkillPatchParseError` when the |
-| `WorktreeAdapterError` | class | _(no summary — add a TSDoc line at the declaration)_ |
+| `WorktreeAdapterError` | class | Typed failure from a `WorktreeAdapter` operation (create/finalize/discard) — wraps the underlying git error as `cause`. |
 | `AceProposerOptions` | interface | `aceProposer` — Agentic Context Engineering: an APPEND-MOSTLY curator, the |
 | `AnalystArtifact` | interface | The analyst's output for one scenario — the artifact the judge scores. |
 | `AnalystScenario` | interface | A labeled trace scenario: a FIXED trace corpus plus the failure modes a |
diff --git a/docs/api/runtime.md b/docs/api/runtime.md
index 3693a0b..830b5e1 100644
--- a/docs/api/runtime.md
+++ b/docs/api/runtime.md
@@ -15815,7 +15815,11 @@ passes. Ground truth — the driver ends directly, no validation. The check read
 
 > **defineLeaderboard**\<`TCase`, `TArtifact`\>(`spec`): [`DefinedLeaderboard`](#definedleaderboard)\<`TCase`, `TArtifact`\>
 
-Defined in: [runtime/define-leaderboard.ts:294](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/define-leaderboard.ts#L294)
+Defined in: [runtime/define-leaderboard.ts:299](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/define-leaderboard.ts#L299)
+
+Assemble a declarative spec (`cases` + `prompt` + `score`) into a runnable
+harness×model leaderboard — `run()` executes the matrix, `toBenchmarkAdapter()`
+exposes the same domain as a structural `BenchmarkAdapter`.
 
 #### Type Parameters
 
@@ -17219,10 +17223,13 @@ readonly `SandboxEvent`[]
 
 > **createSandboxToolPartState**(): [`SandboxToolPartState`](#sandboxtoolpartstate)
 
-Defined in: [runtime/sandbox-events.ts:155](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L155)
+Defined in: [runtime/sandbox-events.ts:160](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L160)
 
 **`Experimental`**
 
+Fresh per-turn [SandboxToolPartState](#sandboxtoolpartstate) for [mapSandboxToolEvent](#mapsandboxtoolevent) — an
+empty call-status map so each turn projects tool frames independently.
+
 #### Returns
 
 [`SandboxToolPartState`](#sandboxtoolpartstate)
@@ -17233,7 +17240,7 @@ Defined in: [runtime/sandbox-events.ts:155](https://github.com/tangle-network/ag
 
 > **mapSandboxToolEvent**(`event`, `state`): [`RuntimeStreamEvent`](index.md#runtimestreamevent) & `object`[]
 
-Defined in: [runtime/sandbox-events.ts:186](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L186)
+Defined in: [runtime/sandbox-events.ts:191](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L191)
 
 **`Experimental`**
 
@@ -17277,7 +17284,7 @@ Returns `[]` for every non-tool event.
 
 > **mapSandboxEvent**(`event`, `opts?`): [`RuntimeStreamEvent`](index.md#runtimestreamevent) \| `undefined`
 
-Defined in: [runtime/sandbox-events.ts:313](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L313)
+Defined in: [runtime/sandbox-events.ts:318](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/sandbox-events.ts#L318)
 
 Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
 for runtimes that bridge a sandbox `streamPrompt` into the
diff --git a/package.json b/package.json
index de346bb..620c4bd 100644
--- a/package.json
+++ b/package.json
@@ -94,7 +94,7 @@
   },
   "devDependencies": {
     "@biomejs/biome": "^2.4.15",
-    "@tangle-network/agent-eval": "^0.103.1",
+    "@tangle-network/agent-eval": "^0.103.2",
     "@tangle-network/agent-interface": ">=0.14.0 <1.0.0",
     "@tangle-network/sandbox": ">=0.8.0 <1.0.0",
     "@types/node": "^25.9.3",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index cce5f93..0bcd864 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -12,14 +12,14 @@ importers:
         specifier: ^2.4.15
         version: 2.4.15
       '@tangle-network/agent-eval':
-        specifier: ^0.103.1
-        version: 0.103.1(typescript@5.9.3)
+        specifier: ^0.103.2
+        version: 0.103.2(typescript@5.9.3)
       '@tangle-network/agent-interface':
         specifier: '>=0.14.0 <1.0.0'
         version: 0.14.0
       '@tangle-network/sandbox':
         specifier: '>=0.8.0 <1.0.0'
-        version: 0.9.5(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))
+        version: 0.9.5(viem@2.54.2(typescript@5.9.3)(zod@4.4.3))
       '@types/node':
         specifier: ^25.9.3
         version: 25.9.3
@@ -432,8 +432,8 @@ packages:
   '@gerrit0/mini-shiki@3.23.0':
     resolution: {integrity: sha512-bEMORlG0cqdjVyCEuU0cDQbORWX+kYCeo0kV1lbxF5bt4r7SID2l9bqsxJEM0zndaxpOUT7riCyIVEuqq/Ynxg==}
 
-  '@hono/node-server@2.0.4':
-    resolution: {integrity: sha512-Ut3y0dMMPWy6bZ2kVfx25EOVbZlm15dhF4mOsezMlhpNHy+4MkU1qN9Y6lnruYi4wPmFzimGX2X7LF/FwHli4A==}
+  '@hono/node-server@2.0.8':
+    resolution: {integrity: sha512-GuCWzLxwg218fy1JaHculFsdcuY12hxit83V+algozTPnwhNjLrRL/Alg9OYjLZLoUZ1rw/S4CdTMsnkSKCmFA==}
     engines: {node: '>=20'}
     peerDependencies:
       hono: ^4
@@ -636,13 +636,13 @@ packages:
   '@tangle-network/agent-core@0.3.4':
     resolution: {integrity: sha512-Hvz3ABRouNtBmRvGqPxifAO2yuILneJMylWH5jW/jeS2F03RvqkGYuXyGXWWLqosYbb3hVAvSEe4Ykm2FMGEDQ==}
 
-  '@tangle-network/agent-eval@0.103.1':
-    resolution: {integrity: sha512-9V37IcaRixSfIUkZ50pgU8a5nSVrkVmq5BimNLwVzbi3USwOkkJ9RcecMScpLUnrYNeaoe5Sac8lS6kzL1uTDQ==}
+  '@tangle-network/agent-eval@0.103.2':
+    resolution: {integrity: sha512-ANdqOcd171PUSzPP8ul8AB8Y2uJM16XYP8A8aAxs3Jdr9vznENiajzm9b8ibNG+VwLiclnD9RPbaYUdZzDqUBg==}
     engines: {node: '>=20'}
     hasBin: true
 
-  '@tangle-network/agent-interface@0.10.0':
-    resolution: {integrity: sha512-oiREgihkeX/xcGEtFfi9AkAfU2VzuF7SSla2s0iliXPUXyHCIIx6jwzHiYdwb1ZGCfvC+T+0SWOIa6fN5u195g==}
+  '@tangle-network/agent-interface@0.10.1':
+    resolution: {integrity: sha512-yehY/0EgKvu8lG6jIVoZCtMPLkj8VEWwasuAtuph2RaB9MKE5wuxRF647O6jw8KufNZ3aQ2UVVWpZ19dGCbs6w==}
 
   '@tangle-network/agent-interface@0.13.0':
     resolution: {integrity: sha512-CeTPGRLoXqpt0h+BCyFgZPkfU1zyRpWmqfD+85i/uk+uvbqxkfI+JprfKVf3tBsQuCgJPSjPt5qjdW8n3h2BVg==}
@@ -862,8 +862,8 @@ packages:
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
     os: [darwin]
 
-  hono@4.12.25:
-    resolution: {integrity: sha512-2NFaIyNVgJmBs/ecmtGzlmluTFs5cHEWGTdu0t1HBwYzoGXOL5nUQBRMXsXWla5i4KkG//QMzVP88m1+I3fdAQ==}
+  hono@4.12.27:
+    resolution: {integrity: sha512-1yrb/+w6HWQJrUCLkJ2IF5jNIPvvFkblV5RNOYl6bV+OA6p9GLcMpHFFGTosSvHvcAUibuUukRqhlYI4z32C7Q==}
     engines: {node: '>=16.9.0'}
 
   isows@1.0.7:
@@ -1123,8 +1123,8 @@ packages:
   undici-types@7.24.6:
     resolution: {integrity: sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==}
 
-  viem@2.52.2:
-    resolution: {integrity: sha512-HSU12p5aD/kAPZfrlbCUqdiP4P/c6hQ9AhfTS51VbLUQIjkWd1d5EjrCx/SCxZ0zhZVRn4Iv5X5WDqXPG8Ubew==}
+  viem@2.54.2:
+    resolution: {integrity: sha512-o0+5dEAUekBMTbixXy2mKbSDPnwsCJ+8+mOeMBDjkuS9iM4fcr3yKUWb2zlOy2NKInkg3anl1W11sxYspLiXig==}
     peerDependencies:
       typescript: '>=5.0.4'
     peerDependenciesMeta:
@@ -1209,8 +1209,8 @@ packages:
     engines: {node: '>=8'}
     hasBin: true
 
-  ws@8.20.1:
-    resolution: {integrity: sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==}
+  ws@8.21.0:
+    resolution: {integrity: sha512-Vsp28b7DRcimFQvrqu2Wek3z1iYxDCWqHYB8Qsnk/S4RfaCQzPGPyBNuVjJV3cd6UiKtUtp6sNM77gWvzcCH+g==}
     engines: {node: '>=10.0.0'}
     peerDependencies:
       bufferutil: ^4.0.1
@@ -1444,9 +1444,9 @@ snapshots:
       '@shikijs/types': 3.23.0
       '@shikijs/vscode-textmate': 10.0.2
 
-  '@hono/node-server@2.0.4(hono@4.12.25)':
+  '@hono/node-server@2.0.8(hono@4.12.27)':
     dependencies:
-      hono: 4.12.25
+      hono: 4.12.27
 
   '@jridgewell/gen-mapping@0.3.13':
     dependencies:
@@ -1604,14 +1604,14 @@ snapshots:
       '@tangle-network/agent-interface': 0.14.0
       zod: 4.4.3
 
-  '@tangle-network/agent-eval@0.103.1(typescript@5.9.3)':
+  '@tangle-network/agent-eval@0.103.2(typescript@5.9.3)':
     dependencies:
       '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.3)
       '@ax-llm/ax': 19.0.45(zod@4.4.3)
-      '@hono/node-server': 2.0.4(hono@4.12.25)
-      '@tangle-network/agent-interface': 0.10.0
+      '@hono/node-server': 2.0.8(hono@4.12.27)
+      '@tangle-network/agent-interface': 0.10.1
       '@tangle-network/tcloud': 0.4.14(typescript@5.9.3)(zod@4.4.3)
-      hono: 4.12.25
+      hono: 4.12.27
       zod: 4.4.3
     transitivePeerDependencies:
       - '@mastra/core'
@@ -1622,7 +1622,7 @@ snapshots:
       - typescript
       - utf-8-validate
 
-  '@tangle-network/agent-interface@0.10.0':
+  '@tangle-network/agent-interface@0.10.1':
     dependencies:
       zod: 4.4.3
 
@@ -1634,12 +1634,12 @@ snapshots:
     dependencies:
       zod: 4.4.3
 
-  '@tangle-network/sandbox@0.9.5(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))':
+  '@tangle-network/sandbox@0.9.5(viem@2.54.2(typescript@5.9.3)(zod@4.4.3))':
     dependencies:
       '@tangle-network/agent-core': 0.3.4
       '@tangle-network/agent-interface': 0.13.0
     optionalDependencies:
-      viem: 2.52.2(typescript@5.9.3)(zod@4.4.3)
+      viem: 2.54.2(typescript@5.9.3)(zod@4.4.3)
 
   '@tangle-network/tcloud-attestation@0.1.1': {}
 
@@ -1647,10 +1647,10 @@ snapshots:
     dependencies:
       '@scure/bip32': 2.2.0
       '@scure/bip39': 2.2.0
-      '@tangle-network/sandbox': 0.9.5(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))
+      '@tangle-network/sandbox': 0.9.5(viem@2.54.2(typescript@5.9.3)(zod@4.4.3))
       '@tangle-network/tcloud-attestation': 0.1.1
       commander: 14.0.3
-      viem: 2.52.2(typescript@5.9.3)(zod@4.4.3)
+      viem: 2.54.2(typescript@5.9.3)(zod@4.4.3)
     transitivePeerDependencies:
       - '@mastra/core'
       - '@modelcontextprotocol/sdk'
@@ -1864,11 +1864,11 @@ snapshots:
   fsevents@2.3.3:
     optional: true
 
-  hono@4.12.25: {}
+  hono@4.12.27: {}
 
-  isows@1.0.7(ws@8.20.1):
+  isows@1.0.7(ws@8.21.0):
     dependencies:
-      ws: 8.20.1
+      ws: 8.21.0
 
   joycon@3.1.1: {}
 
@@ -2126,16 +2126,16 @@ snapshots:
 
   undici-types@7.24.6: {}
 
-  viem@2.52.2(typescript@5.9.3)(zod@4.4.3):
+  viem@2.54.2(typescript@5.9.3)(zod@4.4.3):
     dependencies:
       '@noble/curves': 1.9.1
       '@noble/hashes': 1.8.0
       '@scure/bip32': 1.7.0
       '@scure/bip39': 1.6.0
       abitype: 1.2.3(typescript@5.9.3)(zod@4.4.3)
-      isows: 1.0.7(ws@8.20.1)
+      isows: 1.0.7(ws@8.21.0)
       ox: 0.14.29(typescript@5.9.3)(zod@4.4.3)
-      ws: 8.20.1
+      ws: 8.21.0
     optionalDependencies:
       typescript: 5.9.3
     transitivePeerDependencies:
@@ -2224,7 +2224,7 @@ snapshots:
       siginfo: 2.0.0
       stackback: 0.0.2
 
-  ws@8.20.1: {}
+  ws@8.21.0: {}
 
   yaml@2.9.0: {}
 
diff --git a/scripts/gen-primitive-catalog.mjs b/scripts/gen-primitive-catalog.mjs
index 0fd1851..6ed9f83 100644
--- a/scripts/gen-primitive-catalog.mjs
+++ b/scripts/gen-primitive-catalog.mjs
@@ -271,7 +271,7 @@ for (let i = 0; i < allModules.length; i++) bySpecifier.set(allModules[i].specif
 // The ceiling is the exact current count; when a backfill lowers the real number,
 // lower the constant to match. Exceeding it (a new undocumented callable) exits 1.
 
-const maxUndocumentedCallables = 34
+const maxUndocumentedCallables = 0
 const ratchetKinds = new Set(['function', 'class', 'const'])
 
 // ─────────────────────────────────────────────────────────────────────────────
diff --git a/src/runtime/define-leaderboard.ts b/src/runtime/define-leaderboard.ts
index 0596413..6095dbf 100644
--- a/src/runtime/define-leaderboard.ts
+++ b/src/runtime/define-leaderboard.ts
@@ -291,6 +291,11 @@ function normalizeScore(s: number | LeaderboardScore): LeaderboardScore {
   return typeof s === 'number' ? { composite: s } : s
 }
 
+/**
+ * Assemble a declarative spec (`cases` + `prompt` + `score`) into a runnable
+ * harness×model leaderboard — `run()` executes the matrix, `toBenchmarkAdapter()`
+ * exposes the same domain as a structural `BenchmarkAdapter`.
+ */
 export function defineLeaderboard<TCase, TArtifact = string>(
   spec: LeaderboardSpec<TCase, TArtifact>,
 ): DefinedLeaderboard<TCase, TArtifact> {
diff --git a/src/runtime/sandbox-events.ts b/src/runtime/sandbox-events.ts
index 829c640..d2b6c86 100644
--- a/src/runtime/sandbox-events.ts
+++ b/src/runtime/sandbox-events.ts
@@ -151,7 +151,12 @@ export interface SandboxToolPartState {
   seq: number
 }
 
-/** @experimental */
+/**
+ * Fresh per-turn {@link SandboxToolPartState} for {@link mapSandboxToolEvent} — an
+ * empty call-status map so each turn projects tool frames independently.
+ *
+ * @experimental
+ */
 export function createSandboxToolPartState(): SandboxToolPartState {
   return { statusByCall: new Map(), seq: 0 }
 }