diff --git a/docs/api/primitive-catalog.md b/docs/api/primitive-catalog.md index 01ed065..2a65b30 100644 --- a/docs/api/primitive-catalog.md +++ b/docs/api/primitive-catalog.md @@ -7,7 +7,7 @@ # Primitive catalog — the never-stale anti-reinvention inventory -> **GENERATED** from `@tangle-network/agent-runtime@0.84.0` and `@tangle-network/agent-eval@0.103.1` by `scripts/gen-primitive-catalog.mjs`. Do NOT hand-edit — run `pnpm run docs:api`. This is the mechanical companion to the JUDGMENT in `canonical-api.md` (§2 decision table + §1.5 AgentProfile law): that doc says WHICH primitive to reach for and what NOT to build; this catalog proves WHAT exists. Per-symbol signatures + `file:line` live in the per-module pages under `docs/api/`. +> **GENERATED** from `@tangle-network/agent-runtime@0.85.0` and `@tangle-network/agent-eval@0.103.1` by `scripts/gen-primitive-catalog.mjs`. Do NOT hand-edit — run `pnpm run docs:api`. This is the mechanical companion to the JUDGMENT in `canonical-api.md` (§2 decision table + §1.5 AgentProfile law): that doc says WHICH primitive to reach for and what NOT to build; this catalog proves WHAT exists. Per-symbol signatures + `file:line` live in the per-module pages under `docs/api/`. ## 1. agent-runtime — own public surface @@ -246,7 +246,7 @@ Import from `@tangle-network/agent-runtime/intelligence` — 63 exports. ### Recursive atom + loop kernel (alias of ./runtime) -Import from `@tangle-network/agent-runtime/loops` — 419 exports. +Import from `@tangle-network/agent-runtime/loops` — 425 exports. | Symbol | Kind | Summary | |---|---|---| @@ -262,6 +262,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `authorStrategy` | function | Author + load a strategy from losses. Throws when the author emits no loadable module; | | `breadthStrategy` | function | BREADTH: K independent rollouts (each own artifact), verifier picks the best. | | `buildSteerContext` | function | Build the `SteerContext` a combinator reads to steer (its `loopUntil.until`, `widen` gate, any | +| `collectAgentTurn` | function | Drain a `streamAgentTurn` stream (or any `RuntimeStreamEvent` stream that | | `completionAuthorizes` | function | Decide whether a `CompletionVerdict` may end the node under the policy: authority scales with the verdict's determinism, and probabilistic verdicts must clear `minConfidence`. | | `computeFindingId` | function | Compute the stable finding_id from the identity-defining fields. | | `contentAddress` | function | Mint the content-addressed `outRef` for a result artifact: `sha256:` over a | @@ -358,6 +359,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `settledToIteration` | function | The step-8 merge-boundary adapter (M4): rehydrate a `Settled.done` into the kernel's | | `spendFromUsageEvents` | function | Fold a normalized `UsageEvent` array into a `Spend`. Tokens and usd are separate | | `stopSentinel` | function | A unique, attributable stop sentinel for a node (ralph-loop style). Deterministic from the | +| `streamAgentTurn` | function | Run ONE agent turn on any backend kind and stream its events. Yields the | | `sumSandboxUsage` | function | Sum the token usage + USD cost of a sandbox turn's events — the one honest way to meter an | | `supervise` | function | One-call supervisor: build + run a supervisor from its profile with sensible defaults; the raw `supervisorAgent` + `createSupervisor().run` seams stay available for power use. | | `superviseSurface` | function | Drive a team of agents (spawned + steered by `profile`) to solve a graded `AgenticSurface` task, and | @@ -394,6 +396,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `AgentProfile` | interface | Public provider-neutral agent profile contract. | | `AgentRunSpec` | interface | Sandbox-SDK-shaped agent specification. | | `AgentSpec` | interface | `AgentProfile` does NOT carry a `harness`/backend field — `harness` lives on the | +| `AgentTurnUsage` | interface | Metered usage of one turn, summed over every cost-bearing event the backend | | `AnalystFinding` | interface | Unified envelope every analyst emits. Schema-versioned so renderers | | `AnytimeTaskCurve` | interface | anytimeReport — time-to-satisfactory-output metrics, derived entirely from the | | `AuditIntentInput` | interface | auditIntent — the route-rigor analyst: is this trajectory even going the RIGHT WAY? | @@ -404,6 +407,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `BusEvent` | interface | Every bus event is a discriminated union member keyed by `type`. | | `BusRecord` | interface | A published event stamped for ordering and observability. `seq` is the monotonic publish index; | | `CheckpointCapableBox` | interface | Loop-side widening of the box's optional checkpoint method. The | +| `CollectedAgentTurn` | interface | A drained turn: the terminal summary plus every event the stream yielded. | | `CompletionAnalyst` | interface | Reads a node's trace → a completion verdict. Same input shape as the `analyze` hook, so | | `CompletionEvidence` | interface | Trace-derived evidence for a completion claim — an artifact (output) or a verifier metric, | | `CompletionPolicy` | interface | When a verdict authorizes the driver to END. Deterministic → trust (ground truth); | @@ -526,6 +530,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `WorktreeCommandResult` | interface | Outcome of one verification command run in the worktree (test or typecheck). | | `AgentEnvironmentProviderRef` | type | Provider object or registry name accepted by runtime provider adapters. | | `AgentProfileRef` | type | Portable profile reference: inline profile or provider catalog id. | +| `AgentTurnBackend` | type | The execution substrate one turn runs on — a closed discriminated union over | | `ApplyContinuation` | type | Fold a steering string into the caller's Task shape, producing the Task for | | `AssertTraceDerivedFindings` | type | The firewall assertion contract, re-stated for the reactive seam (PORT of | | `BudgetReadout` | type | Post-reservation pool readout — the shape `Scope.budget` exposes. `tokensLeft`, | @@ -566,7 +571,7 @@ Import from `@tangle-network/agent-runtime/loops` — 419 exports. | `WinnerStrategy` | type | Built-in valid-only winner strategies for `selectValidWinner` (selector≠judge): best gated-valid | | `WorktreePatchArtifact` | type | Terminal artifact of one worktree-CLI run — the canonical worktree-harness result (the captured | -**Undocumented supporting types** (add a TSDoc line at the declaration to earn a table row): `AgentEnvironment`, `AgentEnvironmentCapabilities`, `AgentEnvironmentEvent`, `AgentEnvironmentProvider`, `AgentEnvironmentQuery`, `AgentEnvironmentSummary`, `AgenticOptions`, `AgenticRunResult`, `AgenticTool`, `AgentSession`, `AgentSessionRef`, `AgentTurnInput`, `AgentTurnResult`, `AnalystRegistry`, `AnytimeReport`, `AnytimeStrategySummary`, `ArtifactHandle`, `AuditIntentOptions`, `AuthoredHarness`, `AuthoredStrategy`, `AuthorStrategyOptions`, `BenchmarkConfig`, `BenchmarkLift`, `BenchmarkStrategySummary`, `BenchmarkTaskRow`, `BudgetPool`, `BusStats`, `ChampionPick`, `CheckpointRef`, `CheckpointRequest`, `CreateAgentEnvironmentInput`, `DefinedLeaderboard`, `Driver`, `EventBus`, `EvolutionArchiveNode`, `EvolutionBandInfo`, `EvolutionCandidate`, `EvolutionGeneration`, `EvolutionReport`, `ExecRequest`, `ExecResult`, `ForkRequest`, `GitWorkspaceOptions`, `HarvestFailure`, `HarvestReport`, `Inbox`, `InProcessSandboxClientOptions`, `IntentAudit`, `Iteration`, `Leaderboard`, `LeaderboardOptions`, `LeaderboardSpec`, `LoopDecisionPayload`, `LoopDispatchOptions`, `LoopEndedPayload`, `LoopIterationEndedPayload`, `LoopIterationStartedPayload`, `LoopPlanDescription`, `LoopResult`, `LoopSandboxPlacement`, `LoopStartedPayload`, `LoopTraceEmitter`, `LoopWinner`, `McpEnvironmentOptions`, `Observation`, `ObserveOptions`, `OpenSandboxRunOptions`, `PairwiseOptions`, `PatchDeliverableOptions`, `PlacementInfo`, `PromotionGateOptions`, `PromotionVerdict`, `PublishOptions`, `ResourceRequest`, `RouterChatResult`, `RouterChatToolsResult`, `RouterToolLoopResult`, `RunAgenticOptions`, `SandboxRun`, `ShotSpec`, `StrategyEvolutionConfig`, `StrategyResult`, `SuperviseOptions`, `SuperviseSurfaceOptions`, `SupervisorAgentDeps`, `SupervisorOpts`, `SurfaceScore`, `ToolSpec`, `TraceSource`, `ValidationCtx`, `Validator`, `WaterfallCollector`, `WaterfallReport`, `Workspace`, `WorkspaceRequest`, `WorkspaceRun`, `WorktreeCliExecutorOptions`, `WorktreeFanoutOptions`, `AgentEnvironmentStatus`, `AgentSessionStatus`, `ChampionPolicy`, `LoopTraceEvent`, `MakeWorkerAgent`, `WorkspaceCommit`. +**Undocumented supporting types** (add a TSDoc line at the declaration to earn a table row): `AgentEnvironment`, `AgentEnvironmentCapabilities`, `AgentEnvironmentEvent`, `AgentEnvironmentProvider`, `AgentEnvironmentQuery`, `AgentEnvironmentSummary`, `AgenticOptions`, `AgenticRunResult`, `AgenticTool`, `AgentSession`, `AgentSessionRef`, `AgentTurnInput`, `AgentTurnResult`, `AnalystRegistry`, `AnytimeReport`, `AnytimeStrategySummary`, `ArtifactHandle`, `AuditIntentOptions`, `AuthoredHarness`, `AuthoredStrategy`, `AuthorStrategyOptions`, `BenchmarkConfig`, `BenchmarkLift`, `BenchmarkStrategySummary`, `BenchmarkTaskRow`, `BudgetPool`, `BusStats`, `ChampionPick`, `CheckpointRef`, `CheckpointRequest`, `CreateAgentEnvironmentInput`, `DefinedLeaderboard`, `Driver`, `EventBus`, `EvolutionArchiveNode`, `EvolutionBandInfo`, `EvolutionCandidate`, `EvolutionGeneration`, `EvolutionReport`, `ExecRequest`, `ExecResult`, `ForkRequest`, `GitWorkspaceOptions`, `HarvestFailure`, `HarvestReport`, `Inbox`, `InProcessSandboxClientOptions`, `IntentAudit`, `Iteration`, `Leaderboard`, `LeaderboardOptions`, `LeaderboardSpec`, `LoopDecisionPayload`, `LoopDispatchOptions`, `LoopEndedPayload`, `LoopIterationEndedPayload`, `LoopIterationStartedPayload`, `LoopPlanDescription`, `LoopResult`, `LoopSandboxPlacement`, `LoopStartedPayload`, `LoopTraceEmitter`, `LoopWinner`, `McpEnvironmentOptions`, `Observation`, `ObserveOptions`, `OpenSandboxRunOptions`, `PairwiseOptions`, `PatchDeliverableOptions`, `PlacementInfo`, `PromotionGateOptions`, `PromotionVerdict`, `PublishOptions`, `ResourceRequest`, `RouterChatResult`, `RouterChatToolsResult`, `RouterToolLoopResult`, `RunAgenticOptions`, `SandboxRun`, `ShotSpec`, `StrategyEvolutionConfig`, `StrategyResult`, `StreamAgentTurnOptions`, `SuperviseOptions`, `SuperviseSurfaceOptions`, `SupervisorAgentDeps`, `SupervisorOpts`, `SurfaceScore`, `ToolSpec`, `TraceSource`, `ValidationCtx`, `Validator`, `WaterfallCollector`, `WaterfallReport`, `Workspace`, `WorkspaceRequest`, `WorkspaceRun`, `WorktreeCliExecutorOptions`, `WorktreeFanoutOptions`, `AgentEnvironmentStatus`, `AgentSessionStatus`, `ChampionPolicy`, `LoopTraceEvent`, `MakeWorkerAgent`, `WorkspaceCommit`. ### Environment provider adapters — generic sandbox/compute bridge diff --git a/docs/api/runtime.md b/docs/api/runtime.md index 265a155..64f3100 100644 --- a/docs/api/runtime.md +++ b/docs/api/runtime.md @@ -8143,6 +8143,139 @@ Defined in: [runtime/strategy.ts:1028](https://github.com/tangle-network/agent-r *** +### StreamAgentTurnOptions + +Defined in: [runtime/stream-agent-turn.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L93) + +**`Experimental`** + +#### Properties + +##### signal? + +> `optional` **signal?**: `AbortSignal` + +Defined in: [runtime/stream-agent-turn.ts:95](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L95) + +**`Experimental`** + +Caller-initiated cancellation. Terminates the stream with `final.status: 'aborted'`. + +##### timeoutMs? + +> `optional` **timeoutMs?**: `number` + +Defined in: [runtime/stream-agent-turn.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L101) + +**`Experimental`** + +Wall-clock deadline for the whole turn in ms. An expired deadline aborts +the backend and terminates the stream with `final.status: 'failed'` +(a blown deadline is a turn failure, not a caller cancellation). + +*** + +### AgentTurnUsage + +Defined in: [runtime/stream-agent-turn.ts:112](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L112) + +**`Experimental`** + +Metered usage of one turn, summed over every cost-bearing event the backend +emitted. `input`/`output` are token counts (0 when the backend reported +none — the honest sum, never a fabricated estimate). `costUsd`/`model` are +present only when the backend actually reported them. + +#### Properties + +##### input + +> **input**: `number` + +Defined in: [runtime/stream-agent-turn.ts:113](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L113) + +**`Experimental`** + +##### output + +> **output**: `number` + +Defined in: [runtime/stream-agent-turn.ts:114](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L114) + +**`Experimental`** + +##### costUsd? + +> `optional` **costUsd?**: `number` + +Defined in: [runtime/stream-agent-turn.ts:115](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L115) + +**`Experimental`** + +##### model? + +> `optional` **model?**: `string` + +Defined in: [runtime/stream-agent-turn.ts:116](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L116) + +**`Experimental`** + +*** + +### CollectedAgentTurn + +Defined in: [runtime/stream-agent-turn.ts:126](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L126) + +**`Experimental`** + +A drained turn: the terminal summary plus every event the stream yielded. +`status`/`error` mirror the terminal `final` event so a failed or aborted +turn stays inspectable without re-scanning `events`. + +#### Properties + +##### finalText + +> **finalText**: `string` + +Defined in: [runtime/stream-agent-turn.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L127) + +**`Experimental`** + +##### usage + +> **usage**: [`AgentTurnUsage`](#agentturnusage) + +Defined in: [runtime/stream-agent-turn.ts:128](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L128) + +**`Experimental`** + +##### events + +> **events**: [`RuntimeStreamEvent`](index.md#runtimestreamevent)[] + +Defined in: [runtime/stream-agent-turn.ts:129](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L129) + +**`Experimental`** + +##### status + +> **status**: [`AgentTaskStatus`](index.md#agenttaskstatus) + +Defined in: [runtime/stream-agent-turn.ts:130](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L130) + +**`Experimental`** + +##### error? + +> `optional` **error?**: [`BackendErrorDetail`](index.md#backenderrordetail) + +Defined in: [runtime/stream-agent-turn.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L131) + +**`Experimental`** + +*** + ### SurfaceWorkerOut Defined in: [runtime/supervise-surface.ts:34](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise-surface.ts#L34) @@ -14448,6 +14581,83 @@ Defined in: [runtime/strategy-evolution.ts:55](https://github.com/tangle-network *** +### AgentTurnBackend + +> **AgentTurnBackend** = \{ `kind`: `"box"`; `box`: `SandboxInstance`; `agentRunName?`: `string`; \} \| \{ `kind`: `"executor"`; `factory`: [`ExecutorFactory`](#executorfactory)\<`unknown`\>; `agentRunName?`: `string`; \} \| \{ `kind`: `"chat"`; `backend`: [`AgentExecutionBackend`](index.md#agentexecutionbackend); \} + +Defined in: [runtime/stream-agent-turn.ts:63](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L63) + +**`Experimental`** + +The execution substrate one turn runs on — a closed discriminated union over +the three stream surfaces the runtime already owns. + +#### Union Members + +##### Type Literal + +\{ `kind`: `"box"`; `box`: `SandboxInstance`; `agentRunName?`: `string`; \} + +###### kind + +> **kind**: `"box"` + +A live sandbox box: the turn is one `box.streamPrompt(prompt)` call. + +###### box + +> **box**: `SandboxInstance` + +###### agentRunName? + +> `optional` **agentRunName?**: `string` + +Model label stamped on cost-only `llm_call` events. Default `'agent'`. + +*** + +##### Type Literal + +\{ `kind`: `"executor"`; `factory`: [`ExecutorFactory`](#executorfactory)\<`unknown`\>; `agentRunName?`: `string`; \} + +###### kind + +> **kind**: `"executor"` + +A one-shot `Executor` (cli-bridge / router / BYO): the factory is +instantiated fresh for the turn via `inlineSandboxClient`, run once on +the prompt, and torn down — the same per-spawn lifecycle the supervise +runtime gives it. + +###### factory + +> **factory**: [`ExecutorFactory`](#executorfactory)\<`unknown`\> + +###### agentRunName? + +> `optional` **agentRunName?**: `string` + +Model label stamped on cost-only `llm_call` events. Default `'agent'`. + +*** + +##### Type Literal + +\{ `kind`: `"chat"`; `backend`: [`AgentExecutionBackend`](index.md#agentexecutionbackend); \} + +###### kind + +> **kind**: `"chat"` + +An in-process `AgentExecutionBackend` (`resolveAgentBackend` output or +any custom backend): the turn is one `backend.stream()` call. + +###### backend + +> **backend**: [`AgentExecutionBackend`](index.md#agentexecutionbackend) + +*** + ### BudgetReadout > **BudgetReadout** = `Readonly`\<\{ `tokensLeft`: `number`; `usdLeft`: `number`; `usdCapped`: `boolean`; `deadlineMs`: `number`; `reservedTokens`: `number`; \}\> @@ -17193,6 +17403,66 @@ Run a Strategy through the keystone Supervisor — `Agent.act` over a conserved- *** +### streamAgentTurn() + +> **streamAgentTurn**(`backend`, `prompt`, `opts?`): `AsyncGenerator`\<[`RuntimeStreamEvent`](index.md#runtimestreamevent)\> + +Defined in: [runtime/stream-agent-turn.ts:158](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L158) + +**`Experimental`** + +Run ONE agent turn on any backend kind and stream its events. Yields the +`RuntimeStreamEvent` vocabulary incrementally and always ends with a `final` +event carrying the turn's text and usage (`metadata.tokenUsage`, +`metadata.costUsd?`, `metadata.model?`) — on success, failure, abort, and +timeout alike. The generator never throws; failures surface in-band as +`backend_error` + `final` with a typed `error` detail. + +#### Parameters + +##### backend + +[`AgentTurnBackend`](#agentturnbackend) + +##### prompt + +`string` + +##### opts? + +[`StreamAgentTurnOptions`](#streamagentturnoptions) = `{}` + +#### Returns + +`AsyncGenerator`\<[`RuntimeStreamEvent`](index.md#runtimestreamevent)\> + +*** + +### collectAgentTurn() + +> **collectAgentTurn**(`stream`): `Promise`\<[`CollectedAgentTurn`](#collectedagentturn)\> + +Defined in: [runtime/stream-agent-turn.ts:223](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/stream-agent-turn.ts#L223) + +**`Experimental`** + +Drain a `streamAgentTurn` stream (or any `RuntimeStreamEvent` stream that +honors its terminal contract) into the turn summary plus the full event +list. Fail-loud: throws when the stream ends without a terminal `final` +event — a stream that violates the contract must not read as an empty turn. + +#### Parameters + +##### stream + +`AsyncIterable`\<[`RuntimeStreamEvent`](index.md#runtimestreamevent)\> + +#### Returns + +`Promise`\<[`CollectedAgentTurn`](#collectedagentturn)\> + +*** + ### failuresAnalyst() > **failuresAnalyst**(): [`AnalystRegistry`](#analystregistry) diff --git a/docs/canonical-api.md b/docs/canonical-api.md index 9d3c72e..5879506 100644 --- a/docs/canonical-api.md +++ b/docs/canonical-api.md @@ -2,7 +2,7 @@ -> **Version 0.84.0.** The export inventory + per-symbol signatures live in the generated `docs/api/` reference: **`docs/api/primitive-catalog.md`** is the never-stale, grouped list of every primitive to reuse (own surface + the agent-eval judge / authenticity / verification / statistics / campaign / token-usage surfaces), with each one's import path and one-line summary read live from source; the per-module pages hold the full signatures. The pinned substrate is agent-eval `>=0.101.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`, plus environment-provider types) are owned by **`@tangle-network/agent-interface`** (peer `>=0.14.0 <1.0.0`) — the single source of truth. Substrate primitives are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package — the catalog's §2 shows exactly which subpath each lives under. +> **Version 0.85.0.** The export inventory + per-symbol signatures live in the generated `docs/api/` reference: **`docs/api/primitive-catalog.md`** is the never-stale, grouped list of every primitive to reuse (own surface + the agent-eval judge / authenticity / verification / statistics / campaign / token-usage surfaces), with each one's import path and one-line summary read live from source; the per-module pages hold the full signatures. The pinned substrate is agent-eval `>=0.101.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`, plus environment-provider types) are owned by **`@tangle-network/agent-interface`** (peer `>=0.14.0 <1.0.0`) — the single source of truth. Substrate primitives are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package — the catalog's §2 shows exactly which subpath each lives under. > > **`./loops` is the runtime barrel** — `package.json` maps it to `src/runtime/index.ts`. Everything below labelled `/loops` is the recursive-atom + loop-kernel surface. > @@ -63,6 +63,7 @@ This table is judgment-only: it maps an intent to the ONE primitive to reach for | Run a sandbox coding rollout, round-synchronous (fresh box per round) | `runLoop(options)` — `/loops` | a `new Sandbox()`+acquire+stream+parse+delete loop, or a 2nd winner-selector | | Run **agent-eval fixture folders** through runtime `runLoop` | agent-eval fixture loading/planning, then `loopCampaignDispatch(...)` — `/loops` | a one-off `runCampaign` dispatch that hand-builds `ExecCtx`, drops loop traces, or forgets token/cost reporting | | Run + **resume** ONE persistent box across turns | `openSandboxRun(client, opts, deliverable)` — `/loops` | a per-domain `new Sandbox`+`box.fs.read`+delete copy | +| Run **ONE agent turn** on any substrate — box, cli-bridge/router `Executor`, or in-process chat backend — as ONE normalized `RuntimeStreamEvent` stream with a guaranteed terminal result+usage event | `streamAgentTurn(backend, prompt, { signal, timeoutMs })` + `collectAgentTurn(stream)` — `/loops` | a per-provider stream→event mapper zoo, a hand-faked box around a non-box executor, or raw fetch leaking through the turn abstraction | | Pick / register a leaf backend, or bring your own agent | `createExecutor({ backend })` / `createExecutorRegistry()` / implement `Executor` — `/loops` | a per-vendor adapter or closed `inline\|sandbox\|cli` switch (won't report through the `UsageEvent` channel) | | Evolve a **prompt/string** surface | `gepaProposer({ llm, model, target })` (default inside `selfImprove`; the skill-surface twin is `skillOptProposer`, same source) — `agent-eval/campaign` | a hand-rolled prompt-mutation reflection loop with its own Pareto bookkeeping | | Self-improve a profile (one pluggable verb) — START HERE (self-improvement) | `improve(profile, findings, { surface, gate })` — root `.` (the RSI verb; defaults the generator from `surface`, wraps `selfImprove`) | a bespoke optimize loop, or calling `selfImprove`/a skill-optimizer directly for the common case | diff --git a/package.json b/package.json index 9abcb1d..6587ecd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-runtime", - "version": "0.84.0", + "version": "0.85.0", "description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.", "homepage": "https://github.com/tangle-network/agent-runtime#readme", "repository": { diff --git a/src/runtime/index.ts b/src/runtime/index.ts index 72c0739..388fb7f 100644 --- a/src/runtime/index.ts +++ b/src/runtime/index.ts @@ -378,6 +378,14 @@ export { type StrategyEvolutionConfig, selectChampion, } from './strategy-evolution' +export { + type AgentTurnBackend, + type AgentTurnUsage, + type CollectedAgentTurn, + collectAgentTurn, + type StreamAgentTurnOptions, + streamAgentTurn, +} from './stream-agent-turn' // The supervisor's intelligence: it AUTHORS each worker's profile (instructions + model) from a // SKILL (its own system prompt) — the optimizable self-improvement surface, not the plumbing. export { diff --git a/src/runtime/inline-sandbox-client.ts b/src/runtime/inline-sandbox-client.ts index f4572f6..27a3b0d 100644 --- a/src/runtime/inline-sandbox-client.ts +++ b/src/runtime/inline-sandbox-client.ts @@ -55,8 +55,21 @@ export function inlineSandboxClient(factory: ExecutorFactory): SandboxC const createOptions = options return { id, - async *streamPrompt(message: string): AsyncGenerator { + async *streamPrompt( + message: string, + opts?: { signal?: AbortSignal }, + ): AsyncGenerator { + // Chain the caller's turn signal into the executor's spawn signal so + // an abort reaches `exec.execute` — a cooperative executor settles + // (throws) instead of running to completion after cancellation. const controller = new AbortController() + const callerSignal = opts?.signal + const onAbort = () => + controller.abort(callerSignal?.reason ?? new Error('prompt aborted')) + if (callerSignal) { + if (callerSignal.aborted) onAbort() + else callerSignal.addEventListener('abort', onAbort, { once: true }) + } const spec: AgentSpec = { profile: { name: id }, harness: null } const exec = factory(spec, { signal: controller.signal, seams: { createOptions } }) try { @@ -86,6 +99,7 @@ export function inlineSandboxClient(factory: ExecutorFactory): SandboxC }, } as unknown as SandboxEvent } finally { + callerSignal?.removeEventListener('abort', onAbort) await exec.teardown('brutalKill').catch(() => {}) } }, diff --git a/src/runtime/stream-agent-turn.test.ts b/src/runtime/stream-agent-turn.test.ts new file mode 100644 index 0000000..be870c1 --- /dev/null +++ b/src/runtime/stream-agent-turn.test.ts @@ -0,0 +1,250 @@ +/** + * Offline contract tests for `streamAgentTurn` / `collectAgentTurn` — one per + * backend kind (box via `inProcessSandboxClient`, executor via a stub + * `ExecutorFactory`, chat via a stub `AgentExecutionBackend`), plus the + * terminal-guarantee, abort, and timeout paths. No network, no credentials. + */ + +import type { SandboxEvent } from '@tangle-network/sandbox' +import { describe, expect, it } from 'vitest' +import type { AgentExecutionBackend, RuntimeStreamEvent } from '../types' +import { inProcessSandboxClient } from './in-process-sandbox-client' +import { collectAgentTurn, streamAgentTurn } from './stream-agent-turn' +import type { Executor, ExecutorFactory, ExecutorResult } from './supervise/types' + +function finalOf(events: RuntimeStreamEvent[]): RuntimeStreamEvent & { type: 'final' } { + const final = events.at(-1) + if (!final || final.type !== 'final') throw new Error('no terminal final event') + return final +} + +describe('streamAgentTurn: box backend', () => { + async function makeBox(events: SandboxEvent[]) { + const client = inProcessSandboxClient({ onPrompt: () => events }) + return client.create() + } + + it('streams incremental events and terminates with usage', async () => { + const box = await makeBox([ + { type: 'message.part.updated', data: { part: { type: 'text' }, delta: 'Hello ' } }, + { type: 'message.part.updated', data: { part: { type: 'text' }, delta: 'world' } }, + { type: 'llm_call', data: { model: 'kimi-k2', tokensIn: 100, tokensOut: 40, costUsd: 0.02 } }, + { type: 'result', data: { finalText: 'Hello world' } }, + ] as SandboxEvent[]) + + const seen: RuntimeStreamEvent[] = [] + for await (const event of streamAgentTurn({ kind: 'box', box }, 'say hello')) { + seen.push(event) + } + // Incremental events surface in order, before the terminal event. + expect(seen.map((e) => e.type)).toEqual([ + 'backend_start', + 'text_delta', + 'text_delta', + 'llm_call', + 'final', + ]) + const final = finalOf(seen) + expect(final.status).toBe('completed') + expect(final.text).toBe('Hello world') + expect(final.metadata).toMatchObject({ + tokenUsage: { input: 100, output: 40 }, + costUsd: 0.02, + model: 'kimi-k2', + }) + }) + + it('collectAgentTurn round-trips the terminal summary', async () => { + const box = await makeBox([ + { type: 'message.part.updated', data: { part: { type: 'text' }, delta: '42' } }, + { type: 'done', data: { tokenUsage: { inputTokens: 7, outputTokens: 3 } } }, + ] as SandboxEvent[]) + + const turn = await collectAgentTurn(streamAgentTurn({ kind: 'box', box }, 'answer')) + expect(turn.finalText).toBe('42') + expect(turn.usage).toEqual({ input: 7, output: 3 }) + expect(turn.status).toBe('completed') + expect(turn.events.map((e) => e.type)).toEqual([ + 'backend_start', + 'text_delta', + 'llm_call', + 'final', + ]) + }) + + it('surfaces a throwing box as backend_error + final failed (never throws)', async () => { + const client = inProcessSandboxClient({ + // biome-ignore lint/correctness/useYield: the throw-before-yield path is the test subject + onPrompt: async function* (): AsyncIterable { + throw new Error('box exploded') + }, + }) + const box = await client.create() + const turn = await collectAgentTurn(streamAgentTurn({ kind: 'box', box }, 'boom')) + expect(turn.status).toBe('failed') + expect(turn.error).toMatchObject({ kind: 'backend', message: 'box exploded' }) + const types = turn.events.map((e) => e.type) + expect(types).toContain('backend_error') + expect(types.at(-1)).toBe('final') + }) +}) + +describe('streamAgentTurn: executor backend', () => { + function stubFactory(opts?: { + onTeardown?: () => void + hangUntilAbort?: boolean + }): ExecutorFactory { + return (_spec, ctx): Executor => ({ + runtime: 'inline', + async execute(task, signal): Promise> { + if (opts?.hangUntilAbort) { + await new Promise((_resolve, reject) => { + const onAbort = () => reject(signal.reason ?? new Error('aborted')) + if (signal.aborted) onAbort() + else signal.addEventListener('abort', onAbort, { once: true }) + // ctx.signal must be the same channel — assert linkage indirectly. + if (ctx.signal.aborted) onAbort() + }) + } + return { + outRef: 'stub-1', + out: { content: `echo: ${String(task)}` }, + spent: { iterations: 1, tokens: { input: 11, output: 6 }, usd: 0.005, ms: 1 }, + } + }, + async teardown() { + opts?.onTeardown?.() + return { destroyed: true } + }, + resultArtifact(): ExecutorResult { + throw new Error('one-shot executor: resultArtifact unused') + }, + }) + } + + it('runs the factory once and terminates with the executor usage', async () => { + let toreDown = 0 + const stream = streamAgentTurn( + { kind: 'executor', factory: stubFactory({ onTeardown: () => toreDown++ }) }, + 'ping', + ) + const turn = await collectAgentTurn(stream) + expect(turn.finalText).toBe('echo: ping') + expect(turn.usage).toEqual({ input: 11, output: 6, costUsd: 0.005 }) + expect(turn.status).toBe('completed') + // Incremental metering surfaces before the terminal event. + expect(turn.events.map((e) => e.type)).toEqual(['backend_start', 'llm_call', 'final']) + expect(toreDown).toBe(1) + }) + + it('abort reaches the executor signal and terminates with status aborted', async () => { + let toreDown = 0 + const controller = new AbortController() + const stream = streamAgentTurn( + { + kind: 'executor', + factory: stubFactory({ hangUntilAbort: true, onTeardown: () => toreDown++ }), + }, + 'hang', + { signal: controller.signal }, + ) + setTimeout(() => controller.abort(new Error('caller cancelled')), 20) + const turn = await collectAgentTurn(stream) + expect(turn.status).toBe('aborted') + expect(turn.error?.message).toBe('caller cancelled') + expect(toreDown).toBe(1) + }) +}) + +describe('streamAgentTurn: chat backend', () => { + function stubChatBackend(opts?: { hangUntilAbort?: boolean }): AgentExecutionBackend { + return { + kind: 'stub-chat', + async *stream(_input, context): AsyncIterable { + yield { type: 'text_delta', text: 'partial ' } + if (opts?.hangUntilAbort) { + await new Promise((_resolve, reject) => { + const signal = context.signal + const onAbort = () => reject(signal?.reason ?? new Error('aborted')) + if (signal?.aborted) onAbort() + else signal?.addEventListener('abort', onAbort, { once: true }) + }) + } + yield { type: 'text_delta', text: 'answer' } + yield { type: 'llm_call', model: 'glm-4.6', tokensIn: 21, tokensOut: 9 } + }, + } + } + + it('streams normalized events and terminates with usage + model', async () => { + const seen: RuntimeStreamEvent[] = [] + for await (const event of streamAgentTurn({ kind: 'chat', backend: stubChatBackend() }, 'hi')) { + seen.push(event) + } + expect(seen.map((e) => e.type)).toEqual([ + 'backend_start', + 'text_delta', + 'text_delta', + 'llm_call', + 'final', + ]) + // Normalization stamps task/session onto the backend's bare events. + const delta = seen.at(1) + if (delta?.type !== 'text_delta') throw new Error('expected text_delta') + expect(delta.task?.intent).toBe('hi') + expect(delta.session?.backend).toBe('stub-chat') + const final = finalOf(seen) + expect(final.text).toBe('partial answer') + expect(final.metadata).toMatchObject({ + tokenUsage: { input: 21, output: 9 }, + model: 'glm-4.6', + }) + expect(final.metadata).not.toHaveProperty('costUsd') + }) + + it('abort mid-stream terminates with status aborted after partial deltas', async () => { + const controller = new AbortController() + const stream = streamAgentTurn( + { kind: 'chat', backend: stubChatBackend({ hangUntilAbort: true }) }, + 'hang', + { signal: controller.signal }, + ) + setTimeout(() => controller.abort(new Error('user stopped')), 20) + const turn = await collectAgentTurn(stream) + expect(turn.status).toBe('aborted') + expect(turn.error?.message).toBe('user stopped') + // The delta streamed before the abort is preserved on the terminal event. + expect(turn.finalText).toBe('partial ') + expect(turn.events.map((e) => e.type)).toEqual([ + 'backend_start', + 'text_delta', + 'backend_error', + 'final', + ]) + }) + + it('timeoutMs expiry terminates with status failed (not aborted)', async () => { + const turn = await collectAgentTurn( + streamAgentTurn( + { kind: 'chat', backend: stubChatBackend({ hangUntilAbort: true }) }, + 'slow', + { + timeoutMs: 25, + }, + ), + ) + expect(turn.status).toBe('failed') + expect(turn.error?.message).toContain('timed out after 25ms') + }) +}) + +describe('collectAgentTurn contract', () => { + it('throws when the stream ends without a terminal final event', async () => { + async function* truncated(): AsyncIterable { + yield { type: 'text_delta', text: 'lost' } + } + await expect(collectAgentTurn(truncated())).rejects.toThrow( + /ended without a terminal 'final' event/, + ) + }) +}) diff --git a/src/runtime/stream-agent-turn.ts b/src/runtime/stream-agent-turn.ts new file mode 100644 index 0000000..0e2424f --- /dev/null +++ b/src/runtime/stream-agent-turn.ts @@ -0,0 +1,432 @@ +/** + * `streamAgentTurn` — the ONE run-a-turn event-stream contract over every + * execution substrate: a sandbox box (`SandboxInstance.streamPrompt`), a + * one-shot `Executor` (cli-bridge / router / BYO, via `ExecutorFactory`), and + * an in-process `AgentExecutionBackend` (the `resolveAgentBackend` output). + * + * One function, one vocabulary: every backend kind yields the existing + * `RuntimeStreamEvent` union incrementally and ALWAYS terminates with a + * `final` event whose `text` is the turn's final text and whose + * `metadata.tokenUsage` / `metadata.costUsd` / `metadata.model` carry the + * turn's metered usage. `collectAgentTurn` drains a stream into that terminal + * summary plus the full event list. + * + * This is a UNIFICATION seam, not a new stream parser — each kind is a thin + * adapter over code that already exists and is already hardened: + * - `box` — `mapSandboxEvent` + `extractLlmCallEvent` (sandbox-events.ts) + * project the sandbox event stream; nothing is re-mapped here. + * - `executor` — `inlineSandboxClient` (the ONE executor→box adapter) turns + * the factory into a box, then the box path drives it. The + * executor's settle/teardown lifecycle stays in that adapter. + * - `chat` — the backend's own `stream()` surface, normalized by + * `normalizeBackendStreamEvent` (the same projection + * `runAgentTaskStream` applies). + * + * Distinct from `openSandboxRun` (box-only, session resume over one persistent + * artifact, raw `SandboxEvent` deliverables) and from `runAgentTaskStream` + * (full task lifecycle: knowledge preflight, session store, resume). This is + * the minimal turn primitive underneath both worlds: prompt in, one normalized + * event stream out, terminal result+usage guaranteed on every non-thrown path. + * + * Stream envelope: `backend_start` → incremental events → (`backend_error` on + * failure) → `final`. A caller-initiated abort terminates with + * `final.status: 'aborted'`; an expired `timeoutMs` deadline with + * `final.status: 'failed'` — so cancellation stays distinguishable from a + * blown deadline. + * + * @experimental + */ + +import { scoreKnowledgeReadiness } from '@tangle-network/agent-eval' +import type { SandboxEvent, SandboxInstance } from '@tangle-network/sandbox' +import { normalizeBackendStreamEvent } from '../backends' +import { BackendTransportError } from '../errors' +import { newRuntimeSession, nowIso } from '../sessions' +import type { + AgentExecutionBackend, + AgentTaskSpec, + AgentTaskStatus, + BackendErrorDetail, + RuntimeSession, + RuntimeStreamEvent, +} from '../types' +import { inlineSandboxClient } from './inline-sandbox-client' +import { mapSandboxEvent } from './sandbox-events' +import type { ExecutorFactory } from './supervise/types' + +/** + * The execution substrate one turn runs on — a closed discriminated union over + * the three stream surfaces the runtime already owns. + * + * @experimental + */ +export type AgentTurnBackend = + | { + /** A live sandbox box: the turn is one `box.streamPrompt(prompt)` call. */ + kind: 'box' + box: SandboxInstance + /** Model label stamped on cost-only `llm_call` events. Default `'agent'`. */ + agentRunName?: string + } + | { + /** + * A one-shot `Executor` (cli-bridge / router / BYO): the factory is + * instantiated fresh for the turn via `inlineSandboxClient`, run once on + * the prompt, and torn down — the same per-spawn lifecycle the supervise + * runtime gives it. + */ + kind: 'executor' + factory: ExecutorFactory + /** Model label stamped on cost-only `llm_call` events. Default `'agent'`. */ + agentRunName?: string + } + | { + /** + * An in-process `AgentExecutionBackend` (`resolveAgentBackend` output or + * any custom backend): the turn is one `backend.stream()` call. + */ + kind: 'chat' + backend: AgentExecutionBackend + } + +/** @experimental */ +export interface StreamAgentTurnOptions { + /** Caller-initiated cancellation. Terminates the stream with `final.status: 'aborted'`. */ + signal?: AbortSignal + /** + * Wall-clock deadline for the whole turn in ms. An expired deadline aborts + * the backend and terminates the stream with `final.status: 'failed'` + * (a blown deadline is a turn failure, not a caller cancellation). + */ + timeoutMs?: number +} + +/** + * Metered usage of one turn, summed over every cost-bearing event the backend + * emitted. `input`/`output` are token counts (0 when the backend reported + * none — the honest sum, never a fabricated estimate). `costUsd`/`model` are + * present only when the backend actually reported them. + * + * @experimental + */ +export interface AgentTurnUsage { + input: number + output: number + costUsd?: number + model?: string +} + +/** + * A drained turn: the terminal summary plus every event the stream yielded. + * `status`/`error` mirror the terminal `final` event so a failed or aborted + * turn stays inspectable without re-scanning `events`. + * + * @experimental + */ +export interface CollectedAgentTurn { + finalText: string + usage: AgentTurnUsage + events: RuntimeStreamEvent[] + status: AgentTaskStatus + error?: BackendErrorDetail +} + +/** Mutable per-turn accumulator threaded through the backend adapters. */ +interface TurnAccumulator { + /** Concatenated incremental text (`text_delta` events). */ + deltaText: string + /** Final text read off a terminal `result`/`done`/`final` event, when the + * backend emitted one. Preferred over `deltaText` for box streams, whose + * `message.part.updated` fallback carries running accumulations. */ + terminalText?: string + input: number + output: number + costUsd: number + model?: string +} + +/** + * Run ONE agent turn on any backend kind and stream its events. Yields the + * `RuntimeStreamEvent` vocabulary incrementally and always ends with a `final` + * event carrying the turn's text and usage (`metadata.tokenUsage`, + * `metadata.costUsd?`, `metadata.model?`) — on success, failure, abort, and + * timeout alike. The generator never throws; failures surface in-band as + * `backend_error` + `final` with a typed `error` detail. + * + * @experimental + */ +export async function* streamAgentTurn( + backend: AgentTurnBackend, + prompt: string, + opts: StreamAgentTurnOptions = {}, +): AsyncGenerator { + const label = backend.kind === 'chat' ? backend.backend.kind : backend.kind + const task: AgentTaskSpec = { id: `turn-${crypto.randomUUID()}`, intent: prompt } + const acc: TurnAccumulator = { deltaText: '', input: 0, output: 0, costUsd: 0 } + const deadline = deriveTurnSignal(opts.signal, opts.timeoutMs ?? 0) + + let session: RuntimeSession | undefined + try { + session = await startTurnSession(backend, task, prompt, deadline.signal, label) + yield { type: 'backend_start', task, session, backend: label, timestamp: nowIso() } + + const inner = + backend.kind === 'chat' + ? driveChatTurn(backend.backend, task, session, prompt, deadline.signal, acc) + : driveBoxTurn( + backend.kind === 'box' + ? backend.box + : await inlineSandboxClient(backend.factory).create(), + prompt, + deadline.signal, + backend.agentRunName ?? 'agent', + acc, + ) + for await (const event of inner) { + yield event + throwIfAborted(deadline.signal) + } + + yield buildFinalEvent(task, session, acc, { status: 'completed', reason: 'turn completed' }) + } catch (err) { + const callerAborted = opts.signal?.aborted === true + const status: AgentTaskStatus = callerAborted ? 'aborted' : 'failed' + const message = err instanceof Error ? err.message : String(err) + const error: BackendErrorDetail = + err instanceof BackendTransportError + ? { kind: 'transport', message, status: err.status, body: err.body } + : { kind: 'backend', message } + yield { + type: 'backend_error', + task, + ...(session ? { session } : {}), + backend: label, + message, + recoverable: !callerAborted, + error, + timestamp: nowIso(), + } + yield buildFinalEvent(task, session, acc, { status, reason: message, error }) + } finally { + deadline.dispose() + } +} + +/** + * Drain a `streamAgentTurn` stream (or any `RuntimeStreamEvent` stream that + * honors its terminal contract) into the turn summary plus the full event + * list. Fail-loud: throws when the stream ends without a terminal `final` + * event — a stream that violates the contract must not read as an empty turn. + * + * @experimental + */ +export async function collectAgentTurn( + stream: AsyncIterable, +): Promise { + const events: RuntimeStreamEvent[] = [] + for await (const event of stream) events.push(event) + const final = events.at(-1) + if (!final || final.type !== 'final') { + throw new Error( + `collectAgentTurn: stream ended without a terminal 'final' event (last: ${final ? final.type : 'none'})`, + ) + } + const metadata = final.metadata ?? {} + const tokenUsage = + metadata.tokenUsage && typeof metadata.tokenUsage === 'object' + ? (metadata.tokenUsage as Record) + : {} + const usage: AgentTurnUsage = { + input: finiteNumber(tokenUsage.input) ?? 0, + output: finiteNumber(tokenUsage.output) ?? 0, + } + const costUsd = finiteNumber(metadata.costUsd) + if (costUsd !== undefined) usage.costUsd = costUsd + if (typeof metadata.model === 'string' && metadata.model.length > 0) { + usage.model = metadata.model + } + return { + finalText: final.text ?? '', + usage, + events, + status: final.status, + ...(final.error ? { error: final.error } : {}), + } +} + +/** Start the backend's session when it owns one (`chat` kind); mint a local + * correlation session otherwise. Box/executor turns carry no server session + * here — resume lives in `openSandboxRun`/`SandboxLineage`, not this primitive. */ +async function startTurnSession( + backend: AgentTurnBackend, + task: AgentTaskSpec, + prompt: string, + signal: AbortSignal, + label: string, +): Promise { + if (backend.kind === 'chat' && backend.backend.start) { + return backend.backend.start( + { task, message: prompt }, + { task, knowledge: emptyReadiness(task), signal }, + ) + } + return newRuntimeSession(label) +} + +/** + * One turn over a box: `box.streamPrompt` projected through the EXISTING + * `mapSandboxEvent` (text/reasoning deltas + cost-bearing `llm_call`s). Usage + * accumulates off the mapped `llm_call` events — the same fold + * `sumSandboxUsage` applies. Final text prefers the terminal + * `result`/`done`/`final` payload over concatenated deltas, because the + * sandbox `message.part.updated` fallback may carry running accumulations. + */ +async function* driveBoxTurn( + box: SandboxInstance, + prompt: string, + signal: AbortSignal, + agentRunName: string, + acc: TurnAccumulator, +): AsyncGenerator { + for await (const event of box.streamPrompt(prompt, { signal })) { + const terminalText = terminalTextFromSandboxEvent(event) + if (terminalText !== undefined) acc.terminalText = terminalText + const mapped = mapSandboxEvent(event, { agentRunName }) + if (!mapped) continue + // `mapSandboxEvent` stamps `agentRunName` as the model label when the + // event carried none — a run label, not a reported model. Exclude it from + // the terminal usage so `usage.model` is never a fabricated value. + foldEvent(mapped, acc, agentRunName) + yield mapped + } +} + +/** One turn over an in-process backend: its own `stream()` surface, projected + * through the same `normalizeBackendStreamEvent` the task lifecycle applies. */ +async function* driveChatTurn( + backend: AgentExecutionBackend, + task: AgentTaskSpec, + session: RuntimeSession, + prompt: string, + signal: AbortSignal, + acc: TurnAccumulator, +): AsyncGenerator { + const input = { task, message: prompt } + const context = { task, knowledge: emptyReadiness(task), session, signal } + for await (const raw of backend.stream(input, context)) { + const event = normalizeBackendStreamEvent(raw, task, session) + foldEvent(event, acc) + yield event + } +} + +/** Fold one normalized event into the turn accumulator (text + usage). + * `fallbackModelLabel` — a mapper-stamped run label to exclude from + * `usage.model` (it is not a backend-reported model). */ +function foldEvent( + event: RuntimeStreamEvent, + acc: TurnAccumulator, + fallbackModelLabel?: string, +): void { + if (event.type === 'text_delta') { + acc.deltaText += event.text + return + } + if (event.type === 'llm_call') { + acc.input += event.tokensIn ?? 0 + acc.output += event.tokensOut ?? 0 + acc.costUsd += event.costUsd ?? 0 + if (event.model && event.model !== fallbackModelLabel) acc.model = event.model + } +} + +/** Read the final text off a terminal sandbox event, when present. */ +function terminalTextFromSandboxEvent(event: SandboxEvent): string | undefined { + if (!event || typeof event !== 'object') return undefined + const type = String(event.type ?? '') + if (type !== 'result' && type !== 'done' && type !== 'final') return undefined + const data = + event.data && typeof event.data === 'object' + ? (event.data as Record) + : ({} as Record) + for (const key of ['finalText', 'text', 'response', 'content']) { + const value = data[key] + if (typeof value === 'string') return value + } + return undefined +} + +function buildFinalEvent( + task: AgentTaskSpec, + session: RuntimeSession | undefined, + acc: TurnAccumulator, + outcome: { status: AgentTaskStatus; reason: string; error?: BackendErrorDetail }, +): RuntimeStreamEvent { + const finalText = acc.terminalText ?? acc.deltaText + return { + type: 'final', + task, + ...(session ? { session } : {}), + status: outcome.status, + reason: outcome.reason, + ...(finalText ? { text: finalText } : {}), + metadata: { + tokenUsage: { input: acc.input, output: acc.output }, + ...(acc.costUsd > 0 ? { costUsd: acc.costUsd } : {}), + ...(acc.model ? { model: acc.model } : {}), + }, + ...(outcome.error ? { error: outcome.error } : {}), + timestamp: nowIso(), + } +} + +/** Minimal ready-by-construction readiness report for a requirement-free turn. */ +function emptyReadiness(task: AgentTaskSpec) { + return scoreKnowledgeReadiness({ taskId: task.id, requirements: [] }) +} + +function finiteNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined +} + +function throwIfAborted(signal: AbortSignal): void { + if (!signal.aborted) return + throw signal.reason instanceof Error ? signal.reason : new Error(String(signal.reason)) +} + +/** + * Derive the turn's effective abort signal: fires when EITHER the caller's + * signal aborts OR the `timeoutMs` deadline elapses. `dispose()` clears the + * timer so a finished turn never leaks a pending timeout. `timeoutMs <= 0` + * disables the deadline. Node-portable (no `AbortSignal.any`, which needs + * >=20.3 — the package floor is >=20). + */ +function deriveTurnSignal( + callerSignal: AbortSignal | undefined, + timeoutMs: number, +): { signal: AbortSignal; dispose: () => void } { + const controller = new AbortController() + const timer = + timeoutMs > 0 + ? setTimeout( + () => controller.abort(new Error(`agent turn timed out after ${timeoutMs}ms`)), + timeoutMs, + ) + : undefined + if (timer && typeof (timer as { unref?: () => void }).unref === 'function') { + ;(timer as { unref: () => void }).unref() + } + const onCallerAbort = () => + controller.abort(callerSignal?.reason ?? new Error('agent turn aborted')) + if (callerSignal) { + if (callerSignal.aborted) onCallerAbort() + else callerSignal.addEventListener('abort', onCallerAbort, { once: true }) + } + return { + signal: controller.signal, + dispose: () => { + if (timer) clearTimeout(timer) + callerSignal?.removeEventListener('abort', onCallerAbort) + }, + } +}