tangle-network · drewstone · Jul 3, 2026 · Jul 3, 2026
diff --git a/docs/api/index.md b/docs/api/index.md
@@ -3876,7 +3876,7 @@ Defined in: [otel-export.ts:608](https://github.com/tangle-network/agent-runtime
 
 ### ResolveAgentBackendOptions
 
-Defined in: [resolve-agent-backend.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L50)
+Defined in: [resolve-agent-backend.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L51)
 
 #### Extends
 
@@ -3932,11 +3932,35 @@ OpenAI Chat Completions `response_format`. Omit for provider default text.
 
 `OpenAICompatPassthrough.responseFormat`
 
+##### temperature?
+
+> `optional` **temperature?**: `number`
+
+Defined in: [backends.ts:234](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L234)
+
+OpenAI Chat Completions `temperature`. Omit for provider default.
+
+###### Inherited from
+
+`OpenAICompatPassthrough.temperature`
+
+##### maxTokens?
+
+> `optional` **maxTokens?**: `number`
+
+Defined in: [backends.ts:236](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L236)
+
+Maximum completion tokens, sent as OpenAI-compatible `max_tokens`. Omit for provider default.
+
+###### Inherited from
+
+`OpenAICompatPassthrough.maxTokens`
+
 ##### fetchImpl?
 
 > `optional` **fetchImpl?**: (`input`, `init?`) => `Promise`\<`Response`\>
 
-Defined in: [backends.ts:233](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L233)
+Defined in: [backends.ts:237](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L237)
 
 ###### Parameters
 
@@ -3960,7 +3984,7 @@ Defined in: [backends.ts:233](https://github.com/tangle-network/agent-runtime/bl
 
 > `optional` **retry?**: `BackendRetryPolicy`
 
-Defined in: [backends.ts:234](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L234)
+Defined in: [backends.ts:238](https://github.com/tangle-network/agent-runtime/blob/main/src/backends.ts#L238)
 
 ###### Inherited from
 
@@ -3970,15 +3994,15 @@ Defined in: [backends.ts:234](https://github.com/tangle-network/agent-runtime/bl
 
 > **kind**: [`AgentBackendKind`](#agentbackendkind)
 
-Defined in: [resolve-agent-backend.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L53)
+Defined in: [resolve-agent-backend.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L54)
 
 The chat transport to resolve.
 
 ##### apiKey
 
 > **apiKey**: `string`
 
-Defined in: [resolve-agent-backend.ts:59](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L59)
+Defined in: [resolve-agent-backend.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L60)
 
 Bearer credential for the OpenAI-compat kinds. Empty string is valid for a
 loopback-anonymous cli-bridge; a `router`/`tcloud` route with an empty key
@@ -3988,31 +4012,31 @@ is a caller bug the product surfaces before calling in.
 
 > **baseUrl**: `string`
 
-Defined in: [resolve-agent-backend.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L61)
+Defined in: [resolve-agent-backend.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L62)
 
 Base URL for the OpenAI-compat kinds. cli-bridge's is its `/v1`.
 
 ##### model
 
 > **model**: `string`
 
-Defined in: [resolve-agent-backend.ts:63](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L63)
+Defined in: [resolve-agent-backend.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L64)
 
 Model id sent on every request. cli-bridge rejects a request without it.
 
 ##### label?
 
 > `optional` **label?**: `string`
 
-Defined in: [resolve-agent-backend.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L65)
+Defined in: [resolve-agent-backend.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L66)
 
 `kind` label stamped on the resolved backend + its traces. Defaults to `kind`.
 
 ##### sandboxBackend?
 
 > `optional` **sandboxBackend?**: () => [`AgentExecutionBackend`](#agentexecutionbackend)\<`TInput`\>
 
-Defined in: [resolve-agent-backend.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L71)
+Defined in: [resolve-agent-backend.ts:72](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L72)
 
 `sandbox` kind: the product's own domain backend. Required for that kind —
 the substrate owns no product sandbox shape, so a `sandbox` resolution with
@@ -6491,7 +6515,7 @@ Mode → configured runner. Partial: only register the modes a
 
 > **AgentBackendKind** = `"router"` \| `"tcloud"` \| `"cli-bridge"` \| `"sandbox"`
 
-Defined in: [resolve-agent-backend.ts:37](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L37)
+Defined in: [resolve-agent-backend.ts:38](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L38)
 
 The transport a chat backend runs on.
 
@@ -7231,6 +7255,18 @@ omits the field; provider falls back to its own default — typically
 
 OpenAI Chat Completions `response_format`. Omit for provider default text.
 
+###### temperature?
+
+`number`
+
+OpenAI Chat Completions `temperature`. Omit for provider default.
+
+###### maxTokens?
+
+`number`
+
+Maximum completion tokens, sent as OpenAI-compatible `max_tokens`. Omit for provider default.
+
 ###### fetchImpl?
 
 (`input`, `init?`) => `Promise`\<`Response`\>
@@ -8479,7 +8515,7 @@ Map a `KnowledgeReadinessReport` to a three-state branch (`ready` / `blocked` /
 
 > **resolveAgentBackend**\<`TInput`\>(`opts`): [`AgentExecutionBackend`](#agentexecutionbackend)\<`TInput`\>
 
-Defined in: [resolve-agent-backend.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L78)
+Defined in: [resolve-agent-backend.ts:79](https://github.com/tangle-network/agent-runtime/blob/main/src/resolve-agent-backend.ts#L79)
 
 Resolve the `AgentExecutionBackend` for the chosen `kind`. Reuse this instead
 of hand-rolling the `createOpenAICompatibleBackend` branch in each product.

diff --git a/src/backends.ts b/src/backends.ts
@@ -230,6 +230,10 @@ export function createOpenAICompatibleBackend<
    * OpenAI Chat Completions `response_format`. Omit for provider default text.
    */
   responseFormat?: OpenAIChatResponseFormat
+  /** OpenAI Chat Completions `temperature`. Omit for provider default. */
+  temperature?: number
+  /** Maximum completion tokens, sent as OpenAI-compatible `max_tokens`. Omit for provider default. */
+  maxTokens?: number
   fetchImpl?: typeof fetch
   retry?: BackendRetryPolicy
 }): AgentExecutionBackend<TInput> {
@@ -269,6 +273,12 @@ export function createOpenAICompatibleBackend<
       if (options.responseFormat !== undefined) {
         bodyPayload.response_format = options.responseFormat
       }
+      if (options.temperature !== undefined) {
+        bodyPayload.temperature = options.temperature
+      }
+      if (options.maxTokens !== undefined) {
+        bodyPayload.max_tokens = options.maxTokens
+      }
       const requestBody = JSON.stringify(bodyPayload)
       let response: Response | undefined
       let lastStatus = 0

diff --git a/src/resolve-agent-backend.test.ts b/src/resolve-agent-backend.test.ts
@@ -65,12 +65,26 @@ describe('resolveAgentBackend', () => {
     expect(cfg.fetchImpl).toBe(fetchImpl)
   })
 
+  it('forwards generation options when set', () => {
+    const backend = resolveAgentBackend({
+      kind: 'router',
+      ...base,
+      temperature: 1,
+      maxTokens: 8192,
+    })
+    const cfg = configOf(backend)
+    expect(cfg.temperature).toBe(1)
+    expect(cfg.maxTokens).toBe(8192)
+  })
+
   it('omits passthrough fields that were never set (keeps the tool-free request shape)', () => {
     const backend = resolveAgentBackend({ kind: 'tcloud', ...base })
     const cfg = configOf(backend)
     expect('tools' in cfg).toBe(false)
     expect('toolChoice' in cfg).toBe(false)
     expect('responseFormat' in cfg).toBe(false)
+    expect('temperature' in cfg).toBe(false)
+    expect('maxTokens' in cfg).toBe(false)
     expect('fetchImpl' in cfg).toBe(false)
     expect('retry' in cfg).toBe(false)
   })

diff --git a/src/resolve-agent-backend.ts b/src/resolve-agent-backend.ts
@@ -25,8 +25,9 @@
  * This resolver is PURE backend selection. Product concerns — credit hard-cuts,
  * fetch-capture shims, D1 platform wiring — stay as product-side WRAPPERS
  * around the returned backend. The OpenAI-compat passthrough fields (`tools`,
- * `toolChoice`, `responseFormat`, `fetchImpl`, `retry`) are forwarded verbatim
- * so a product can advertise its app tools or install a capturing fetch without
+ * `toolChoice`, `responseFormat`, `temperature`, `maxTokens`, `fetchImpl`,
+ * `retry`) are forwarded verbatim so a product can advertise its app tools,
+ * preserve generation settings, or install a capturing fetch without
  * re-opening the branch this consolidation closes.
  */
 
@@ -44,7 +45,7 @@ export type AgentBackendKind = 'router' | 'tcloud' | 'cli-bridge' | 'sandbox'
  */
 type OpenAICompatPassthrough = Pick<
   Parameters<typeof createOpenAICompatibleBackend>[0],
-  'tools' | 'toolChoice' | 'responseFormat' | 'fetchImpl' | 'retry'
+  'tools' | 'toolChoice' | 'responseFormat' | 'temperature' | 'maxTokens' | 'fetchImpl' | 'retry'
 >
 
 export interface ResolveAgentBackendOptions<TInput extends AgentBackendInput = AgentBackendInput>
@@ -89,6 +90,8 @@ export function resolveAgentBackend<TInput extends AgentBackendInput = AgentBack
       if (opts.tools !== undefined) passthrough.tools = opts.tools
       if (opts.toolChoice !== undefined) passthrough.toolChoice = opts.toolChoice
       if (opts.responseFormat !== undefined) passthrough.responseFormat = opts.responseFormat
+      if (opts.temperature !== undefined) passthrough.temperature = opts.temperature
+      if (opts.maxTokens !== undefined) passthrough.maxTokens = opts.maxTokens
       if (opts.fetchImpl !== undefined) passthrough.fetchImpl = opts.fetchImpl
       if (opts.retry !== undefined) passthrough.retry = opts.retry
       return createOpenAICompatibleBackend<TInput>({

diff --git a/tests/backends-openai-tools.test.ts b/tests/backends-openai-tools.test.ts
@@ -130,6 +130,32 @@ describe('createOpenAICompatibleBackend — tools[] request shape', () => {
     })
   })
 
+  it('includes generation options when configured', async () => {
+    let captured: Record<string, unknown> | undefined
+    const backend = createOpenAICompatibleBackend({
+      apiKey: 'sk-test',
+      baseUrl: 'https://router.tangle.tools/v1',
+      model: 'kimi-k2.7-code-highspeed',
+      temperature: 1,
+      maxTokens: 8192,
+      fetchImpl: async (_url, init) => {
+        captured = JSON.parse((init?.body as string) ?? '{}') as Record<string, unknown>
+        return new Response('data: [DONE]\n\n', { status: 200 })
+      },
+    })
+    await collect(
+      runAgentTaskStream({
+        task: { id: 'generation-options', intent: 'hi', requiredKnowledge: [readyReq] },
+        backend,
+        input: { message: 'hi' },
+      }),
+    )
+    expect(captured).toMatchObject({
+      temperature: 1,
+      max_tokens: 8192,
+    })
+  })
+
   it('honors an explicit tool_choice value (auto / none / required / pin)', async () => {
     const captures: Record<string, unknown>[] = []
     const make = (toolChoice: Parameters<typeof createOpenAICompatibleBackend>[0]['toolChoice']) =>