From 2fd401c3b8a543173eab2c3175f808ba9734cd9d Mon Sep 17 00:00:00 2001
From: Makisuo <minetoaster12@gmail.com>
Date: Mon, 29 Jun 2026 18:42:27 +0200
Subject: [PATCH] flash-try

---
 apps/chat-flue/.dev.vars.example        | 13 +++++++++----
 apps/chat-flue/alchemy.run.ts           |  3 +++
 apps/chat-flue/src/agents/maple-chat.ts | 18 +++++++++---------
 apps/chat-flue/src/app.ts               | 15 ++++++++++++++-
 apps/chat-flue/src/lib/env.ts           |  6 ++++--
 apps/chat-flue/src/workflows/triage.ts  |  6 +++---
 6 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/apps/chat-flue/.dev.vars.example b/apps/chat-flue/.dev.vars.example
index 7d37b72c..f99c1086 100644
--- a/apps/chat-flue/.dev.vars.example
+++ b/apps/chat-flue/.dev.vars.example
@@ -4,7 +4,12 @@
 # INTERNAL_SERVICE_TOKEN — the Flue agent sends `Bearer maple_svc_<token>`.
 INTERNAL_SERVICE_TOKEN="dev-internal-service-token"
 
-# Optional Workers AI model override (provider id `cloudflare` + @cf model id).
-# Default is cloudflare/@cf/moonshotai/kimi-k2.6 (validated). Confirm any swap
-# against the live Workers AI catalog — deprecated ids return 410.
-# MAPLE_CHAT_MODEL="cloudflare/@cf/moonshotai/kimi-k2.6"
+# OpenRouter API key — backs the `openrouter/*` model provider. Required for the
+# default chat + triage model (openrouter/google/gemini-3.5-flash). Without it,
+# those model specifiers fail at run time.
+OPENROUTER_API_KEY="sk-or-..."
+
+# Optional model override (any Flue provider specifier: `<provider>/<model-id>`).
+# Default is openrouter/google/gemini-3.5-flash. Workers AI is still reachable via
+# the `cloudflare` provider, e.g. cloudflare/@cf/moonshotai/kimi-k2.6.
+# MAPLE_CHAT_MODEL="openrouter/google/gemini-3.5-flash"
diff --git a/apps/chat-flue/alchemy.run.ts b/apps/chat-flue/alchemy.run.ts
index 26d6c7b2..9d46f44d 100644
--- a/apps/chat-flue/alchemy.run.ts
+++ b/apps/chat-flue/alchemy.run.ts
@@ -102,6 +102,9 @@ export const createChatFlueWorker = async ({ stage, domains, mapleApiUrl }: Crea
 			FLUE_REGISTRY: registry,
 			MAPLE_API_URL: mapleApiUrl,
 			INTERNAL_SERVICE_TOKEN: alchemy.secret(requireEnv("INTERNAL_SERVICE_TOKEN")),
+			// OpenRouter API key — backs the `openrouter/*` model provider used by the
+			// chat agent + triage workflow defaults. Bound only when set in the deploy env.
+			...optionalSecret("OPENROUTER_API_KEY"),
 			// OpenTelemetry → Maple ingest. Provide the internal-org ingest key so
 			// chat-flue spans land beside `maple-api`; telemetry no-ops when unset.
 			...optionalSecret("MAPLE_INGEST_KEY"),
diff --git a/apps/chat-flue/src/agents/maple-chat.ts b/apps/chat-flue/src/agents/maple-chat.ts
index 9c614310..b6a688b2 100644
--- a/apps/chat-flue/src/agents/maple-chat.ts
+++ b/apps/chat-flue/src/agents/maple-chat.ts
@@ -9,17 +9,17 @@ import { buildSubmitDiagnosisTool } from "../lib/submit-diagnosis.ts"
 import { enterSpan } from "../lib/tracing.ts"
 
 /**
- * Default Workers AI model. EXPERIMENT: trying Z.ai's `@cf/zai-org/glm-5.2`.
- * `cloudflare/<model-id>` is passed verbatim to `env.AI.run(...)`; an `@cf/*`
- * model is hosted natively on Workers AI — keyless and billed as normal Workers
- * AI usage (neurons + the daily free allocation), no partner/Unified Billing or
- * AI Gateway BYOK.
+ * Default chat model: Google's `gemini-3.5-flash`, reached through OpenRouter.
+ * Flue parses a model specifier as `provider-id/model-id` (split on the first
+ * `/`), so `openrouter/google/gemini-3.5-flash` routes to the `openrouter`
+ * provider with model id `google/gemini-3.5-flash`. The provider is registered
+ * in `app.ts` with `OPENROUTER_API_KEY`; usage is billed to that OpenRouter key
+ * (no longer keyless Workers AI).
  *
- * Previous default: `cloudflare/@cf/moonshotai/kimi-k2.6` (validated). The
- * Workers AI catalog churns, so confirm the id against the live catalog if a
- * call 404s. Override per-org via `MAPLE_CHAT_MODEL`.
+ * Previous default: `cloudflare/@cf/zai-org/glm-5.2` (native Workers AI).
+ * Override per deploy via `MAPLE_CHAT_MODEL` (any Flue provider specifier).
  */
-const DEFAULT_MODEL = "cloudflare/@cf/zai-org/glm-5.2"
+const DEFAULT_MODEL = "openrouter/google/gemini-3.5-flash"
 
 /**
  * The addressable Maple chat agent on Cloudflare Workers AI, with tools sourced
diff --git a/apps/chat-flue/src/app.ts b/apps/chat-flue/src/app.ts
index 3a7e040e..3e2d682b 100644
--- a/apps/chat-flue/src/app.ts
+++ b/apps/chat-flue/src/app.ts
@@ -1,5 +1,5 @@
 import { createOpenTelemetryObserver } from "@flue/opentelemetry"
-import { observe } from "@flue/runtime"
+import { observe, registerProvider } from "@flue/runtime"
 import { flue } from "@flue/runtime/routing"
 import { Hono } from "hono"
 import { cors } from "hono/cors"
@@ -24,6 +24,19 @@ import { telemetryEnv } from "./lib/telemetry-env.ts"
 // (→ Maple) and are the primary signal for the "chat did nothing" failure mode,
 // regardless of whether the OTel export is on.
 const env = await telemetryEnv();
+
+// Model provider: the chat agent + triage workflow default to
+// `openrouter/google/gemini-3.5-flash`, so register the OpenRouter provider with
+// its API key. Runs at module scope in every isolate (worker + DOs), before the
+// Flue-generated `_entry.ts` provider body (which only registers `cloudflare`),
+// so `openrouter` is available to `resolveModel` on the first turn. `openrouter`
+// is a catalog provider, so the key is all that's needed (api + baseUrl default
+// from the catalog). Guarded on the key so module load never throws when it's
+// unset (tests / pre-secret deploys); a missing key surfaces at run time instead.
+if (env.OPENROUTER_API_KEY) {
+	registerProvider("openrouter", { apiKey: env.OPENROUTER_API_KEY })
+}
+
 const tracerProvider = setupTelemetry({
 	ingestKey: env.MAPLE_INGEST_KEY,
 	endpoint: env.MAPLE_ENDPOINT,
diff --git a/apps/chat-flue/src/lib/env.ts b/apps/chat-flue/src/lib/env.ts
index 79502075..e0729f05 100644
--- a/apps/chat-flue/src/lib/env.ts
+++ b/apps/chat-flue/src/lib/env.ts
@@ -4,13 +4,15 @@ import type { CloudflareAIBinding } from "@flue/runtime/cloudflare"
 export interface ChatFlueEnv {
 	/** Workers AI binding. Backs the `cloudflare/*` model provider (env.AI.run). */
 	AI: CloudflareAIBinding
+	/** OpenRouter API key; backs the `openrouter/*` model provider (registered in app.ts). */
+	OPENROUTER_API_KEY?: string
 	/** Base URL of the Maple API worker that hosts the MCP server (`/mcp`). */
 	MAPLE_API_URL: string
 	/** Shared secret for Maple internal-service auth (`Bearer maple_svc_<token>`). */
 	INTERNAL_SERVICE_TOKEN: string
-	/** Optional Workers AI model override, e.g. `cloudflare/@cf/meta/llama-3.3-70b-instruct-fp8-fast`. */
+	/** Optional model override (any Flue provider specifier), e.g. `openrouter/google/gemini-3.5-flash`. */
 	MAPLE_CHAT_MODEL?: string
-	/** Optional Workers AI model override for the headless triage workflow (falls back to MAPLE_CHAT_MODEL). */
+	/** Optional model override for the headless triage workflow (falls back to MAPLE_CHAT_MODEL). */
 	MAPLE_TRIAGE_MODEL?: string
 	/** Deployment environment label, surfaced on telemetry. */
 	MAPLE_ENVIRONMENT?: string
diff --git a/apps/chat-flue/src/workflows/triage.ts b/apps/chat-flue/src/workflows/triage.ts
index 06ed3ebe..8b434cac 100644
--- a/apps/chat-flue/src/workflows/triage.ts
+++ b/apps/chat-flue/src/workflows/triage.ts
@@ -6,8 +6,8 @@ import { AiTriageResultSchema } from "../lib/triage-result.ts"
 
 /**
  * Headless AI-triage as a Flue workflow — the agentic-investigation half of the
- * legacy apps/api `AiTriageWorkflow`. It runs the read-only investigation loop on
- * Workers AI against Maple's MCP tools and returns a structured `AiTriageResult`.
+ * legacy apps/api `AiTriageWorkflow`. It runs the read-only investigation loop
+ * against Maple's MCP tools and returns a structured `AiTriageResult`.
  *
  * Boundary: this workflow owns ONLY the LLM step. The durable incident lifecycle
  * — gate/claim, D1 `ai_triage_runs` persistence, issue severity + timeline,
@@ -20,7 +20,7 @@ import { AiTriageResultSchema } from "../lib/triage-result.ts"
  * model's final structured output is validated against `AiTriageResultSchema`.
  */
 
-const DEFAULT_TRIAGE_MODEL = "cloudflare/@cf/moonshotai/kimi-k2.6"
+const DEFAULT_TRIAGE_MODEL = "openrouter/google/gemini-3.5-flash"
 
 export interface TriagePayload {
 	readonly orgId: string