From 2fd401c3b8a543173eab2c3175f808ba9734cd9d Mon Sep 17 00:00:00 2001 From: Makisuo Date: Mon, 29 Jun 2026 18:42:27 +0200 Subject: [PATCH] flash-try --- apps/chat-flue/.dev.vars.example | 13 +++++++++---- apps/chat-flue/alchemy.run.ts | 3 +++ apps/chat-flue/src/agents/maple-chat.ts | 18 +++++++++--------- apps/chat-flue/src/app.ts | 15 ++++++++++++++- apps/chat-flue/src/lib/env.ts | 6 ++++-- apps/chat-flue/src/workflows/triage.ts | 6 +++--- 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/apps/chat-flue/.dev.vars.example b/apps/chat-flue/.dev.vars.example index 7d37b72c..f99c1086 100644 --- a/apps/chat-flue/.dev.vars.example +++ b/apps/chat-flue/.dev.vars.example @@ -4,7 +4,12 @@ # INTERNAL_SERVICE_TOKEN — the Flue agent sends `Bearer maple_svc_`. INTERNAL_SERVICE_TOKEN="dev-internal-service-token" -# Optional Workers AI model override (provider id `cloudflare` + @cf model id). -# Default is cloudflare/@cf/moonshotai/kimi-k2.6 (validated). Confirm any swap -# against the live Workers AI catalog — deprecated ids return 410. -# MAPLE_CHAT_MODEL="cloudflare/@cf/moonshotai/kimi-k2.6" +# OpenRouter API key — backs the `openrouter/*` model provider. Required for the +# default chat + triage model (openrouter/google/gemini-3.5-flash). Without it, +# those model specifiers fail at run time. +OPENROUTER_API_KEY="sk-or-..." + +# Optional model override (any Flue provider specifier: `/`). +# Default is openrouter/google/gemini-3.5-flash. Workers AI is still reachable via +# the `cloudflare` provider, e.g. cloudflare/@cf/moonshotai/kimi-k2.6. +# MAPLE_CHAT_MODEL="openrouter/google/gemini-3.5-flash" diff --git a/apps/chat-flue/alchemy.run.ts b/apps/chat-flue/alchemy.run.ts index 26d6c7b2..9d46f44d 100644 --- a/apps/chat-flue/alchemy.run.ts +++ b/apps/chat-flue/alchemy.run.ts @@ -102,6 +102,9 @@ export const createChatFlueWorker = async ({ stage, domains, mapleApiUrl }: Crea FLUE_REGISTRY: registry, MAPLE_API_URL: mapleApiUrl, INTERNAL_SERVICE_TOKEN: alchemy.secret(requireEnv("INTERNAL_SERVICE_TOKEN")), + // OpenRouter API key — backs the `openrouter/*` model provider used by the + // chat agent + triage workflow defaults. Bound only when set in the deploy env. + ...optionalSecret("OPENROUTER_API_KEY"), // OpenTelemetry → Maple ingest. Provide the internal-org ingest key so // chat-flue spans land beside `maple-api`; telemetry no-ops when unset. ...optionalSecret("MAPLE_INGEST_KEY"), diff --git a/apps/chat-flue/src/agents/maple-chat.ts b/apps/chat-flue/src/agents/maple-chat.ts index 9c614310..b6a688b2 100644 --- a/apps/chat-flue/src/agents/maple-chat.ts +++ b/apps/chat-flue/src/agents/maple-chat.ts @@ -9,17 +9,17 @@ import { buildSubmitDiagnosisTool } from "../lib/submit-diagnosis.ts" import { enterSpan } from "../lib/tracing.ts" /** - * Default Workers AI model. EXPERIMENT: trying Z.ai's `@cf/zai-org/glm-5.2`. - * `cloudflare/` is passed verbatim to `env.AI.run(...)`; an `@cf/*` - * model is hosted natively on Workers AI — keyless and billed as normal Workers - * AI usage (neurons + the daily free allocation), no partner/Unified Billing or - * AI Gateway BYOK. + * Default chat model: Google's `gemini-3.5-flash`, reached through OpenRouter. + * Flue parses a model specifier as `provider-id/model-id` (split on the first + * `/`), so `openrouter/google/gemini-3.5-flash` routes to the `openrouter` + * provider with model id `google/gemini-3.5-flash`. The provider is registered + * in `app.ts` with `OPENROUTER_API_KEY`; usage is billed to that OpenRouter key + * (no longer keyless Workers AI). * - * Previous default: `cloudflare/@cf/moonshotai/kimi-k2.6` (validated). The - * Workers AI catalog churns, so confirm the id against the live catalog if a - * call 404s. Override per-org via `MAPLE_CHAT_MODEL`. + * Previous default: `cloudflare/@cf/zai-org/glm-5.2` (native Workers AI). + * Override per deploy via `MAPLE_CHAT_MODEL` (any Flue provider specifier). */ -const DEFAULT_MODEL = "cloudflare/@cf/zai-org/glm-5.2" +const DEFAULT_MODEL = "openrouter/google/gemini-3.5-flash" /** * The addressable Maple chat agent on Cloudflare Workers AI, with tools sourced diff --git a/apps/chat-flue/src/app.ts b/apps/chat-flue/src/app.ts index 3a7e040e..3e2d682b 100644 --- a/apps/chat-flue/src/app.ts +++ b/apps/chat-flue/src/app.ts @@ -1,5 +1,5 @@ import { createOpenTelemetryObserver } from "@flue/opentelemetry" -import { observe } from "@flue/runtime" +import { observe, registerProvider } from "@flue/runtime" import { flue } from "@flue/runtime/routing" import { Hono } from "hono" import { cors } from "hono/cors" @@ -24,6 +24,19 @@ import { telemetryEnv } from "./lib/telemetry-env.ts" // (→ Maple) and are the primary signal for the "chat did nothing" failure mode, // regardless of whether the OTel export is on. const env = await telemetryEnv(); + +// Model provider: the chat agent + triage workflow default to +// `openrouter/google/gemini-3.5-flash`, so register the OpenRouter provider with +// its API key. Runs at module scope in every isolate (worker + DOs), before the +// Flue-generated `_entry.ts` provider body (which only registers `cloudflare`), +// so `openrouter` is available to `resolveModel` on the first turn. `openrouter` +// is a catalog provider, so the key is all that's needed (api + baseUrl default +// from the catalog). Guarded on the key so module load never throws when it's +// unset (tests / pre-secret deploys); a missing key surfaces at run time instead. +if (env.OPENROUTER_API_KEY) { + registerProvider("openrouter", { apiKey: env.OPENROUTER_API_KEY }) +} + const tracerProvider = setupTelemetry({ ingestKey: env.MAPLE_INGEST_KEY, endpoint: env.MAPLE_ENDPOINT, diff --git a/apps/chat-flue/src/lib/env.ts b/apps/chat-flue/src/lib/env.ts index 79502075..e0729f05 100644 --- a/apps/chat-flue/src/lib/env.ts +++ b/apps/chat-flue/src/lib/env.ts @@ -4,13 +4,15 @@ import type { CloudflareAIBinding } from "@flue/runtime/cloudflare" export interface ChatFlueEnv { /** Workers AI binding. Backs the `cloudflare/*` model provider (env.AI.run). */ AI: CloudflareAIBinding + /** OpenRouter API key; backs the `openrouter/*` model provider (registered in app.ts). */ + OPENROUTER_API_KEY?: string /** Base URL of the Maple API worker that hosts the MCP server (`/mcp`). */ MAPLE_API_URL: string /** Shared secret for Maple internal-service auth (`Bearer maple_svc_`). */ INTERNAL_SERVICE_TOKEN: string - /** Optional Workers AI model override, e.g. `cloudflare/@cf/meta/llama-3.3-70b-instruct-fp8-fast`. */ + /** Optional model override (any Flue provider specifier), e.g. `openrouter/google/gemini-3.5-flash`. */ MAPLE_CHAT_MODEL?: string - /** Optional Workers AI model override for the headless triage workflow (falls back to MAPLE_CHAT_MODEL). */ + /** Optional model override for the headless triage workflow (falls back to MAPLE_CHAT_MODEL). */ MAPLE_TRIAGE_MODEL?: string /** Deployment environment label, surfaced on telemetry. */ MAPLE_ENVIRONMENT?: string diff --git a/apps/chat-flue/src/workflows/triage.ts b/apps/chat-flue/src/workflows/triage.ts index 06ed3ebe..8b434cac 100644 --- a/apps/chat-flue/src/workflows/triage.ts +++ b/apps/chat-flue/src/workflows/triage.ts @@ -6,8 +6,8 @@ import { AiTriageResultSchema } from "../lib/triage-result.ts" /** * Headless AI-triage as a Flue workflow — the agentic-investigation half of the - * legacy apps/api `AiTriageWorkflow`. It runs the read-only investigation loop on - * Workers AI against Maple's MCP tools and returns a structured `AiTriageResult`. + * legacy apps/api `AiTriageWorkflow`. It runs the read-only investigation loop + * against Maple's MCP tools and returns a structured `AiTriageResult`. * * Boundary: this workflow owns ONLY the LLM step. The durable incident lifecycle * — gate/claim, D1 `ai_triage_runs` persistence, issue severity + timeline, @@ -20,7 +20,7 @@ import { AiTriageResultSchema } from "../lib/triage-result.ts" * model's final structured output is validated against `AiTriageResultSchema`. */ -const DEFAULT_TRIAGE_MODEL = "cloudflare/@cf/moonshotai/kimi-k2.6" +const DEFAULT_TRIAGE_MODEL = "openrouter/google/gemini-3.5-flash" export interface TriagePayload { readonly orgId: string