routatic · samueltuyizere · Jun 30, 2026 · Jun 29, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/CONFIGURATION.md b/CONFIGURATION.md
@@ -16,56 +16,67 @@ For migration, `~/.config/oc-go-cc/config.json` is loaded when the new config fi
   "host": "127.0.0.1",
   "port": 3456,
   "hot_reload": false,
+  "anthropic_first": {
+    "enabled": false,
+    "base_url": "https://api.anthropic.com"
+  },
 
   "models": {
     "default": {
       "provider": "opencode-go",
-      "model_id": "kimi-k2.6",
+      "model_id": "deepseek-v4-pro",
       "temperature": 0.7,
-      "max_tokens": 4096
+      "max_tokens": 8192,
+      "reasoning_effort": "max",
+      "thinking": { "type": "enabled" }
     },
     "background": {
       "provider": "opencode-go",
-      "model_id": "qwen3.5-plus",
+      "model_id": "deepseek-v4-flash",
       "temperature": 0.5,
       "max_tokens": 2048
     },
     "think": {
       "provider": "opencode-go",
-      "model_id": "glm-5.1",
+      "model_id": "glm-5.2",
       "temperature": 0.7,
       "max_tokens": 8192
     },
     "complex": {
       "provider": "opencode-go",
-      "model_id": "glm-5.1",
+      "model_id": "deepseek-v4-pro",
       "temperature": 0.7,
-      "max_tokens": 4096
+      "max_tokens": 8192,
+      "reasoning_effort": "max",
+      "thinking": { "type": "enabled" }
     },
     "long_context": {
       "provider": "opencode-go",
-      "model_id": "minimax-m2.7",
+      "model_id": "minimax-m3",
       "temperature": 0.7,
       "max_tokens": 16384,
       "context_threshold": 80000
     },
     "fast": {
       "provider": "opencode-go",
-      "model_id": "qwen3.6-plus",
+      "model_id": "deepseek-v4-flash",
       "temperature": 0.7,
       "max_tokens": 4096
     }
   },
 
   "fallbacks": {
     "default": [
-      { "provider": "opencode-go", "model_id": "glm-5" },
-      { "provider": "opencode-go", "model_id": "qwen3.6-plus" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
-    "think": [{ "provider": "opencode-go", "model_id": "glm-5" }],
-    "complex": [{ "provider": "opencode-go", "model_id": "glm-5" }],
-    "long_context": [{ "provider": "opencode-go", "model_id": "minimax-m2.5" }],
-    "fast": [{ "provider": "opencode-go", "model_id": "qwen3.5-plus" }]
+    "think": [{ "provider": "opencode-go", "model_id": "qwen3.7-plus" }],
+    "complex": [{ "provider": "opencode-go", "model_id": "qwen3.7-plus" }],
+    "long_context": [{ "provider": "opencode-go", "model_id": "qwen3.7-plus" }],
+    "fast": [{ "provider": "opencode-go", "model_id": "qwen3.7-plus" }]
   },
 
   "model_overrides": {
@@ -115,6 +126,34 @@ For migration, `~/.config/oc-go-cc/config.json` is loaded when the new config fi
 }
 ```
 
+## Anthropic-First Failover
+
+Enable this mode to keep Anthropic as Claude Code's primary API and use the configured OpenCode model chain only while Anthropic is unavailable:
+
+```json
+{
+  "anthropic_first": {
+    "enabled": true,
+    "base_url": "https://api.anthropic.com"
+  }
+}
+```
+
+Configure Claude Code with only the proxy address:
+
+```bash
+export ANTHROPIC_BASE_URL=http://127.0.0.1:3456
+unset ANTHROPIC_AUTH_TOKEN ANTHROPIC_API_KEY
+```
+
+Leaving the credential variables unset preserves the saved Claude Pro, Max, Team, or Enterprise login. The proxy forwards the raw request, OAuth credential, `anthropic-version`, and complete `anthropic-beta` capability header to Anthropic.
+
+Fallback occurs for HTTP 408, 429, 5xx, and transport failures before a response starts. HTTP 400, 401, 403, 404, and other request errors are returned unchanged. After a failure, the proxy honors `Retry-After`; otherwise it uses exponential backoff from 30 seconds to 15 minutes. One real user request probes recovery while concurrent requests continue through OpenCode. No synthetic health requests are sent.
+
+Once response bytes have started, a failed stream cannot be restarted on another model without duplicating content. `/v1/messages/count_tokens` remains local and does not affect availability state.
+
+When OpenCode Go returns `GoUsageLimitError`, remaining Go models are skipped for that request and the chain advances to Zen. The default chain uses Qwen3.7 Plus, Qwen3.7 Max, then the currently working Zen-free Nemotron 3 Ultra, MiMo V2.5, and DeepSeek V4 Flash models. Free Zen endpoints are time-limited and may retain data under [OpenCode's documented privacy terms](https://opencode.ai/docs/zen/#privacy).
+
 ## Providers
 
 routatic-proxy supports three providers for upstream API calls:

diff --git a/MODELS.md b/MODELS.md
@@ -93,7 +93,7 @@ All OpenCode Go models are also available on Zen. Zen additionally offers:
 - **Claude Models (Anthropic endpoint):** claude-fable-5, claude-opus-4-8, claude-opus-4-7, claude-opus-4-6, claude-opus-4-5, claude-opus-4-1, claude-sonnet-4-6, claude-sonnet-4-5, claude-sonnet-4, claude-haiku-4-5, claude-3-5-haiku
 - **GPT Models (Responses endpoint):** gpt-5.5, gpt-5.5-pro, gpt-5.4, gpt-5.4-pro, gpt-5.4-mini, gpt-5.4-nano, gpt-5.3-codex, gpt-5.3-codex-spark, gpt-5.2, gpt-5.2-codex, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.1-codex-mini, gpt-5, gpt-5-codex, gpt-5-nano
 - **Gemini Models (Gemini endpoint):** gemini-3.5-flash, gemini-3.1-pro, gemini-3-flash
-- **Free Tier (chat completions):** deepseek-v4-pro, deepseek-v4-flash-free, grok-build-0.1, big-pickle, mimo-v2.5-free, north-mini-code-free, nemotron-3-ultra-free
+- **Free Tier (chat completions):** deepseek-v4-flash-free, big-pickle, mimo-v2.5-free, north-mini-code-free, nemotron-3-ultra-free
 
 #### Deprecated Zen Models
 
@@ -117,7 +117,7 @@ The following models are deprecated and will be removed:
 | Claude Haiku 3.5 | Feb 16, 2026 | Claude Haiku 4.5 |
 | Qwen3 Coder 480B | Feb 6, 2026 | Qwen3.7 Plus/Max |
 
-DeepSeek V4 Pro and Flash are OpenAI-compatible on both Go and Zen providers. On Zen, DeepSeek V4 Pro is available as a free-tier model. routatic-proxy transforms Claude Code's Anthropic request into OpenAI Chat Completions format, including tools, tool results, thinking history, `reasoning_effort`, and `thinking`.
+DeepSeek V4 Pro and Flash are OpenAI-compatible on both Go and Zen providers. DeepSeek V4 Flash Free is the free Zen variant. routatic-proxy transforms Claude Code's Anthropic request into OpenAI Chat Completions format, including tools, tool results, thinking history, `reasoning_effort`, and `thinking`.
 
 For Claude Code and OpenCode-style agent workflows, DeepSeek V4 supports max thinking mode with:
 
@@ -568,7 +568,7 @@ Critical review → GLM-5.1 (rarely)
 2. **Reserve GLM-5.1 for critical tasks only** — 880 req/$12 drains budget fast
 3. **Use Qwen3.5 Plus for simple operations** — 10,200 req/$12 is unbeatable
 4. **MiniMax M2.5 for long context** — 6,300 req/$12 with 1M context is amazing value
-5. **Use Zen free-tier models** for non-critical tasks — deepseek-v4-pro, grok-build-0.1, big-pickle, and others cost $0
+5. **Use Zen free-tier models** for non-critical tasks — Nemotron 3 Ultra Free, MiMo V2.5 Free, DeepSeek V4 Flash Free, Big Pickle, and others cost $0 while their promotions remain active
 6. **Monitor your usage** in the [OpenCode console](https://opencode.ai/auth)
 
 ## See Also

diff --git a/README.md b/README.md
@@ -44,6 +44,7 @@ OpenCode Go gives you access to powerful open coding models for **$5/month** (th
 - **Model Routing** — Automatically routes to different models based on context (default, thinking, long context, background)
 - **Streaming Scenario Routing** — Configurable routing for streaming requests; enables proper scenario selection for Claude Code multi-agent and review workflows (see [CONFIGURATION.md](CONFIGURATION.md#streaming-scenario-routing))
 - **Fallback Chains** — If a model fails, automatically tries the next one in your configured chain
+- **Anthropic-First Failover** — Keep Claude on Anthropic and use OpenCode only during rate limits or outages
 - **Circuit Breaker** — Tracks model health and skips failing models to avoid latency spikes
 - **Real-time Streaming** — Full SSE streaming with live format transformation
 - **Tool Calling** — Proper Anthropic tool_use/tool_result <-> OpenAI/Gemini function calling translation
@@ -74,7 +75,7 @@ Zen provides pay-as-you-go access to additional models:
 - **Claude Models**: Claude Fable 5, Claude Opus 4.8/4.6/4.5/4.1, Claude Sonnet 4
 - **Gemini Models**: Gemini 3.5 Flash, Gemini 3.1 Pro, Gemini 3 Flash
 - **GPT Models**: GPT 5.5, GPT 5.4, GPT 5.3 Codex, and more
-- **Free Tier**: DeepSeek V4 Pro, Grok Build 0.1, Big Pickle, and others
+- **Free Tier**: Nemotron 3 Ultra Free, MiMo V2.5 Free, DeepSeek V4 Flash Free, and others
 
 See [MODELS.md](MODELS.md#opencodes-zen) for the full Zen model list.
 
@@ -144,11 +145,22 @@ make docker-stop
 
 ### 4. Configure Claude Code
 
+For the default OpenCode-only mode:
+
 ```bash
 export ANTHROPIC_BASE_URL=http://127.0.0.1:3456
 export ANTHROPIC_AUTH_TOKEN=unused
 ```
 
+For Anthropic-first mode, enable `anthropic_first` in the proxy config and set only the base URL. Do not set an API key or auth token: Claude Code will keep using its saved Claude subscription login.
+
+```bash
+export ANTHROPIC_BASE_URL=http://127.0.0.1:3456
+unset ANTHROPIC_AUTH_TOKEN ANTHROPIC_API_KEY
+```
+
+Anthropic-first mode falls back on HTTP 408, 429, 5xx, and connection failures. It honors `Retry-After` and uses one real request to detect recovery, so it does not spend tokens on health checks. See [CONFIGURATION.md](CONFIGURATION.md#anthropic-first-failover).
+
 ### 5. Run Claude Code
 
 ```bash

diff --git a/cmd/routatic-proxy/main.go b/cmd/routatic-proxy/main.go
@@ -173,11 +173,20 @@ func serveCmd() *cobra.Command {
 
 			fmt.Printf("Starting %s v%s\n", appName, version)
 			fmt.Printf("Listening on %s:%d\n", cfg.Host, cfg.Port)
-			fmt.Printf("Forwarding to: %s\n", cfg.OpenCodeGo.BaseURL)
+			if cfg.AnthropicFirst.Enabled {
+				fmt.Printf("Forwarding to Anthropic first: %s\n", cfg.AnthropicFirst.BaseURL)
+				fmt.Printf("OpenCode fallback: %s\n", cfg.OpenCodeGo.BaseURL)
+			} else {
+				fmt.Printf("Forwarding to: %s\n", cfg.OpenCodeGo.BaseURL)
+			}
 			fmt.Println()
 			fmt.Println("Configure Claude Code with:")
 			fmt.Printf("  export ANTHROPIC_BASE_URL=http://%s:%d\n", cfg.Host, cfg.Port)
-			fmt.Println("  export ANTHROPIC_AUTH_TOKEN=unused")
+			if cfg.AnthropicFirst.Enabled {
+				fmt.Println("  unset ANTHROPIC_AUTH_TOKEN ANTHROPIC_API_KEY")
+			} else {
+				fmt.Println("  export ANTHROPIC_AUTH_TOKEN=unused")
+			}
 			fmt.Println()
 
 			return srv.Start()
@@ -336,7 +345,7 @@ func checkCmd() *cobra.Command {
 					env[key] = value
 				}
 			}
-			conflicts += checkClaudeEnv("environment", env, expectedURL)
+			conflicts += checkClaudeEnv("environment", env, expectedURL, cfg.AnthropicFirst.Enabled)
 
 			home, err := os.UserHomeDir()
 			if err != nil {
@@ -361,7 +370,7 @@ func checkCmd() *cobra.Command {
 						fmt.Printf("%s: %v\n", path, err)
 						continue
 					}
-					conflicts += checkClaudeEnv(path, settings.Env, expectedURL)
+					conflicts += checkClaudeEnv(path, settings.Env, expectedURL, cfg.AnthropicFirst.Enabled)
 				}
 			}
 
@@ -379,7 +388,7 @@ func checkCmd() *cobra.Command {
 
 // checkClaudeEnv checks a single environment map for conflicting Claude Code settings.
 // Returns the number of conflicts found.
-func checkClaudeEnv(source string, env map[string]string, expectedURL string) int {
+func checkClaudeEnv(source string, env map[string]string, expectedURL string, anthropicFirst bool) int {
 	conflicts := 0
 	if value, ok := env["ANTHROPIC_BASE_URL"]; ok {
 		normalized := strings.TrimRight(value, "/")
@@ -393,11 +402,14 @@ func checkClaudeEnv(source string, env map[string]string, expectedURL string) in
 		conflicts++
 	}
 	if value, ok := env["ANTHROPIC_AUTH_TOKEN"]; ok {
-		if value != "unused" {
+		if anthropicFirst {
+			fmt.Printf("%s: ANTHROPIC_AUTH_TOKEN is set; unset it to keep the saved Claude subscription login active\n", source)
+			conflicts++
+		} else if value != "unused" {
 			fmt.Printf("%s: ANTHROPIC_AUTH_TOKEN is %q, expected \"unused\"\n", source, value)
 			conflicts++
 		}
-	} else {
+	} else if !anthropicFirst {
 		fmt.Printf("%s: ANTHROPIC_AUTH_TOKEN is not set (recommended: \"unused\")\n", source)
 	}
 	return conflicts
@@ -433,7 +445,6 @@ func modelsCmd() *cobra.Command {
 			fmt.Println()
 			fmt.Println("Available OpenCode Zen models (free tier):")
 			fmt.Println()
-			fmt.Println("  deepseek-v4-pro            OpenAI-compatible")
 			fmt.Println("  deepseek-v4-flash-free     OpenAI-compatible")
 			fmt.Println("  grok-build-0.1             OpenAI-compatible")
 			fmt.Println("  big-pickle                 OpenAI-compatible")
@@ -564,42 +575,50 @@ func getDefaultConfig() string {
   "port": 3456,
   "hot_reload": false,
   "enable_streaming_scenario_routing": false,
-  "respect_requested_model": true,
+  "respect_requested_model": false,
+  "anthropic_first": {
+    "enabled": false,
+    "base_url": "https://api.anthropic.com"
+  },
   "models": {
     "background": {
       "provider": "opencode-go",
-      "model_id": "qwen3.5-plus",
+      "model_id": "deepseek-v4-flash",
       "temperature": 0.5,
       "max_tokens": 2048
     },
     "default": {
       "provider": "opencode-go",
-      "model_id": "kimi-k2.6",
+      "model_id": "deepseek-v4-pro",
       "temperature": 0.7,
-      "max_tokens": 4096
+      "max_tokens": 8192,
+      "reasoning_effort": "max",
+      "thinking": { "type": "enabled" }
     },
     "long_context": {
       "provider": "opencode-go",
-      "model_id": "minimax-m2.5",
+      "model_id": "minimax-m3",
       "temperature": 0.7,
       "max_tokens": 16384,
       "context_threshold": 80000
     },
     "think": {
       "provider": "opencode-go",
-      "model_id": "glm-5.1",
+      "model_id": "glm-5.2",
       "temperature": 0.7,
       "max_tokens": 8192
     },
     "complex": {
       "provider": "opencode-go",
-      "model_id": "glm-5.1",
+      "model_id": "deepseek-v4-pro",
       "temperature": 0.7,
-      "max_tokens": 4096
+      "max_tokens": 8192,
+      "reasoning_effort": "max",
+      "thinking": { "type": "enabled" }
     },
     "fast": {
       "provider": "opencode-go",
-      "model_id": "qwen3.6-plus",
+      "model_id": "deepseek-v4-flash",
       "temperature": 0.7,
       "max_tokens": 4096
     },
@@ -630,28 +649,46 @@ func getDefaultConfig() string {
   },
   "fallbacks": {
     "background": [
-      { "provider": "opencode-go", "model_id": "qwen3.6-plus" },
-      { "provider": "opencode-go", "model_id": "minimax-m2.5" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "default": [
-      { "provider": "opencode-go", "model_id": "mimo-v2.5-pro" },
-      { "provider": "opencode-go", "model_id": "qwen3.6-plus" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "long_context": [
-      { "provider": "opencode-go", "model_id": "minimax-m2.7" },
-      { "provider": "opencode-go", "model_id": "kimi-k2.6" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "think": [
-      { "provider": "opencode-go", "model_id": "kimi-k2.6" },
-      { "provider": "opencode-go", "model_id": "mimo-v2.5-pro" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "complex": [
-      { "provider": "opencode-go", "model_id": "glm-5.1" },
-      { "provider": "opencode-go", "model_id": "kimi-k2.6" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "fast": [
-      { "provider": "opencode-go", "model_id": "qwen3.5-plus" },
-      { "provider": "opencode-go", "model_id": "minimax-m2.5" }
+      { "provider": "opencode-go", "model_id": "qwen3.7-plus" },
+      { "provider": "opencode-go", "model_id": "qwen3.7-max" },
+      { "provider": "opencode-zen", "model_id": "nemotron-3-ultra-free" },
+      { "provider": "opencode-zen", "model_id": "mimo-v2.5-free" },
+      { "provider": "opencode-zen", "model_id": "deepseek-v4-flash-free" }
     ],
     "glm-5.2": [
       { "provider": "opencode-go", "model_id": "glm-5.1" },
@@ -775,7 +812,7 @@ func getDefaultConfig() string {
   "opencode_go": {
     "base_url": "https://opencode.ai/zen/go/v1/chat/completions",
     "anthropic_base_url": "https://opencode.ai/zen/go/v1/messages",
-    "api_key": "${ROUTATIC_PROXY_OPENCODE_GO_API_KEY}",
+    "api_key": "",
     "api_keys": [],
     "timeout_ms": 300000
   },
@@ -784,7 +821,7 @@ func getDefaultConfig() string {
     "anthropic_base_url": "https://opencode.ai/zen/v1/messages",
     "responses_base_url": "https://opencode.ai/zen/v1/responses",
     "gemini_base_url": "https://opencode.ai/zen/v1/models",
-    "api_key": "${ROUTATIC_PROXY_OPENCODE_ZEN_API_KEY}",
+    "api_key": "",
     "api_keys": [],
     "timeout_ms": 300000
   },