From 11f9f2946417c3ed6829965ba9c11f5f39b7fc4e Mon Sep 17 00:00:00 2001
From: octo-patch <octo-patch@github.com>
Date: Sat, 25 Apr 2026 10:25:03 +0800
Subject: [PATCH] fix: handle /v1-suffixed BASE_URL and improve error reporting
 for OpenAI-compat providers (fixes #170)

Two bugs in _call_llm_provider:

1. URL double-/v1: when XAI_BASE_URL (or any provider's BASE_URL) already
   ends with /v1 (e.g. https://openrouter.ai/api/v1), the code appended
   another /v1/chat/completions, producing an invalid URL. The fix checks
   for a trailing /v1 and omits the extra prefix.

2. JSON-before-raise_for_status: response.json() was called before
   raise_for_status(), so an HTTP error with a non-JSON (e.g. empty) body
   caused a confusing JSONDecodeError instead of a clear HTTP error message.
   The fix parses JSON first (best-effort, for logging), then calls
   raise_for_status(), and surfaces the raw response text when JSON is absent.

Also corrects the README env var name from XAI_URL to XAI_BASE_URL and
adds examples showing that both https://api.x.ai and https://api.x.ai/v1
are accepted as BASE_URL values.

Co-Authored-By: Octopus <liyuan851277048@icloud.com>
---
 README.md         |  2 +-
 utils/call_llm.py | 45 +++++++++++++++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/README.md b/README.md
index cc8ad4e8..4014028e 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
    pip install -r requirements.txt
    ```
 
-4. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, url, and API key (e.g. `XAI_MODEL`, `XAI_URL`,`XAI_API_KEY`). If using Ollama, the url is `http://localhost:11434/` and the API key can be omitted.
+4. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, base URL, and API key (e.g. `XAI_MODEL`, `XAI_BASE_URL`, `XAI_API_KEY`). The base URL can include or omit the `/v1` suffix — both `https://api.x.ai` and `https://api.x.ai/v1` work. If using Ollama, set `OLLAMA_BASE_URL=http://localhost:11434` and the API key can be omitted. For OpenRouter, set `XAI_BASE_URL=https://openrouter.ai/api/v1`.
    You can use your own models. We highly recommend the latest models with thinking capabilities (Claude 3.7 with thinking, O1). You can verify that it is correctly set up by running:
    ```bash
    python utils/call_llm.py
diff --git a/utils/call_llm.py b/utils/call_llm.py
index 70c9e83a..de9eea53 100644
--- a/utils/call_llm.py
+++ b/utils/call_llm.py
@@ -57,9 +57,11 @@ def _call_llm_provider(prompt: str) -> str:
     Environment variables:
     - LLM_PROVIDER: "OLLAMA" or "XAI"
     - <provider>_MODEL: Model name (e.g., OLLAMA_MODEL, XAI_MODEL)
-    - <provider>_BASE_URL: Base URL without endpoint (e.g., OLLAMA_BASE_URL, XAI_BASE_URL)
+    - <provider>_BASE_URL: Base URL of the API. May include or omit a trailing /v1
+      (e.g., OLLAMA_BASE_URL=http://localhost:11434 or http://localhost:11434/v1,
+             XAI_BASE_URL=https://api.x.ai/v1 or https://openrouter.ai/api/v1)
     - <provider>_API_KEY: API key (e.g., OLLAMA_API_KEY, XAI_API_KEY; optional for providers that don't require it)
-    The endpoint /v1/chat/completions will be appended to the base URL.
+    The /chat/completions endpoint is appended; /v1 is added only if not already present in the URL.
     """
     logger.info(f"PROMPT: {prompt}") # log the prompt
 
@@ -84,8 +86,13 @@ def _call_llm_provider(prompt: str) -> str:
     if not base_url:
         raise ValueError(f"{base_url_var} environment variable is required")
 
-    # Append the endpoint to the base URL
-    url = f"{base_url.rstrip('/')}/v1/chat/completions"
+    # Build the chat completions URL. Avoid duplicating /v1 when the caller
+    # already includes it in BASE_URL (e.g. https://openrouter.ai/api/v1).
+    clean_base = base_url.rstrip('/')
+    if clean_base.endswith('/v1'):
+        url = f"{clean_base}/chat/completions"
+    else:
+        url = f"{clean_base}/v1/chat/completions"
 
     # Configure headers and payload based on provider
     headers = {
@@ -100,20 +107,32 @@ def _call_llm_provider(prompt: str) -> str:
         "temperature": 0.7,
     }
 
+    response_json = None
     try:
         response = requests.post(url, headers=headers, json=payload)
-        response_json = response.json() # Log the response
-        logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
-        #logger.info(f"RESPONSE: {response.json()}")
+        # Parse JSON first so we can log it and include error details on failure
+        try:
+            response_json = response.json()
+            logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
+        except (ValueError, requests.exceptions.JSONDecodeError):
+            logger.warning(
+                "Non-JSON response from %s (HTTP %s): %s",
+                provider, response.status_code, response.text[:500]
+            )
         response.raise_for_status()
-        return response.json()["choices"][0]["message"]["content"]
+        if response_json is None:
+            raise Exception(
+                f"Empty or non-JSON response from {provider} (HTTP {response.status_code}). "
+                f"Verify that {base_url_var} points to a valid OpenAI-compatible endpoint."
+            )
+        return response_json["choices"][0]["message"]["content"]
     except requests.exceptions.HTTPError as e:
         error_message = f"HTTP error occurred: {e}"
-        try:
-            error_details = response.json().get("error", "No additional details")
+        if response_json is not None:
+            error_details = response_json.get("error", "No additional details")
             error_message += f" (Details: {error_details})"
-        except:
-            pass
+        elif response.text:
+            error_message += f" (Response: {response.text[:200]})"
         raise Exception(error_message)
     except requests.exceptions.ConnectionError:
         raise Exception(f"Failed to connect to {provider} API. Check your network connection.")
@@ -121,8 +140,6 @@ def _call_llm_provider(prompt: str) -> str:
         raise Exception(f"Request to {provider} API timed out.")
     except requests.exceptions.RequestException as e:
         raise Exception(f"An error occurred while making the request to {provider}: {e}")
-    except ValueError:
-        raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")
 
 # By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
 def call_llm(prompt: str, use_cache: bool = True) -> str: