From be32b0aa502cc260f14dcb0f9a43943ed504f067 Mon Sep 17 00:00:00 2001
From: Gwyneth Pena-Siguenza <gwynethpena@outlook.com>
Date: Tue, 19 May 2026 08:09:03 -0400
Subject: [PATCH] ollama support updates

---
 .devcontainer/ollama/devcontainer.json |  6 ++++--
 .env.sample                            |  2 +-
 .env.sample.ollama                     |  2 +-
 README.md                              | 28 ++++++++++++++++++++------
 requirements-rag.txt                   |  1 +
 spanish/README.md                      | 14 ++++++++++---
 6 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/.devcontainer/ollama/devcontainer.json b/.devcontainer/ollama/devcontainer.json
index 4868d79..0177f35 100644
--- a/.devcontainer/ollama/devcontainer.json
+++ b/.devcontainer/ollama/devcontainer.json
@@ -7,7 +7,9 @@
         "context": "../.."
     },
     "features": {
-        "ghcr.io/prulloac/devcontainer-features/ollama:1": {}
+        "ghcr.io/prulloac/devcontainer-features/ollama:1": {
+            "pull": "gemma4:e4b"
+        }
     },
     // Configure tool-specific properties.
     "customizations": {
@@ -26,7 +28,7 @@
     },
 
     // Use 'postCreateCommand' to run commands after the container is created.
-    "postCreateCommand": "cp .env.sample.ollama .env && ollama pull llama3.1",
+    "postCreateCommand": "cp .env.sample.ollama .env",
 
     // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
     "remoteUser": "vscode",
diff --git a/.env.sample b/.env.sample
index 7450dcc..6f0fe7c 100644
--- a/.env.sample
+++ b/.env.sample
@@ -5,7 +5,7 @@ AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com
 AZURE_OPENAI_CHAT_DEPLOYMENT=YOUR-AZURE-DEPLOYMENT-NAME
 # Needed for Ollama:
 OLLAMA_ENDPOINT=http://localhost:11434/v1
-OLLAMA_MODEL=llama3.1
+OLLAMA_MODEL=gemma4:e4b
 # Needed for OpenAI.com:
 OPENAI_KEY=YOUR-OPENAI-KEY
 OPENAI_MODEL=gpt-3.5-turbo
diff --git a/.env.sample.ollama b/.env.sample.ollama
index fa4606f..f037bc1 100644
--- a/.env.sample.ollama
+++ b/.env.sample.ollama
@@ -1,4 +1,4 @@
 # See .env.sample for all options
 API_HOST=ollama
 OLLAMA_ENDPOINT=http://localhost:11434/v1
-OLLAMA_MODEL=llama3.1
+OLLAMA_MODEL=gemma4:e4b
diff --git a/README.md b/README.md
index 51f115d..1223de2 100644
--- a/README.md
+++ b/README.md
@@ -142,27 +142,43 @@ This project includes infrastructure as code (IaC) to provision Azure OpenAI dep
 
 ## Using Ollama models
 
+Most chat, streaming, function calling, structured outputs, CSV RAG, and document RAG flow samples work with local Ollama chat models. These samples have been tested with `gemma4:e4b` and `qwen3.5:4b`. The document ingestion and hybrid vector search samples currently use `text-embedding-3-small` for embeddings, so those scripts need Azure OpenAI/OpenAI embeddings or a code update before they can run in a local-only Ollama setup. The `reasoning.py` sample is intended for reasoning models, such as `gpt-oss`.
+
+If you use GitHub Codespaces or Dev Containers, you can use the Ollama devcontainer, which installs Ollama and pulls the default model for you:
+
+```text
+https://codespaces.new/Azure-Samples/python-openai-demos?devcontainer_path=.devcontainer/ollama/devcontainer.json
+```
+
 1. Install [Ollama](https://ollama.com/) and follow the instructions to set it up on your local machine.
-2. Pull a model, for example:
+2. Pull the recommended model:
+
+    ```shell
+    ollama pull gemma4:e4b
+    ```
+
+    Another tested option is:
 
     ```shell
-    ollama pull llama3.1
+    ollama pull qwen3.5:4b
     ```
 
-3. Create a `.env` file by copying the `.env.sample` file and updating it with your Ollama endpoint and model name.
+3. Create a `.env` file by copying the Ollama-specific environment sample:
 
     ```bash
-    cp .env.sample .env
+    cp .env.sample.ollama .env
     ```
 
-4. Update the `.env` file with your Ollama endpoint and model name (any model you've pulled):
+4. Update the `.env` file with your Ollama endpoint and model name, if needed:
 
     ```bash
     API_HOST=ollama
     OLLAMA_ENDPOINT=http://localhost:11434/v1
-    OLLAMA_MODEL=llama3.1
+    OLLAMA_MODEL=gemma4:e4b
     ```
 
+Use `http://localhost:11434/v1` when Ollama and Python run in the same environment, including the Ollama devcontainer. If Python runs in a different container and Ollama runs on the host machine, use `http://host.docker.internal:11434/v1` instead.
+
 ## Resources
 
 * [Video series: Learn Python + AI (October 2025)](https://techcommunity.microsoft.com/blog/educatordeveloperblog/level-up-your-python--ai-skills-with-our-complete-series/4464546)
diff --git a/requirements-rag.txt b/requirements-rag.txt
index eab7c69..1718e75 100644
--- a/requirements-rag.txt
+++ b/requirements-rag.txt
@@ -1,3 +1,4 @@
 pymupdf4llm
 lunr
 sentence-transformers
+tiktoken
diff --git a/spanish/README.md b/spanish/README.md
index b7c67ea..a39b57f 100644
--- a/spanish/README.md
+++ b/spanish/README.md
@@ -137,10 +137,16 @@ OPENAI_MODEL=gpt-4o-mini
 
 ### Usando modelos de Ollama
 
-Instala [Ollama](https://ollama.com/) y descarga un modelo:
+Instala [Ollama](https://ollama.com/) y descarga el modelo recomendado:
 
 ```bash
-ollama pull llama3.1
+ollama pull gemma4:e4b
+```
+
+Otra opcion probada es:
+
+```bash
+ollama pull qwen3.5:4b
 ```
 
 Configura tu `.env`:
@@ -148,11 +154,13 @@ Configura tu `.env`:
 ```bash
 API_HOST=ollama
 OLLAMA_ENDPOINT=http://localhost:11434/v1
-OLLAMA_MODEL=llama3.1
+OLLAMA_MODEL=gemma4:e4b
 ```
 
 Si ejecutas dentro de un Dev Container, reemplaza `localhost` por `host.docker.internal`.
 
+La mayoria de los ejemplos de chat, streaming, function calling, salidas estructuradas, RAG con CSV y flujo RAG con documentos funcionan con modelos de chat locales de Ollama. Los ejemplos de ingesta de documentos y busqueda vectorial hibrida actualmente usan `text-embedding-3-small` para embeddings, asi que esos scripts necesitan embeddings de Azure OpenAI/OpenAI o una actualizacion de codigo antes de poder ejecutarse en una configuracion local solo con Ollama.
+
 ## Recursos
 
 * [Próxima serie octubre 2025: Python + IA](https://aka.ms/PythonIA/serie)