From be32b0aa502cc260f14dcb0f9a43943ed504f067 Mon Sep 17 00:00:00 2001 From: Gwyneth Pena-Siguenza Date: Tue, 19 May 2026 08:09:03 -0400 Subject: [PATCH] ollama support updates --- .devcontainer/ollama/devcontainer.json | 6 ++++-- .env.sample | 2 +- .env.sample.ollama | 2 +- README.md | 28 ++++++++++++++++++++------ requirements-rag.txt | 1 + spanish/README.md | 14 ++++++++++--- 6 files changed, 40 insertions(+), 13 deletions(-) diff --git a/.devcontainer/ollama/devcontainer.json b/.devcontainer/ollama/devcontainer.json index 4868d79..0177f35 100644 --- a/.devcontainer/ollama/devcontainer.json +++ b/.devcontainer/ollama/devcontainer.json @@ -7,7 +7,9 @@ "context": "../.." }, "features": { - "ghcr.io/prulloac/devcontainer-features/ollama:1": {} + "ghcr.io/prulloac/devcontainer-features/ollama:1": { + "pull": "gemma4:e4b" + } }, // Configure tool-specific properties. "customizations": { @@ -26,7 +28,7 @@ }, // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "cp .env.sample.ollama .env && ollama pull llama3.1", + "postCreateCommand": "cp .env.sample.ollama .env", // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "vscode", diff --git a/.env.sample b/.env.sample index 7450dcc..6f0fe7c 100644 --- a/.env.sample +++ b/.env.sample @@ -5,7 +5,7 @@ AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com AZURE_OPENAI_CHAT_DEPLOYMENT=YOUR-AZURE-DEPLOYMENT-NAME # Needed for Ollama: OLLAMA_ENDPOINT=http://localhost:11434/v1 -OLLAMA_MODEL=llama3.1 +OLLAMA_MODEL=gemma4:e4b # Needed for OpenAI.com: OPENAI_KEY=YOUR-OPENAI-KEY OPENAI_MODEL=gpt-3.5-turbo diff --git a/.env.sample.ollama b/.env.sample.ollama index fa4606f..f037bc1 100644 --- a/.env.sample.ollama +++ b/.env.sample.ollama @@ -1,4 +1,4 @@ # See .env.sample for all options API_HOST=ollama OLLAMA_ENDPOINT=http://localhost:11434/v1 -OLLAMA_MODEL=llama3.1 +OLLAMA_MODEL=gemma4:e4b diff --git a/README.md b/README.md index 51f115d..1223de2 100644 --- a/README.md +++ b/README.md @@ -142,27 +142,43 @@ This project includes infrastructure as code (IaC) to provision Azure OpenAI dep ## Using Ollama models +Most chat, streaming, function calling, structured outputs, CSV RAG, and document RAG flow samples work with local Ollama chat models. These samples have been tested with `gemma4:e4b` and `qwen3.5:4b`. The document ingestion and hybrid vector search samples currently use `text-embedding-3-small` for embeddings, so those scripts need Azure OpenAI/OpenAI embeddings or a code update before they can run in a local-only Ollama setup. The `reasoning.py` sample is intended for reasoning models, such as `gpt-oss`. + +If you use GitHub Codespaces or Dev Containers, you can use the Ollama devcontainer, which installs Ollama and pulls the default model for you: + +```text +https://codespaces.new/Azure-Samples/python-openai-demos?devcontainer_path=.devcontainer/ollama/devcontainer.json +``` + 1. Install [Ollama](https://ollama.com/) and follow the instructions to set it up on your local machine. -2. Pull a model, for example: +2. Pull the recommended model: + + ```shell + ollama pull gemma4:e4b + ``` + + Another tested option is: ```shell - ollama pull llama3.1 + ollama pull qwen3.5:4b ``` -3. Create a `.env` file by copying the `.env.sample` file and updating it with your Ollama endpoint and model name. +3. Create a `.env` file by copying the Ollama-specific environment sample: ```bash - cp .env.sample .env + cp .env.sample.ollama .env ``` -4. Update the `.env` file with your Ollama endpoint and model name (any model you've pulled): +4. Update the `.env` file with your Ollama endpoint and model name, if needed: ```bash API_HOST=ollama OLLAMA_ENDPOINT=http://localhost:11434/v1 - OLLAMA_MODEL=llama3.1 + OLLAMA_MODEL=gemma4:e4b ``` +Use `http://localhost:11434/v1` when Ollama and Python run in the same environment, including the Ollama devcontainer. If Python runs in a different container and Ollama runs on the host machine, use `http://host.docker.internal:11434/v1` instead. + ## Resources * [Video series: Learn Python + AI (October 2025)](https://techcommunity.microsoft.com/blog/educatordeveloperblog/level-up-your-python--ai-skills-with-our-complete-series/4464546) diff --git a/requirements-rag.txt b/requirements-rag.txt index eab7c69..1718e75 100644 --- a/requirements-rag.txt +++ b/requirements-rag.txt @@ -1,3 +1,4 @@ pymupdf4llm lunr sentence-transformers +tiktoken diff --git a/spanish/README.md b/spanish/README.md index b7c67ea..a39b57f 100644 --- a/spanish/README.md +++ b/spanish/README.md @@ -137,10 +137,16 @@ OPENAI_MODEL=gpt-4o-mini ### Usando modelos de Ollama -Instala [Ollama](https://ollama.com/) y descarga un modelo: +Instala [Ollama](https://ollama.com/) y descarga el modelo recomendado: ```bash -ollama pull llama3.1 +ollama pull gemma4:e4b +``` + +Otra opcion probada es: + +```bash +ollama pull qwen3.5:4b ``` Configura tu `.env`: @@ -148,11 +154,13 @@ Configura tu `.env`: ```bash API_HOST=ollama OLLAMA_ENDPOINT=http://localhost:11434/v1 -OLLAMA_MODEL=llama3.1 +OLLAMA_MODEL=gemma4:e4b ``` Si ejecutas dentro de un Dev Container, reemplaza `localhost` por `host.docker.internal`. +La mayoria de los ejemplos de chat, streaming, function calling, salidas estructuradas, RAG con CSV y flujo RAG con documentos funcionan con modelos de chat locales de Ollama. Los ejemplos de ingesta de documentos y busqueda vectorial hibrida actualmente usan `text-embedding-3-small` para embeddings, asi que esos scripts necesitan embeddings de Azure OpenAI/OpenAI o una actualizacion de codigo antes de poder ejecutarse en una configuracion local solo con Ollama. + ## Recursos * [Próxima serie octubre 2025: Python + IA](https://aka.ms/PythonIA/serie)