Update async credentials, rm GitHub Models

pamelafox · pamelafox · commit cfa570d5f437 · 2026-04-10T16:37:56.000Z
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -11,7 +11,7 @@ RAG on PostgreSQL is a Python FastAPI backend with React TypeScript frontend tha
 Install the following tools before beginning development:
 
 - **Python 3.10+** (3.12 recommended)
-- **Node.js 18+** for frontend development  
+- **Node.js 18+** for frontend development
 - **PostgreSQL 14+** with pgvector extension
 - **Azure Developer CLI (azd)** for deployment
 - **Docker Desktop** for dev containers (optional)
@@ -37,7 +37,7 @@ Run these commands in sequence. NEVER CANCEL any long-running commands:
    ```bash
    # Ubuntu/Debian:
    sudo apt update && sudo apt install -y postgresql-16-pgvector
-   
+
    # Start PostgreSQL and set password
    sudo service postgresql start
    sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"
@@ -97,7 +97,7 @@ Use "Frontend & Backend" configuration in the VS Code Run & Debug menu.
 ### Linting and Formatting (ALWAYS run before committing)
 ```bash
 ruff check .          # Lint code (takes <1 second)
-ruff format .          # Format code (takes <1 second)  
+ruff format .          # Format code (takes <1 second)
 mypy . --python-version 3.12  # Type check (takes ~42 seconds)
 ```
 
@@ -121,7 +121,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
 **CRITICAL TIMING INFORMATION** - Set these timeout values and NEVER CANCEL:
 
 - **Dependencies install**: 90 seconds (use 180+ second timeout)
-- **Frontend npm install**: 22 seconds (use 60+ second timeout)  
+- **Frontend npm install**: 22 seconds (use 60+ second timeout)
 - **Frontend build**: 12 seconds (use 30+ second timeout)
 - **MyPy type checking**: 42 seconds (use 90+ second timeout)
 - **Full test suite**: 25 seconds (use 60+ second timeout)
@@ -156,7 +156,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
    # Test API endpoints
    curl http://localhost:8000/items/1
    # Should return JSON with item data
-   
+
    # Test frontend
    curl http://localhost:8000/ | head -n 5
    # Should return HTML with "RAG on PostgreSQL" title
@@ -226,9 +226,6 @@ The application supports multiple OpenAI providers:
 3. **Ollama** (local):
    Set `OPENAI_CHAT_HOST=ollama`
 
-4. **GitHub Models**:
-   Set `OPENAI_CHAT_HOST=github`
-
 ## Common Issues and Solutions
 
 ### Database Connection Issues
@@ -267,7 +264,7 @@ Open `http://localhost:8089/` and point to your running application.
 The application provides these REST API endpoints (view full docs at `http://localhost:8000/docs`):
 
 - `GET /items/{id}` - Get specific item by ID
-- `GET /search` - Search items with text query 
+- `GET /search` - Search items with text query
 - `GET /similar` - Find similar items using vector search
 - `POST /chat` - Chat with RAG system (requires OpenAI configuration)
 - `POST /chat/stream` - Streaming chat responses
@@ -286,7 +283,7 @@ curl "http://localhost:8000/search?query=tent&limit=5"
 **Quick ls -la output for repository root:**
 ```
 .devcontainer/          # Dev container configuration
-.env.sample            # Environment variables template  
+.env.sample            # Environment variables template
 .github/               # GitHub Actions workflows
 .gitignore            # Git ignore patterns
 .pre-commit-config.yaml # Pre-commit hook configuration
@@ -309,8 +306,8 @@ tests/        # Test suite
 - **Always build and test locally before committing**
 - **Use pre-commit hooks** - they run ruff automatically
 - **Check the GitHub Actions** in `.github/workflows/` for CI requirements
-- **Reference the full README.md** for deployment and Azure-specific details  
+- **Reference the full README.md** for deployment and Azure-specific details
 - **Use VS Code with the Python and Ruff extensions** for the best development experience
 - **Never skip the frontend build** - the backend serves static files from `src/backend/static/`
 
-This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
+This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
diff --git a/evals/eval_config.json b/evals/eval_config.json
@@ -8,8 +8,7 @@
             "use_advanced_flow": true,
             "top": 3,
             "retrieval_mode": "hybrid",
-            "temperature": 0.3,
-            "seed": 42
+            "temperature": 0.3
         }
     },
     "target_response_answer_jmespath": "message.content",
diff --git a/evals/evaluate.py b/evals/evaluate.py
@@ -68,8 +68,6 @@ def get_openai_config() -> dict:
         openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"]
     elif os.environ.get("OPENAI_CHAT_HOST") == "ollama":
         raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
-    elif os.environ.get("OPENAI_CHAT_HOST") == "github":
-        raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
     else:
         logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
         openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"}
diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py
@@ -121,7 +121,7 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc
                 {"role": "system", "content": generate_prompt},
                 {"role": "user", "content": json.dumps(source)},
             ],
-            tools=[qa_pairs_tool(num_questions=2)],
+            tools=[qa_pairs_tool(num_questions=2)],  # type: ignore[list-item]
             max_output_tokens=1000,
             store=False,
         )
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
@@ -3,7 +3,7 @@
 from collections.abc import AsyncGenerator
 from typing import Annotated, Optional
 
-import azure.identity
+import azure.identity.aio
 from fastapi import Depends, Request
 from openai import AsyncOpenAI
 from pydantic import BaseModel
@@ -77,9 +77,9 @@ async def common_parameters():
 
 
 async def get_azure_credential() -> (
-    azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
+    azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
 ):
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
     try:
         if client_id := os.getenv("APP_IDENTITY_ID"):
             # Authenticate using a user-assigned managed identity on Azure
@@ -88,14 +88,14 @@ async def get_azure_credential() -> (
                 "Using managed identity for client ID %s",
                 client_id,
             )
-            azure_credential = azure.identity.ManagedIdentityCredential(client_id=client_id)
+            azure_credential = azure.identity.aio.ManagedIdentityCredential(client_id=client_id)
         else:
             if tenant_id := os.getenv("AZURE_TENANT_ID"):
                 logger.info("Authenticating to Azure using Azure Developer CLI Credential for tenant %s", tenant_id)
-                azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=tenant_id, process_timeout=60)
+                azure_credential = azure.identity.aio.AzureDeveloperCliCredential(tenant_id=tenant_id)
             else:
                 logger.info("Authenticating to Azure using Azure Developer CLI Credential")
-                azure_credential = azure.identity.AzureDeveloperCliCredential(process_timeout=60)
+                azure_credential = azure.identity.aio.AzureDeveloperCliCredential()
         return azure_credential
     except Exception as e:
         logger.warning("Failed to authenticate to Azure: %s", e)
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
@@ -1,15 +1,16 @@
 import logging
 import os
 
-import azure.identity
 import azure.identity.aio
 import openai
 
 logger = logging.getLogger("ragapp")
 
 
 async def create_openai_chat_client(
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+    | azure.identity.aio.ManagedIdentityCredential
+    | None,
 ) -> openai.AsyncOpenAI:
     openai_chat_client: openai.AsyncOpenAI
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
@@ -55,7 +56,9 @@ async def create_openai_chat_client(
 
 
 async def create_openai_embed_client(
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+    | azure.identity.aio.ManagedIdentityCredential
+    | None,
 ) -> openai.AsyncOpenAI:
     openai_embed_client: openai.AsyncOpenAI
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
diff --git a/src/backend/fastapi_app/update_embeddings.py b/src/backend/fastapi_app/update_embeddings.py
@@ -29,8 +29,6 @@ async def update_embeddings(in_seed_data=False):
         embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l")
     elif OPENAI_EMBED_HOST == "ollama":
         embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic")
-    elif OPENAI_EMBED_HOST == "github":
-        embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l")
     else:
         embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l")
     logger.info(f"Updating embeddings in column: {embedding_column}")
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -5,6 +5,7 @@
 
 import openai
 import openai.resources
+import openai.resources.responses
 import pytest
 import pytest_asyncio
 from fastapi.testclient import TestClient
@@ -184,6 +185,8 @@ def __init__(self, answer: str):
                         delta=parts[0] + "<<",
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=0,
                     )
                 )
                 self.events.append(
@@ -193,6 +196,8 @@ def __init__(self, answer: str):
                         delta=parts[1],
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=1,
                     )
                 )
             else:
@@ -203,6 +208,8 @@ def __init__(self, answer: str):
                         delta=answer,
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=0,
                     )
                 )
 
@@ -305,7 +312,7 @@ async def mock_acreate(*args, **kwargs):
 @pytest.fixture(scope="function")
 def mock_azure_credential(mock_session_env):
     """Mock the Azure credential for testing."""
-    with mock.patch("azure.identity.AzureDeveloperCliCredential") as mock_azure_credential:
+    with mock.patch("azure.identity.aio.AzureDeveloperCliCredential") as mock_azure_credential:
         mock_azure_credential.return_value = MockAzureCredential()
         yield mock_azure_credential
 

Original file line number	Diff line number	Diff line change
`@@ -121,7 +121,7 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc`
`121`	`121`	`{"role": "system", "content": generate_prompt},`
`122`	`122`	`{"role": "user", "content": json.dumps(source)},`
`123`	`123`	`],`
`124`		`- tools=[qa_pairs_tool(num_questions=2)],`
	`124`	`+ tools=[qa_pairs_tool(num_questions=2)], # type: ignore[list-item]`
`125`	`125`	`max_output_tokens=1000,`
`126`	`126`	`store=False,`
`127`	`127`	`)`