fix: prevent exception chunk from being passed to _process in astream

0xCUB3 · 0xCUB3 · commit 151b8926bd3e · 2026-02-16T09:13:57.000-05:00
When a backend error (e.g. ollama.ResponseError) is propagated through
the async queue, astream() stored the exception but left it in the
chunks list. The subsequent _process() loop then tried to process the
exception object as a real chunk, causing an AttributeError that masked
the original backend error.

Two fixes:
1. base.py: pop() the exception from chunks (like we already do for the
   None sentinel) so _process never receives it.
2. ollama.py: use .get() instead of [] for chat_response in
   post_processing, since the key may not exist if no valid chunks
   were processed before the error.

Reproduces as: KeyError: 'chat_response' when an Ollama model returns a
ResponseError (e.g. timeout on a large model). The post_processing
runs in the finally block after the AttributeError, finds no
chat_response key, and raises KeyError — masking the real error.

Signed-off-by: 0xCUB3 &lt;skula@mit.edu&gt;
diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
@@ -596,7 +596,7 @@ async def post_processing(
         generate_log.backend = f"ollama::{self._get_ollama_model_id()}"
         generate_log.model_options = mot._model_options
         generate_log.date = datetime.datetime.now()
-        generate_log.model_output = mot._meta["chat_response"]
+        generate_log.model_output = mot._meta.get("chat_response")
         generate_log.extra = {
             "format": _format,
             "thinking": mot._model_options.get(ModelOption.THINKING, None),
diff --git a/mellea/core/base.py b/mellea/core/base.py
@@ -325,8 +325,8 @@ async def astream(self) -> str:
             elif isinstance(chunks[-1], Exception):
                 # Mark as computed so post_process runs in finally block
                 self._computed = True
-                # Store exception to re-raise after cleanup
-                exception_to_raise = chunks[-1]
+                # Remove the exception from chunks so _process doesn't receive it
+                exception_to_raise = chunks.pop()
 
             for chunk in chunks:
                 assert self._process is not None
diff --git a/test/core/test_astream_exception_handling.py b/test/core/test_astream_exception_handling.py
@@ -0,0 +1,139 @@
+"""Tests for ModelOutputThunk.astream() exception handling.
+
+Verifies that backend exceptions propagated through the async queue are
+handled correctly: the exception should not be fed into _process as a chunk,
+and post_processing should not crash if it runs during exception cleanup.
+"""
+
+import asyncio
+import functools
+
+import pytest
+
+from mellea.core.base import GenerateType, ModelOutputThunk
+
+
+async def _noop_post_process(mot: ModelOutputThunk) -> None:
+    """Minimal post_process that doesn't touch _meta."""
+
+
+async def _tracking_process(
+    mot: ModelOutputThunk, chunk: object, *, calls: list
+) -> None:
+    """Process callback that records each chunk it receives."""
+    calls.append(chunk)
+    if mot._underlying_value is None:
+        mot._underlying_value = ""
+    mot._underlying_value += str(chunk)
+
+
+def _make_mot(
+    *, process_calls: list | None = None, post_process=None
+) -> ModelOutputThunk:
+    """Create a ModelOutputThunk wired for async queue consumption.
+
+    Args:
+        process_calls: If provided, attaches a tracking _process that appends
+            each chunk to this list.
+        post_process: Custom post_process callback. Defaults to a noop.
+
+    Returns:
+        A ModelOutputThunk ready for queue-based streaming.
+    """
+    mot = ModelOutputThunk(None)
+    mot._generate_type = GenerateType.ASYNC
+
+    if process_calls is not None:
+        mot._process = functools.partial(_tracking_process, calls=process_calls)
+    else:
+        # Default process that just accumulates string content.
+        async def _default_process(m: ModelOutputThunk, chunk: object) -> None:
+            if m._underlying_value is None:
+                m._underlying_value = ""
+            m._underlying_value += str(chunk)
+
+        mot._process = _default_process
+
+    mot._post_process = post_process or _noop_post_process
+    return mot
+
+
+async def test_exception_not_passed_to_process():
+    """Exception in the queue should not be passed to _process as a chunk."""
+    calls: list = []
+    mot = _make_mot(process_calls=calls)
+
+    await mot._async_queue.put(RuntimeError("backend failed"))
+
+    with pytest.raises(RuntimeError, match="backend failed"):
+        await mot.astream()
+
+    # The exception should have been popped; _process should not receive it.
+    assert len(calls) == 0, (
+        f"_process should not have been called with an exception, got {calls}"
+    )
+
+
+async def test_valid_chunks_before_exception_are_processed():
+    """Valid chunks before the exception should still be processed."""
+    calls: list = []
+    mot = _make_mot(process_calls=calls)
+
+    # Two valid chunks then an exception.
+    await mot._async_queue.put("chunk1")
+    await mot._async_queue.put("chunk2")
+    await mot._async_queue.put(ValueError("oops"))
+
+    with pytest.raises(ValueError, match="oops"):
+        await mot.astream()
+
+    assert calls == ["chunk1", "chunk2"]
+
+
+async def test_post_process_runs_on_exception():
+    """post_process should still run (for telemetry cleanup) when an exception occurs."""
+    post_process_called = False
+
+    async def _tracking_post(mot: ModelOutputThunk) -> None:
+        nonlocal post_process_called
+        post_process_called = True
+
+    mot = _make_mot(post_process=_tracking_post)
+    await mot._async_queue.put(RuntimeError("backend failed"))
+
+    with pytest.raises(RuntimeError, match="backend failed"):
+        await mot.astream()
+
+    assert post_process_called, "post_process should run even when an exception occurs"
+
+
+async def test_post_process_with_missing_meta_key():
+    """post_process accessing a missing _meta key should not mask the real exception.
+
+    This reproduces the KeyError: 'chat_response' scenario where
+    post_processing tries to read a key that was never set because
+    no valid chunks were processed.
+    """
+
+    async def _fragile_post(mot: ModelOutputThunk) -> None:
+        # Simulates ollama post_processing using .get() (the fix) instead
+        # of direct [] access. This should not raise.
+        _ = mot._meta.get("chat_response")
+
+    mot = _make_mot(post_process=_fragile_post)
+    await mot._async_queue.put(RuntimeError("backend failed"))
+
+    # The original backend error should propagate, not a KeyError.
+    with pytest.raises(RuntimeError, match="backend failed"):
+        await mot.astream()
+
+
+async def test_exception_only_queue_marks_computed():
+    """A queue with only an exception should still mark the thunk as computed."""
+    mot = _make_mot()
+    await mot._async_queue.put(RuntimeError("backend failed"))
+
+    with pytest.raises(RuntimeError, match="backend failed"):
+        await mot.astream()
+
+    assert mot._computed, "Thunk should be marked computed after exception"