AssemblyAI · alexkroman · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/REFERENCE.md b/REFERENCE.md
@@ -94,7 +94,7 @@ each carrying a `"type"` field to dispatch on:
 | ------- | ----------- |
 | `assembly stream --json` | `begin`, `turn`, `termination` (with `--from-stdin`, a `source` event precedes each file's events) |
 | `assembly agent --json` | `session.ready`, `transcript.user.delta`, `transcript.user`, `reply.started`, `transcript.agent`, `reply.done` |
-| `assembly live --json` | `session.ready`, `transcript.user.delta`, `transcript.user`, `tool.use`, `reply.started`, `transcript.agent`, `reply.done` |
+| `assembly live --json` | `session.ready`, `transcript.user.delta`, `transcript.user`, `tool.use`, `plan`, `reply.started`, `transcript.agent`, `reply.done` |
 | `assembly dictate --json` | `utterance` |
 | `assembly llm --follow --json` | `answer` |
 | `assembly transcribe <batch> --json` | `result` (one per source), then `reduce` if `--llm-reduce` is set |

diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md
diff --git a/aai_cli/agent/events.py b/aai_cli/agent/events.py
@@ -59,6 +59,20 @@ class ToolUse(_Event):
     label: str
 
 
+class TodoItem(_Event):
+    """One task in the agent's plan: its text and lifecycle ``status``."""
+
+    content: str
+    status: str
+
+
+class PlanUpdate(_Event):
+    """The agent's task list (its ``write_todos`` plan); ``todos`` replaces any prior plan."""
+
+    type: Literal["plan"] = "plan"
+    todos: tuple[TodoItem, ...]
+
+
 class AgentTranscript(_Event):
     """The agent's reply transcript (``interrupted`` when the user barged in)."""
 
@@ -74,4 +88,13 @@ class ReplyDone(_Event):
     interrupted: bool
 
 
-Event = SessionReady | UserDelta | UserFinal | ToolUse | ReplyStarted | AgentTranscript | ReplyDone
+Event = (
+    SessionReady
+    | UserDelta
+    | UserFinal
+    | ToolUse
+    | PlanUpdate
+    | ReplyStarted
+    | AgentTranscript
+    | ReplyDone
+)
diff --git a/aai_cli/agent/render.py b/aai_cli/agent/render.py
@@ -1,12 +1,23 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from rich.text import Text
 
 from aai_cli.agent import events
 from aai_cli.ui.render import BaseRenderer
 
+if TYPE_CHECKING:
+    from aai_cli.agent_cascade.plan import TodoItem
+
+# Single-cell status marks for the plan, pipe-safe and aligned (the TUI styles its own).
+_TODO_MARKS = {"completed": "[x]", "in_progress": "[~]", "pending": "[ ]"}
+
+
+def _mark(status: str) -> str:
+    """The checklist marker for a todo status, falling back to the pending box for an unknown one."""
+    return _TODO_MARKS.get(status, "[ ]")
+
 
 def _labeled(label: str, body: str, *, style: str = "aai.label") -> Text:
     """A transcript line tinted entirely in `style` — both the `label` prefix and the body.
@@ -88,6 +99,24 @@ def tool_call(self, label: str) -> None:
         else:
             self._line(_labeled("", f"{label}…", style="aai.muted"))
 
+    def todos_updated(self, todos: tuple[TodoItem, ...]) -> None:
+        """Surface the agent's plan (its ``write_todos`` list), replacing any prior plan.
+
+        JSON emits a ``plan`` event; piped text routes a compact one-line summary to stderr
+        (transcript-only stdout); human mode shows a muted multi-line checklist.
+        """
+        if self.json_mode:
+            items = tuple(events.TodoItem(content=t.content, status=t.status) for t in todos)
+            self._emit_event(events.PlanUpdate(todos=items))
+        elif self.text_mode:
+            self._status("Plan: " + "; ".join(f"{_mark(t.status)} {t.content}" for t in todos))
+        else:
+            self._line(_labeled("", "Plan:", style="aai.muted"))
+            for todo in todos:
+                self._line(
+                    _labeled("  ", f"{_mark(todo.status)} {todo.content}", style="aai.muted")
+                )
+
     # --- agent -------------------------------------------------------------
     def reply_started(self) -> None:
         if self.json_mode:

diff --git a/aai_cli/agent_cascade/_io.py b/aai_cli/agent_cascade/_io.py
@@ -13,7 +13,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Protocol
 
-from aai_cli.agent_cascade import brain
+from aai_cli.agent_cascade import brain, plan, streamer
 from aai_cli.agent_cascade._runtime import Worker as _Worker
 from aai_cli.agent_cascade._runtime import spawn_thread as _spawn_thread
 from aai_cli.agent_cascade.config import CascadeConfig
@@ -43,6 +43,9 @@ def user_final(self, text: str) -> None:
     def tool_call(self, label: str) -> None:
         """Show that the agent is using a tool (e.g. "Searching the web") while it thinks."""
 
+    def todos_updated(self, todos: tuple[plan.TodoItem, ...]) -> None:
+        """Show the agent's current task list (its ``write_todos`` plan), replacing any prior."""
+
     def reply_started(self) -> None:
         """Mark the start of an agent reply."""
 
@@ -82,12 +85,13 @@ class CascadeDeps:
     """
 
     run_stt: Callable[[Callable[[object], None]], None]
-    # stream_reply(messages) -> iterable of SpeechDelta/ToolNotice events (plus ApprovalPause
-    # markers under --files write gating). The reply is streamed token-by-token so the engine
-    # can speak each clause as it lands; a ToolNotice surfaces the "Searching the web…"
-    # affordance (brain.build_streamer).
+    # stream_reply(messages) -> iterable of SpeechDelta/ToolNotice/TodoUpdate events (plus
+    # ApprovalPause markers under --files write gating). The reply is streamed token-by-token so
+    # the engine can speak each clause as it lands; a ToolNotice surfaces the "Searching the web…"
+    # affordance and a TodoUpdate the agent's plan (streamer.build_streamer).
     stream_reply: Callable[
-        ..., Iterable[brain.SpeechDelta | brain.ToolNotice | brain.ApprovalPause]
+        ...,
+        Iterable[brain.SpeechDelta | brain.ToolNotice | plan.TodoUpdate | brain.ApprovalPause],
     ]
     # synthesize(text, sink): streaming TTS — sink is called with each PCM frame as it
     # arrives so playback starts on the first frame instead of after the whole clause.
@@ -110,7 +114,7 @@ def run_stt(on_turn: Callable[[object], None]) -> None:
         # The LLM leg is a deepagents graph (web search / MCP tools), streamed token-by-token
         # so a spoken turn can transparently use tools and start speaking sooner. ``approver``
         # gates --files writes (None on the non-files path, where the graph never pauses).
-        stream_reply = brain.build_streamer(api_key, config, approver=approver)
+        stream_reply = streamer.build_streamer(api_key, config, approver=approver)
 
         def synthesize(text: str, sink: Callable[[bytes], None]) -> None:
             spec = SpeakConfig(

diff --git a/aai_cli/agent_cascade/_runtime.py b/aai_cli/agent_cascade/_runtime.py
@@ -23,7 +23,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Protocol
 
-from aai_cli.agent_cascade import brain
+from aai_cli.agent_cascade import brain, plan
 from aai_cli.core.errors import CLIError
 
 if TYPE_CHECKING:
@@ -53,9 +53,11 @@ class Timeout:
     """Consumer sentinel: the wall-clock deadline elapsed before the next event arrived."""
 
 
-# What the producer thread puts on the consumer's queue: a speech/tool event from the
+# What the producer thread puts on the consumer's queue: a speech/tool/plan event from the
 # streaming leg, an approval-pause marker (--files write gating), or a terminal sentinel.
-type ReplyEvent = brain.SpeechDelta | brain.ToolNotice | brain.ApprovalPause | Done | Failure
+type ReplyEvent = (
+    brain.SpeechDelta | brain.ToolNotice | plan.TodoUpdate | brain.ApprovalPause | Done | Failure
+)
 
 
 def timeout_error() -> CLIError: