fix(mcp): move mini-prompt injection from system prompt to user messages

fewtarius · fewtarius · commit caf93e7d3477 · 2026-04-08T17:53:35.000-04:00
Mini-prompts appended to the end of the system prompt were being treated
as reference material by models rather than actionable instructions.

Move injection to two strategies:
- Ephemeral: append mini-prompt content to the last user message at
  API call time (not persisted). Models pay attention to user-message
  content, giving mini-prompts immediate salience.
- One-time persist: when mini-prompts are enabled mid-conversation,
  add a hidden system-generated user message to history so the model
  retains context for its decisions even after the prompt is disabled.

ChatWidget still handles first-message injection (persisted in
userContext for continuity). The existing userContext dedup stripping
handles old messages at API time.
diff --git a/Info.plist b/Info.plist
@@ -19,9 +19,9 @@
 	<key>CFBundlePackageType</key>
 	<string>APPL</string>
 	<key>CFBundleShortVersionString</key>
-	<string>20260404.1</string>
+	<string>20260408.1</string>
 	<key>CFBundleVersion</key>
-	<string>20260404.1</string>
+	<string>20260408.1</string>
 	<key>LSApplicationCategoryType</key>
 	<string>public.app-category.productivity</string>
 	<key>LSMinimumSystemVersion</key>
diff --git a/Resources/whats-new.json b/Resources/whats-new.json
@@ -1,5 +1,26 @@
 {
   "releases": [
+    {
+      "version": "20260408.1",
+      "release_date": "April 8, 2026",
+      "introduction": "This release fixes a bug where mini-prompts disappeared mid-conversation, and includes UI improvements for smoother scrolling and conversation navigation.",
+      "improvements": [
+        {
+          "id": "scroll-system-rewrite",
+          "icon": "arrow.up.arrow.down.circle.fill",
+          "title": "Rewritten Scroll System",
+          "description": "The conversation scroll system has been completely rewritten to fix content jumping off-screen. Conversations now scroll to the newest message and auto-focus the input field when opened."
+        }
+      ],
+      "bugfixes": [
+        {
+          "id": "mini-prompt-mid-conversation",
+          "icon": "brain.head.profile.fill",
+          "title": "Fixed Mini-Prompts Disappearing Mid-Conversation",
+          "description": "Mini-prompts are now injected directly into user messages at API call time, so agents act on them immediately instead of treating them as background context. When you enable a mini-prompt mid-conversation, it's persisted once as a hidden message for continuity and injected ephemerally on every turn for attention."
+        }
+      ]
+    },
     {
       "version": "20260404.1",
       "release_date": "April 4, 2026",
diff --git a/Sources/APIFramework/AgentOrchestrator+LLMCalls.swift b/Sources/APIFramework/AgentOrchestrator+LLMCalls.swift
@@ -182,34 +182,11 @@ extension AgentOrchestrator {
             }
         }
 
-        /// MINI PROMPT INJECTION: Include enabled mini-prompts in system prompt
-        /// Mini-prompts are user-configured persistent instructions for the conversation.
-        /// By embedding them in the system prompt (which is sent on every call), they are
-        /// always visible to the agent without consuming additional context slots.
-        if !conversation.enabledMiniPromptIds.isEmpty {
-            let miniPromptText = MiniPromptManager.shared.getInjectedText(
-                for: conversation.id,
-                enabledIds: conversation.enabledMiniPromptIds
-            )
-            if !miniPromptText.isEmpty {
-                let enabledPrompts = MiniPromptManager.shared.enabledPrompts(
-                    for: conversation.id,
-                    enabledIds: conversation.enabledMiniPromptIds
-                )
-                systemPromptAdditions += """
-
-
-                # User Instructions (Mini-Prompts)
-
-                The user has specified the following instructions that you MUST follow:
-
-                \(miniPromptText)
-
-                These instructions take priority. Follow them exactly.
-                """
-                logger.info("callLLM: Injected \(enabledPrompts.count) mini-prompt(s) into system prompt (\(miniPromptText.count) chars)")
-            }
-        }
+        /// MINI PROMPT INJECTION: Handled via ephemeral user-message injection below.
+        /// Mini-prompts appended to the system prompt get buried under layers of context
+        /// and models treat them as reference material rather than actionable instructions.
+        /// Instead, we inject them into the latest user message at API-call time (not persisted).
+        /// ChatWidget handles first-message injection into <userContext> (persisted for continuity).
 
         /// Session naming: inject instruction for unnamed conversations so AI provides a title
         if conversation.title.hasPrefix("New Conversation") {
@@ -488,6 +465,52 @@ extension AgentOrchestrator {
         logger.debug("callLLM: Request has \(messages.count) messages (\(messagesToSend.count) conversation + \(internalMessages.count) internal)")
         logger.debug("callLLM: User sees \(conversation.messages.count) messages, LLM context uses \(messagesToSend.count) messages")
 
+        /// EPHEMERAL MINI-PROMPT INJECTION: Append mini-prompt content to the last user message
+        /// in the API payload. This is NOT persisted to conversation history - it only exists
+        /// in the messages array sent to the API. Models pay more attention to content in user
+        /// messages than system prompts, so this gets mini-prompts acted on rather than ignored.
+        ///
+        /// ONE-TIME PERSIST: When mini-prompts are enabled mid-conversation, we also add a
+        /// hidden system-generated user message to conversation history so the model retains
+        /// context about WHY it made certain decisions even after the prompt is disabled.
+        if !conversation.enabledMiniPromptIds.isEmpty {
+            let miniPromptText = MiniPromptManager.shared.getInjectedText(
+                for: conversation.id,
+                enabledIds: conversation.enabledMiniPromptIds
+            )
+            if !miniPromptText.isEmpty {
+                /// Check if mini-prompt content is already persisted in conversation history
+                let alreadyPersisted = conversation.messages.contains { msg in
+                    msg.isFromUser && msg.content.contains(miniPromptText.prefix(100))
+                }
+
+                /// One-time persist: Add hidden message if not already in history.
+                /// This ensures the model retains context for its decisions even after
+                /// the mini-prompt is disabled. Hidden from UI via isSystemGenerated.
+                if !alreadyPersisted {
+                    let persistContent = "<userContext>\n\(miniPromptText)\n</userContext>"
+                    conversation.messageBus?.addUserMessage(
+                        content: persistContent,
+                        isPinned: true,
+                        isSystemGenerated: true
+                    )
+                    logger.info("callLLM: Persisted mini-prompt content as hidden message for conversation continuity (\(miniPromptText.count) chars)")
+                }
+
+                /// Ephemeral injection into last user message for immediate salience
+                if let lastUserIndex = messages.lastIndex(where: { $0.role == "user" }) {
+                    let existingContent = messages[lastUserIndex].content ?? ""
+                    if !existingContent.contains(miniPromptText.prefix(100)) {
+                        let injectedContent = existingContent + "\n\n<userContext>\n\(miniPromptText)\n</userContext>"
+                        messages[lastUserIndex] = OpenAIChatMessage(role: "user", content: injectedContent)
+                        logger.info("callLLM: Ephemeral mini-prompt injection into last user message (\(miniPromptText.count) chars)")
+                    } else {
+                        logger.debug("callLLM: Mini-prompt already present in last user message")
+                    }
+                }
+            }
+        }
+
         /// Get model's actual context limit for MessageValidator budget calculation
         let modelContextLimit = await tokenCounter.getContextSize(modelName: model)
         logger.debug("CONTEXT: Model '\(model)' has context limit of \(modelContextLimit) tokens")
@@ -1239,34 +1262,8 @@ extension AgentOrchestrator {
             }
         }
 
-        /// MINI PROMPT INJECTION: Include enabled mini-prompts in system prompt
-        /// Mini-prompts are user-configured persistent instructions for the conversation.
-        /// By embedding them in the system prompt (which is sent on every call), they are
-        /// always visible to the agent without consuming additional context slots.
-        if !conversation.enabledMiniPromptIds.isEmpty {
-            let miniPromptText = MiniPromptManager.shared.getInjectedText(
-                for: conversation.id,
-                enabledIds: conversation.enabledMiniPromptIds
-            )
-            if !miniPromptText.isEmpty {
-                let enabledPrompts = MiniPromptManager.shared.enabledPrompts(
-                    for: conversation.id,
-                    enabledIds: conversation.enabledMiniPromptIds
-                )
-                systemPromptAdditions += """
-
-
-                # User Instructions (Mini-Prompts)
-
-                The user has specified the following instructions that you MUST follow:
-
-                \(miniPromptText)
-
-                These instructions take priority. Follow them exactly.
-                """
-                logger.info("callLLMStreaming: Injected \(enabledPrompts.count) mini-prompt(s) into system prompt (\(miniPromptText.count) chars)")
-            }
-        }
+        /// MINI PROMPT INJECTION: Handled via ephemeral user-message injection below.
+        /// See callLLM for rationale - system prompt injection gets buried and ignored.
 
         /// Session naming: inject instruction for unnamed conversations so AI provides a title
         if conversation.title.hasPrefix("New Conversation") {
@@ -1599,6 +1596,44 @@ extension AgentOrchestrator {
         messages = MessageValidator.validateToolMessagePairs(messages)
         logger.debug("callLLMStreaming: Validated tool message pairs - \(messages.count) messages after validation")
 
+        /// EPHEMERAL MINI-PROMPT INJECTION + ONE-TIME PERSIST
+        /// See callLLM for full rationale - models act on user-message content, ignore system prompt tail.
+        if !conversation.enabledMiniPromptIds.isEmpty {
+            let miniPromptText = MiniPromptManager.shared.getInjectedText(
+                for: conversation.id,
+                enabledIds: conversation.enabledMiniPromptIds
+            )
+            if !miniPromptText.isEmpty {
+                /// Check if mini-prompt content is already persisted in conversation history
+                let alreadyPersisted = conversation.messages.contains { msg in
+                    msg.isFromUser && msg.content.contains(miniPromptText.prefix(100))
+                }
+
+                /// One-time persist: Add hidden message if not already in history
+                if !alreadyPersisted {
+                    let persistContent = "<userContext>\n\(miniPromptText)\n</userContext>"
+                    conversation.messageBus?.addUserMessage(
+                        content: persistContent,
+                        isPinned: true,
+                        isSystemGenerated: true
+                    )
+                    logger.info("callLLMStreaming: Persisted mini-prompt content as hidden message (\(miniPromptText.count) chars)")
+                }
+
+                /// Ephemeral injection into last user message for immediate salience
+                if let lastUserIndex = messages.lastIndex(where: { $0.role == "user" }) {
+                    let existingContent = messages[lastUserIndex].content ?? ""
+                    if !existingContent.contains(miniPromptText.prefix(100)) {
+                        let injectedContent = existingContent + "\n\n<userContext>\n\(miniPromptText)\n</userContext>"
+                        messages[lastUserIndex] = OpenAIChatMessage(role: "user", content: injectedContent)
+                        logger.info("callLLMStreaming: Ephemeral mini-prompt injection into last user message (\(miniPromptText.count) chars)")
+                    } else {
+                        logger.debug("callLLMStreaming: Mini-prompt already present in last user message")
+                    }
+                }
+            }
+        }
+
         /// Get model context limit for MessageValidator budget calculation
         let modelContextLimit = await tokenCounter.getContextSize(modelName: model)
         logger.debug("CONTEXT: Model has context limit of \(modelContextLimit) tokens")
diff --git a/Sources/UserInterface/Chat/ChatWidget.swift b/Sources/UserInterface/Chat/ChatWidget.swift
@@ -3052,9 +3052,8 @@ public struct ChatWidget: View {
                         for: activeConversation.id,
                         enabledIds: activeConversation.enabledMiniPromptIds
                     )
-                    /// Mini prompt injection tracking is handled by AgentOrchestrator
-                    /// ChatWidget should NOT call recordInjection() here - that blocks the
-                    /// orchestrator from injecting its own <miniPromptReminder> system message
+                    /// ChatWidget handles first-message persist only. AgentOrchestrator handles
+                    /// one-time mid-conversation persist and ephemeral per-turn injection.
                     logger.info("MINI_PROMPT_TRACE: ChatWidget injected \(enabledPrompts.count) mini-prompt(s) into FIRST user message: \(enabledPrompts.map { $0.name }.joined(separator: ", "))")
                 } else {
                     logger.debug("MINI_PROMPT_TRACE: ChatWidget miniPromptText was empty")