Hmbown · mvanhorn · May 25, 2026
diff --git a/crates/tui/src/llm_client/mock.rs b/crates/tui/src/llm_client/mock.rs
@@ -63,6 +63,21 @@ use super::{LlmClient, StreamEventBox};
 /// the mock does not require `MessageStart` to be present.
 pub type CannedTurn = Vec<StreamEvent>;
 
+/// A queued mock response step.
+pub enum FauxStep {
+    Canned(CannedTurn),
+    /// Build a canned turn from the live outgoing request.
+    ///
+    /// Tests can assert DeepSeek V4's thinking-mode tool-call invariant here:
+    /// on the assistant turn that produced the previous tool call, the next
+    /// outgoing request must still carry `reasoning_content` (represented in
+    /// this model as a [`ContentBlock::Thinking`] block). If it is missing,
+    /// DeepSeek V4 returns HTTP 400 on the follow-up turn. This guards the
+    /// [v0.4.9-v0.5.1 regression range](https://github.com/Hmbown/CodeWhale/compare/v0.4.9...v0.5.1)
+    /// where that content was dropped.
+    Factory(Box<dyn Fn(&MessageRequest) -> CannedTurn + Send + Sync>),
+}
+
 /// A queue-driven mock LLM client.
 ///
 /// The mock holds a FIFO queue of canned response turns. Each call to
@@ -75,7 +90,7 @@ pub type CannedTurn = Vec<StreamEvent>;
 /// can assert on the outgoing payload (e.g. that prior `reasoning_content` is
 /// preserved across turns).
 pub struct MockLlmClient {
-    canned: Mutex<VecDeque<CannedTurn>>,
+    canned: Mutex<VecDeque<FauxStep>>,
     captured_requests: Mutex<Vec<MessageRequest>>,
     calls: AtomicUsize,
     provider_name: &'static str,
@@ -91,7 +106,7 @@ impl MockLlmClient {
     #[must_use]
     pub fn new(canned: Vec<CannedTurn>) -> Self {
         Self {
-            canned: Mutex::new(canned.into()),
+            canned: Mutex::new(canned.into_iter().map(FauxStep::Canned).collect()),
             captured_requests: Mutex::new(Vec::new()),
             calls: AtomicUsize::new(0),
             provider_name: "mock",
@@ -119,7 +134,22 @@ impl MockLlmClient {
         self.canned
             .lock()
             .expect("MockLlmClient.canned mutex poisoned")
-            .push_back(turn);
+            .push_back(FauxStep::Canned(turn));
+    }
+
+    /// Push a factory step onto the back of the queue.
+    ///
+    /// The closure receives the live outgoing [`MessageRequest`] before the
+    /// response stream is built, so assertions panic directly from the client
+    /// call rather than later while polling the returned stream.
+    pub fn push_factory<F>(&self, factory: F)
+    where
+        F: Fn(&MessageRequest) -> CannedTurn + Send + Sync + 'static,
+    {
+        self.canned
+            .lock()
+            .expect("MockLlmClient.canned mutex poisoned")
+            .push_back(FauxStep::Factory(Box::new(factory)));
     }
 
     /// Push a canned non-streaming `MessageResponse`. Consumed by
@@ -175,13 +205,20 @@ impl MockLlmClient {
         self.calls.fetch_add(1, Ordering::SeqCst);
     }
 
-    fn pop_turn(&self) -> Option<CannedTurn> {
+    fn pop_step(&self) -> Option<FauxStep> {
         self.canned
             .lock()
             .expect("MockLlmClient.canned mutex poisoned")
             .pop_front()
     }
 
+    fn turn_from_step(&self, step: FauxStep, request: &MessageRequest) -> CannedTurn {
+        match step {
+            FauxStep::Canned(turn) => turn,
+            FauxStep::Factory(factory) => factory(request),
+        }
+    }
+
     fn pop_message(&self) -> Option<MessageResponse> {
         self.canned_messages
             .lock()
@@ -207,26 +244,28 @@ impl LlmClient for MockLlmClient {
         }
 
         // Fallback: synthesize a MessageResponse from the next streaming turn.
-        let Some(turn) = self.pop_turn() else {
+        let Some(step) = self.pop_step() else {
             return Err(anyhow!(
                 "MockLlmClient: create_message called but no canned response queued (request #{})",
                 self.calls.load(Ordering::SeqCst)
             ));
         };
 
+        let turn = self.turn_from_step(step, &request);
         Ok(synthesize_message_response(turn, &self.model))
     }
 
     async fn create_message_stream(&self, request: MessageRequest) -> Result<StreamEventBox> {
         self.record_request(&request);
 
-        let Some(turn) = self.pop_turn() else {
+        let Some(step) = self.pop_step() else {
             return Err(anyhow!(
                 "MockLlmClient: create_message_stream called but no canned turn queued (call #{})",
                 self.calls.load(Ordering::SeqCst)
             ));
         };
 
+        let turn = self.turn_from_step(step, &request);
         Ok(stream_from_canned(turn))
     }
 
@@ -561,6 +600,22 @@ mod tests {
         assert_eq!(resp.stop_reason.as_deref(), Some("end_turn"));
     }
 
+    #[tokio::test]
+    async fn create_message_synthesizes_from_factory_turn() {
+        let mock = MockLlmClient::new(Vec::new());
+        mock.push_factory(|request| {
+            assert_eq!(request.model, "mock-model");
+            canned::simple_text_turn("from factory")
+        });
+
+        let resp = mock.create_message(empty_request()).await.unwrap();
+        let text = match &resp.content[0] {
+            ContentBlock::Text { text, .. } => text.clone(),
+            _ => panic!("expected text"),
+        };
+        assert_eq!(text, "from factory");
+    }
+
     #[tokio::test]
     async fn provider_and_model_are_overridable() {
         let mock = MockLlmClient::new(vec![canned::simple_text_turn("x")])

diff --git a/crates/tui/tests/reasoning_content_replayed_after_tool_call.rs b/crates/tui/tests/reasoning_content_replayed_after_tool_call.rs
@@ -0,0 +1,128 @@
+use futures_util::StreamExt;
+
+#[path = "../src/models.rs"]
+#[allow(dead_code)]
+mod models;
+
+#[path = "support/llm_client.rs"]
+mod llm_client;
+
+use crate::llm_client::LlmClient;
+use crate::llm_client::mock::{MockLlmClient, canned};
+use crate::models::{ContentBlock, Message, MessageRequest};
+
+fn user_message(text: &str) -> Message {
+    Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: text.to_string(),
+            cache_control: None,
+        }],
+    }
+}
+
+fn assistant_thinking_tool_call(
+    thinking: &str,
+    id: &str,
+    name: &str,
+    input: serde_json::Value,
+) -> Message {
+    Message {
+        role: "assistant".to_string(),
+        content: vec![
+            ContentBlock::Thinking {
+                thinking: thinking.to_string(),
+            },
+            ContentBlock::ToolUse {
+                id: id.to_string(),
+                name: name.to_string(),
+                input,
+                caller: None,
+            },
+        ],
+    }
+}
+
+fn tool_result_message(tool_use_id: &str, content: &str) -> Message {
+    Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::ToolResult {
+            tool_use_id: tool_use_id.to_string(),
+            content: content.to_string(),
+            is_error: None,
+            content_blocks: None,
+        }],
+    }
+}
+
+fn make_request(messages: Vec<Message>) -> MessageRequest {
+    MessageRequest {
+        model: "deepseek-v4-pro".to_string(),
+        messages,
+        max_tokens: 4096,
+        system: None,
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: Some("high".to_string()),
+        stream: Some(true),
+        temperature: None,
+        top_p: None,
+    }
+}
+
+#[tokio::test]
+async fn reasoning_content_is_replayed_after_thinking_tool_call() {
+    let mock = MockLlmClient::new(vec![]);
+
+    mock.push_turn(vec![
+        canned::message_start("r1"),
+        canned::thinking_delta(0, "I should inspect /tmp before answering."),
+        canned::tool_use_block_start(1, "call_a", "list_dir"),
+        canned::tool_input_delta(1, r#"{"path":"/tmp"}"#),
+        canned::block_stop(1),
+        canned::message_delta("tool_use", None),
+        canned::message_stop(),
+    ]);
+
+    mock.push_factory(|request| {
+        let assistant = request
+            .messages
+            .iter()
+            .rev()
+            .find(|message| message.role == "assistant")
+            .expect("follow-up request must include the prior assistant tool-call turn");
+
+        assert!(
+            assistant
+                .content
+                .iter()
+                .any(|block| matches!(block, ContentBlock::Thinking { .. })),
+            "DeepSeek V4 follow-up requests must replay reasoning_content on the assistant tool-call turn"
+        );
+
+        canned::simple_text_turn("I see the /tmp entries.")
+    });
+
+    let mut first = mock
+        .create_message_stream(make_request(vec![user_message("list /tmp")]))
+        .await
+        .expect("first stream opens");
+    while first.next().await.is_some() {}
+
+    let mut second = mock
+        .create_message_stream(make_request(vec![
+            user_message("list /tmp"),
+            assistant_thinking_tool_call(
+                "I should inspect /tmp before answering.",
+                "call_a",
+                "list_dir",
+                serde_json::json!({ "path": "/tmp" }),
+            ),
+            tool_result_message("call_a", "/tmp/file1\n/tmp/file2"),
+        ]))
+        .await
+        .expect("second stream opens");
+    while second.next().await.is_some() {}
+}