diff --git a/crates/tui/src/llm_client/mock.rs b/crates/tui/src/llm_client/mock.rs index 2588755df..8dc4043e5 100644 --- a/crates/tui/src/llm_client/mock.rs +++ b/crates/tui/src/llm_client/mock.rs @@ -63,6 +63,21 @@ use super::{LlmClient, StreamEventBox}; /// the mock does not require `MessageStart` to be present. pub type CannedTurn = Vec; +/// A queued mock response step. +pub enum FauxStep { + Canned(CannedTurn), + /// Build a canned turn from the live outgoing request. + /// + /// Tests can assert DeepSeek V4's thinking-mode tool-call invariant here: + /// on the assistant turn that produced the previous tool call, the next + /// outgoing request must still carry `reasoning_content` (represented in + /// this model as a [`ContentBlock::Thinking`] block). If it is missing, + /// DeepSeek V4 returns HTTP 400 on the follow-up turn. This guards the + /// [v0.4.9-v0.5.1 regression range](https://github.com/Hmbown/CodeWhale/compare/v0.4.9...v0.5.1) + /// where that content was dropped. + Factory(Box CannedTurn + Send + Sync>), +} + /// A queue-driven mock LLM client. /// /// The mock holds a FIFO queue of canned response turns. Each call to @@ -75,7 +90,7 @@ pub type CannedTurn = Vec; /// can assert on the outgoing payload (e.g. that prior `reasoning_content` is /// preserved across turns). pub struct MockLlmClient { - canned: Mutex>, + canned: Mutex>, captured_requests: Mutex>, calls: AtomicUsize, provider_name: &'static str, @@ -91,7 +106,7 @@ impl MockLlmClient { #[must_use] pub fn new(canned: Vec) -> Self { Self { - canned: Mutex::new(canned.into()), + canned: Mutex::new(canned.into_iter().map(FauxStep::Canned).collect()), captured_requests: Mutex::new(Vec::new()), calls: AtomicUsize::new(0), provider_name: "mock", @@ -119,7 +134,22 @@ impl MockLlmClient { self.canned .lock() .expect("MockLlmClient.canned mutex poisoned") - .push_back(turn); + .push_back(FauxStep::Canned(turn)); + } + + /// Push a factory step onto the back of the queue. + /// + /// The closure receives the live outgoing [`MessageRequest`] before the + /// response stream is built, so assertions panic directly from the client + /// call rather than later while polling the returned stream. + pub fn push_factory(&self, factory: F) + where + F: Fn(&MessageRequest) -> CannedTurn + Send + Sync + 'static, + { + self.canned + .lock() + .expect("MockLlmClient.canned mutex poisoned") + .push_back(FauxStep::Factory(Box::new(factory))); } /// Push a canned non-streaming `MessageResponse`. Consumed by @@ -175,13 +205,20 @@ impl MockLlmClient { self.calls.fetch_add(1, Ordering::SeqCst); } - fn pop_turn(&self) -> Option { + fn pop_step(&self) -> Option { self.canned .lock() .expect("MockLlmClient.canned mutex poisoned") .pop_front() } + fn turn_from_step(&self, step: FauxStep, request: &MessageRequest) -> CannedTurn { + match step { + FauxStep::Canned(turn) => turn, + FauxStep::Factory(factory) => factory(request), + } + } + fn pop_message(&self) -> Option { self.canned_messages .lock() @@ -207,26 +244,28 @@ impl LlmClient for MockLlmClient { } // Fallback: synthesize a MessageResponse from the next streaming turn. - let Some(turn) = self.pop_turn() else { + let Some(step) = self.pop_step() else { return Err(anyhow!( "MockLlmClient: create_message called but no canned response queued (request #{})", self.calls.load(Ordering::SeqCst) )); }; + let turn = self.turn_from_step(step, &request); Ok(synthesize_message_response(turn, &self.model)) } async fn create_message_stream(&self, request: MessageRequest) -> Result { self.record_request(&request); - let Some(turn) = self.pop_turn() else { + let Some(step) = self.pop_step() else { return Err(anyhow!( "MockLlmClient: create_message_stream called but no canned turn queued (call #{})", self.calls.load(Ordering::SeqCst) )); }; + let turn = self.turn_from_step(step, &request); Ok(stream_from_canned(turn)) } @@ -561,6 +600,22 @@ mod tests { assert_eq!(resp.stop_reason.as_deref(), Some("end_turn")); } + #[tokio::test] + async fn create_message_synthesizes_from_factory_turn() { + let mock = MockLlmClient::new(Vec::new()); + mock.push_factory(|request| { + assert_eq!(request.model, "mock-model"); + canned::simple_text_turn("from factory") + }); + + let resp = mock.create_message(empty_request()).await.unwrap(); + let text = match &resp.content[0] { + ContentBlock::Text { text, .. } => text.clone(), + _ => panic!("expected text"), + }; + assert_eq!(text, "from factory"); + } + #[tokio::test] async fn provider_and_model_are_overridable() { let mock = MockLlmClient::new(vec![canned::simple_text_turn("x")]) diff --git a/crates/tui/tests/reasoning_content_replayed_after_tool_call.rs b/crates/tui/tests/reasoning_content_replayed_after_tool_call.rs new file mode 100644 index 000000000..16c0b87f4 --- /dev/null +++ b/crates/tui/tests/reasoning_content_replayed_after_tool_call.rs @@ -0,0 +1,128 @@ +use futures_util::StreamExt; + +#[path = "../src/models.rs"] +#[allow(dead_code)] +mod models; + +#[path = "support/llm_client.rs"] +mod llm_client; + +use crate::llm_client::LlmClient; +use crate::llm_client::mock::{MockLlmClient, canned}; +use crate::models::{ContentBlock, Message, MessageRequest}; + +fn user_message(text: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } +} + +fn assistant_thinking_tool_call( + thinking: &str, + id: &str, + name: &str, + input: serde_json::Value, +) -> Message { + Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: thinking.to_string(), + }, + ContentBlock::ToolUse { + id: id.to_string(), + name: name.to_string(), + input, + caller: None, + }, + ], + } +} + +fn tool_result_message(tool_use_id: &str, content: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: tool_use_id.to_string(), + content: content.to_string(), + is_error: None, + content_blocks: None, + }], + } +} + +fn make_request(messages: Vec) -> MessageRequest { + MessageRequest { + model: "deepseek-v4-pro".to_string(), + messages, + max_tokens: 4096, + system: None, + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: Some("high".to_string()), + stream: Some(true), + temperature: None, + top_p: None, + } +} + +#[tokio::test] +async fn reasoning_content_is_replayed_after_thinking_tool_call() { + let mock = MockLlmClient::new(vec![]); + + mock.push_turn(vec![ + canned::message_start("r1"), + canned::thinking_delta(0, "I should inspect /tmp before answering."), + canned::tool_use_block_start(1, "call_a", "list_dir"), + canned::tool_input_delta(1, r#"{"path":"/tmp"}"#), + canned::block_stop(1), + canned::message_delta("tool_use", None), + canned::message_stop(), + ]); + + mock.push_factory(|request| { + let assistant = request + .messages + .iter() + .rev() + .find(|message| message.role == "assistant") + .expect("follow-up request must include the prior assistant tool-call turn"); + + assert!( + assistant + .content + .iter() + .any(|block| matches!(block, ContentBlock::Thinking { .. })), + "DeepSeek V4 follow-up requests must replay reasoning_content on the assistant tool-call turn" + ); + + canned::simple_text_turn("I see the /tmp entries.") + }); + + let mut first = mock + .create_message_stream(make_request(vec![user_message("list /tmp")])) + .await + .expect("first stream opens"); + while first.next().await.is_some() {} + + let mut second = mock + .create_message_stream(make_request(vec![ + user_message("list /tmp"), + assistant_thinking_tool_call( + "I should inspect /tmp before answering.", + "call_a", + "list_dir", + serde_json::json!({ "path": "/tmp" }), + ), + tool_result_message("call_a", "/tmp/file1\n/tmp/file2"), + ])) + .await + .expect("second stream opens"); + while second.next().await.is_some() {} +}