Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 61 additions & 6 deletions crates/tui/src/llm_client/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ use super::{LlmClient, StreamEventBox};
/// the mock does not require `MessageStart` to be present.
pub type CannedTurn = Vec<StreamEvent>;

/// A queued mock response step.
pub enum FauxStep {
Canned(CannedTurn),
/// Build a canned turn from the live outgoing request.
///
/// Tests can assert DeepSeek V4's thinking-mode tool-call invariant here:
/// on the assistant turn that produced the previous tool call, the next
/// outgoing request must still carry `reasoning_content` (represented in
/// this model as a [`ContentBlock::Thinking`] block). If it is missing,
/// DeepSeek V4 returns HTTP 400 on the follow-up turn. This guards the
/// [v0.4.9-v0.5.1 regression range](https://github.com/Hmbown/CodeWhale/compare/v0.4.9...v0.5.1)
/// where that content was dropped.
Factory(Box<dyn Fn(&MessageRequest) -> CannedTurn + Send + Sync>),
}

/// A queue-driven mock LLM client.
///
/// The mock holds a FIFO queue of canned response turns. Each call to
Expand All @@ -75,7 +90,7 @@ pub type CannedTurn = Vec<StreamEvent>;
/// can assert on the outgoing payload (e.g. that prior `reasoning_content` is
/// preserved across turns).
pub struct MockLlmClient {
canned: Mutex<VecDeque<CannedTurn>>,
canned: Mutex<VecDeque<FauxStep>>,
captured_requests: Mutex<Vec<MessageRequest>>,
calls: AtomicUsize,
provider_name: &'static str,
Expand All @@ -91,7 +106,7 @@ impl MockLlmClient {
#[must_use]
pub fn new(canned: Vec<CannedTurn>) -> Self {
Self {
canned: Mutex::new(canned.into()),
canned: Mutex::new(canned.into_iter().map(FauxStep::Canned).collect()),
captured_requests: Mutex::new(Vec::new()),
calls: AtomicUsize::new(0),
provider_name: "mock",
Expand Down Expand Up @@ -119,7 +134,22 @@ impl MockLlmClient {
self.canned
.lock()
.expect("MockLlmClient.canned mutex poisoned")
.push_back(turn);
.push_back(FauxStep::Canned(turn));
}

/// Push a factory step onto the back of the queue.
///
/// The closure receives the live outgoing [`MessageRequest`] before the
/// response stream is built, so assertions panic directly from the client
/// call rather than later while polling the returned stream.
pub fn push_factory<F>(&self, factory: F)
where
F: Fn(&MessageRequest) -> CannedTurn + Send + Sync + 'static,
{
self.canned
.lock()
.expect("MockLlmClient.canned mutex poisoned")
.push_back(FauxStep::Factory(Box::new(factory)));
}

/// Push a canned non-streaming `MessageResponse`. Consumed by
Expand Down Expand Up @@ -175,13 +205,20 @@ impl MockLlmClient {
self.calls.fetch_add(1, Ordering::SeqCst);
}

fn pop_turn(&self) -> Option<CannedTurn> {
fn pop_step(&self) -> Option<FauxStep> {
self.canned
.lock()
.expect("MockLlmClient.canned mutex poisoned")
.pop_front()
}

fn turn_from_step(&self, step: FauxStep, request: &MessageRequest) -> CannedTurn {
match step {
FauxStep::Canned(turn) => turn,
FauxStep::Factory(factory) => factory(request),
}
}

fn pop_message(&self) -> Option<MessageResponse> {
self.canned_messages
.lock()
Expand All @@ -207,26 +244,28 @@ impl LlmClient for MockLlmClient {
}

// Fallback: synthesize a MessageResponse from the next streaming turn.
let Some(turn) = self.pop_turn() else {
let Some(step) = self.pop_step() else {
return Err(anyhow!(
"MockLlmClient: create_message called but no canned response queued (request #{})",
self.calls.load(Ordering::SeqCst)
));
};

let turn = self.turn_from_step(step, &request);
Ok(synthesize_message_response(turn, &self.model))
}

async fn create_message_stream(&self, request: MessageRequest) -> Result<StreamEventBox> {
self.record_request(&request);

let Some(turn) = self.pop_turn() else {
let Some(step) = self.pop_step() else {
return Err(anyhow!(
"MockLlmClient: create_message_stream called but no canned turn queued (call #{})",
self.calls.load(Ordering::SeqCst)
));
};

let turn = self.turn_from_step(step, &request);
Ok(stream_from_canned(turn))
}

Expand Down Expand Up @@ -561,6 +600,22 @@ mod tests {
assert_eq!(resp.stop_reason.as_deref(), Some("end_turn"));
}

#[tokio::test]
async fn create_message_synthesizes_from_factory_turn() {
let mock = MockLlmClient::new(Vec::new());
mock.push_factory(|request| {
assert_eq!(request.model, "mock-model");
canned::simple_text_turn("from factory")
});

let resp = mock.create_message(empty_request()).await.unwrap();
let text = match &resp.content[0] {
ContentBlock::Text { text, .. } => text.clone(),
_ => panic!("expected text"),
};
assert_eq!(text, "from factory");
}

#[tokio::test]
async fn provider_and_model_are_overridable() {
let mock = MockLlmClient::new(vec![canned::simple_text_turn("x")])
Expand Down
128 changes: 128 additions & 0 deletions crates/tui/tests/reasoning_content_replayed_after_tool_call.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use futures_util::StreamExt;

#[path = "../src/models.rs"]
#[allow(dead_code)]
mod models;

#[path = "support/llm_client.rs"]
mod llm_client;

use crate::llm_client::LlmClient;
use crate::llm_client::mock::{MockLlmClient, canned};
use crate::models::{ContentBlock, Message, MessageRequest};

fn user_message(text: &str) -> Message {
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: text.to_string(),
cache_control: None,
}],
}
}

fn assistant_thinking_tool_call(
thinking: &str,
id: &str,
name: &str,
input: serde_json::Value,
) -> Message {
Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: thinking.to_string(),
},
ContentBlock::ToolUse {
id: id.to_string(),
name: name.to_string(),
input,
caller: None,
},
],
}
}

fn tool_result_message(tool_use_id: &str, content: &str) -> Message {
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: tool_use_id.to_string(),
content: content.to_string(),
is_error: None,
content_blocks: None,
}],
}
}

fn make_request(messages: Vec<Message>) -> MessageRequest {
MessageRequest {
model: "deepseek-v4-pro".to_string(),
messages,
max_tokens: 4096,
system: None,
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("high".to_string()),
stream: Some(true),
temperature: None,
top_p: None,
}
}

#[tokio::test]
async fn reasoning_content_is_replayed_after_thinking_tool_call() {
let mock = MockLlmClient::new(vec![]);

mock.push_turn(vec![
canned::message_start("r1"),
canned::thinking_delta(0, "I should inspect /tmp before answering."),
canned::tool_use_block_start(1, "call_a", "list_dir"),
canned::tool_input_delta(1, r#"{"path":"/tmp"}"#),
canned::block_stop(1),
canned::message_delta("tool_use", None),
canned::message_stop(),
]);

mock.push_factory(|request| {
let assistant = request
.messages
.iter()
.rev()
.find(|message| message.role == "assistant")
.expect("follow-up request must include the prior assistant tool-call turn");

assert!(
assistant
.content
.iter()
.any(|block| matches!(block, ContentBlock::Thinking { .. })),
"DeepSeek V4 follow-up requests must replay reasoning_content on the assistant tool-call turn"
);

canned::simple_text_turn("I see the /tmp entries.")
});

let mut first = mock
.create_message_stream(make_request(vec![user_message("list /tmp")]))
.await
.expect("first stream opens");
while first.next().await.is_some() {}

let mut second = mock
.create_message_stream(make_request(vec![
user_message("list /tmp"),
assistant_thinking_tool_call(
"I should inspect /tmp before answering.",
"call_a",
"list_dir",
serde_json::json!({ "path": "/tmp" }),
),
tool_result_message("call_a", "/tmp/file1\n/tmp/file2"),
]))
.await
.expect("second stream opens");
while second.next().await.is_some() {}
}