From c8cc345ad7059d327ebd155587c956c40f15de0d Mon Sep 17 00:00:00 2001 From: Michael Gorbovitski Date: Wed, 17 Jun 2026 11:00:13 -0400 Subject: [PATCH] Bedrock: inline mid-conversation system reminders to preserve prompt cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bedrock's prompt cache is prefix-based: a mid-conversation role=system message (e.g. the reminders Claude Code injects) hoisted into the top-level system block grows that prefix every turn and collapses the cached conversation to the tools/system floor. This is the Bedrock counterpart of the native Anthropic provider's mid-conversation system support (SupportsMidConversationSystem) — Bedrock has no message-level system role, so the inlined message is rendered as a user turn. Gated by an inlineSystemReminders bool the caller computes via IsAnthropicModelFamily(ctx, model) (alias-aware), so non-Anthropic families keep the historical hoist-everything behavior. Tool-call/result cache_control breakpoints are preserved as CachePoint blocks carrying the requested TTL. Adds regression tests including a positive cache_control->CachePoint+TTL assertion, the lone-system early return, and the no-leading-system gate. Co-Authored-By: Claude Opus 4.6 --- core/providers/bedrock/bedrock_test.go | 496 ++++++++++++++++++++++++- core/providers/bedrock/responses.go | 97 ++++- 2 files changed, 583 insertions(+), 10 deletions(-) diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go index 83dc16b589..76457c75e9 100644 --- a/core/providers/bedrock/bedrock_test.go +++ b/core/providers/bedrock/bedrock_test.go @@ -2884,7 +2884,7 @@ func TestToolResultJSONParsingResponsesAPI(t *testing.T) { }, } - messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input) + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false) require.NoError(t, err) require.Len(t, messages, 1) @@ -4950,7 +4950,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) { }, } - messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input) + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false) require.NoError(t, err) require.Len(t, messages, 1) @@ -4994,7 +4994,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) { }, } - messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input) + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false) require.NoError(t, err) require.Len(t, messages, 1) @@ -5027,7 +5027,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) { }, } - messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input) + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false) require.NoError(t, err) require.Len(t, messages, 1) @@ -6041,3 +6041,491 @@ func TestBedrockMixedBlockToolResultRoundTrip(t *testing.T) { require.NotNil(t, got.Citations) assert.True(t, got.Citations.Enabled) } + +// systemReminderTextMsg builds a role=system message with a single text block. +func systemReminderTextMsg(text string) schemas.ResponsesMessage { + return schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem), + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{ + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)}, + }, + }, + } +} + +// userReminderTextMsg builds a role=user message with a single text block. +func userReminderTextMsg(text string) schemas.ResponsesMessage { + return schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{ + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)}, + }, + }, + } +} + +// TestMidConversationSystemReminderStaysInline verifies that only the leading run of +// role=system messages is hoisted into Bedrock's top-level system block. Reminders that +// Claude Code injects mid-conversation (also role=system) must stay inline as user +// messages, otherwise they grow the system block in front of the cached conversation +// prefix and break Bedrock prompt caching (collapsing reads to the tools/system floor). +func TestMidConversationSystemReminderStaysInline(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), // leading system prompt + userReminderTextMsg("first user turn"), + systemReminderTextMsg("The task tools haven't been used recently."), // injected reminder + userReminderTextMsg("second user turn"), + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + // Only the leading system prompt should be hoisted into the system block. + require.Len(t, systemMessages, 1, "only the leading system prompt belongs in the system block") + require.NotNil(t, systemMessages[0].Text) + assert.Equal(t, "You are Claude Code.", *systemMessages[0].Text) + + // The injected reminder stays inline as user content. Because it is rendered as a user + // message between two user turns, the consecutive-same-role merge folds all three into a + // single user message — preserving order. The key invariant is that the reminder text is + // NOT in the system block and appears in chronological order in the conversation. + require.Len(t, messages, 1, "three consecutive user-role messages merge into one") + assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role) + require.Len(t, messages[0].Content, 3) + require.NotNil(t, messages[0].Content[0].Text) + assert.Equal(t, "first user turn", *messages[0].Content[0].Text) + require.NotNil(t, messages[0].Content[1].Text) + assert.Equal(t, "\nThe task tools haven't been used recently.\n\n", + *messages[0].Content[1].Text, "reminder must stay inline at its position, wrapped") + require.NotNil(t, messages[0].Content[2].Text) + assert.Equal(t, "second user turn", *messages[0].Content[2].Text) +} + +// TestMidConversationSystemReminderHoistedForNonAnthropic verifies the Anthropic-only gating: +// for a non-Anthropic Bedrock model (e.g. Nova), the historical behavior is preserved — every +// role=system message, including mid-conversation ones, is hoisted into the top-level system +// block and nothing is inlined as a . The inlining is a prompt-cache workaround +// specific to Anthropic-on-Bedrock and must not change the wire shape for other models. +func TestMidConversationSystemReminderHoistedForNonAnthropic(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are a helpful assistant."), // leading system prompt + userReminderTextMsg("first user turn"), + systemReminderTextMsg("Mid-conversation reminder."), // would be inlined for Anthropic + userReminderTextMsg("second user turn"), + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false) + require.NoError(t, err) + + // Both system messages are hoisted (historical behavior), not just the leading one. + require.Len(t, systemMessages, 2, "non-Anthropic models hoist every system message") + assert.Equal(t, "You are a helpful assistant.", *systemMessages[0].Text) + assert.Equal(t, "Mid-conversation reminder.", *systemMessages[1].Text) + + // No reminder is inlined into the conversation, and nothing is -wrapped. + for _, m := range messages { + for _, b := range m.Content { + if b.Text != nil { + assert.NotContains(t, *b.Text, "", "non-Anthropic path must not wrap reminders") + } + } + } +} + +// TestMultipleLeadingSystemMessagesAllHoisted verifies that a leading run of more than one +// system message is fully hoisted, and the boundary closes at the first non-system message. +func TestMultipleLeadingSystemMessagesAllHoisted(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("System prompt part one."), + systemReminderTextMsg("System prompt part two."), + userReminderTextMsg("hello"), + systemReminderTextMsg("Injected reminder."), + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + require.Len(t, systemMessages, 2, "both leading system messages belong in the system block") + assert.Equal(t, "System prompt part one.", *systemMessages[0].Text) + assert.Equal(t, "System prompt part two.", *systemMessages[1].Text) + + // Consecutive same-role merge folds the inlined reminder into the preceding user message. + require.Len(t, messages, 1) + assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role) + require.Len(t, messages[0].Content, 2) + assert.Equal(t, "hello", *messages[0].Content[0].Text) + assert.Equal(t, "\nInjected reminder.\n\n", *messages[0].Content[1].Text) +} + +// TestSystemReminderAfterToolResultPreservesPairing verifies that a reminder arriving after a +// tool result does not split the tool_use/tool_result pairing and ends up correctly ordered +// after the tool result (both are user-role and merge). This is the dominant production shape. +func TestSystemReminderAfterToolResultPreservesPairing(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), + userReminderTextMsg("do a thing"), + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{CallID: schemas.Ptr("tooluse_1"), Name: schemas.Ptr("read")}, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_1"), + Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("file contents")}, + }, + }, + systemReminderTextMsg("The task tools haven't been used recently."), // reminder right after tool result + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + require.Len(t, systemMessages, 1) + + // Find the assistant tool_use and assert it is immediately followed by a user tool_result. + toolUseIdx := -1 + for i, m := range messages { + for _, b := range m.Content { + if b.ToolUse != nil { + toolUseIdx = i + } + } + } + require.GreaterOrEqual(t, toolUseIdx, 0, "tool_use must be present") + require.Less(t, toolUseIdx+1, len(messages), "tool_use must be followed by a message") + next := messages[toolUseIdx+1] + assert.Equal(t, bedrock.BedrockMessageRoleUser, next.Role) + var hasToolResult bool + for _, b := range next.Content { + if b.ToolResult != nil { + hasToolResult = true + } + } + assert.True(t, hasToolResult, "tool_use must be immediately followed by its tool_result") + + // The reminder must NOT be hoisted into the system block. + for _, sm := range systemMessages { + if sm.Text != nil { + assert.NotContains(t, *sm.Text, "task tools haven't been used", "reminder must not be hoisted") + } + } + + // The reminder text appears inline, wrapped, somewhere after the tool result. + var foundReminder bool + for _, m := range messages { + for _, b := range m.Content { + if b.Text != nil && *b.Text == "\nThe task tools haven't been used recently.\n\n" { + foundReminder = true + } + } + } + assert.True(t, foundReminder, "reminder must be inlined as wrapped user text") +} + +// developerReminderTextMsg builds a role=developer message with a single text block. +func developerReminderTextMsg(text string) schemas.ResponsesMessage { + return schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleDeveloper), + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{ + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)}, + }, + }, + } +} + +// TestMidConversationDeveloperReminderStaysInline mirrors the system-reminder test for the +// developer role, which the converter treats identically (both are hoisted only when leading, +// inlined otherwise). Without coverage, a future change special-casing developer could regress. +func TestMidConversationDeveloperReminderStaysInline(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), // leading system prompt + userReminderTextMsg("first user turn"), + developerReminderTextMsg("Developer note injected mid-conversation."), + userReminderTextMsg("second user turn"), + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + // Only the leading system prompt is hoisted; the developer reminder is NOT. + require.Len(t, systemMessages, 1) + assert.Equal(t, "You are Claude Code.", *systemMessages[0].Text) + for _, sm := range systemMessages { + if sm.Text != nil { + assert.NotContains(t, *sm.Text, "Developer note", "developer reminder must not be hoisted") + } + } + + // It appears inline, wrapped, in chronological order. + var found bool + for _, m := range messages { + for _, b := range m.Content { + if b.Text != nil && *b.Text == "\nDeveloper note injected mid-conversation.\n\n" { + found = true + } + } + } + assert.True(t, found, "developer reminder must be inlined as wrapped user text") +} + +// TestMidConversationReminderContentStrInlined covers the ContentStr branch of the helper +// (simple string content) rather than ContentBlocks, which all the other tests use. A +// regression in that branch (e.g. forgetting to wrap) would otherwise pass CI. +func TestMidConversationReminderContentStrInlined(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), + userReminderTextMsg("hello"), + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem), + Content: &schemas.ResponsesMessageContent{ContentStr: schemas.Ptr("Reminder via ContentStr.")}, + }, + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + require.Len(t, systemMessages, 1, "only the leading prompt is hoisted") + + var found bool + for _, m := range messages { + for _, b := range m.Content { + if b.Text != nil && *b.Text == "\nReminder via ContentStr.\n\n" { + found = true + } + } + } + assert.True(t, found, "ContentStr reminder must be inlined as wrapped user text") +} + +// TestMidConversationReminderEmptyContentDropped pins the nil-return contract: a mid-conversation +// reminder with no text content produces no Bedrock message (the helper returns nil and the +// caller skips it), rather than an empty user message that Bedrock would reject. +func TestMidConversationReminderEmptyContentDropped(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), + userReminderTextMsg("hello"), + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem), + Content: &schemas.ResponsesMessageContent{ContentBlocks: []schemas.ResponsesMessageContentBlock{}}, + }, + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + require.Len(t, systemMessages, 1) + + // No empty content blocks anywhere; the empty reminder was dropped, not emitted. + for _, m := range messages { + require.NotEmpty(t, m.Content, "no message should have empty content") + for _, b := range m.Content { + if b.Text != nil { + assert.NotEqual(t, "\n\n\n", *b.Text, "empty reminder must not be emitted as a wrapped blank") + } + } + } +} + +// TestSystemReminderBetweenToolCallAndResult covers a reminder arriving BETWEEN a function_call +// and its output — a defensive edge case (Claude Code never interleaves reminders into a tool +// exchange). Asserts the tool_use/tool_result pair is preserved. Known quirk, not asserted: the +// merged user turn places the reminder text before the tool_result block; revisit if real traffic +// ever interleaves this way. +func TestSystemReminderBetweenToolCallAndResult(t *testing.T) { + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), + userReminderTextMsg("do a thing"), + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{CallID: schemas.Ptr("tooluse_x"), Name: schemas.Ptr("read")}, + }, + systemReminderTextMsg("Injected between call and result."), // the interleaved reminder + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_x"), + Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("contents")}, + }, + }, + } + + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + // Locate the assistant message carrying the tool_use. + toolUseMsgIdx := -1 + for i, m := range messages { + for _, b := range m.Content { + if b.ToolUse != nil { + toolUseMsgIdx = i + } + } + } + require.GreaterOrEqual(t, toolUseMsgIdx, 0, "tool_use must be present") + + // No reminder text may share the tool_use assistant message (that would split the pair). + for _, b := range messages[toolUseMsgIdx].Content { + assert.Nil(t, b.Text, "tool_use message must not contain an inlined reminder text block") + } + + // The pair invariant: the next message is the user turn and it contains the matching + // tool_result (block order within that turn is not asserted — see the known limitation above). + require.Less(t, toolUseMsgIdx+1, len(messages), "tool_use must be followed by a message") + next := messages[toolUseMsgIdx+1] + assert.Equal(t, bedrock.BedrockMessageRoleUser, next.Role) + var hasResult bool + for _, b := range next.Content { + if b.ToolResult != nil && b.ToolResult.ToolUseID == "tooluse_x" { + hasResult = true + } + } + assert.True(t, hasResult, "user message after tool_use must contain the matching tool_result") +} + +// TestSystemReminderDoesNotCarryCachePoint pins the deliberate omission flagged in review: an +// inlined mid-conversation reminder must NOT emit a CachePoint, even if its block carries +// CacheControl. A breakpoint at the moving conversation tail would shift every turn and defeat +// the prefix caching this whole change exists to preserve. +func TestSystemReminderDoesNotCarryCachePoint(t *testing.T) { + reminder := systemReminderTextMsg("Reminder that happens to carry a breakpoint.") + reminder.Content.ContentBlocks[0].CacheControl = &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral} + + input := []schemas.ResponsesMessage{ + systemReminderTextMsg("You are Claude Code."), + userReminderTextMsg("hello"), + reminder, + } + + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + for _, m := range messages { + for _, b := range m.Content { + assert.Nil(t, b.CachePoint, "inlined reminder must not introduce a CachePoint block") + } + } +} + +// TestToolCacheControlBecomesCachePointWithTTL is the positive counterpart to +// TestSystemReminderDoesNotCarryCachePoint: a cache_control breakpoint Claude Code places on a +// tool call / tool result must survive into Bedrock as a CachePoint block adjacent to the +// ToolUse/ToolResult, AND must carry the requested TTL ("1h"). This pins the bug review caught +// where the breakpoints were emitted with the default TTL dropped. +func TestToolCacheControlBecomesCachePointWithTTL(t *testing.T) { + ttl := "1h" + toolMsgs := func() []schemas.ResponsesMessage { + return []schemas.ResponsesMessage{ + userReminderTextMsg("call the tool"), + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + CacheControl: &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral, TTL: &ttl}, + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_ttl"), + Name: schemas.Ptr("get_weather"), + Arguments: schemas.Ptr(`{"location":"NYC"}`), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + CacheControl: &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral, TTL: &ttl}, + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_ttl"), + Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("sunny")}, + }, + }, + } + } + + assertTTLPreserved := func(t *testing.T, input []schemas.ResponsesMessage) { + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + var afterToolUse, afterToolResult bool + for _, m := range messages { + for i, b := range m.Content { + if b.ToolUse != nil && b.ToolUse.ToolUseID == "tooluse_ttl" { + require.Greater(t, len(m.Content), i+1, "a CachePoint block must follow the ToolUse") + cp := m.Content[i+1].CachePoint + require.NotNil(t, cp, "ToolUse with cache_control must be followed by a CachePoint") + require.NotNil(t, cp.TTL, "CachePoint must carry the requested TTL, not drop it") + assert.Equal(t, "1h", *cp.TTL) + afterToolUse = true + } + if b.ToolResult != nil && b.ToolResult.ToolUseID == "tooluse_ttl" { + require.Greater(t, len(m.Content), i+1, "a CachePoint block must follow the ToolResult") + cp := m.Content[i+1].CachePoint + require.NotNil(t, cp, "ToolResult with cache_control must be followed by a CachePoint") + require.NotNil(t, cp.TTL, "CachePoint must carry the requested TTL, not drop it") + assert.Equal(t, "1h", *cp.TTL) + afterToolResult = true + } + } + } + assert.True(t, afterToolUse, "expected a TTL-carrying CachePoint after the tool_use") + assert.True(t, afterToolResult, "expected a TTL-carrying CachePoint after the tool_result") + } + + // End-of-sequence flush: the tool call/result are the last messages (isLastResultInSequence). + t.Run("end of sequence", func(t *testing.T) { + assertTTLPreserved(t, toolMsgs()) + }) + // Flush-before-message: a following user message triggers the flush while tool results are + // pending (the path this PR added inside case ResponsesMessageTypeMessage). + t.Run("followed by a message", func(t *testing.T) { + assertTTLPreserved(t, append(toolMsgs(), userReminderTextMsg("and then continue"))) + }) +} + +// TestLoneSystemMessageReturnsUserMessage covers the single-element early return: a lone +// system/developer message is converted to a user message and returned, with no system block, +// regardless of the inlineSystemReminders gate. +func TestLoneSystemMessageReturnsUserMessage(t *testing.T) { + // Both system and developer roles take the single-message early return. + roles := map[string]schemas.ResponsesMessage{ + "system": systemReminderTextMsg("You are Claude Code."), + "developer": developerReminderTextMsg("You are Claude Code."), + } + for role, msg := range roles { + for _, inline := range []bool{true, false} { + input := []schemas.ResponsesMessage{msg} + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, inline) + require.NoError(t, err) + assert.Empty(t, systemMessages, "lone %s message must not populate the system block (inline=%v)", role, inline) + require.Len(t, messages, 1, "lone %s message must yield exactly one message (inline=%v)", role, inline) + assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role) + } + } +} + +// TestNoLeadingSystemBlockReminderInlined covers the seenNonSystemMessage gate when the +// conversation does NOT start with a system message: the first message is a user turn, so a later +// role=system reminder is mid-conversation and must be inlined (nothing hoisted into system). +func TestNoLeadingSystemBlockReminderInlined(t *testing.T) { + input := []schemas.ResponsesMessage{ + userReminderTextMsg("hello"), + systemReminderTextMsg("Reminder with no leading system block."), + userReminderTextMsg("continue"), + } + + messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true) + require.NoError(t, err) + + assert.Empty(t, systemMessages, "no leading system block means nothing should be hoisted") + var inlined bool + for _, m := range messages { + for _, b := range m.Content { + if b.Text != nil && strings.Contains(*b.Text, "") && + strings.Contains(*b.Text, "Reminder with no leading system block.") { + assert.Equal(t, bedrock.BedrockMessageRoleUser, m.Role, "inlined reminder must be a user message") + inlined = true + } + } + } + assert.True(t, inlined, "the mid-conversation reminder must be inlined as a wrapped user message") +} diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go index 56cff5119f..94d7c71bf0 100644 --- a/core/providers/bedrock/responses.go +++ b/core/providers/bedrock/responses.go @@ -2160,7 +2160,9 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. input = input[:trimmed] } - messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(ctx, input) + // Inline mid-conversation system reminders for Anthropic models (keeps Bedrock's + // prefix-based prompt cache stable); hoist-everything for other families. + messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(ctx, input, schemas.IsAnthropicModelFamily(ctx, bifrostReq.Model)) if err != nil { return nil, fmt.Errorf("failed to convert Responses messages: %w", err) } @@ -2660,7 +2662,8 @@ func ToBedrockConverseResponse(bifrostResp *schemas.BifrostResponsesResponse) (* // Convert Bifrost messages back to Bedrock messages using the new conversion method. // Response-side conversion does not perform outbound fetches in practice (model output // blocks already carry inline data), so context.Background() is acceptable here. - bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(context.Background(), bifrostResp.Output) + // Response output never contains mid-conversation system reminders, so disable inlining. + bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(context.Background(), bifrostResp.Output, false) if err != nil { return nil, fmt.Errorf("failed to convert bifrost output messages: %w", err) } @@ -3129,8 +3132,12 @@ func (m *ToolCallStateManager) HasPendingResults() bool { // ConvertBifrostMessagesToBedrockMessages converts an array of Bifrost ResponsesMessage to Bedrock message format // This is the main conversion method from Bifrost to Bedrock - handles all message types and returns messages + system messages // Uses a state machine to properly track and manage tool call lifecycles. -// The ctx is propagated to URL fetches inside content blocks. -func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessages []schemas.ResponsesMessage) ([]BedrockMessage, []BedrockSystemMessage, error) { +// The ctx is propagated to URL fetches inside content blocks. inlineSystemReminders selects the +// mid-conversation system-message handling: when true, only the leading run of system/developer +// messages is hoisted into the top-level `system` block and later (mid-conversation) ones are +// inlined in place; when false, every system/developer message is hoisted (historical behavior). +// Callers compute it from the provider+model — see the call site in ToBedrockResponsesRequest. +func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessages []schemas.ResponsesMessage, inlineSystemReminders bool) ([]BedrockMessage, []BedrockSystemMessage, error) { // If only a single system message is present, convert it user message (since openai allows it) if len(bifrostMessages) == 1 && bifrostMessages[0].Role != nil && (*bifrostMessages[0].Role == schemas.ResponsesInputMessageRoleSystem || *bifrostMessages[0].Role == schemas.ResponsesInputMessageRoleDeveloper) { msg := bifrostMessages[0] @@ -3142,6 +3149,16 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage } } + // Bedrock's prompt cache is prefix-based, so growing the top-level `system` block invalidates + // the cached conversation behind it. A mid-conversation role=system message (e.g. the reminders + // Claude Code injects) hoisted into `system` collapses the cache to the tools/system floor every + // time one appears. When inlineSystemReminders is set we instead keep only the leading run of + // system/developer messages in `system` and inline later ones in place. This is the Bedrock + // counterpart of the native Anthropic provider's mid-conversation system support + // (SupportsMidConversationSystem) — Bedrock has no message-level system role, so the inlined + // message is rendered as a user turn (see convertBifrostSystemReminderToBedrockUserMessage). + // When false, every system/developer message is hoisted (historical behavior). + var bedrockMessages []BedrockMessage var systemMessages []BedrockSystemMessage var pendingReasoningContentBlocks []BedrockContentBlock @@ -3149,6 +3166,9 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage // blocks that must be prepended to the next assistant text message (same-turn server-managed tools). var pendingServerToolBlocks []BedrockContentBlock + // Set once the leading system prompt ends (first non-system message); gates inlineSystemReminders. + seenNonSystemMessage := false + // Initialize the state manager for tracking tool calls and results stateManager := NewToolCallStateManager() @@ -3242,6 +3262,13 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage msgType = *msg.Type } + // First non-system message closes the leading system-prompt run (see seenNonSystemMessage). + isSystemMessage := msgType == schemas.ResponsesMessageTypeMessage && msg.Role != nil && + (*msg.Role == schemas.ResponsesInputMessageRoleSystem || *msg.Role == schemas.ResponsesInputMessageRoleDeveloper) + if !isSystemMessage { + seenNonSystemMessage = true + } + // If we're processing a non-reasoning message and have pending reasoning blocks, // flush them into the previous assistant message (if it exists) if msgType != schemas.ResponsesMessageTypeReasoning && len(pendingReasoningContentBlocks) > 0 { @@ -3445,6 +3472,13 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage } toolUseBlock.ToolUse.Input = input toolUseBlocks = append(toolUseBlocks, *toolUseBlock) + // Preserve the cache breakpoint Claude Code placed on this tool call, else the + // next turn can't match the prefix and collapses to the tools/system floor. + if toolCall.CacheControl != nil { + toolUseBlocks = append(toolUseBlocks, BedrockContentBlock{ + CachePoint: newBedrockCachePoint(toolCall.CacheControl.TTL), + }) + } } } @@ -3471,6 +3505,12 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage Status: schemas.Ptr(result.Status), }, }) + // Preserve the cache breakpoint Claude Code placed on this tool result. + if result.CacheControl != nil { + resultBlocks = append(resultBlocks, BedrockContentBlock{ + CachePoint: newBedrockCachePoint(result.CacheControl.TTL), + }) + } } if len(resultBlocks) > 0 { @@ -3483,10 +3523,17 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage } // Convert regular message - if role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper { - // Convert to system message + if (role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper) && + (!inlineSystemReminders || !seenNonSystemMessage) { + // Leading system prompt (or any system message for non-Anthropic models): hoist into `system`. systemMsgs := convertBifrostMessageToBedrockSystemMessages(&msg) systemMessages = append(systemMessages, systemMsgs...) + } else if role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper { + // Mid-conversation reminder: inline in place instead of hoisting (see inlineSystemReminders). + bedrockMsg := convertBifrostSystemReminderToBedrockUserMessage(&msg) + if bedrockMsg != nil { + bedrockMessages = append(bedrockMessages, *bedrockMsg) + } } else { // Convert user/assistant text message bedrockMsg := convertBifrostMessageToBedrockMessage(ctx, &msg) @@ -3701,6 +3748,44 @@ func convertBifrostMessageToBedrockSystemMessages(msg *schemas.ResponsesMessage) return systemMessages } +// convertBifrostSystemReminderToBedrockUserMessage renders a mid-conversation role=system reminder +// as a user message (Bedrock has no message-level system role), wrapping each text block in the +// same \n...\n\n envelope Claude Code uses for pre-wrapped ones. +// Returns nil for content that yields no text, so the caller skips the append. +func convertBifrostSystemReminderToBedrockUserMessage(msg *schemas.ResponsesMessage) *BedrockMessage { + if msg.Content == nil { + return nil + } + + var contentBlocks []BedrockContentBlock + wrap := func(text string) { + wrapped := "\n" + text + "\n\n" + contentBlocks = append(contentBlocks, BedrockContentBlock{Text: &wrapped}) + } + + // Text-only by design: reminders never carry images, and we deliberately attach no cache point + // here — a breakpoint on this moving-tail message would shift every turn and defeat the prefix + // caching this inlining exists for. + if msg.Content.ContentStr != nil { + wrap(*msg.Content.ContentStr) + } else if msg.Content.ContentBlocks != nil { + for _, block := range msg.Content.ContentBlocks { + if block.Text != nil { + wrap(*block.Text) + } + } + } + + if len(contentBlocks) == 0 { + return nil + } + + return &BedrockMessage{ + Role: BedrockMessageRoleUser, + Content: contentBlocks, + } +} + // convertBifrostMessageToBedrockMessage converts a regular Bifrost message to Bedrock message. // The ctx is propagated to URL fetches inside content blocks. func convertBifrostMessageToBedrockMessage(ctx context.Context, msg *schemas.ResponsesMessage) *BedrockMessage {