diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go
index 83dc16b589..76457c75e9 100644
--- a/core/providers/bedrock/bedrock_test.go
+++ b/core/providers/bedrock/bedrock_test.go
@@ -2884,7 +2884,7 @@ func TestToolResultJSONParsingResponsesAPI(t *testing.T) {
},
}
- messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false)
require.NoError(t, err)
require.Len(t, messages, 1)
@@ -4950,7 +4950,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
},
}
- messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false)
require.NoError(t, err)
require.Len(t, messages, 1)
@@ -4994,7 +4994,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
},
}
- messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false)
require.NoError(t, err)
require.Len(t, messages, 1)
@@ -5027,7 +5027,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
},
}
- messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false)
require.NoError(t, err)
require.Len(t, messages, 1)
@@ -6041,3 +6041,491 @@ func TestBedrockMixedBlockToolResultRoundTrip(t *testing.T) {
require.NotNil(t, got.Citations)
assert.True(t, got.Citations.Enabled)
}
+
+// systemReminderTextMsg builds a role=system message with a single text block.
+func systemReminderTextMsg(text string) schemas.ResponsesMessage {
+ return schemas.ResponsesMessage{
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
+ Content: &schemas.ResponsesMessageContent{
+ ContentBlocks: []schemas.ResponsesMessageContentBlock{
+ {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)},
+ },
+ },
+ }
+}
+
+// userReminderTextMsg builds a role=user message with a single text block.
+func userReminderTextMsg(text string) schemas.ResponsesMessage {
+ return schemas.ResponsesMessage{
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser),
+ Content: &schemas.ResponsesMessageContent{
+ ContentBlocks: []schemas.ResponsesMessageContentBlock{
+ {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)},
+ },
+ },
+ }
+}
+
+// TestMidConversationSystemReminderStaysInline verifies that only the leading run of
+// role=system messages is hoisted into Bedrock's top-level system block. Reminders that
+// Claude Code injects mid-conversation (also role=system) must stay inline as user
+// messages, otherwise they grow the system block in front of the cached conversation
+// prefix and break Bedrock prompt caching (collapsing reads to the tools/system floor).
+func TestMidConversationSystemReminderStaysInline(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."), // leading system prompt
+ userReminderTextMsg("first user turn"),
+ systemReminderTextMsg("The task tools haven't been used recently."), // injected reminder
+ userReminderTextMsg("second user turn"),
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ // Only the leading system prompt should be hoisted into the system block.
+ require.Len(t, systemMessages, 1, "only the leading system prompt belongs in the system block")
+ require.NotNil(t, systemMessages[0].Text)
+ assert.Equal(t, "You are Claude Code.", *systemMessages[0].Text)
+
+ // The injected reminder stays inline as user content. Because it is rendered as a user
+ // message between two user turns, the consecutive-same-role merge folds all three into a
+ // single user message — preserving order. The key invariant is that the reminder text is
+ // NOT in the system block and appears in chronological order in the conversation.
+ require.Len(t, messages, 1, "three consecutive user-role messages merge into one")
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role)
+ require.Len(t, messages[0].Content, 3)
+ require.NotNil(t, messages[0].Content[0].Text)
+ assert.Equal(t, "first user turn", *messages[0].Content[0].Text)
+ require.NotNil(t, messages[0].Content[1].Text)
+ assert.Equal(t, "\nThe task tools haven't been used recently.\n\n",
+ *messages[0].Content[1].Text, "reminder must stay inline at its position, wrapped")
+ require.NotNil(t, messages[0].Content[2].Text)
+ assert.Equal(t, "second user turn", *messages[0].Content[2].Text)
+}
+
+// TestMidConversationSystemReminderHoistedForNonAnthropic verifies the Anthropic-only gating:
+// for a non-Anthropic Bedrock model (e.g. Nova), the historical behavior is preserved — every
+// role=system message, including mid-conversation ones, is hoisted into the top-level system
+// block and nothing is inlined as a . The inlining is a prompt-cache workaround
+// specific to Anthropic-on-Bedrock and must not change the wire shape for other models.
+func TestMidConversationSystemReminderHoistedForNonAnthropic(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are a helpful assistant."), // leading system prompt
+ userReminderTextMsg("first user turn"),
+ systemReminderTextMsg("Mid-conversation reminder."), // would be inlined for Anthropic
+ userReminderTextMsg("second user turn"),
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, false)
+ require.NoError(t, err)
+
+ // Both system messages are hoisted (historical behavior), not just the leading one.
+ require.Len(t, systemMessages, 2, "non-Anthropic models hoist every system message")
+ assert.Equal(t, "You are a helpful assistant.", *systemMessages[0].Text)
+ assert.Equal(t, "Mid-conversation reminder.", *systemMessages[1].Text)
+
+ // No reminder is inlined into the conversation, and nothing is -wrapped.
+ for _, m := range messages {
+ for _, b := range m.Content {
+ if b.Text != nil {
+ assert.NotContains(t, *b.Text, "", "non-Anthropic path must not wrap reminders")
+ }
+ }
+ }
+}
+
+// TestMultipleLeadingSystemMessagesAllHoisted verifies that a leading run of more than one
+// system message is fully hoisted, and the boundary closes at the first non-system message.
+func TestMultipleLeadingSystemMessagesAllHoisted(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("System prompt part one."),
+ systemReminderTextMsg("System prompt part two."),
+ userReminderTextMsg("hello"),
+ systemReminderTextMsg("Injected reminder."),
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ require.Len(t, systemMessages, 2, "both leading system messages belong in the system block")
+ assert.Equal(t, "System prompt part one.", *systemMessages[0].Text)
+ assert.Equal(t, "System prompt part two.", *systemMessages[1].Text)
+
+ // Consecutive same-role merge folds the inlined reminder into the preceding user message.
+ require.Len(t, messages, 1)
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role)
+ require.Len(t, messages[0].Content, 2)
+ assert.Equal(t, "hello", *messages[0].Content[0].Text)
+ assert.Equal(t, "\nInjected reminder.\n\n", *messages[0].Content[1].Text)
+}
+
+// TestSystemReminderAfterToolResultPreservesPairing verifies that a reminder arriving after a
+// tool result does not split the tool_use/tool_result pairing and ends up correctly ordered
+// after the tool result (both are user-role and merge). This is the dominant production shape.
+func TestSystemReminderAfterToolResultPreservesPairing(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."),
+ userReminderTextMsg("do a thing"),
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{CallID: schemas.Ptr("tooluse_1"), Name: schemas.Ptr("read")},
+ },
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{
+ CallID: schemas.Ptr("tooluse_1"),
+ Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("file contents")},
+ },
+ },
+ systemReminderTextMsg("The task tools haven't been used recently."), // reminder right after tool result
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+ require.Len(t, systemMessages, 1)
+
+ // Find the assistant tool_use and assert it is immediately followed by a user tool_result.
+ toolUseIdx := -1
+ for i, m := range messages {
+ for _, b := range m.Content {
+ if b.ToolUse != nil {
+ toolUseIdx = i
+ }
+ }
+ }
+ require.GreaterOrEqual(t, toolUseIdx, 0, "tool_use must be present")
+ require.Less(t, toolUseIdx+1, len(messages), "tool_use must be followed by a message")
+ next := messages[toolUseIdx+1]
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, next.Role)
+ var hasToolResult bool
+ for _, b := range next.Content {
+ if b.ToolResult != nil {
+ hasToolResult = true
+ }
+ }
+ assert.True(t, hasToolResult, "tool_use must be immediately followed by its tool_result")
+
+ // The reminder must NOT be hoisted into the system block.
+ for _, sm := range systemMessages {
+ if sm.Text != nil {
+ assert.NotContains(t, *sm.Text, "task tools haven't been used", "reminder must not be hoisted")
+ }
+ }
+
+ // The reminder text appears inline, wrapped, somewhere after the tool result.
+ var foundReminder bool
+ for _, m := range messages {
+ for _, b := range m.Content {
+ if b.Text != nil && *b.Text == "\nThe task tools haven't been used recently.\n\n" {
+ foundReminder = true
+ }
+ }
+ }
+ assert.True(t, foundReminder, "reminder must be inlined as wrapped user text")
+}
+
+// developerReminderTextMsg builds a role=developer message with a single text block.
+func developerReminderTextMsg(text string) schemas.ResponsesMessage {
+ return schemas.ResponsesMessage{
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleDeveloper),
+ Content: &schemas.ResponsesMessageContent{
+ ContentBlocks: []schemas.ResponsesMessageContentBlock{
+ {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr(text)},
+ },
+ },
+ }
+}
+
+// TestMidConversationDeveloperReminderStaysInline mirrors the system-reminder test for the
+// developer role, which the converter treats identically (both are hoisted only when leading,
+// inlined otherwise). Without coverage, a future change special-casing developer could regress.
+func TestMidConversationDeveloperReminderStaysInline(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."), // leading system prompt
+ userReminderTextMsg("first user turn"),
+ developerReminderTextMsg("Developer note injected mid-conversation."),
+ userReminderTextMsg("second user turn"),
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ // Only the leading system prompt is hoisted; the developer reminder is NOT.
+ require.Len(t, systemMessages, 1)
+ assert.Equal(t, "You are Claude Code.", *systemMessages[0].Text)
+ for _, sm := range systemMessages {
+ if sm.Text != nil {
+ assert.NotContains(t, *sm.Text, "Developer note", "developer reminder must not be hoisted")
+ }
+ }
+
+ // It appears inline, wrapped, in chronological order.
+ var found bool
+ for _, m := range messages {
+ for _, b := range m.Content {
+ if b.Text != nil && *b.Text == "\nDeveloper note injected mid-conversation.\n\n" {
+ found = true
+ }
+ }
+ }
+ assert.True(t, found, "developer reminder must be inlined as wrapped user text")
+}
+
+// TestMidConversationReminderContentStrInlined covers the ContentStr branch of the helper
+// (simple string content) rather than ContentBlocks, which all the other tests use. A
+// regression in that branch (e.g. forgetting to wrap) would otherwise pass CI.
+func TestMidConversationReminderContentStrInlined(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."),
+ userReminderTextMsg("hello"),
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
+ Content: &schemas.ResponsesMessageContent{ContentStr: schemas.Ptr("Reminder via ContentStr.")},
+ },
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+ require.Len(t, systemMessages, 1, "only the leading prompt is hoisted")
+
+ var found bool
+ for _, m := range messages {
+ for _, b := range m.Content {
+ if b.Text != nil && *b.Text == "\nReminder via ContentStr.\n\n" {
+ found = true
+ }
+ }
+ }
+ assert.True(t, found, "ContentStr reminder must be inlined as wrapped user text")
+}
+
+// TestMidConversationReminderEmptyContentDropped pins the nil-return contract: a mid-conversation
+// reminder with no text content produces no Bedrock message (the helper returns nil and the
+// caller skips it), rather than an empty user message that Bedrock would reject.
+func TestMidConversationReminderEmptyContentDropped(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."),
+ userReminderTextMsg("hello"),
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage),
+ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleSystem),
+ Content: &schemas.ResponsesMessageContent{ContentBlocks: []schemas.ResponsesMessageContentBlock{}},
+ },
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+ require.Len(t, systemMessages, 1)
+
+ // No empty content blocks anywhere; the empty reminder was dropped, not emitted.
+ for _, m := range messages {
+ require.NotEmpty(t, m.Content, "no message should have empty content")
+ for _, b := range m.Content {
+ if b.Text != nil {
+ assert.NotEqual(t, "\n\n\n", *b.Text, "empty reminder must not be emitted as a wrapped blank")
+ }
+ }
+ }
+}
+
+// TestSystemReminderBetweenToolCallAndResult covers a reminder arriving BETWEEN a function_call
+// and its output — a defensive edge case (Claude Code never interleaves reminders into a tool
+// exchange). Asserts the tool_use/tool_result pair is preserved. Known quirk, not asserted: the
+// merged user turn places the reminder text before the tool_result block; revisit if real traffic
+// ever interleaves this way.
+func TestSystemReminderBetweenToolCallAndResult(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."),
+ userReminderTextMsg("do a thing"),
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{CallID: schemas.Ptr("tooluse_x"), Name: schemas.Ptr("read")},
+ },
+ systemReminderTextMsg("Injected between call and result."), // the interleaved reminder
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{
+ CallID: schemas.Ptr("tooluse_x"),
+ Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("contents")},
+ },
+ },
+ }
+
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ // Locate the assistant message carrying the tool_use.
+ toolUseMsgIdx := -1
+ for i, m := range messages {
+ for _, b := range m.Content {
+ if b.ToolUse != nil {
+ toolUseMsgIdx = i
+ }
+ }
+ }
+ require.GreaterOrEqual(t, toolUseMsgIdx, 0, "tool_use must be present")
+
+ // No reminder text may share the tool_use assistant message (that would split the pair).
+ for _, b := range messages[toolUseMsgIdx].Content {
+ assert.Nil(t, b.Text, "tool_use message must not contain an inlined reminder text block")
+ }
+
+ // The pair invariant: the next message is the user turn and it contains the matching
+ // tool_result (block order within that turn is not asserted — see the known limitation above).
+ require.Less(t, toolUseMsgIdx+1, len(messages), "tool_use must be followed by a message")
+ next := messages[toolUseMsgIdx+1]
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, next.Role)
+ var hasResult bool
+ for _, b := range next.Content {
+ if b.ToolResult != nil && b.ToolResult.ToolUseID == "tooluse_x" {
+ hasResult = true
+ }
+ }
+ assert.True(t, hasResult, "user message after tool_use must contain the matching tool_result")
+}
+
+// TestSystemReminderDoesNotCarryCachePoint pins the deliberate omission flagged in review: an
+// inlined mid-conversation reminder must NOT emit a CachePoint, even if its block carries
+// CacheControl. A breakpoint at the moving conversation tail would shift every turn and defeat
+// the prefix caching this whole change exists to preserve.
+func TestSystemReminderDoesNotCarryCachePoint(t *testing.T) {
+ reminder := systemReminderTextMsg("Reminder that happens to carry a breakpoint.")
+ reminder.Content.ContentBlocks[0].CacheControl = &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral}
+
+ input := []schemas.ResponsesMessage{
+ systemReminderTextMsg("You are Claude Code."),
+ userReminderTextMsg("hello"),
+ reminder,
+ }
+
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ for _, m := range messages {
+ for _, b := range m.Content {
+ assert.Nil(t, b.CachePoint, "inlined reminder must not introduce a CachePoint block")
+ }
+ }
+}
+
+// TestToolCacheControlBecomesCachePointWithTTL is the positive counterpart to
+// TestSystemReminderDoesNotCarryCachePoint: a cache_control breakpoint Claude Code places on a
+// tool call / tool result must survive into Bedrock as a CachePoint block adjacent to the
+// ToolUse/ToolResult, AND must carry the requested TTL ("1h"). This pins the bug review caught
+// where the breakpoints were emitted with the default TTL dropped.
+func TestToolCacheControlBecomesCachePointWithTTL(t *testing.T) {
+ ttl := "1h"
+ toolMsgs := func() []schemas.ResponsesMessage {
+ return []schemas.ResponsesMessage{
+ userReminderTextMsg("call the tool"),
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
+ CacheControl: &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral, TTL: &ttl},
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{
+ CallID: schemas.Ptr("tooluse_ttl"),
+ Name: schemas.Ptr("get_weather"),
+ Arguments: schemas.Ptr(`{"location":"NYC"}`),
+ },
+ },
+ {
+ Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput),
+ CacheControl: &schemas.CacheControl{Type: schemas.CacheControlTypeEphemeral, TTL: &ttl},
+ ResponsesToolMessage: &schemas.ResponsesToolMessage{
+ CallID: schemas.Ptr("tooluse_ttl"),
+ Output: &schemas.ResponsesToolMessageOutputStruct{ResponsesToolCallOutputStr: schemas.Ptr("sunny")},
+ },
+ },
+ }
+ }
+
+ assertTTLPreserved := func(t *testing.T, input []schemas.ResponsesMessage) {
+ messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ var afterToolUse, afterToolResult bool
+ for _, m := range messages {
+ for i, b := range m.Content {
+ if b.ToolUse != nil && b.ToolUse.ToolUseID == "tooluse_ttl" {
+ require.Greater(t, len(m.Content), i+1, "a CachePoint block must follow the ToolUse")
+ cp := m.Content[i+1].CachePoint
+ require.NotNil(t, cp, "ToolUse with cache_control must be followed by a CachePoint")
+ require.NotNil(t, cp.TTL, "CachePoint must carry the requested TTL, not drop it")
+ assert.Equal(t, "1h", *cp.TTL)
+ afterToolUse = true
+ }
+ if b.ToolResult != nil && b.ToolResult.ToolUseID == "tooluse_ttl" {
+ require.Greater(t, len(m.Content), i+1, "a CachePoint block must follow the ToolResult")
+ cp := m.Content[i+1].CachePoint
+ require.NotNil(t, cp, "ToolResult with cache_control must be followed by a CachePoint")
+ require.NotNil(t, cp.TTL, "CachePoint must carry the requested TTL, not drop it")
+ assert.Equal(t, "1h", *cp.TTL)
+ afterToolResult = true
+ }
+ }
+ }
+ assert.True(t, afterToolUse, "expected a TTL-carrying CachePoint after the tool_use")
+ assert.True(t, afterToolResult, "expected a TTL-carrying CachePoint after the tool_result")
+ }
+
+ // End-of-sequence flush: the tool call/result are the last messages (isLastResultInSequence).
+ t.Run("end of sequence", func(t *testing.T) {
+ assertTTLPreserved(t, toolMsgs())
+ })
+ // Flush-before-message: a following user message triggers the flush while tool results are
+ // pending (the path this PR added inside case ResponsesMessageTypeMessage).
+ t.Run("followed by a message", func(t *testing.T) {
+ assertTTLPreserved(t, append(toolMsgs(), userReminderTextMsg("and then continue")))
+ })
+}
+
+// TestLoneSystemMessageReturnsUserMessage covers the single-element early return: a lone
+// system/developer message is converted to a user message and returned, with no system block,
+// regardless of the inlineSystemReminders gate.
+func TestLoneSystemMessageReturnsUserMessage(t *testing.T) {
+ // Both system and developer roles take the single-message early return.
+ roles := map[string]schemas.ResponsesMessage{
+ "system": systemReminderTextMsg("You are Claude Code."),
+ "developer": developerReminderTextMsg("You are Claude Code."),
+ }
+ for role, msg := range roles {
+ for _, inline := range []bool{true, false} {
+ input := []schemas.ResponsesMessage{msg}
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, inline)
+ require.NoError(t, err)
+ assert.Empty(t, systemMessages, "lone %s message must not populate the system block (inline=%v)", role, inline)
+ require.Len(t, messages, 1, "lone %s message must yield exactly one message (inline=%v)", role, inline)
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, messages[0].Role)
+ }
+ }
+}
+
+// TestNoLeadingSystemBlockReminderInlined covers the seenNonSystemMessage gate when the
+// conversation does NOT start with a system message: the first message is a user turn, so a later
+// role=system reminder is mid-conversation and must be inlined (nothing hoisted into system).
+func TestNoLeadingSystemBlockReminderInlined(t *testing.T) {
+ input := []schemas.ResponsesMessage{
+ userReminderTextMsg("hello"),
+ systemReminderTextMsg("Reminder with no leading system block."),
+ userReminderTextMsg("continue"),
+ }
+
+ messages, systemMessages, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input, true)
+ require.NoError(t, err)
+
+ assert.Empty(t, systemMessages, "no leading system block means nothing should be hoisted")
+ var inlined bool
+ for _, m := range messages {
+ for _, b := range m.Content {
+ if b.Text != nil && strings.Contains(*b.Text, "") &&
+ strings.Contains(*b.Text, "Reminder with no leading system block.") {
+ assert.Equal(t, bedrock.BedrockMessageRoleUser, m.Role, "inlined reminder must be a user message")
+ inlined = true
+ }
+ }
+ }
+ assert.True(t, inlined, "the mid-conversation reminder must be inlined as a wrapped user message")
+}
diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go
index 56cff5119f..94d7c71bf0 100644
--- a/core/providers/bedrock/responses.go
+++ b/core/providers/bedrock/responses.go
@@ -2160,7 +2160,9 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.
input = input[:trimmed]
}
- messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(ctx, input)
+ // Inline mid-conversation system reminders for Anthropic models (keeps Bedrock's
+ // prefix-based prompt cache stable); hoist-everything for other families.
+ messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(ctx, input, schemas.IsAnthropicModelFamily(ctx, bifrostReq.Model))
if err != nil {
return nil, fmt.Errorf("failed to convert Responses messages: %w", err)
}
@@ -2660,7 +2662,8 @@ func ToBedrockConverseResponse(bifrostResp *schemas.BifrostResponsesResponse) (*
// Convert Bifrost messages back to Bedrock messages using the new conversion method.
// Response-side conversion does not perform outbound fetches in practice (model output
// blocks already carry inline data), so context.Background() is acceptable here.
- bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(context.Background(), bifrostResp.Output)
+ // Response output never contains mid-conversation system reminders, so disable inlining.
+ bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(context.Background(), bifrostResp.Output, false)
if err != nil {
return nil, fmt.Errorf("failed to convert bifrost output messages: %w", err)
}
@@ -3129,8 +3132,12 @@ func (m *ToolCallStateManager) HasPendingResults() bool {
// ConvertBifrostMessagesToBedrockMessages converts an array of Bifrost ResponsesMessage to Bedrock message format
// This is the main conversion method from Bifrost to Bedrock - handles all message types and returns messages + system messages
// Uses a state machine to properly track and manage tool call lifecycles.
-// The ctx is propagated to URL fetches inside content blocks.
-func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessages []schemas.ResponsesMessage) ([]BedrockMessage, []BedrockSystemMessage, error) {
+// The ctx is propagated to URL fetches inside content blocks. inlineSystemReminders selects the
+// mid-conversation system-message handling: when true, only the leading run of system/developer
+// messages is hoisted into the top-level `system` block and later (mid-conversation) ones are
+// inlined in place; when false, every system/developer message is hoisted (historical behavior).
+// Callers compute it from the provider+model — see the call site in ToBedrockResponsesRequest.
+func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessages []schemas.ResponsesMessage, inlineSystemReminders bool) ([]BedrockMessage, []BedrockSystemMessage, error) {
// If only a single system message is present, convert it user message (since openai allows it)
if len(bifrostMessages) == 1 && bifrostMessages[0].Role != nil && (*bifrostMessages[0].Role == schemas.ResponsesInputMessageRoleSystem || *bifrostMessages[0].Role == schemas.ResponsesInputMessageRoleDeveloper) {
msg := bifrostMessages[0]
@@ -3142,6 +3149,16 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
}
}
+ // Bedrock's prompt cache is prefix-based, so growing the top-level `system` block invalidates
+ // the cached conversation behind it. A mid-conversation role=system message (e.g. the reminders
+ // Claude Code injects) hoisted into `system` collapses the cache to the tools/system floor every
+ // time one appears. When inlineSystemReminders is set we instead keep only the leading run of
+ // system/developer messages in `system` and inline later ones in place. This is the Bedrock
+ // counterpart of the native Anthropic provider's mid-conversation system support
+ // (SupportsMidConversationSystem) — Bedrock has no message-level system role, so the inlined
+ // message is rendered as a user turn (see convertBifrostSystemReminderToBedrockUserMessage).
+ // When false, every system/developer message is hoisted (historical behavior).
+
var bedrockMessages []BedrockMessage
var systemMessages []BedrockSystemMessage
var pendingReasoningContentBlocks []BedrockContentBlock
@@ -3149,6 +3166,9 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
// blocks that must be prepended to the next assistant text message (same-turn server-managed tools).
var pendingServerToolBlocks []BedrockContentBlock
+ // Set once the leading system prompt ends (first non-system message); gates inlineSystemReminders.
+ seenNonSystemMessage := false
+
// Initialize the state manager for tracking tool calls and results
stateManager := NewToolCallStateManager()
@@ -3242,6 +3262,13 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
msgType = *msg.Type
}
+ // First non-system message closes the leading system-prompt run (see seenNonSystemMessage).
+ isSystemMessage := msgType == schemas.ResponsesMessageTypeMessage && msg.Role != nil &&
+ (*msg.Role == schemas.ResponsesInputMessageRoleSystem || *msg.Role == schemas.ResponsesInputMessageRoleDeveloper)
+ if !isSystemMessage {
+ seenNonSystemMessage = true
+ }
+
// If we're processing a non-reasoning message and have pending reasoning blocks,
// flush them into the previous assistant message (if it exists)
if msgType != schemas.ResponsesMessageTypeReasoning && len(pendingReasoningContentBlocks) > 0 {
@@ -3445,6 +3472,13 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
}
toolUseBlock.ToolUse.Input = input
toolUseBlocks = append(toolUseBlocks, *toolUseBlock)
+ // Preserve the cache breakpoint Claude Code placed on this tool call, else the
+ // next turn can't match the prefix and collapses to the tools/system floor.
+ if toolCall.CacheControl != nil {
+ toolUseBlocks = append(toolUseBlocks, BedrockContentBlock{
+ CachePoint: newBedrockCachePoint(toolCall.CacheControl.TTL),
+ })
+ }
}
}
@@ -3471,6 +3505,12 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
Status: schemas.Ptr(result.Status),
},
})
+ // Preserve the cache breakpoint Claude Code placed on this tool result.
+ if result.CacheControl != nil {
+ resultBlocks = append(resultBlocks, BedrockContentBlock{
+ CachePoint: newBedrockCachePoint(result.CacheControl.TTL),
+ })
+ }
}
if len(resultBlocks) > 0 {
@@ -3483,10 +3523,17 @@ func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessage
}
// Convert regular message
- if role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper {
- // Convert to system message
+ if (role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper) &&
+ (!inlineSystemReminders || !seenNonSystemMessage) {
+ // Leading system prompt (or any system message for non-Anthropic models): hoist into `system`.
systemMsgs := convertBifrostMessageToBedrockSystemMessages(&msg)
systemMessages = append(systemMessages, systemMsgs...)
+ } else if role == schemas.ResponsesInputMessageRoleSystem || role == schemas.ResponsesInputMessageRoleDeveloper {
+ // Mid-conversation reminder: inline in place instead of hoisting (see inlineSystemReminders).
+ bedrockMsg := convertBifrostSystemReminderToBedrockUserMessage(&msg)
+ if bedrockMsg != nil {
+ bedrockMessages = append(bedrockMessages, *bedrockMsg)
+ }
} else {
// Convert user/assistant text message
bedrockMsg := convertBifrostMessageToBedrockMessage(ctx, &msg)
@@ -3701,6 +3748,44 @@ func convertBifrostMessageToBedrockSystemMessages(msg *schemas.ResponsesMessage)
return systemMessages
}
+// convertBifrostSystemReminderToBedrockUserMessage renders a mid-conversation role=system reminder
+// as a user message (Bedrock has no message-level system role), wrapping each text block in the
+// same \n...\n\n envelope Claude Code uses for pre-wrapped ones.
+// Returns nil for content that yields no text, so the caller skips the append.
+func convertBifrostSystemReminderToBedrockUserMessage(msg *schemas.ResponsesMessage) *BedrockMessage {
+ if msg.Content == nil {
+ return nil
+ }
+
+ var contentBlocks []BedrockContentBlock
+ wrap := func(text string) {
+ wrapped := "\n" + text + "\n\n"
+ contentBlocks = append(contentBlocks, BedrockContentBlock{Text: &wrapped})
+ }
+
+ // Text-only by design: reminders never carry images, and we deliberately attach no cache point
+ // here — a breakpoint on this moving-tail message would shift every turn and defeat the prefix
+ // caching this inlining exists for.
+ if msg.Content.ContentStr != nil {
+ wrap(*msg.Content.ContentStr)
+ } else if msg.Content.ContentBlocks != nil {
+ for _, block := range msg.Content.ContentBlocks {
+ if block.Text != nil {
+ wrap(*block.Text)
+ }
+ }
+ }
+
+ if len(contentBlocks) == 0 {
+ return nil
+ }
+
+ return &BedrockMessage{
+ Role: BedrockMessageRoleUser,
+ Content: contentBlocks,
+ }
+}
+
// convertBifrostMessageToBedrockMessage converts a regular Bifrost message to Bedrock message.
// The ctx is propagated to URL fetches inside content blocks.
func convertBifrostMessageToBedrockMessage(ctx context.Context, msg *schemas.ResponsesMessage) *BedrockMessage {