Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/ruby_llm/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module RubyLLM
class Message
ROLES = %i[system user assistant tool].freeze

attr_reader :role, :model_id, :tool_calls, :tool_call_id, :raw, :thinking, :tokens
attr_reader :role, :model_id, :tool_calls, :tool_call_id, :raw, :thinking, :tokens, :finish_reason
attr_writer :content

def initialize(options = {})
Expand All @@ -24,6 +24,7 @@ def initialize(options = {})
)
@raw = options[:raw]
@thinking = options[:thinking]
@finish_reason = options[:finish_reason]

ensure_valid_role
end
Expand Down Expand Up @@ -79,6 +80,7 @@ def to_h
model_id: model_id,
tool_calls: tool_calls,
tool_call_id: tool_call_id,
finish_reason: finish_reason,
thinking: thinking&.text,
thinking_signature: thinking&.signature
}.merge(tokens ? tokens.to_h : {}).compact
Expand Down
11 changes: 11 additions & 0 deletions lib/ruby_llm/providers/anthropic/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,22 @@ def build_message(data, content, thinking, thinking_signature, tool_use_blocks,
cached_tokens: cached_tokens,
cache_creation_tokens: cache_creation_tokens,
thinking_tokens: thinking_tokens,
finish_reason: normalize_finish_reason(data['stop_reason']),
model_id: data['model'],
raw: response
)
end

FINISH_REASON_MAP = {
'end_turn' => 'stop',
'stop_sequence' => 'stop',
'max_tokens' => 'length'
}.freeze

def normalize_finish_reason(reason)
FINISH_REASON_MAP.fetch(reason, reason)
end

def format_message(msg, thinking: nil)
thinking_enabled = thinking&.enabled?

Expand Down
14 changes: 13 additions & 1 deletion lib/ruby_llm/providers/anthropic/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,22 @@ def build_chunk(data)
output_tokens: extract_output_tokens(data),
cached_tokens: extract_cached_tokens(data),
cache_creation_tokens: extract_cache_creation_tokens(data),
tool_calls: extract_tool_calls(data)
tool_calls: extract_tool_calls(data),
finish_reason: normalize_finish_reason(data.dig('delta', 'stop_reason'))
)
end

def normalize_finish_reason(reason)
case reason
when 'end_turn', 'stop_sequence'
'stop'
when 'max_tokens'
'length'
else
reason
end
end

def extract_content_delta(data, delta_type)
return data.dig('delta', 'text') if delta_type == 'text_delta'

Expand Down
12 changes: 12 additions & 0 deletions lib/ruby_llm/providers/gemini/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,23 @@ def parse_completion_response(response)
output_tokens: calculate_output_tokens(data),
cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
finish_reason: normalize_finish_reason(data.dig('candidates', 0, 'finishReason')),
model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
raw: response
)
end

FINISH_REASON_MAP = {
'STOP' => 'stop',
'MAX_TOKENS' => 'length',
'SAFETY' => 'content_filter',
'RECITATION' => 'content_filter'
}.freeze

def normalize_finish_reason(reason)
FINISH_REASON_MAP.fetch(reason, reason)
end

def convert_schema_to_gemini(schema)
return nil unless schema

Expand Down
16 changes: 15 additions & 1 deletion lib/ruby_llm/providers/gemini/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def build_chunk(data)
output_tokens: extract_output_tokens(data),
cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
tool_calls: extract_tool_calls(data)
tool_calls: extract_tool_calls(data),
finish_reason: normalize_finish_reason(data.dig('candidates', 0, 'finishReason'))
)
end

Expand All @@ -34,6 +35,19 @@ def extract_model_id(data)
data['modelVersion']
end

def normalize_finish_reason(reason)
case reason
when 'STOP'
'stop'
when 'MAX_TOKENS'
'length'
when 'SAFETY', 'RECITATION'
'content_filter'
else
reason
end
end

def extract_text_content(parts)
text_parts = parts.reject { |p| p['thought'] }
text = text_parts.filter_map { |p| p['text'] }.join
Expand Down
29 changes: 29 additions & 0 deletions lib/ruby_llm/providers/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,24 @@ def maybe_normalize_temperature(temperature, model)
OpenAI::Temperature.normalize(temperature, model.id)
end

# rubocop:disable Metrics/ParameterLists
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
tool_prefs: nil, &)
super(
messages,
tools: tools,
tool_prefs: tool_prefs,
temperature: temperature,
model: model,
params: normalize_params(params),
headers: headers,
schema: schema,
thinking: thinking,
&
)
end
# rubocop:enable Metrics/ParameterLists

class << self
def capabilities
OpenAI::Capabilities
Expand All @@ -49,6 +67,17 @@ def configuration_requirements
%i[openai_api_key]
end
end

private

def normalize_params(params)
normalized = RubyLLM::Utils.deep_symbolize_keys(params || {})
max_tokens = normalized[:max_tokens]

return normalized if max_tokens.nil? || normalized.key?(:max_completion_tokens)

normalized.merge(max_completion_tokens: max_tokens)
end
end
end
end
10 changes: 10 additions & 0 deletions lib/ruby_llm/providers/openai/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,21 @@ def parse_completion_response(response)
cached_tokens: cached_tokens,
cache_creation_tokens: 0,
thinking_tokens: thinking_tokens,
finish_reason: normalize_finish_reason(data.dig('choices', 0, 'finish_reason')),
model_id: data['model'],
raw: response
)
end

def normalize_finish_reason(reason)
case reason
when 'tool_calls', 'function_call'
'tool_use'
else
reason
end
end

def format_messages(messages)
messages.map do |msg|
{
Expand Down
3 changes: 2 additions & 1 deletion lib/ruby_llm/providers/openai/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def build_chunk(data)
output_tokens: usage['completion_tokens'],
cached_tokens: cached_tokens,
cache_creation_tokens: 0,
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
finish_reason: OpenAI::Chat.normalize_finish_reason(data.dig('choices', 0, 'finish_reason'))
)
end

Expand Down
3 changes: 3 additions & 0 deletions lib/ruby_llm/stream_accumulator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def initialize
@content = +''
@thinking_text = +''
@thinking_signature = nil
@finish_reason = nil
@tool_calls = {}
@input_tokens = nil
@output_tokens = nil
Expand All @@ -23,6 +24,7 @@ def initialize
def add(chunk)
RubyLLM.logger.debug { chunk.inspect } if RubyLLM.config.log_stream_debug
@model_id ||= chunk.model_id
@finish_reason = chunk.finish_reason if chunk.finish_reason

handle_chunk_content(chunk)
append_thinking_from_chunk(chunk)
Expand All @@ -47,6 +49,7 @@ def to_message(response)
),
model_id: model_id,
tool_calls: tool_calls_from_stream,
finish_reason: @finish_reason,
raw: response
)
end
Expand Down
12 changes: 6 additions & 6 deletions spec/ruby_llm/generators/chat_ui_generator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@
expect(message_content).to include('acts_as_message')

# Check broadcasting setup
expect(message_content).to include(%q(broadcasts_to ->(message) { "chat_#{message.chat_id}" }))
expect(message_content).to include("broadcasts_to ->(message) { \"chat_\#{message.chat_id}\" }")
expect(message_content).to include('inserts_by: :append')

# Check broadcast_append_chunk method
expect(message_content).to include('def broadcast_append_chunk(content)')
expect(message_content).to include(%q(broadcast_append_to "chat_#{chat_id}"))
expect(message_content).to include(%q(target: "message_#{id}_content"))
expect(message_content).to include("broadcast_append_to \"chat_\#{chat_id}\"")
expect(message_content).to include("target: \"message_\#{id}_content\"")
expect(message_content).to include('content: ERB::Util.html_escape(content.to_s)')
end
end
Expand Down Expand Up @@ -318,13 +318,13 @@
expect(message_content).to include("model: :llm_model, model_class: 'Llm::Model'")

# Check broadcasting setup
expect(message_content).to include(%q(broadcasts_to ->(llm_message) { "llm_chat_#{llm_message.llm_chat_id}" }))
expect(message_content).to include("broadcasts_to ->(llm_message) { \"llm_chat_\#{llm_message.llm_chat_id}\" }")
expect(message_content).to include('inserts_by: :append')

# Check broadcast_append_chunk method
expect(message_content).to include('def broadcast_append_chunk(content)')
expect(message_content).to include(%q(broadcast_append_to "llm_chat_#{llm_chat_id}"))
expect(message_content).to include(%q(target: "llm_message_#{id}_content"))
expect(message_content).to include("broadcast_append_to \"llm_chat_\#{llm_chat_id}\"")
expect(message_content).to include("target: \"llm_message_\#{id}_content\"")
expect(message_content).to include('content: ERB::Util.html_escape(content.to_s)')
end
end
Expand Down
15 changes: 15 additions & 0 deletions spec/ruby_llm/providers/anthropic/chat_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,20 @@
expect(message.cached_tokens).to eq(21)
expect(message.cache_creation_tokens).to eq(7)
end

it 'normalizes finish_reason on the message' do
response_body = {
'model' => 'claude-sonnet-4-5-20250929',
'stop_reason' => 'max_tokens',
'content' => [{ 'type' => 'text', 'text' => 'Hi!' }],
'usage' => {}
}

response = instance_double(Faraday::Response, body: response_body)

message = described_class.parse_completion_response(response)

expect(message.finish_reason).to eq('length')
end
end
end
39 changes: 39 additions & 0 deletions spec/ruby_llm/providers/anthropic/streaming_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe RubyLLM::Providers::Anthropic::Streaming do
include_context 'with configured RubyLLM'

let(:test_obj) do
Object.new.tap do |obj|
obj.extend(RubyLLM::Providers::Anthropic::Tools)
obj.extend(described_class)
end
end

it 'normalizes finish_reason on streaming chunks' do
data = {
'type' => 'message_delta',
'delta' => {
'type' => 'text_delta',
'text' => 'hello',
'stop_reason' => 'max_tokens'
}
}

allow(test_obj).to receive_messages(
extract_model_id: 'claude-sonnet-4-5',
extract_input_tokens: nil,
extract_output_tokens: nil,
extract_cached_tokens: nil,
extract_cache_creation_tokens: nil,
extract_tool_calls: nil
)

chunk = test_obj.send(:build_chunk, data)

expect(chunk.content).to eq('hello')
expect(chunk.finish_reason).to eq('length')
end
end
23 changes: 23 additions & 0 deletions spec/ruby_llm/providers/gemini/chat_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,29 @@
end

describe '#parse_completion_response' do
it 'normalizes finish_reason on the message' do
response = Struct.new(:body, :env).new(
{
'candidates' => [
{
'finishReason' => 'SAFETY',
'content' => {
'parts' => [{ 'text' => 'blocked' }]
}
}
],
'usageMetadata' => {},
'modelVersion' => 'gemini-2.5-flash'
},
Struct.new(:url).new(Struct.new(:path).new('/v1/models/gemini-2.5-flash:generateContent'))
)

provider = RubyLLM::Providers::Gemini.new(RubyLLM.config)
message = provider.send(:parse_completion_response, response)

expect(message.finish_reason).to eq('content_filter')
end

it 'keeps thought-only parts out of assistant content' do
response = Struct.new(:body, :env).new(
{
Expand Down
19 changes: 19 additions & 0 deletions spec/ruby_llm/providers/gemini/streaming_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,25 @@
expect(chunk.cached_tokens).to eq(6)
end

it 'normalizes finish_reason on streaming chunks' do
data = {
'candidates' => [
{
'finishReason' => 'MAX_TOKENS',
'content' => {
'parts' => [{ 'text' => 'hello' }]
}
}
],
'usageMetadata' => {},
'modelVersion' => 'gemini-2.5-flash'
}

chunk = test_obj.send(:build_chunk, data)

expect(chunk.finish_reason).to eq('length')
end

it 'correctly sums candidatesTokenCount and thoughtsTokenCount in streaming' do
chat = RubyLLM.chat(model: 'gemini-2.5-flash', provider: :gemini)

Expand Down
Loading
Loading