Skip to content

Commit 239d816

Browse files
Fix OpenAI transcription zero token usage
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 3c6e53c commit 239d816

File tree

2 files changed

+59
-2
lines changed

2 files changed

+59
-2
lines changed

lib/ruby_llm/providers/openai/transcription.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,17 @@ def parse_transcription_response(response, model:)
5353
return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
5454

5555
usage = data['usage'] || {}
56+
input_tokens = usage['input_tokens'].nil? ? usage['prompt_tokens'] : usage['input_tokens']
57+
output_tokens = usage['output_tokens'].nil? ? usage['completion_tokens'] : usage['output_tokens']
5658

5759
RubyLLM::Transcription.new(
5860
text: data['text'],
5961
model: model,
6062
language: data['language'],
6163
duration: data['duration'],
6264
segments: data['segments'],
63-
input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
64-
output_tokens: usage['output_tokens'] || usage['completion_tokens']
65+
input_tokens: input_tokens,
66+
output_tokens: output_tokens
6567
)
6668
end
6769
end
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# frozen_string_literal: true
2+
3+
require 'spec_helper'
4+
5+
RSpec.describe RubyLLM::Providers::OpenAI::Transcription do
6+
describe '.parse_transcription_response' do
7+
let(:response) { instance_double(Faraday::Response, body: body) }
8+
9+
context 'when explicit zero-valued usage fields are present' do
10+
let(:body) do
11+
{
12+
'text' => 'Transcript',
13+
'language' => 'en',
14+
'duration' => 1.23,
15+
'segments' => [],
16+
'usage' => {
17+
'input_tokens' => 0,
18+
'prompt_tokens' => 12,
19+
'output_tokens' => 0,
20+
'completion_tokens' => 8
21+
}
22+
}
23+
end
24+
25+
it 'preserves zero-valued token usage fields' do
26+
transcription = described_class.parse_transcription_response(response, model: 'gpt-4o-transcribe')
27+
28+
expect(transcription.input_tokens).to eq(0)
29+
expect(transcription.output_tokens).to eq(0)
30+
end
31+
end
32+
33+
context 'when only fallback token usage fields are present' do
34+
let(:body) do
35+
{
36+
'text' => 'Transcript',
37+
'language' => 'en',
38+
'duration' => 1.23,
39+
'segments' => [],
40+
'usage' => {
41+
'prompt_tokens' => 12,
42+
'completion_tokens' => 8
43+
}
44+
}
45+
end
46+
47+
it 'falls back to prompt/completion token usage fields' do
48+
transcription = described_class.parse_transcription_response(response, model: 'gpt-4o-transcribe')
49+
50+
expect(transcription.input_tokens).to eq(12)
51+
expect(transcription.output_tokens).to eq(8)
52+
end
53+
end
54+
end
55+
end

0 commit comments

Comments
 (0)