Skip to content

Commit 0406663

Browse files
committed
fix: compute correct Content-Length for StringIO with multi-byte chars
StringIO.tell() returns the character position, not the byte offset, so super_len() returned the wrong value for StringIO objects containing multi-byte UTF-8 characters (e.g. emoji). This caused an incorrect Content-Length header that violates RFC 9110 section 8.6. Read the remaining text and encode it to UTF-8 to measure the true byte length, consistent with how plain str bodies are already handled. Closes #6917
1 parent 0c2bbe7 commit 0406663

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

src/requests/utils.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,30 @@ def super_len(o):
186186
if hasattr(o, "seek") and total_length is None:
187187
# StringIO and BytesIO have seek but no usable fileno
188188
try:
189-
# seek to end of file
190-
o.seek(0, 2)
191-
total_length = o.tell()
192-
193-
# seek back to current position to support
194-
# partially read file-like objects
195-
o.seek(current_position or 0)
189+
if isinstance(o, io.StringIO):
190+
# StringIO.tell() returns the character
191+
# position, not the byte offset. Read the
192+
# remaining text and encode it to measure the
193+
# true byte length for Content-Length.
194+
start = current_position or 0
195+
o.seek(start)
196+
total_length = len(o.read().encode("utf-8"))
197+
198+
# Reset current_position so the returned value
199+
# is just total_length (the remaining bytes).
200+
current_position = 0
201+
202+
# seek back to original position to support
203+
# partially read file-like objects
204+
o.seek(start)
205+
else:
206+
# seek to end of file
207+
o.seek(0, 2)
208+
total_length = o.tell()
209+
210+
# seek back to current position to support
211+
# partially read file-like objects
212+
o.seek(current_position or 0)
196213
except OSError:
197214
total_length = 0
198215

tests/test_utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,30 @@ def test_super_len_with_tell(self):
142142
foo.read(2)
143143
assert super_len(foo) == 3
144144

145+
def test_super_len_stringio_multibyte(self):
146+
"""Ensure StringIO with multi-byte characters returns the UTF-8
147+
byte length rather than the character count. See #6917."""
148+
# Single emoji: 1 character, 4 bytes in UTF-8
149+
foo = StringIO.StringIO("\U0001F4A9")
150+
assert super_len(foo) == 4
151+
152+
# Mixed ASCII and multi-byte
153+
foo = StringIO.StringIO("hello \U0001F4A9 world")
154+
assert super_len(foo) == len("hello \U0001F4A9 world".encode("utf-8"))
155+
156+
# Partially read StringIO with multi-byte characters
157+
foo = StringIO.StringIO("hello \U0001F4A9 world")
158+
foo.read(6) # read "hello "
159+
remaining_bytes = len("\U0001F4A9 world".encode("utf-8"))
160+
assert super_len(foo) == remaining_bytes
161+
162+
# Position should be preserved after super_len call
163+
foo = StringIO.StringIO("hello \U0001F4A9 world")
164+
foo.read(3)
165+
pos_before = foo.tell()
166+
super_len(foo)
167+
assert foo.tell() == pos_before
168+
145169
def test_super_len_with_fileno(self):
146170
with open(__file__, "rb") as f:
147171
length = super_len(f)

0 commit comments

Comments
 (0)