diff --git a/src/jsc/array_buffer.rs b/src/jsc/array_buffer.rs index c6ecad51243..ec3a0467aaf 100644 --- a/src/jsc/array_buffer.rs +++ b/src/jsc/array_buffer.rs @@ -417,8 +417,8 @@ impl ArrayBuffer { pub fn from_bytes(bytes: &mut [u8], typed_array_type: JSType) -> ArrayBuffer { ArrayBuffer { - len: u32::try_from(bytes.len()).expect("int cast") as usize, - byte_len: u32::try_from(bytes.len()).expect("int cast") as usize, + len: bytes.len(), + byte_len: bytes.len(), typed_array_type, ptr: bytes.as_mut_ptr(), ..Default::default() @@ -439,8 +439,8 @@ impl ArrayBuffer { // this is an FFI hand-off, not a leak. let ptr = bun_core::heap::into_raw(bytes).cast::(); ArrayBuffer { - len: u32::try_from(len).expect("int cast") as usize, - byte_len: u32::try_from(len).expect("int cast") as usize, + len, + byte_len: len, typed_array_type, ptr, ..Default::default() diff --git a/src/jsc/array_buffer.zig b/src/jsc/array_buffer.zig index ee5b8351388..57698726c83 100644 --- a/src/jsc/array_buffer.zig +++ b/src/jsc/array_buffer.zig @@ -209,7 +209,7 @@ pub const ArrayBuffer = extern struct { } pub fn fromBytes(bytes: []u8, typed_array_type: jsc.JSValue.JSType) ArrayBuffer { - return ArrayBuffer{ .len = @as(u32, @intCast(bytes.len)), .byte_len = @as(u32, @intCast(bytes.len)), .typed_array_type = typed_array_type, .ptr = bytes.ptr }; + return ArrayBuffer{ .len = bytes.len, .byte_len = bytes.len, .typed_array_type = typed_array_type, .ptr = bytes.ptr }; } pub fn toJSUnchecked(this: ArrayBuffer, ctx: *jsc.JSGlobalObject) bun.JSError!jsc.JSValue { diff --git a/src/runtime/webcore/FormData.rs b/src/runtime/webcore/FormData.rs index 7bbe91c67fc..f369cdab328 100644 --- a/src/runtime/webcore/FormData.rs +++ b/src/runtime/webcore/FormData.rs @@ -7,7 +7,7 @@ use bun_jsc::{ AnyPromise, CallFrame, DOMFormData, JSGlobalObject, JSValue, JsError, JsResult, JsTerminated, ZigStringJsc as _, }; -use bun_semver::{self, SlicedString}; +use bun_semver; use core::ffi::c_void; use crate::webcore::Blob; @@ -79,14 +79,16 @@ impl AsyncFormDataExt for AsyncFormData { } } -/// Raw slice into the input buffer. Not using `bun.Semver.String` because -/// file bodies are binary data that can contain null bytes, which -/// Semver.String's inline storage treats as terminators. +/// Raw slices into the input buffer. Not using `bun_semver::String` +/// because it stores the offset/length as `u32` (with the top bit of +/// length stolen as a tag), which silently corrupts any part located +/// past 4 GiB in the body or whose value exceeds 2 GiB. It also treats +/// inline null bytes as terminators, which is wrong for file bodies. pub struct Field<'a> { /// Borrows into the caller-owned input buffer (binary body slice). pub value: &'a [u8], - pub filename: bun_semver::String, - pub content_type: bun_semver::String, + pub filename: &'a [u8], + pub content_type: &'a [u8], pub is_file: bool, pub zero_count: u8, } @@ -95,8 +97,8 @@ impl Default for Field<'_> { fn default() -> Self { Field { value: b"", - filename: bun_semver::String::default(), - content_type: bun_semver::String::default(), + filename: b"", + content_type: b"", is_file: false, zero_count: 0, } @@ -216,12 +218,12 @@ pub fn to_js_from_multipart_data( } impl<'a> Wrapper<'a> { - fn on_entry(wrap: &mut Self, name: bun_semver::String, field: &Field<'_>, buf: &[u8]) { + fn on_entry(wrap: &mut Self, name: &[u8], field: &Field<'_>) { let value_str: &[u8] = field.value; - let key = ZigString::init_utf8(name.slice(buf)); + let key = ZigString::init_utf8(name); if field.is_file { - let filename_str = field.filename.slice(buf); + let filename_str: &[u8] = field.filename; let mut blob = Blob::create(value_str, wrap.global, false); let filename = ZigString::init_utf8(filename_str); @@ -230,7 +232,7 @@ pub fn to_js_from_multipart_data( // ownership cases instead of unifying through a single // `&[u8]` (avoids borrowing a temporary). if !field.content_type.is_empty() { - let ct = field.content_type.slice(buf); + let ct = field.content_type; blob.content_type_allocated.set(true); blob.content_type .set(bun_core::heap::into_raw(Box::<[u8]>::from(ct)).cast_const()); @@ -311,10 +313,9 @@ pub fn for_each_multipart_entry( input: &[u8], boundary: &[u8], ctx: &mut C, - mut iterator: impl FnMut(&mut C, bun_semver::String, &Field<'_>, &[u8]), + mut iterator: impl FnMut(&mut C, &[u8], &Field<'_>), ) -> Result<(), bun_core::Error> { let mut slice = input; - let subslicer = SlicedString::init(input, input); let mut buf = [0u8; 76]; { @@ -354,11 +355,11 @@ pub fn for_each_multipart_entry( remain = &remain[header_end + 4..]; let mut field = Field::default(); - let mut name = bun_semver::String::default(); - let mut filename: Option = None; + let mut name: &[u8] = b""; + let mut filename: Option<&[u8]> = None; let mut header_chunk = header; let mut is_file = false; - while !header_chunk.is_empty() && (filename.is_none() || name.len() == 0) { + while !header_chunk.is_empty() && (filename.is_none() || name.is_empty()) { let line_end = strings::index_of(header_chunk, b"\r\n") .ok_or_else(|| err!("is missing header line end"))?; let line = &header_chunk[..line_end]; @@ -408,9 +409,9 @@ pub fn for_each_multipart_entry( } if strings::eql_case_insensitive_ascii(eql_key, b"name", true) { - name = subslicer.sub(field_value).value(); + name = field_value; } else if strings::eql_case_insensitive_ascii(eql_key, b"filename", true) { - filename = Some(subslicer.sub(field_value).value()); + filename = Some(field_value); is_file = true; } @@ -439,7 +440,7 @@ pub fn for_each_multipart_entry( .iter() .all(|&b| b == b'\t' || (0x20..=0x7E).contains(&b)) { - field.content_type = subslicer.sub(trimmed).value(); + field.content_type = trimmed; } } } @@ -453,10 +454,10 @@ pub fn for_each_multipart_entry( body = &body[..body.len() - 2]; } field.value = body; - field.filename = filename.unwrap_or_default(); + field.filename = filename.unwrap_or(b""); field.is_file = is_file; - iterator(ctx, name, &field, input); + iterator(ctx, name, &field); } Ok(()) diff --git a/src/runtime/webcore/FormData.zig b/src/runtime/webcore/FormData.zig index 84114ace035..0180bae7e23 100644 --- a/src/runtime/webcore/FormData.zig +++ b/src/runtime/webcore/FormData.zig @@ -104,12 +104,15 @@ pub const FormData = struct { } pub const Field = struct { - /// Raw slice into the input buffer. Not using `bun.Semver.String` because - /// file bodies are binary data that can contain null bytes, which - /// Semver.String's inline storage treats as terminators. + /// Raw slices into the input buffer. Not using `bun.Semver.String` + /// because it stores the offset/length as `u32` (with the top bit + /// of length stolen as a tag), which silently corrupts any part + /// located past 4GB in the body or whose value exceeds 2GB. It also + /// treats inline null bytes as terminators, which is wrong for file + /// bodies. value: []const u8 = "", - filename: bun.Semver.String = .{}, - content_type: bun.Semver.String = .{}, + filename: []const u8 = "", + content_type: []const u8 = "", is_file: bool = false, zero_count: u8 = 0, @@ -218,19 +221,19 @@ pub const FormData = struct { globalThis: *jsc.JSGlobalObject, form: *jsc.DOMFormData, - pub fn onEntry(wrap: *@This(), name: bun.Semver.String, field: Field, buf: []const u8) void { + pub fn onEntry(wrap: *@This(), name: []const u8, field: Field) void { const value_str = field.value; - var key = jsc.ZigString.initUTF8(name.slice(buf)); + var key = jsc.ZigString.initUTF8(name); if (field.is_file) { - const filename_str = field.filename.slice(buf); + const filename_str = field.filename; var blob = jsc.WebCore.Blob.create(value_str, bun.default_allocator, wrap.globalThis, false); defer blob.detach(); var filename = jsc.ZigString.initUTF8(filename_str); const content_type: []const u8 = brk: { - if (!field.content_type.isEmpty()) { - break :brk field.content_type.slice(buf); + if (field.content_type.len > 0) { + break :brk field.content_type; } if (filename_str.len > 0) { const extension = std.fs.path.extension(filename_str); @@ -249,7 +252,7 @@ pub const FormData = struct { }; if (content_type.len > 0) { - if (!field.content_type.isEmpty()) { + if (field.content_type.len > 0) { blob.content_type_allocated = true; blob.content_type = bun.default_allocator.dupe(u8, content_type) catch @panic("failed to allocate memory for blob content type"); blob.content_type_was_set = true; @@ -299,13 +302,11 @@ pub const FormData = struct { ctx: Ctx, comptime iterator: fn ( Ctx, - bun.Semver.String, + []const u8, Field, - string, ) void, ) !void { var slice = input; - var subslicer = bun.Semver.SlicedString.init(input, input); var buf: [76]u8 = undefined; { @@ -334,11 +335,11 @@ pub const FormData = struct { remain = remain[header_end + 4 ..]; var field = Field{}; - var name: bun.Semver.String = .{}; - var filename: ?bun.Semver.String = null; + var name: []const u8 = ""; + var filename: ?[]const u8 = null; var header_chunk = header; var is_file = false; - while (header_chunk.len > 0 and (filename == null or name.len() == 0)) { + while (header_chunk.len > 0 and (filename == null or name.len == 0)) { const line_end = strings.indexOf(header_chunk, "\r\n") orelse return error.@"is missing header line end"; const line = header_chunk[0..line_end]; header_chunk = header_chunk[line_end + 2 ..]; @@ -380,13 +381,13 @@ pub const FormData = struct { } if (strings.eqlCaseInsensitiveASCII(eql_key, "name", true)) { - name = subslicer.sub(field_value).value(); + name = field_value; } else if (strings.eqlCaseInsensitiveASCII(eql_key, "filename", true)) { - filename = subslicer.sub(field_value).value(); + filename = field_value; is_file = true; } - if (!name.isEmpty() and filename != null) { + if (name.len > 0 and filename != null) { break; } @@ -396,12 +397,12 @@ pub const FormData = struct { break; } } - } else if (value.len > 0 and field.content_type.isEmpty() and strings.eqlCaseInsensitiveASCII(key, "content-type", true)) { - field.content_type = subslicer.sub(strings.trim(value, "; \t")).value(); + } else if (value.len > 0 and field.content_type.len == 0 and strings.eqlCaseInsensitiveASCII(key, "content-type", true)) { + field.content_type = strings.trim(value, "; \t"); } } - if (name.len() + @as(usize, field.zero_count) == 0) { + if (name.len + @as(usize, field.zero_count) == 0) { continue; } @@ -410,16 +411,14 @@ pub const FormData = struct { body = body[0 .. body.len - 2]; } field.value = body; - field.filename = filename orelse .{}; + field.filename = filename orelse ""; field.is_file = is_file; - iterator(ctx, name, field, input); + iterator(ctx, name, field); } } }; -const string = []const u8; - const std = @import("std"); const bun = @import("bun"); diff --git a/test/js/web/html/FormData.test.ts b/test/js/web/html/FormData.test.ts index 3c5236201a9..a3139b18288 100644 --- a/test/js/web/html/FormData.test.ts +++ b/test/js/web/html/FormData.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, test } from "bun:test"; -import { bunEnv, bunExe } from "harness"; +import { bunEnv, bunExe, isWindows } from "harness"; +import { totalmem } from "os"; import { join } from "path"; describe("FormData", () => { @@ -915,3 +916,91 @@ test("FormData.toJSON merges duplicate numeric field names into an array", async expect(stdout.trim().split("\n")).toEqual(['{"0":["a","b","c"],"tag":["x","y"]}', '{"0":["first","second"]}']); expect(exitCode).toBe(0); }); + +// https://github.com/oven-sh/bun/issues/21490 +// +// The multipart parser stored part metadata (name, filename, content-type) as +// `bun.Semver.String`, which packs offset/length into 32-bit fields. For a +// part whose header sits past 4 GiB in the body, the offset wrapped and the +// parser read garbage. +// +// Needs ~10 GiB of working set in a subprocess, so skip on small machines and +// on Windows (where ArrayBuffer backing commits eagerly). Use the +// cgroup-aware limit when available so containerized CI runners aren't fooled +// by the host's physical RAM. +const effectiveMemory = process.constrainedMemory?.() || totalmem(); +it.skipIf(isWindows || effectiveMemory < 16 * 1024 * 1024 * 1024)( + "multipart parser handles parts at offsets > 4 GiB", + async () => { + const fixture = ` + const boundary = "----bun-issue-21490"; + const GiB = 1024 * 1024 * 1024; + + const head = Buffer.from( + "--" + boundary + "\\r\\n" + + 'Content-Disposition: form-data; name="big_upload"; filename="big.bin"\\r\\n' + + "Content-Type: application/octet-stream\\r\\n\\r\\n", + "utf8", + ); + const mid = Buffer.from( + "\\r\\n--" + boundary + "\\r\\n" + + 'Content-Disposition: form-data; name="description_field"\\r\\n\\r\\n' + + "this part lives past the 4 GiB mark\\r\\n", + "utf8", + ); + const mid2 = Buffer.from( + "--" + boundary + "\\r\\n" + + 'Content-Disposition: form-data; name="second_attachment"; filename="also_past_4gb.txt"\\r\\n\\r\\n' + + "file contents\\r\\n", + "utf8", + ); + const tail = Buffer.from("--" + boundary + "--\\r\\n", "utf8"); + + // 4 GiB + 256 bytes so the trailing parts' headers sit above 2**32. + let chunk = new Uint8Array(GiB); + const fileBody = new Blob([chunk, chunk, chunk, chunk, new Uint8Array(256)]); + chunk = null; + const fileSize = fileBody.size; + + const body = new Blob([head, fileBody, mid, mid2, tail]); + + const request = new Request("http://localhost/", { + method: "POST", + headers: { "content-type": "multipart/form-data; boundary=" + boundary }, + body, + }); + + const form = await request.formData(); + const big = form.get("big_upload"); + const second = form.get("second_attachment"); + const result = { + keys: [...form.keys()], + big: { name: big?.name, size: big?.size, type: big?.type }, + description_field: form.get("description_field"), + second: { name: second?.name, size: second?.size, text: second ? await second.text() : null }, + expectedFileSize: fileSize, + }; + console.log(JSON.stringify(result)); + `; + + await using proc = Bun.spawn({ + cmd: [bunExe(), "-e", fixture], + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stderr).toBe(""); + const result = JSON.parse(stdout.trim()); + expect(result).toEqual({ + keys: ["big_upload", "description_field", "second_attachment"], + big: { name: "big.bin", size: result.expectedFileSize, type: "application/octet-stream" }, + description_field: "this part lives past the 4 GiB mark", + second: { name: "also_past_4gb.txt", size: "file contents".length, text: "file contents" }, + expectedFileSize: 4 * 1024 * 1024 * 1024 + 256, + }); + expect(exitCode).toBe(0); + }, + 120_000, +);