Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/jsc/array_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ impl ArrayBuffer {

pub fn from_bytes(bytes: &mut [u8], typed_array_type: JSType) -> ArrayBuffer {
ArrayBuffer {
len: u32::try_from(bytes.len()).expect("int cast") as usize,
byte_len: u32::try_from(bytes.len()).expect("int cast") as usize,
len: bytes.len(),
byte_len: bytes.len(),
typed_array_type,
ptr: bytes.as_mut_ptr(),
..Default::default()
Expand All @@ -439,8 +439,8 @@ impl ArrayBuffer {
// this is an FFI hand-off, not a leak.
let ptr = bun_core::heap::into_raw(bytes).cast::<u8>();
ArrayBuffer {
len: u32::try_from(len).expect("int cast") as usize,
byte_len: u32::try_from(len).expect("int cast") as usize,
len,
byte_len: len,
typed_array_type,
ptr,
..Default::default()
Expand Down
2 changes: 1 addition & 1 deletion src/jsc/array_buffer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ pub const ArrayBuffer = extern struct {
}

pub fn fromBytes(bytes: []u8, typed_array_type: jsc.JSValue.JSType) ArrayBuffer {
return ArrayBuffer{ .len = @as(u32, @intCast(bytes.len)), .byte_len = @as(u32, @intCast(bytes.len)), .typed_array_type = typed_array_type, .ptr = bytes.ptr };
return ArrayBuffer{ .len = bytes.len, .byte_len = bytes.len, .typed_array_type = typed_array_type, .ptr = bytes.ptr };
}

pub fn toJSUnchecked(this: ArrayBuffer, ctx: *jsc.JSGlobalObject) bun.JSError!jsc.JSValue {
Expand Down
45 changes: 23 additions & 22 deletions src/runtime/webcore/FormData.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use bun_jsc::{
AnyPromise, CallFrame, DOMFormData, JSGlobalObject, JSValue, JsError, JsResult, JsTerminated,
ZigStringJsc as _,
};
use bun_semver::{self, SlicedString};
use bun_semver;
use core::ffi::c_void;

use crate::webcore::Blob;
Expand Down Expand Up @@ -79,14 +79,16 @@ impl AsyncFormDataExt for AsyncFormData {
}
}

/// Raw slice into the input buffer. Not using `bun.Semver.String` because
/// file bodies are binary data that can contain null bytes, which
/// Semver.String's inline storage treats as terminators.
/// Raw slices into the input buffer. Not using `bun_semver::String`
/// because it stores the offset/length as `u32` (with the top bit of
/// length stolen as a tag), which silently corrupts any part located
/// past 4 GiB in the body or whose value exceeds 2 GiB. It also treats
/// inline null bytes as terminators, which is wrong for file bodies.
pub struct Field<'a> {
/// Borrows into the caller-owned input buffer (binary body slice).
pub value: &'a [u8],
pub filename: bun_semver::String,
pub content_type: bun_semver::String,
pub filename: &'a [u8],
pub content_type: &'a [u8],
pub is_file: bool,
pub zero_count: u8,
}
Expand All @@ -95,8 +97,8 @@ impl Default for Field<'_> {
fn default() -> Self {
Field {
value: b"",
filename: bun_semver::String::default(),
content_type: bun_semver::String::default(),
filename: b"",
content_type: b"",
is_file: false,
zero_count: 0,
}
Expand Down Expand Up @@ -216,12 +218,12 @@ pub fn to_js_from_multipart_data(
}

impl<'a> Wrapper<'a> {
fn on_entry(wrap: &mut Self, name: bun_semver::String, field: &Field<'_>, buf: &[u8]) {
fn on_entry(wrap: &mut Self, name: &[u8], field: &Field<'_>) {
let value_str: &[u8] = field.value;
let key = ZigString::init_utf8(name.slice(buf));
let key = ZigString::init_utf8(name);

if field.is_file {
let filename_str = field.filename.slice(buf);
let filename_str: &[u8] = field.filename;

let mut blob = Blob::create(value_str, wrap.global, false);
let filename = ZigString::init_utf8(filename_str);
Expand All @@ -230,7 +232,7 @@ pub fn to_js_from_multipart_data(
// ownership cases instead of unifying through a single
// `&[u8]` (avoids borrowing a temporary).
if !field.content_type.is_empty() {
let ct = field.content_type.slice(buf);
let ct = field.content_type;
blob.content_type_allocated.set(true);
blob.content_type
.set(bun_core::heap::into_raw(Box::<[u8]>::from(ct)).cast_const());
Expand Down Expand Up @@ -311,10 +313,9 @@ pub fn for_each_multipart_entry<C>(
input: &[u8],
boundary: &[u8],
ctx: &mut C,
mut iterator: impl FnMut(&mut C, bun_semver::String, &Field<'_>, &[u8]),
mut iterator: impl FnMut(&mut C, &[u8], &Field<'_>),
) -> Result<(), bun_core::Error> {
let mut slice = input;
let subslicer = SlicedString::init(input, input);

let mut buf = [0u8; 76];
{
Expand Down Expand Up @@ -354,11 +355,11 @@ pub fn for_each_multipart_entry<C>(
remain = &remain[header_end + 4..];

let mut field = Field::default();
let mut name = bun_semver::String::default();
let mut filename: Option<bun_semver::String> = None;
let mut name: &[u8] = b"";
let mut filename: Option<&[u8]> = None;
let mut header_chunk = header;
let mut is_file = false;
while !header_chunk.is_empty() && (filename.is_none() || name.len() == 0) {
while !header_chunk.is_empty() && (filename.is_none() || name.is_empty()) {
let line_end = strings::index_of(header_chunk, b"\r\n")
.ok_or_else(|| err!("is missing header line end"))?;
let line = &header_chunk[..line_end];
Expand Down Expand Up @@ -408,9 +409,9 @@ pub fn for_each_multipart_entry<C>(
}

if strings::eql_case_insensitive_ascii(eql_key, b"name", true) {
name = subslicer.sub(field_value).value();
name = field_value;
} else if strings::eql_case_insensitive_ascii(eql_key, b"filename", true) {
filename = Some(subslicer.sub(field_value).value());
filename = Some(field_value);
is_file = true;
}

Expand Down Expand Up @@ -439,7 +440,7 @@ pub fn for_each_multipart_entry<C>(
.iter()
.all(|&b| b == b'\t' || (0x20..=0x7E).contains(&b))
{
field.content_type = subslicer.sub(trimmed).value();
field.content_type = trimmed;
}
}
}
Expand All @@ -453,10 +454,10 @@ pub fn for_each_multipart_entry<C>(
body = &body[..body.len() - 2];
}
field.value = body;
field.filename = filename.unwrap_or_default();
field.filename = filename.unwrap_or(b"");
field.is_file = is_file;

iterator(ctx, name, &field, input);
iterator(ctx, name, &field);
}

Ok(())
Expand Down
53 changes: 26 additions & 27 deletions src/runtime/webcore/FormData.zig
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,15 @@ pub const FormData = struct {
}

pub const Field = struct {
/// Raw slice into the input buffer. Not using `bun.Semver.String` because
/// file bodies are binary data that can contain null bytes, which
/// Semver.String's inline storage treats as terminators.
/// Raw slices into the input buffer. Not using `bun.Semver.String`
/// because it stores the offset/length as `u32` (with the top bit
/// of length stolen as a tag), which silently corrupts any part
/// located past 4GB in the body or whose value exceeds 2GB. It also
/// treats inline null bytes as terminators, which is wrong for file
/// bodies.
value: []const u8 = "",
filename: bun.Semver.String = .{},
content_type: bun.Semver.String = .{},
filename: []const u8 = "",
content_type: []const u8 = "",
is_file: bool = false,
zero_count: u8 = 0,

Expand Down Expand Up @@ -218,19 +221,19 @@ pub const FormData = struct {
globalThis: *jsc.JSGlobalObject,
form: *jsc.DOMFormData,

pub fn onEntry(wrap: *@This(), name: bun.Semver.String, field: Field, buf: []const u8) void {
pub fn onEntry(wrap: *@This(), name: []const u8, field: Field) void {
const value_str = field.value;
var key = jsc.ZigString.initUTF8(name.slice(buf));
var key = jsc.ZigString.initUTF8(name);

if (field.is_file) {
const filename_str = field.filename.slice(buf);
const filename_str = field.filename;

var blob = jsc.WebCore.Blob.create(value_str, bun.default_allocator, wrap.globalThis, false);
defer blob.detach();
var filename = jsc.ZigString.initUTF8(filename_str);
const content_type: []const u8 = brk: {
if (!field.content_type.isEmpty()) {
break :brk field.content_type.slice(buf);
if (field.content_type.len > 0) {
break :brk field.content_type;
}
if (filename_str.len > 0) {
const extension = std.fs.path.extension(filename_str);
Expand All @@ -249,7 +252,7 @@ pub const FormData = struct {
};

if (content_type.len > 0) {
if (!field.content_type.isEmpty()) {
if (field.content_type.len > 0) {
blob.content_type_allocated = true;
blob.content_type = bun.default_allocator.dupe(u8, content_type) catch @panic("failed to allocate memory for blob content type");
blob.content_type_was_set = true;
Expand Down Expand Up @@ -299,13 +302,11 @@ pub const FormData = struct {
ctx: Ctx,
comptime iterator: fn (
Ctx,
bun.Semver.String,
[]const u8,
Field,
string,
) void,
) !void {
var slice = input;
var subslicer = bun.Semver.SlicedString.init(input, input);

var buf: [76]u8 = undefined;
{
Expand Down Expand Up @@ -334,11 +335,11 @@ pub const FormData = struct {
remain = remain[header_end + 4 ..];

var field = Field{};
var name: bun.Semver.String = .{};
var filename: ?bun.Semver.String = null;
var name: []const u8 = "";
var filename: ?[]const u8 = null;
var header_chunk = header;
var is_file = false;
while (header_chunk.len > 0 and (filename == null or name.len() == 0)) {
while (header_chunk.len > 0 and (filename == null or name.len == 0)) {
Comment thread
robobun marked this conversation as resolved.
Comment thread
robobun marked this conversation as resolved.
const line_end = strings.indexOf(header_chunk, "\r\n") orelse return error.@"is missing header line end";
const line = header_chunk[0..line_end];
header_chunk = header_chunk[line_end + 2 ..];
Expand Down Expand Up @@ -380,13 +381,13 @@ pub const FormData = struct {
}

if (strings.eqlCaseInsensitiveASCII(eql_key, "name", true)) {
name = subslicer.sub(field_value).value();
name = field_value;
} else if (strings.eqlCaseInsensitiveASCII(eql_key, "filename", true)) {
filename = subslicer.sub(field_value).value();
filename = field_value;
is_file = true;
}

if (!name.isEmpty() and filename != null) {
if (name.len > 0 and filename != null) {
break;
}

Expand All @@ -396,12 +397,12 @@ pub const FormData = struct {
break;
}
}
} else if (value.len > 0 and field.content_type.isEmpty() and strings.eqlCaseInsensitiveASCII(key, "content-type", true)) {
field.content_type = subslicer.sub(strings.trim(value, "; \t")).value();
} else if (value.len > 0 and field.content_type.len == 0 and strings.eqlCaseInsensitiveASCII(key, "content-type", true)) {
field.content_type = strings.trim(value, "; \t");
}
}

if (name.len() + @as(usize, field.zero_count) == 0) {
if (name.len + @as(usize, field.zero_count) == 0) {
continue;
}

Expand All @@ -410,16 +411,14 @@ pub const FormData = struct {
body = body[0 .. body.len - 2];
}
field.value = body;
field.filename = filename orelse .{};
field.filename = filename orelse "";
field.is_file = is_file;

iterator(ctx, name, field, input);
iterator(ctx, name, field);
}
}
};

const string = []const u8;

const std = @import("std");

const bun = @import("bun");
Expand Down
91 changes: 90 additions & 1 deletion test/js/web/html/FormData.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, expect, it, test } from "bun:test";
import { bunEnv, bunExe } from "harness";
import { bunEnv, bunExe, isWindows } from "harness";
import { totalmem } from "os";
import { join } from "path";

describe("FormData", () => {
Expand Down Expand Up @@ -915,3 +916,91 @@ test("FormData.toJSON merges duplicate numeric field names into an array", async
expect(stdout.trim().split("\n")).toEqual(['{"0":["a","b","c"],"tag":["x","y"]}', '{"0":["first","second"]}']);
expect(exitCode).toBe(0);
});

// https://github.com/oven-sh/bun/issues/21490
//
// The multipart parser stored part metadata (name, filename, content-type) as
// `bun.Semver.String`, which packs offset/length into 32-bit fields. For a
// part whose header sits past 4 GiB in the body, the offset wrapped and the
// parser read garbage.
//
// Needs ~10 GiB of working set in a subprocess, so skip on small machines and
// on Windows (where ArrayBuffer backing commits eagerly). Use the
// cgroup-aware limit when available so containerized CI runners aren't fooled
// by the host's physical RAM.
const effectiveMemory = process.constrainedMemory?.() || totalmem();
it.skipIf(isWindows || effectiveMemory < 16 * 1024 * 1024 * 1024)(
"multipart parser handles parts at offsets > 4 GiB",
async () => {
const fixture = `
const boundary = "----bun-issue-21490";
const GiB = 1024 * 1024 * 1024;

const head = Buffer.from(
"--" + boundary + "\\r\\n" +
'Content-Disposition: form-data; name="big_upload"; filename="big.bin"\\r\\n' +
"Content-Type: application/octet-stream\\r\\n\\r\\n",
"utf8",
);
const mid = Buffer.from(
"\\r\\n--" + boundary + "\\r\\n" +
'Content-Disposition: form-data; name="description_field"\\r\\n\\r\\n' +
"this part lives past the 4 GiB mark\\r\\n",
"utf8",
);
const mid2 = Buffer.from(
"--" + boundary + "\\r\\n" +
'Content-Disposition: form-data; name="second_attachment"; filename="also_past_4gb.txt"\\r\\n\\r\\n' +
"file contents\\r\\n",
"utf8",
);
const tail = Buffer.from("--" + boundary + "--\\r\\n", "utf8");

// 4 GiB + 256 bytes so the trailing parts' headers sit above 2**32.
let chunk = new Uint8Array(GiB);
const fileBody = new Blob([chunk, chunk, chunk, chunk, new Uint8Array(256)]);
chunk = null;
const fileSize = fileBody.size;

const body = new Blob([head, fileBody, mid, mid2, tail]);

const request = new Request("http://localhost/", {
method: "POST",
headers: { "content-type": "multipart/form-data; boundary=" + boundary },
body,
});

const form = await request.formData();
const big = form.get("big_upload");
const second = form.get("second_attachment");
const result = {
keys: [...form.keys()],
big: { name: big?.name, size: big?.size, type: big?.type },
description_field: form.get("description_field"),
second: { name: second?.name, size: second?.size, text: second ? await second.text() : null },
expectedFileSize: fileSize,
};
console.log(JSON.stringify(result));
`;

await using proc = Bun.spawn({
cmd: [bunExe(), "-e", fixture],
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);

expect(stderr).toBe("");
const result = JSON.parse(stdout.trim());
expect(result).toEqual({
keys: ["big_upload", "description_field", "second_attachment"],
big: { name: "big.bin", size: result.expectedFileSize, type: "application/octet-stream" },
description_field: "this part lives past the 4 GiB mark",
second: { name: "also_past_4gb.txt", size: "file contents".length, text: "file contents" },
expectedFileSize: 4 * 1024 * 1024 * 1024 + 256,
});
expect(exitCode).toBe(0);
},
120_000,
);
Loading