From 106e7d889f3e052fef127aa3f765f44176453b6d Mon Sep 17 00:00:00 2001 From: Alistair Smith Date: Mon, 18 May 2026 20:56:19 +0100 Subject: [PATCH 1/9] blob: drop trailing odd byte in UTF-16LE decode instead of aborting --- src/runtime/webcore/Blob.rs | 14 ++++++++++---- test/js/web/fetch/blob.test.ts | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index 975cba1993c..af6af785a82 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -2664,8 +2664,11 @@ impl BlobExt for Blob { if bom == Some(strings::BOM::Utf16Le) { let _free = (LIFETIME == Lifetime::Temporary).then(|| TemporaryBytes(raw_bytes)); - // BOM::Utf16Le ⇒ buf is UTF-16LE bytes; len is even after BOM strip. - // Mirrors Zig `bun.reinterpretSlice(u16, buf)`; bytemuck checks align + even-len. + // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. Stripping the 2-byte BOM + // does not change parity, so an odd-length input would make + // `bytemuck::cast_slice` `panic!` (uncatchable). Drop the trailing + // odd byte first, mirroring Zig's `@divTrunc(bytes.len, 2)`. + let buf = &buf[..buf.len() & !1]; // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). let out = OwnedString::new(BunString::clone_utf16(bytemuck::cast_slice::(buf))); @@ -2847,8 +2850,11 @@ impl BlobExt for Blob { } if bom == Some(strings::BOM::Utf16Le) { - // BOM::Utf16Le ⇒ buf is UTF-16LE bytes; len is even after BOM strip. - // Mirrors Zig `bun.reinterpretSlice(u16, buf)`; bytemuck checks align + even-len. + // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. Stripping the 2-byte BOM + // does not change parity, so an odd-length input would make + // `bytemuck::cast_slice` `panic!` (uncatchable). Drop the trailing + // odd byte first, mirroring Zig's `@divTrunc(bytes.len, 2)`. + let buf = &buf[..buf.len() & !1]; // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). let mut out = OwnedString::new(BunString::clone_utf16(bytemuck::cast_slice::(buf))); diff --git a/test/js/web/fetch/blob.test.ts b/test/js/web/fetch/blob.test.ts index 0eb83a0ecb5..2f676585cb3 100644 --- a/test/js/web/fetch/blob.test.ts +++ b/test/js/web/fetch/blob.test.ts @@ -324,3 +324,28 @@ test("dupe() preserves allocated content_type for Body clone", () => { expect(originalType).toStartWith("multipart/form-data; boundary="); expect(clonedType).toBe(originalType); }); + +test("Blob.json()/.text() on odd-length UTF-16LE+BOM does not abort", async () => { + // Stripping the 2-byte BOM keeps the length odd, which used to make the + // u8->u16 cast `panic!` and abort the whole process (uncatchable). Run in a + // subprocess: pre-fix it exits 133 with no output; fixed it drops the + // trailing odd byte like Zig and parses the valid prefix. + const src = ` + const oddJson = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le"), Buffer.from([0x20])]); + const oddText = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from("hi", "utf16le"), Buffer.from([0x20])]); + const evenJson = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le")]); + const j = await new Blob([oddJson]).json(); + const t = await new Blob([oddText]).text(); + const e = await new Blob([evenJson]).json(); + process.stdout.write(JSON.stringify(j) + "|" + JSON.stringify(t) + "|" + JSON.stringify(e)); + `; + await using proc = Bun.spawn({ + cmd: [bunExe(), "-e", src], + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + const [stdout, exitCode] = await Promise.all([proc.stdout.text(), proc.exited]); + expect(stdout).toBe(`{"a":1}|"hi"|{"a":1}`); + expect(exitCode).toBe(0); +}); From d2f486606bbcad365c405c804e17a7dc40a201f1 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 18 May 2026 21:05:36 +0000 Subject: [PATCH 2/9] blob: unaligned u8->u16 decode for odd-address UTF-16LE views bytemuck::cast_slice panics on odd alignment as well as odd length; .slice(odd) of a shared byte store hands an odd-address view straight to the cast. Replace with chunks_exact(2) + from_le_bytes, which drops the trailing odd byte and reads unaligned like Zig's reinterpretSlice. Test now covers the .slice(1) case and asserts stderr so the panic text shows up on regression. --- src/runtime/webcore/Blob.rs | 38 +++++++++++++++++++++------------- test/js/web/fetch/blob.test.ts | 21 ++++++++++++------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index af6af785a82..38b1803096f 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -2664,14 +2664,19 @@ impl BlobExt for Blob { if bom == Some(strings::BOM::Utf16Le) { let _free = (LIFETIME == Lifetime::Temporary).then(|| TemporaryBytes(raw_bytes)); - // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. Stripping the 2-byte BOM - // does not change parity, so an odd-length input would make - // `bytemuck::cast_slice` `panic!` (uncatchable). Drop the trailing - // odd byte first, mirroring Zig's `@divTrunc(bytes.len, 2)`. - let buf = &buf[..buf.len() & !1]; + // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. `buf` may be odd-length + // (truncated input) or odd-address (`.slice(odd)` of a shared + // store) — either makes `bytemuck::cast_slice` `panic!` + // (uncatchable). `chunks_exact(2)` + `from_le_bytes` handles both: + // it drops any trailing odd byte (Zig's `@divTrunc(len, 2)`) and + // reads unaligned. `clone_utf16` copies anyway, so the extra Vec + // is one allocation either way. + let utf16: Vec = buf + .chunks_exact(2) + .map(|c| u16::from_le_bytes([c[0], c[1]])) + .collect(); // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). - let out = - OwnedString::new(BunString::clone_utf16(bytemuck::cast_slice::(buf))); + let out = OwnedString::new(BunString::clone_utf16(&utf16)); return out.to_js(global); } @@ -2850,14 +2855,19 @@ impl BlobExt for Blob { } if bom == Some(strings::BOM::Utf16Le) { - // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. Stripping the 2-byte BOM - // does not change parity, so an odd-length input would make - // `bytemuck::cast_slice` `panic!` (uncatchable). Drop the trailing - // odd byte first, mirroring Zig's `@divTrunc(bytes.len, 2)`. - let buf = &buf[..buf.len() & !1]; + // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. `buf` may be odd-length + // (truncated input) or odd-address (`.slice(odd)` of a shared + // store) — either makes `bytemuck::cast_slice` `panic!` + // (uncatchable). `chunks_exact(2)` + `from_le_bytes` handles both: + // it drops any trailing odd byte (Zig's `@divTrunc(len, 2)`) and + // reads unaligned. `clone_utf16` copies anyway, so the extra Vec + // is one allocation either way. + let utf16: Vec = buf + .chunks_exact(2) + .map(|c| u16::from_le_bytes([c[0], c[1]])) + .collect(); // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). - let mut out = - OwnedString::new(BunString::clone_utf16(bytemuck::cast_slice::(buf))); + let mut out = OwnedString::new(BunString::clone_utf16(&utf16)); // PORT NOTE: Zig used `defer { free; detach }`. Reshaped to compute the // result first, then perform the deferred work explicitly — capturing // `&mut self` in a scopeguard closure conflicts with later uses below. diff --git a/test/js/web/fetch/blob.test.ts b/test/js/web/fetch/blob.test.ts index 2f676585cb3..1468ceee0a2 100644 --- a/test/js/web/fetch/blob.test.ts +++ b/test/js/web/fetch/blob.test.ts @@ -325,19 +325,23 @@ test("dupe() preserves allocated content_type for Body clone", () => { expect(clonedType).toBe(originalType); }); -test("Blob.json()/.text() on odd-length UTF-16LE+BOM does not abort", async () => { - // Stripping the 2-byte BOM keeps the length odd, which used to make the - // u8->u16 cast `panic!` and abort the whole process (uncatchable). Run in a - // subprocess: pre-fix it exits 133 with no output; fixed it drops the - // trailing odd byte like Zig and parses the valid prefix. +test("Blob.json()/.text() on odd-length/odd-aligned UTF-16LE+BOM does not abort", async () => { + // Odd length: stripping the 2-byte BOM keeps the length odd. Odd address: + // `.slice(1)` of a shared byte store hands an odd pointer straight to the + // u8->u16 cast. Either used to `panic!` and abort the whole process + // (uncatchable). Run in a subprocess: pre-fix it exits 133 with no output; + // fixed it drops the trailing odd byte and reads unaligned like Zig. const src = ` const oddJson = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le"), Buffer.from([0x20])]); const oddText = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from("hi", "utf16le"), Buffer.from([0x20])]); const evenJson = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le")]); + // Odd address: pad 1 byte then .slice(1) so the view starts at base+1. + const misaligned = Buffer.concat([Buffer.from([0x00, 0xFF, 0xFE]), Buffer.from("hi", "utf16le")]); const j = await new Blob([oddJson]).json(); const t = await new Blob([oddText]).text(); const e = await new Blob([evenJson]).json(); - process.stdout.write(JSON.stringify(j) + "|" + JSON.stringify(t) + "|" + JSON.stringify(e)); + const m = await new Blob([misaligned]).slice(1).text(); + process.stdout.write(JSON.stringify(j) + "|" + JSON.stringify(t) + "|" + JSON.stringify(e) + "|" + JSON.stringify(m)); `; await using proc = Bun.spawn({ cmd: [bunExe(), "-e", src], @@ -345,7 +349,8 @@ test("Blob.json()/.text() on odd-length UTF-16LE+BOM does not abort", async () = stdout: "pipe", stderr: "pipe", }); - const [stdout, exitCode] = await Promise.all([proc.stdout.text(), proc.exited]); - expect(stdout).toBe(`{"a":1}|"hi"|{"a":1}`); + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe(`{"a":1}|"hi"|{"a":1}|"hi"`); expect(exitCode).toBe(0); }); From d6b7f870a4be3fc749d12c763763602abb380974 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 18 May 2026 21:52:54 +0000 Subject: [PATCH 3/9] wtf-bindings: use WTF ASSERT instead of libc assert in uv__tty_make_raw is not included by wtf-bindings.cpp, so whether 'assert' is visible depends on unified-source batching. Use the always-available WTF ASSERT macro instead. --- src/jsc/bindings/wtf-bindings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jsc/bindings/wtf-bindings.cpp b/src/jsc/bindings/wtf-bindings.cpp index 0d4968f805f..8b09b5a8665 100644 --- a/src/jsc/bindings/wtf-bindings.cpp +++ b/src/jsc/bindings/wtf-bindings.cpp @@ -61,7 +61,7 @@ extern "C" int uv_tty_reset_mode(void) static void uv__tty_make_raw(struct termios* tio) { - assert(tio != NULL); + ASSERT(tio != NULL); #if defined __sun || defined __MVS__ /* From 56d2716dd3354f9e9c5c3a2ea3d7e252de846427 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 18 May 2026 23:07:23 +0000 Subject: [PATCH 4/9] ci: retrigger (darwin-aarch64 jobs expired in queue) From d8d9200ab57f442f21193a9670d02575ecf275ac Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Tue, 19 May 2026 03:59:30 +0000 Subject: [PATCH 5/9] blob: keep zero-copy u16 view for aligned UTF-16LE, copy only when misaligned try_cast_slice succeeds for the common aligned case (file reads, whole byte stores) and hands &[u16] straight to clone_utf16 with no intermediate allocation; only odd-address views from .slice(odd) fall through to the chunks_exact copy. --- src/runtime/webcore/Blob.rs | 54 ++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index 38b1803096f..ec3adb6e53b 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -2667,16 +2667,25 @@ impl BlobExt for Blob { // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. `buf` may be odd-length // (truncated input) or odd-address (`.slice(odd)` of a shared // store) — either makes `bytemuck::cast_slice` `panic!` - // (uncatchable). `chunks_exact(2)` + `from_le_bytes` handles both: - // it drops any trailing odd byte (Zig's `@divTrunc(len, 2)`) and - // reads unaligned. `clone_utf16` copies anyway, so the extra Vec - // is one allocation either way. - let utf16: Vec = buf - .chunks_exact(2) - .map(|c| u16::from_le_bytes([c[0], c[1]])) - .collect(); + // (uncatchable). Drop the trailing odd byte (Zig's + // `@divTrunc(len, 2)`), then borrow as `&[u16]` when already + // 2-aligned (the common case: file reads, whole stores) and only + // copy into a fresh `Vec` when `try_cast_slice` rejects on + // alignment. + let buf = &buf[..buf.len() & !1]; + let unaligned: Vec; + let utf16: &[u16] = match bytemuck::try_cast_slice(buf) { + Ok(s) => s, + Err(_) => { + unaligned = buf + .chunks_exact(2) + .map(|c| u16::from_le_bytes([c[0], c[1]])) + .collect(); + &unaligned + } + }; // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). - let out = OwnedString::new(BunString::clone_utf16(&utf16)); + let out = OwnedString::new(BunString::clone_utf16(utf16)); return out.to_js(global); } @@ -2858,16 +2867,25 @@ impl BlobExt for Blob { // BOM::Utf16Le ⇒ buf is UTF-16LE bytes. `buf` may be odd-length // (truncated input) or odd-address (`.slice(odd)` of a shared // store) — either makes `bytemuck::cast_slice` `panic!` - // (uncatchable). `chunks_exact(2)` + `from_le_bytes` handles both: - // it drops any trailing odd byte (Zig's `@divTrunc(len, 2)`) and - // reads unaligned. `clone_utf16` copies anyway, so the extra Vec - // is one allocation either way. - let utf16: Vec = buf - .chunks_exact(2) - .map(|c| u16::from_le_bytes([c[0], c[1]])) - .collect(); + // (uncatchable). Drop the trailing odd byte (Zig's + // `@divTrunc(len, 2)`), then borrow as `&[u16]` when already + // 2-aligned (the common case: file reads, whole stores) and only + // copy into a fresh `Vec` when `try_cast_slice` rejects on + // alignment. + let buf = &buf[..buf.len() & !1]; + let unaligned: Vec; + let utf16: &[u16] = match bytemuck::try_cast_slice(buf) { + Ok(s) => s, + Err(_) => { + unaligned = buf + .chunks_exact(2) + .map(|c| u16::from_le_bytes([c[0], c[1]])) + .collect(); + &unaligned + } + }; // +1 WTF ref; `OwnedString` releases it on scope exit (Zig: `defer out.deref()`). - let mut out = OwnedString::new(BunString::clone_utf16(&utf16)); + let mut out = OwnedString::new(BunString::clone_utf16(utf16)); // PORT NOTE: Zig used `defer { free; detach }`. Reshaped to compute the // result first, then perform the deferred work explicitly — capturing // `&mut self` in a scopeguard closure conflicts with later uses below. From a062e470a9b7d2de56bd6c59b86d797ba35f8016 Mon Sep 17 00:00:00 2001 From: Alistair Smith Date: Fri, 22 May 2026 12:32:25 +0100 Subject: [PATCH 6/9] Retrigger CI From f64750cf16e556e8b132f4cdd8a29212f5e64a30 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Tue, 26 May 2026 03:42:55 +0000 Subject: [PATCH 7/9] blob: use as_chunks::<2> for the unaligned UTF-16LE fallback Const-generic chunk size lets the compiler elide per-element bounds checks and from_le_bytes takes the [u8; 2] array directly. --- src/runtime/webcore/Blob.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index dc5af34e6c8..d36a32f256e 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -2737,8 +2737,10 @@ impl BlobExt for Blob { Ok(s) => s, Err(_) => { unaligned = buf - .chunks_exact(2) - .map(|c| u16::from_le_bytes([c[0], c[1]])) + .as_chunks::<2>() + .0 + .iter() + .map(|c| u16::from_le_bytes(*c)) .collect(); &unaligned } @@ -2972,8 +2974,10 @@ impl BlobExt for Blob { Ok(s) => s, Err(_) => { unaligned = buf - .chunks_exact(2) - .map(|c| u16::from_le_bytes([c[0], c[1]])) + .as_chunks::<2>() + .0 + .iter() + .map(|c| u16::from_le_bytes(*c)) .collect(); &unaligned } From a164f9cb7d30c0bd171716bfc729258ccd1ccbea Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Tue, 26 May 2026 04:13:52 +0000 Subject: [PATCH 8/9] blob.test: relax content_type leak threshold for debug builds bun-debug is ASAN + debug allocator, so RSS inflates the same way the named bun-asan CI binary does; isASAN only matches the binary name. --- test/js/web/fetch/blob.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/js/web/fetch/blob.test.ts b/test/js/web/fetch/blob.test.ts index 99e2ff62d5f..d3a50232447 100644 --- a/test/js/web/fetch/blob.test.ts +++ b/test/js/web/fetch/blob.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "bun:test"; -import { bunEnv, bunExe, isASAN, tempDir } from "harness"; +import { bunEnv, bunExe, isASAN, isDebug, tempDir } from "harness"; import type { BlobOptions } from "node:buffer"; import type { BinaryLike } from "node:crypto"; import path from "node:path"; @@ -350,7 +350,9 @@ test("Bun.file(path, {type}).text() does not leak the duped content_type", async const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); expect(stderr).toBe(""); const { deltaMiB } = JSON.parse(stdout); - expect(deltaMiB).toBeLessThan(isASAN ? 400 : 40); + // Debug builds (bun-debug is ASAN + debug allocator) inflate RSS the same + // way the named bun-asan CI binary does. + expect(deltaMiB).toBeLessThan(isASAN || isDebug ? 400 : 40); expect(exitCode).toBe(0); }); From 3afd71255d73f561ca76e1c14d43b73a76d56148 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Tue, 26 May 2026 04:47:25 +0000 Subject: [PATCH 9/9] blob.test: cover odd-address .slice(1).json() so the to_json fallback is exercised --- test/js/web/fetch/blob.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/js/web/fetch/blob.test.ts b/test/js/web/fetch/blob.test.ts index d3a50232447..0f5eda0be33 100644 --- a/test/js/web/fetch/blob.test.ts +++ b/test/js/web/fetch/blob.test.ts @@ -486,11 +486,13 @@ test("Blob.json()/.text() on odd-length/odd-aligned UTF-16LE+BOM does not abort" const evenJson = Buffer.concat([Buffer.from([0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le")]); // Odd address: pad 1 byte then .slice(1) so the view starts at base+1. const misaligned = Buffer.concat([Buffer.from([0x00, 0xFF, 0xFE]), Buffer.from("hi", "utf16le")]); + const misalignedJson = Buffer.concat([Buffer.from([0x00, 0xFF, 0xFE]), Buffer.from(JSON.stringify({ a: 1 }), "utf16le")]); const j = await new Blob([oddJson]).json(); const t = await new Blob([oddText]).text(); const e = await new Blob([evenJson]).json(); const m = await new Blob([misaligned]).slice(1).text(); - process.stdout.write(JSON.stringify(j) + "|" + JSON.stringify(t) + "|" + JSON.stringify(e) + "|" + JSON.stringify(m)); + const mj = await new Blob([misalignedJson]).slice(1).json(); + process.stdout.write(JSON.stringify(j) + "|" + JSON.stringify(t) + "|" + JSON.stringify(e) + "|" + JSON.stringify(m) + "|" + JSON.stringify(mj)); `; await using proc = Bun.spawn({ cmd: [bunExe(), "-e", src], @@ -500,6 +502,6 @@ test("Blob.json()/.text() on odd-length/odd-aligned UTF-16LE+BOM does not abort" }); const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); expect(stderr).toBe(""); - expect(stdout).toBe(`{"a":1}|"hi"|{"a":1}|"hi"`); + expect(stdout).toBe(`{"a":1}|"hi"|{"a":1}|"hi"|{"a":1}`); expect(exitCode).toBe(0); });