diff --git a/Cargo.lock b/Cargo.lock index 805413c9b9b..bc3e11e9b2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1499,6 +1499,7 @@ dependencies = [ "bstr", "bun_alloc", "bun_core", + "bun_simdutf_sys", "bytemuck", "const_format", "enum-map", diff --git a/src/bun_core/string/immutable.rs b/src/bun_core/string/immutable.rs index 4c3ec1dad72..e4f4549c3ec 100644 --- a/src/bun_core/string/immutable.rs +++ b/src/bun_core/string/immutable.rs @@ -2963,19 +2963,47 @@ pub fn starts_with_windows_drive_letter_t>(s: &[T]) -> bool /// UTF-8 falls back to a scalar WTF-8 decoder that emits U+FFFD for malformed /// bytes and passes unpaired surrogates through (so non-empty input never yields /// an empty slice — fixes #8197 / the TODO at unicode.zig:1537). +/// +/// Panics when the output does not fit. Callers that cannot statically size +/// `buf` for the worst case must use [`try_convert_utf8_to_utf16_in_buffer`]. pub fn convert_utf8_to_utf16_in_buffer<'a>(buf: &'a mut [u16], input: &[u8]) -> &'a mut [u16] { + let buf_len = buf.len(); + match try_convert_utf8_to_utf16_in_buffer(buf, input) { + Some(out) => out, + None => panic!( + "convert_utf8_to_utf16_in_buffer: buf too small (have {} u16 for {} input bytes)", + buf_len, + input.len(), + ), + } +} + +/// Checked variant of [`convert_utf8_to_utf16_in_buffer`]: returns `None` when +/// the converted output does not fit in `buf`, and never writes past `buf`. +/// +/// simdutf's convert API takes only an output *pointer* and writes however +/// many units the input needs, so it must not be entered unless the output +/// provably fits: either `input.len() <= buf.len()` (a UTF-16 unit always +/// consumes at least one UTF-8 byte, and surrogate pairs produce 2 units from +/// 4 bytes), or the exact converted length fits. On invalid input simdutf +/// stops at the first error having written only the valid prefix's units, +/// which is ≤ that same exact-length estimate; the WTF-8 fallback can exceed +/// the estimate (stray continuation bytes become one U+FFFD each), so it +/// re-checks capacity on every write. +pub fn try_convert_utf8_to_utf16_in_buffer<'a>( + buf: &'a mut [u16], + input: &[u8], +) -> Option<&'a mut [u16]> { if input.is_empty() { - return &mut buf[..0]; - } - assert!( - input.len() <= buf.len() || element_length_utf8_into_utf16(input) <= buf.len(), - "convert_utf8_to_utf16_in_buffer: buf too small (have {} u16 for {} input bytes)", - buf.len(), - input.len(), - ); + return Some(&mut buf[..0]); + } + if input.len() > buf.len() && element_length_utf8_into_utf16(input) > buf.len() { + return None; + } let r = simdutf::convert::utf8::to::utf16::with_errors::le(input, buf); if r.is_successful() { - return &mut buf[..r.count]; + debug_assert!(r.count <= buf.len()); + return Some(&mut buf[..r.count]); } // WTF-8 fallback (invalid byte → U+FFFD; lone surrogates pass through). let mut written = 0usize; @@ -2983,15 +3011,24 @@ pub fn convert_utf8_to_utf16_in_buffer<'a>(buf: &'a mut [u16], input: &[u8]) -> while i < input.len() { let b = input[i]; if b < 0x80 { + if written >= buf.len() { + return None; + } buf[written] = b as u16; written += 1; i += 1; } else { let (cp, adv) = decode_wtf8_one(&input[i..]); if cp <= 0xFFFF { + if written >= buf.len() { + return None; + } buf[written] = cp as u16; written += 1; } else { + if written + 2 > buf.len() { + return None; + } let [hi, lo] = encode_surrogate_pair(cp); buf[written] = hi; buf[written + 1] = lo; @@ -3000,7 +3037,7 @@ pub fn convert_utf8_to_utf16_in_buffer<'a>(buf: &'a mut [u16], input: &[u8]) -> i += adv; } } - &mut buf[..written] + Some(&mut buf[..written]) } /// Decode one WTF-8 sequence at the head of `s`; invalid lead/truncated → (U+FFFD, 1). diff --git a/src/bun_core/string/immutable/unicode.rs b/src/bun_core/string/immutable/unicode.rs index fd58c265281..fffb2df1b99 100644 --- a/src/bun_core/string/immutable/unicode.rs +++ b/src/bun_core/string/immutable/unicode.rs @@ -944,20 +944,16 @@ use crate::strings::u16_get_supplementary; pub use crate::strings::{u16_is_lead, u16_is_trail}; pub fn convert_utf8_to_utf16_in_buffer_z<'a>(buf: &'a mut [u16], input: &[u8]) -> &'a WStr { - // TODO: see convert_utf8_to_utf16_in_buffer - if input.is_empty() { - buf[0] = 0; - return wstr_in_buf(buf, 0); - } - assert!( - input.len() < buf.len() || element_length_utf8_into_utf16(input) < buf.len(), - "convert_utf8_to_utf16_in_buffer_z: buf too small (have {} u16 for {} input bytes)", - buf.len(), - input.len(), - ); - let result = simdutf::convert::utf8::to::utf16::le(input, buf); - buf[result] = 0; - wstr_in_buf(buf, result) + // Checked conversion (see `try_convert_utf8_to_utf16_in_buffer`): the + // NUL reserves one slot, and over-long input fails safe to "" — which + // the consuming syscall rejects — instead of letting simdutf (which + // never bounds-checks its output) write past `buf`. + let cap = buf.len().saturating_sub(1); + let len = crate::string::immutable::try_convert_utf8_to_utf16_in_buffer(&mut buf[..cap], input) + .map(|converted| converted.len()) + .unwrap_or(0); + buf[len] = 0; + wstr_in_buf(buf, len) } #[rustfmt::skip] diff --git a/src/paths/Cargo.toml b/src/paths/Cargo.toml index 84994256df4..054e9e43a56 100644 --- a/src/paths/Cargo.toml +++ b/src/paths/Cargo.toml @@ -24,3 +24,9 @@ bun_alloc.workspace = true bun_core.workspace = true thiserror.workspace = true + +[dev-dependencies] +# Nominal `SIMDUTFResult`/`Status` for the test-only simdutf stubs in +# `string_paths.rs` — Miri requires the stub's signature to match the extern +# declaration's types exactly, not just their ABI layout. +bun_simdutf_sys.workspace = true diff --git a/src/paths/resolve_path.rs b/src/paths/resolve_path.rs index 1a7f92b2743..23d38497c48 100644 --- a/src/paths/resolve_path.rs +++ b/src/paths/resolve_path.rs @@ -2219,6 +2219,16 @@ impl PosixToWinNormalizer { debug_assert!(is_sep_any(root[0])); if strings::is_windows_absolute_path_missing_drive_letter::(maybe_posix_path) { let source_root = windows_filesystem_root(source_dir); + // The source root (arbitrarily long for UNC dirs) plus + // the path must fit `buf` with one byte of headroom — + // downstream normalization writes one past the input for + // separator-less UNC roots. Such a join can't exist on NT + // anyway, so fail safe to the un-joined input (which the + // consuming lookup treats as nonexistent) instead of + // writing past the buffer. + if source_root.len() + maybe_posix_path.len() - 1 >= buf.len() { + return maybe_posix_path; + } buf[0..source_root.len()].copy_from_slice(source_root); buf[source_root.len()..source_root.len() + maybe_posix_path.len() - 1] .copy_from_slice(&maybe_posix_path[1..]); @@ -2252,6 +2262,11 @@ impl PosixToWinNormalizer { debug_assert!(is_sep_any(root[0])); if strings::is_windows_absolute_path_missing_drive_letter::(mp) { let source_root = windows_filesystem_root(source_dir); + // See resolve_with_external_buf: over-long joins fail + // safe to the un-joined input (+ NUL accounted for here). + if source_root.len() + mp.len() > buf.len() { + return maybe_posix_path; + } buf[0..source_root.len()].copy_from_slice(source_root); buf[source_root.len()..source_root.len() + mp.len() - 1] .copy_from_slice(&mp[1..]); @@ -2293,6 +2308,16 @@ impl PosixToWinNormalizer { let cwd = bun_core::getcwd(buf)?; windows_filesystem_root(cwd.as_bytes()).len() }; + // The cwd root (arbitrarily long for UNC cwds) plus the + // path must fit `buf` with one byte of headroom: the + // joined result feeds `normalize_buf`, whose UNC-root + // handling writes one past the input when the cwd is a + // bare share root with no trailing separator. Such a + // combination can't exist on NT anyway, so error out + // instead of writing past a buffer. + if sr_len + maybe_posix_path.len() - 1 >= buf.len() { + return Err(bun_core::err!("NameTooLong")); + } buf[sr_len..sr_len + maybe_posix_path.len() - 1] .copy_from_slice(&maybe_posix_path[1..]); let res = &buf[0..sr_len + maybe_posix_path.len() - 1]; @@ -2329,6 +2354,13 @@ impl PosixToWinNormalizer { let cwd = bun_core::getcwd(buf)?; windows_filesystem_root(cwd.as_bytes()).len() }; + // The cwd root (arbitrarily long for UNC cwds) plus the + // path and its NUL must fit `buf`; such a combination + // can't exist on NT anyway, so error out instead of + // writing past it. + if sr_len + maybe_posix_path.len() > buf.len() { + return Err(bun_core::err!("NameTooLong")); + } buf[sr_len..sr_len + maybe_posix_path.len() - 1] .copy_from_slice(&maybe_posix_path[1..]); buf[sr_len + maybe_posix_path.len() - 1] = 0; @@ -2349,6 +2381,9 @@ impl PosixToWinNormalizer { ); } + if maybe_posix_path.len() + 1 > buf.len() { + return Err(bun_core::err!("NameTooLong")); + } buf[..maybe_posix_path.len()].copy_from_slice(maybe_posix_path); buf[maybe_posix_path.len()] = 0; // SAFETY: NUL at buf[maybe_posix_path.len()] diff --git a/src/paths/string_paths.rs b/src/paths/string_paths.rs index 14497b4662c..9e1544e7dae 100644 --- a/src/paths/string_paths.rs +++ b/src/paths/string_paths.rs @@ -207,6 +207,14 @@ pub fn to_w_path_normalize_auto_extend<'a>(wbuf: &'a mut [u16], utf8: &[u8]) -> pub fn to_w_path_normalized<'a>(wbuf: &'a mut [u16], utf8: &[u8]) -> &'a WStr { let mut renormalized = crate::path_buffer_pool::get(); + // Longer than the pooled scratch buffer (and than any path the OS can + // address) — fail-safe to "" like `to_w_path_maybe_dir` does, instead of + // panicking in the `normalize_slashes_only` copy below. + if utf8.len() > renormalized.len() { + wbuf[0] = 0; + return wstr_in_buf(wbuf, 0); + } + let mut path_to_use = normalize_slashes_only(&mut renormalized[..], utf8, b'\\'); // is there a trailing slash? Let's remove it before converting to UTF-16 @@ -218,6 +226,14 @@ pub fn to_w_path_normalized<'a>(wbuf: &'a mut [u16], utf8: &[u8]) -> &'a WStr { } pub(crate) fn to_w_path_normalized16<'a>(wbuf: &'a mut [u16], path: &[u16]) -> &'a WStr { + // Input (plus the NUL) doesn't fit in `wbuf` — fail-safe to "" like + // `to_w_path_maybe_dir` does, instead of panicking in the + // `normalize_slashes_only_t` copy below. + if path.len() >= wbuf.len() { + wbuf[0] = 0; + return wstr_in_buf(wbuf, 0); + } + // PORT NOTE: reshaped for borrowck — Zig wrote into wbuf and then re-sliced wbuf; // here we capture the length and re-derive the mutable slice. let len = { @@ -300,6 +316,33 @@ pub fn to_w_dir_path<'a>(wbuf: &'a mut [u16], utf8: &[u8]) -> &'a WStr { to_w_path_maybe_dir::(wbuf, utf8) } +/// Can `utf8`'s UTF-16 form fit a `WPathBuffer` (`PATH_MAX_WIDE` units, the +/// NT maximum path length), leaving room for the longest prefix any converter +/// prepends (`\??\UNC\`, 8 units), a trailing slash, and the NUL? Paths that +/// fail this cannot exist on disk; callers surface `false`/`ENAMETOOLONG` +/// instead of converting (mirrors the Zig-side fix in oven-sh/bun#27775). +/// +/// UTF-8 → UTF-16 never expands the unit count, so the byte count fitting +/// already proves the fit; the unit count (simdutf, SIMD) is only computed +/// for longer inputs. The byte length is bounded as well: a converted unit +/// consumes at least a third of a byte triple, so any input past 3× +/// `MAX_UNITS` bytes cannot fit regardless of content — and the cap also +/// bounds the u8-space path copies this check guards. +/// +/// simdutf's length is exact for valid WTF-8; on malformed bytes it is an +/// estimate (stray continuation bytes count zero yet convert to one U+FFFD +/// unit each), so a malformed over-long path can pass this check. That is +/// fine: the bounds-checked conversion downstream never overflows and fails +/// safe to an empty path — such input merely gets a generic syscall error +/// instead of the precise `ENAMETOOLONG`. +pub fn fits_in_wide_path_buffer(utf8: &[u8]) -> bool { + const OVERHEAD: usize = windows::NT_UNC_OBJECT_PREFIX.len() + 2; + const MAX_UNITS: usize = crate::PATH_MAX_WIDE - OVERHEAD; + utf8.len() <= MAX_UNITS + || (utf8.len() <= 3 * MAX_UNITS + && strings::element_length_utf8_into_utf16(utf8) <= MAX_UNITS) +} + pub fn to_kernel32_path<'a>(wbuf: &'a mut [u16], utf8: &[u8]) -> &'a WStr { let path = if utf8.starts_with(&windows::NT_OBJECT_PREFIX_U8) { &utf8[windows::NT_OBJECT_PREFIX_U8.len()..] @@ -329,9 +372,26 @@ pub(crate) fn to_w_path_maybe_dir<'a, const ADD_TRAILING_LASH: bool>( let cap = wbuf.len().saturating_sub(1 + (ADD_TRAILING_LASH as usize)); // PORT NOTE: Zig used `bun.simdutf.convert.utf8.to.utf16.le.with_errors`; - // route through `crate::strings::convert_utf8_to_utf16_in_buffer` (same + // route through the checked `try_convert_utf8_to_utf16_in_buffer` (same // simdutf primitive + WTF-8 fallback) to avoid a `bun_simdutf` crate dep. - let mut count = crate::strings::convert_utf8_to_utf16_in_buffer(&mut wbuf[..cap], utf8).len(); + // + // Over-long input is fail-safed to "" instead of overflowing: the Zig + // original handed simdutf a buffer it could write past, silently + // corrupting the stack once a path's UTF-16 form exceeded the wide + // buffer (32767 units for `WPathBuffer`, i.e. longer than any path NT + // can address). The empty result makes the consuming syscall fail + // cleanly; JS-facing paths are rejected with `false`/ENAMETOOLONG before + // they get here (`PathLikeExt::{slice_w, os_path, os_path_kernel32}` in + // `runtime/node/types.rs`, via `fits_in_wide_path_buffer`). Prefixing + // wrappers (`to_kernel32_path`, `to_nt_path`, …) may then yield just + // their prefix, which likewise fails at the syscall. + let Some(converted) = + crate::strings::try_convert_utf8_to_utf16_in_buffer(&mut wbuf[..cap], utf8) + else { + wbuf[0] = 0; + return wstr_in_buf(wbuf, 0); + }; + let mut count = converted.len(); // Many Windows APIs expect normalized path slashes, particularly when the // long path prefix is added or the nt object prefix. To make this easier, @@ -451,4 +511,245 @@ pub fn basename(input: &[T]) -> &[T] { } } +// Run with `cargo test -p bun_paths` (also the Miri lane, +// `bun run rust:miri -p bun_paths`). simdutf's C++ implementation is only +// linked into the full binary, so the two externs the conversion path uses +// are satisfied below with faithful pure-Rust scalar stubs — which is also +// what keeps these tests runnable under Miri (no foreign code). +#[cfg(test)] +mod tests { + use super::*; + + use bun_simdutf_sys::simdutf::{SIMDUTFResult, Status}; + + /// Scalar `simdutf::convert::utf8::to::utf16::with_errors::le`: writes + /// the UTF-16LE form of the valid prefix to `utf16_output` and returns + /// SUCCESS + units written, or a nonzero status + the input position of + /// the first invalid sequence. Mirrors the semantics + /// `try_convert_utf8_to_utf16_in_buffer` relies on: the output buffer + /// length is never communicated, and on error only the valid prefix's + /// units (≤ the `utf16_length_from_utf8` estimate) have been written. + #[unsafe(no_mangle)] + unsafe extern "C" fn simdutf__convert_utf8_to_utf16le_with_errors( + buf: *const u8, + len: usize, + utf16_output: *mut u16, + ) -> SIMDUTFResult { + // SAFETY: test stub; callers pass a valid (ptr, len) input pair. + let input = unsafe { core::slice::from_raw_parts(buf, len) }; + let mut written = 0usize; + let mut i = 0usize; + while i < len { + let b = input[i]; + let cont = |off: usize| i + off < len && input[i + off] & 0xC0 == 0x80; + let (cp, adv): (u32, usize) = if b < 0x80 { + (b as u32, 1) + } else if (0xC2..0xE0).contains(&b) && cont(1) { + ( + (u32::from(b & 0x1F) << 6) | u32::from(input[i + 1] & 0x3F), + 2, + ) + } else if (0xE0..0xF0).contains(&b) && cont(1) && cont(2) { + let cp = (u32::from(b & 0x0F) << 12) + | (u32::from(input[i + 1] & 0x3F) << 6) + | u32::from(input[i + 2] & 0x3F); + if (0xD800..=0xDFFF).contains(&cp) { + return SIMDUTFResult { + status: Status::SURROGATE, + count: i, + }; + } + (cp, 3) + } else if (0xF0..0xF5).contains(&b) && cont(1) && cont(2) && cont(3) { + ( + (u32::from(b & 0x07) << 18) + | (u32::from(input[i + 1] & 0x3F) << 12) + | (u32::from(input[i + 2] & 0x3F) << 6) + | u32::from(input[i + 3] & 0x3F), + 4, + ) + } else { + return SIMDUTFResult { + status: Status::TOO_SHORT, + count: i, + }; + }; + // SAFETY: test stub mirroring simdutf — the caller guarantees + // capacity for the full conversion before calling (that is the + // invariant under test). + unsafe { + if cp <= 0xFFFF { + utf16_output.add(written).write(cp as u16); + written += 1; + } else { + let v = cp - 0x10000; + utf16_output.add(written).write(0xD800 + (v >> 10) as u16); + utf16_output + .add(written + 1) + .write(0xDC00 + (v & 0x3FF) as u16); + written += 2; + } + } + i += adv; + } + SIMDUTFResult { + status: Status::SUCCESS, + count: written, + } + } + + /// Scalar `simdutf::length::utf16::from::utf8`: one unit per + /// non-continuation byte plus one more per 4-byte lead — including the + /// real implementation's undercount on invalid input (stray continuation + /// bytes count zero), which `to_w_path_overlong_invalid_utf8` depends on. + #[unsafe(no_mangle)] + unsafe extern "C" fn simdutf__utf16_length_from_utf8(input: *const u8, length: usize) -> usize { + // SAFETY: test stub; callers pass a valid (ptr, len) input pair. + let input = unsafe { core::slice::from_raw_parts(input, length) }; + input + .iter() + .map(|&b| { + if b & 0xC0 == 0x80 { + 0 + } else if b >= 0xF0 { + 2 + } else { + 1 + } + }) + .sum() + } + + /// The u16 length of the buffer `PathLike::os_path_kernel32` uses on + /// Windows: the 98302-byte (3 × PATH_MAX_WIDE + 1) `PathBuffer` + /// reinterpreted as `[u16]`. + const KERNEL32_WIDE_LEN: usize = (3 * crate::PATH_MAX_WIDE + 1) / 2; + + #[test] + fn to_w_path_fills_to_capacity() { + // cap = wbuf.len() - 1 (NUL); an input of exactly `cap` units fits. + let mut wbuf = [0u16; 9]; + let result = to_w_path(&mut wbuf, b"abcdefgh"); + assert_eq!(result.len(), 8); + assert_eq!(wbuf[8], 0); + } + + #[test] + fn to_w_path_overlong_yields_empty() { + // Used to hand simdutf a buffer it would write past (then panic + // slicing the result); must fail safe to "" instead. + let mut wbuf = [1u16; 32]; + let result = to_w_path(&mut wbuf, &[b'a'; 64]); + assert_eq!(result.len(), 0); + assert_eq!(wbuf[0], 0); + } + + #[test] + fn to_w_path_overlong_invalid_utf8_yields_empty() { + // Stray continuation bytes defeat the simdutf length estimate (they + // count as zero units) but each becomes one U+FFFD in the WTF-8 + // fallback — the bounded fallback must still refuse to write past + // the buffer. + let mut wbuf = [1u16; 32]; + let result = to_w_path(&mut wbuf, &[0x80u8; 64]); + assert_eq!(result.len(), 0); + assert_eq!(wbuf[0], 0); + } + + #[test] + fn to_w_path_multibyte_longer_in_bytes_than_buffer_fits() { + // 20 × U+4E16 = 60 UTF-8 bytes but only 20 UTF-16 units; must + // convert even though the byte length exceeds the buffer length. + let input: Vec = "世".repeat(20).into_bytes(); + let mut wbuf = [0u16; 32]; + let result = to_w_path(&mut wbuf, &input); + assert_eq!(result.len(), 20); + assert!(result.as_slice().iter().all(|&u| u == 0x4E16)); + } + + #[test] + fn to_kernel32_path_adds_long_prefix() { + let mut wbuf = [0u16; 16]; + let result = to_kernel32_path(&mut wbuf, b"C:\\foo"); + let expected: Vec = "\\\\?\\C:\\foo".encode_utf16().collect(); + assert_eq!(result.as_slice(), &expected[..]); + } + + #[test] + fn to_kernel32_path_overlong_windows_sized_buffer() { + // The exact shape of the crash seen in production (and of + // oven-sh/bun#20258): `PathLike::os_path_kernel32` reinterprets the + // 98302-byte Windows `PathBuffer` as 49151 u16s; a drive-letter path + // longer than that in UTF-16 units used to write past the buffer + // inside simdutf and panic slicing the result. It must now fail safe + // (prefix-only output, which the consuming syscall rejects) — and + // `PathLikeExt` rejects such paths with NameTooLong before this + // conversion is even reached. + let mut wbuf = vec![0u16; KERNEL32_WIDE_LEN]; + let mut path = b"C:\\".to_vec(); + path.resize(3 + KERNEL32_WIDE_LEN, b'a'); + let result = to_kernel32_path(&mut wbuf, &path); + assert_eq!(result.as_slice(), &windows::LONG_PATH_PREFIX[..]); + + // Without the drive-letter prefix it degrades to "". + let result = to_w_path(&mut wbuf, &path[3..]); + assert_eq!(result.len(), 0); + } + + #[test] + fn to_kernel32_path_just_under_the_buffer_converts() { + // One unit of headroom below the prefix + NUL overhead: must still + // convert (guards against over-rejection at the boundary). + let mut wbuf = vec![0u16; KERNEL32_WIDE_LEN]; + let mut path = b"C:\\".to_vec(); + path.resize(KERNEL32_WIDE_LEN - 5, b'a'); + let result = to_kernel32_path(&mut wbuf, &path); + assert_eq!(result.len(), path.len() + 4); + assert_eq!(&result.as_slice()[..4], &windows::LONG_PATH_PREFIX[..]); + } + + #[test] + fn convert_z_bounds() { + // The NUL-terminating conversion (used by the Windows profilers' + // path widening) shares the checked core: exact fit converts with + // the NUL in the reserved slot, over-long fails safe to "". + let mut wbuf = [1u16; 9]; + let result = bun_core::strings::convert_utf8_to_utf16_in_buffer_z(&mut wbuf, b"abcdefgh"); + assert_eq!(result.len(), 8); + assert_eq!(wbuf[8], 0); + + let result = bun_core::strings::convert_utf8_to_utf16_in_buffer_z(&mut wbuf, &[b'a'; 16]); + assert_eq!(result.len(), 0); + assert_eq!(wbuf[0], 0); + } + + #[test] + fn fits_in_wide_path_buffer_bounds() { + // PATH_MAX_WIDE (32767) minus the 10-unit overhead (`\??\UNC\` + + // trailing slash + NUL) = 32757 is the largest accepted size. + assert!(fits_in_wide_path_buffer(&vec![b'a'; 32757])); + assert!(!fits_in_wide_path_buffer(&vec![b'a'; 32758])); + + // Long in bytes but short in UTF-16 units: 3-byte chars count once, + // so the exact length must be computed, not the byte length. + let cjk: Vec = "世".repeat(20000).into_bytes(); // 60000 B, 20000 u16 + assert!(fits_in_wide_path_buffer(&cjk)); + let cjk_long: Vec = "世".repeat(32758).into_bytes(); + assert!(!fits_in_wide_path_buffer(&cjk_long)); + // The largest fitting valid path in bytes: 32757 3-byte units. + let cjk_max: Vec = "世".repeat(32757).into_bytes(); // 98271 B + assert!(fits_in_wide_path_buffer(&cjk_max)); + + // Malformed bytes: simdutf's length is an estimate there (stray + // continuation bytes count zero yet convert to one U+FFFD unit + // each), so the check stays permissive for such input and the + // bounds-checked conversion fails safe downstream instead + // (`to_w_path_overlong_invalid_utf8_yields_empty`). The byte cap + // still rejects anything no fitting path could occupy. + assert!(!fits_in_wide_path_buffer(&vec![0x80u8; 98300])); + assert!(fits_in_wide_path_buffer(&vec![0x80u8; 32758])); + assert!(fits_in_wide_path_buffer(&vec![0x80u8; 32757])); + } +} + // ported from: src/string/immutable/paths.zig diff --git a/src/runtime/node/node_fs.rs b/src/runtime/node/node_fs.rs index 93aec5c82b3..d2979cf8098 100644 --- a/src/runtime/node/node_fs.rs +++ b/src/runtime/node/node_fs.rs @@ -194,7 +194,7 @@ use super::stat::Stats; use super::time_like::TimeLike; use super::types::{ ArgumentsSlice, Dirent, Encoding, FdArgExt as _, FileSystemFlags, FileSystemFlagsKind, - PathLike, PathLikeExt as _, PathOrFdExt as _, StringOrBuffer, VectorArrayBuffer, + NameTooLong, PathLike, PathLikeExt as _, PathOrFdExt as _, StringOrBuffer, VectorArrayBuffer, }; // Re-exported publicly: `crate::node::fs::PathOrFileDescriptor` is the // canonical path used by `cli/build_command.rs` et al. (mirrors Zig's @@ -1881,8 +1881,26 @@ mod _async_tasks { let args = &this.args; let mut src_buf = OSPathBuffer::uninit(); let mut dest_buf = OSPathBuffer::uninit(); - let src = args.src.os_path(&mut src_buf); - let dest = args.dest.os_path(&mut dest_buf); + let name_too_long = |path: &PathLike| sys::Error { + errno: E::ENAMETOOLONG as _, + syscall: sys::Tag::copyfile, + path: path.slice().into(), + ..Default::default() + }; + let src = match args.src.os_path(&mut src_buf) { + Ok(p) => p, + Err(NameTooLong) => { + this.finish_concurrently(Err(name_too_long(&args.src))); + return; + } + }; + let dest = match args.dest.os_path(&mut dest_buf) { + Ok(p) => p, + Err(NameTooLong) => { + this.finish_concurrently(Err(name_too_long(&args.dest))); + return; + } + }; #[cfg(windows)] { @@ -5473,6 +5491,18 @@ impl NodeFS { #[cfg(windows)] { + // Paths whose UTF-16 form exceeds the wide buffers can't exist on + // disk; reject instead of overflowing the conversion below. + for path in [&args.src, &args.dest] { + if !strings::fits_in_wide_path_buffer(path.slice()) { + return Err(sys::Error { + errno: E::ENAMETOOLONG as _, + syscall: sys::Tag::copyfile, + path: path.slice().into(), + ..Default::default() + }); + } + } let mut dest_buf = paths::os_path_buffer_pool::get(); let src = strings::to_kernel32_path( bun_core::cast_slice_mut::(&mut self.sync_error_buf), @@ -5525,7 +5555,11 @@ impl NodeFS { let slice = if path.slice().is_empty() { os_path_literal_empty() } else { - path.os_path_kernel32(&mut self.sync_error_buf) + match path.os_path_kernel32(&mut self.sync_error_buf) { + Ok(p) => p, + // Over PATH_MAX_WIDE — such a path can't exist on disk. + Err(NameTooLong) => return Ok(false), + } }; Ok(sys::exists_os_path(slice, false)) @@ -5800,7 +5834,17 @@ impl NodeFS { ctx: &Ctx, ) -> Maybe { let mut buf = paths::path_buffer_pool::get(); - let path = args.path.os_path_kernel32(&mut *buf); + let path = match args.path.os_path_kernel32(&mut *buf) { + Ok(p) => p, + Err(NameTooLong) => { + return Err(sys::Error { + errno: E::ENAMETOOLONG as _, + syscall: sys::Tag::mkdir, + path: args.path.slice().into(), + ..Default::default() + }); + } + }; if args.always_return_none { self.mkdir_recursive_os_path_impl::(ctx, path, args.mode) } else { @@ -8246,8 +8290,20 @@ impl NodeFS { pub fn cp(&mut self, args: &args::Cp, _: Flavor) -> Maybe { let mut src_buf = OSPathBuffer::uninit(); let mut dest_buf = OSPathBuffer::uninit(); - let src_len = args.src.os_path(&mut src_buf).len(); - let dest_len = args.dest.os_path(&mut dest_buf).len(); + let name_too_long = |path: &PathLike| sys::Error { + errno: E::ENAMETOOLONG as _, + syscall: sys::Tag::copyfile, + path: path.slice().into(), + ..Default::default() + }; + let src_len = match args.src.os_path(&mut src_buf) { + Ok(p) => p.len(), + Err(NameTooLong) => return Err(name_too_long(&args.src)), + }; + let dest_len = match args.dest.os_path(&mut dest_buf) { + Ok(p) => p.len(), + Err(NameTooLong) => return Err(name_too_long(&args.dest)), + }; self.cp_sync_inner( &mut src_buf, PathInt::try_from(src_len).expect("int cast"), diff --git a/src/runtime/node/types.rs b/src/runtime/node/types.rs index 8724d15b7dd..e269d97d675 100644 --- a/src/runtime/node/types.rs +++ b/src/runtime/node/types.rs @@ -970,6 +970,15 @@ where // `PathLikeExt` / `PathOrFdExt` extension traits. pub use bun_jsc::node_path::{PathLike, PathOrFileDescriptor}; +/// Returned by [`PathLikeExt::slice_w`] / [`PathLikeExt::os_path`] / +/// [`PathLikeExt::os_path_kernel32`] when the path's UTF-16 form would not +/// fit a `WPathBuffer` (`strings::fits_in_wide_path_buffer`). NT caps paths +/// at `PATH_MAX_WIDE` units, so such a path cannot exist on disk — callers +/// map this to `false`/`ENAMETOOLONG` as appropriate instead of letting the +/// conversion overflow (mirrors the Zig-side fix in oven-sh/bun#27775). +#[derive(Debug, Clone, Copy)] +pub struct NameTooLong; + /// `bun_runtime`-tier behaviour layered on `bun_jsc::node_path::PathLike`. /// /// `to_thread_safe` / `into_thread_safe` / `slice` / `estimated_size` are @@ -986,13 +995,16 @@ pub trait PathLikeExt { fn slice_z<'a>(&'a self, buf: &'a mut PathBuffer) -> &'a ZStr where Self: Sized; - fn slice_w<'a>(&'a self, buf: &'a mut WPathBuffer) -> &'a WStr + fn slice_w<'a>(&'a self, buf: &'a mut WPathBuffer) -> Result<&'a WStr, NameTooLong> where Self: Sized; - fn os_path<'a>(&'a self, buf: &'a mut OSPathBuffer) -> &'a OSPathSliceZ + fn os_path<'a>(&'a self, buf: &'a mut OSPathBuffer) -> Result<&'a OSPathSliceZ, NameTooLong> where Self: Sized; - fn os_path_kernel32<'a>(&'a self, buf: &'a mut PathBuffer) -> &'a OSPathSliceZ + fn os_path_kernel32<'a>( + &'a self, + buf: &'a mut PathBuffer, + ) -> Result<&'a OSPathSliceZ, NameTooLong> where Self: Sized; fn from_js(ctx: &JSGlobalObject, arguments: &mut ArgumentsSlice) -> JsResult> @@ -1054,7 +1066,16 @@ impl PathLikeExt for PathLike { #[cfg(windows)] { - if bun_paths::is_absolute(sliced) { + // Only take the fast path for paths that can exist on NT at + // all (≤ ~32757 UTF-16 units). That bounds the `\\?\`-prefixed + // copy below in bytes too (≤ 3×32757 + 5 < MAX_PATH_BYTES); + // the cwd-join branch of `resolve_cwd_with_external_buf_z` + // prepends the cwd's filesystem root — arbitrarily long for UNC + // cwds — and bounds-checks internally, surfacing NameTooLong. + // Anything over-long falls through to the plain copy at the + // bottom, which fits without the prefix (or takes the too-long + // fallback) and fails at the syscall. + if bun_paths::is_absolute(sliced) && strings::fits_in_wide_path_buffer(sliced) { if sliced.len() > 2 && bun_paths::is_drive_letter(sliced[0]) && sliced[1] == b':' @@ -1075,8 +1096,21 @@ impl PathLikeExt for PathLike { // SAFETY: buf[4+n] == 0 written above. return ZStr::from_buf(&buf[..], 4 + n); } - return bun_paths::resolve_path::PosixToWinNormalizer::resolve_cwd_with_external_buf_z(buf, sliced) - .unwrap_or_else(|_| panic!("Error while resolving path.")); + // PORT NOTE: reshaped for borrowck — capture the length so + // the `Ok` borrow ends at the match, then re-derive. + let resolved_len = match bun_paths::resolve_path::PosixToWinNormalizer::resolve_cwd_with_external_buf_z(buf, sliced) { + Ok(res) => Some(res.len()), + // The cwd root + path don't fit `buf` (UNC cwds can push + // a near-MAX_PATH_BYTES path over); fall through to the + // plain copy / too-long handling below. + Err(e) if e == bun_core::err!("NameTooLong") => None, + Err(e) => panic!("Error while resolving path: {e:?}"), + }; + if let Some(len) = resolved_len { + // SAFETY: `resolve_cwd_with_external_buf_z` wrote the NUL + // at `buf[len]`. + return ZStr::from_buf(&buf[..], len); + } } } @@ -1124,24 +1158,31 @@ impl PathLikeExt for PathLike { } #[inline] - fn slice_w<'a>(&'a self, buf: &'a mut WPathBuffer) -> &'a WStr { - strings::paths::to_w_path(buf, self.slice()) + fn slice_w<'a>(&'a self, buf: &'a mut WPathBuffer) -> Result<&'a WStr, NameTooLong> { + let sliced = self.slice(); + if !strings::fits_in_wide_path_buffer(sliced) { + return Err(NameTooLong); + } + Ok(strings::paths::to_w_path(buf, sliced)) } #[inline] - fn os_path<'a>(&'a self, buf: &'a mut OSPathBuffer) -> &'a OSPathSliceZ { + fn os_path<'a>(&'a self, buf: &'a mut OSPathBuffer) -> Result<&'a OSPathSliceZ, NameTooLong> { #[cfg(windows)] { return self.slice_w(buf); } #[cfg(not(windows))] { - self.slice_z_with_force_copy::(buf) + Ok(self.slice_z_with_force_copy::(buf)) } } #[inline] - fn os_path_kernel32<'a>(&'a self, buf: &'a mut PathBuffer) -> &'a OSPathSliceZ { + fn os_path_kernel32<'a>( + &'a self, + buf: &'a mut PathBuffer, + ) -> Result<&'a OSPathSliceZ, NameTooLong> { #[cfg(windows)] { let s = self.slice(); @@ -1156,49 +1197,71 @@ impl PathLikeExt for PathLike { && (s[2] == b'.' || s[2] == b'?') && bun_paths::is_sep_any(s[3]) { + if !strings::fits_in_wide_path_buffer(s) { + return Err(NameTooLong); + } // SAFETY: reinterpreting PathBuffer ([u8; N]) as [u16] — 2-byte // alignment is runtime-asserted inside `bytes_as_slice_mut` // (port of Zig `@alignCast`); see PathBuffer doc comment for // why the buffer is always sufficiently aligned in practice. let buf_u16 = unsafe { bun_core::bytes_as_slice_mut::(&mut buf[..]) }; - return strings::to_kernel32_path(buf_u16, s); + return Ok(strings::to_kernel32_path(buf_u16, s)); } if !s.is_empty() && bun_paths::is_sep_any(s[0]) { + // Bail before the cwd resolution + normalization below write + // into fixed u8 buffers: UNC-shaped inputs pass through the + // resolver untouched and can reach `normalize_buf` at full + // MAX_PATH_BYTES length, whose root handling writes one past + // the input length. + if !strings::fits_in_wide_path_buffer(s) { + return Err(NameTooLong); + } // `buf` is the scratch for cwd-resolution; `b` is the pooled // scratch for normalisation; final wide path lands back in `buf`. - let resolve = - bun_paths::resolve_path::PosixToWinNormalizer::resolve_cwd_with_external_buf( - buf, s, - ) - .unwrap_or_else(|_| panic!("Error while resolving path.")); + let resolve = match bun_paths::resolve_path::PosixToWinNormalizer::resolve_cwd_with_external_buf( + buf, s, + ) { + Ok(r) => r, + // The cwd root + path don't fit the resolution buffer + // (UNC cwds can push a near-MAX_PATH_BYTES path over) — + // such a path can't exist on NT. + Err(e) if e == bun_core::err!("NameTooLong") => return Err(NameTooLong), + Err(e) => panic!("Error while resolving path: {e:?}"), + }; let normal = bun_paths::resolve_path::normalize_buf::( resolve, &mut b[..], ); + if !strings::fits_in_wide_path_buffer(normal) { + return Err(NameTooLong); + } // `resolve`'s borrow of `buf` ended at the line above (NLL). // SAFETY: same alignment note as above. let buf_u16 = unsafe { bun_core::bytes_as_slice_mut::(&mut buf[..]) }; - return strings::to_kernel32_path(buf_u16, normal); + return Ok(strings::to_kernel32_path(buf_u16, normal)); } // Handle "." specially since normalizeStringBuf strips it to an empty string if s.len() == 1 && s[0] == b'.' { // SAFETY: see alignment note above (PathBuffer reinterpreted as [u16]). let buf_u16 = unsafe { bun_core::bytes_as_slice_mut::(&mut buf[..]) }; - return strings::to_kernel32_path(buf_u16, b"."); + return Ok(strings::to_kernel32_path(buf_u16, b".")); } let normal = bun_paths::resolve_path::normalize_string_buf::< true, bun_paths::platform::Windows, false, >(s, &mut b[..]); + if !strings::fits_in_wide_path_buffer(normal) { + return Err(NameTooLong); + } // SAFETY: see alignment note above (PathBuffer reinterpreted as [u16]). let buf_u16 = unsafe { bun_core::bytes_as_slice_mut::(&mut buf[..]) }; - return strings::to_kernel32_path(buf_u16, normal); + return Ok(strings::to_kernel32_path(buf_u16, normal)); } #[cfg(not(windows))] { - self.slice_z_with_force_copy::(buf) + Ok(self.slice_z_with_force_copy::(buf)) } } diff --git a/src/sys/lib.rs b/src/sys/lib.rs index 5af21f2f141..5cd0437fb19 100644 --- a/src/sys/lib.rs +++ b/src/sys/lib.rs @@ -4212,6 +4212,19 @@ mod windows_impl { // `(mode & W_OK) != 0` AND the file is read-only AND it is NOT a // directory, return `.err = EPERM`. const W_OK: i32 = 2; + // Longer than any path NT can address — reject up front instead of + // letting the wide conversion below fail-safe to a prefix-only path + // (mirrors `PathLikeExt` and the Zig-side fix in oven-sh/bun#27775, + // which handled `access` as one of its call sites). `path` may + // already carry a `\\?\` prefix (NodeFS::access routes through + // `slice_z`, which prepends it) — check the unprefixed form so the + // fit budget doesn't count the prefix twice and over-reject paths + // just under the limit. + if !bun_paths::string_paths::fits_in_wide_path_buffer( + bun_paths::string_paths::without_nt_prefix(path.as_bytes()), + ) { + return Err(Error::new(E::ENAMETOOLONG, Tag::access).with_path(path.as_bytes())); + } let mut wbuf = WPathBuffer::default(); let wpath = bun_paths::string_paths::to_kernel32_path(&mut wbuf, path.as_bytes()); let attrs = unsafe { w::kernel32::GetFileAttributesW(wpath.as_ptr()) }; diff --git a/test/js/node/fs/fs-path-length.test.ts b/test/js/node/fs/fs-path-length.test.ts index c94f20ff099..2dfeb864486 100644 --- a/test/js/node/fs/fs-path-length.test.ts +++ b/test/js/node/fs/fs-path-length.test.ts @@ -105,3 +105,84 @@ describe.if(isWindows)("path length validation in normalizePathWindows", () => { expect(() => fs.readdirSync(devLong)).toThrow("ENAMETOOLONG"); }); }); + +// On Windows, node:fs converts paths to UTF-16 into fixed-size wide buffers +// (PathLike.osPath: a [32767]u16 WPathBuffer; PathLike.osPathKernel32: the +// 98302-byte PathBuffer viewed as [49151]u16). Path validation only bounds +// the UTF-8 *byte* length (98302), so an ASCII path of 32767..98302 chars +// passed validation and the UTF-8→UTF-16 conversion wrote past the wide +// buffer (simdutf performs no bounds checking), panicking with "range end +// index 49151 out of range for slice of length 49150". Paths that long can't +// exist on NT (PATH_MAX_WIDE caps them), so the conversions now reject them +// up front: exists → false, other ops → ENAMETOOLONG. +describe.if(isWindows)("path length validation against UTF-16 conversion buffers", () => { + // Used to overflow the 49151-u16 osPathKernel32 view (exists, recursive + // mkdir, copyFile src). + const kernel32Long = "C:\\" + Buffer.alloc(49200, "a").toString(); + // Used to overflow the 32767-u16 WPathBuffer (copyFile dest, cp). + const wideLong = "C:\\" + Buffer.alloc(40000, "a").toString(); + + it("existsSync returns false instead of crashing", () => { + expect(fs.existsSync(kernel32Long)).toBe(false); + }); + + // https://github.com/oven-sh/bun/issues/20258 — drive-letter-less paths of + // 49151..98302 chars crashed existsSync (49150 and 98303 already worked: + // the former fit the buffer, the latter exceeded the UTF-8 byte check). + it.each([49150, 49151, 64503, 98302, 98303])( + "existsSync handles path length %i across the buffer boundaries (#20258)", + len => { + expect(fs.existsSync(Buffer.alloc(len, "A").toString())).toBe(false); + }, + ); + + it("rejects over-long paths in accessSync", () => { + expect(() => fs.accessSync(kernel32Long)).toThrow("ENAMETOOLONG"); + }); + + // slice_z's drive-letter branch adds the \\?\ prefix in the 98302-byte + // PathBuffer; for byte lengths in (98297, 98302] the prefixed copy used to + // write past the buffer. It must fall back to the unprefixed form and + // surface the syscall's error (which one depends on the OS/filesystem). + it("handles drive-letter paths in the last bytes below MAX_PATH_BYTES", () => { + const p = "C:\\" + Buffer.alloc(98297, "a").toString(); + expect(() => fs.statSync(p)).toThrow(/ENOENT|ENAMETOOLONG|EINVAL/); + }); + + it("rejects over-long paths in recursive mkdirSync", () => { + expect(() => fs.mkdirSync(kernel32Long, { recursive: true })).toThrow("ENAMETOOLONG"); + }); + + it("rejects over-long src paths in copyFileSync", () => { + expect(() => fs.copyFileSync(kernel32Long, "copy-file-dest-does-not-matter.txt")).toThrow("ENAMETOOLONG"); + }); + + it("rejects over-long dest paths in copyFileSync", () => { + expect(() => fs.copyFileSync("copy-file-src-does-not-matter.txt", wideLong)).toThrow("ENAMETOOLONG"); + }); + + it("rejects over-long paths in cpSync", () => { + expect(() => fs.cpSync(wideLong, "cp-dest-does-not-matter.txt")).toThrow("ENAMETOOLONG"); + }); + + it("rejects over-long paths in async fs.promises.mkdir", async () => { + expect(async () => await fs.promises.mkdir(kernel32Long, { recursive: true })).toThrow("ENAMETOOLONG"); + }); + + it("rejects over-long Buffer paths", () => { + expect(() => fs.mkdirSync(Buffer.from(kernel32Long), { recursive: true })).toThrow("ENAMETOOLONG"); + }); + + it("still accepts multi-byte paths that are long in bytes but within the UTF-16 bound", () => { + // 150 × 200-char CJK segments: 90152 UTF-8 bytes — past the UTF-16-unit + // limit in bytes — but only 30152 UTF-16 units, so + // fits_in_wide_path_buffer must compute the exact length and accept it. + // Each component stays under NTFS's 255-unit limit so the only possible + // syscall failure is non-existence: copyFileSync (which checks both + // paths against the guard and does not swallow errors) must get past + // the length guard and fail with ENOENT — not ENAMETOOLONG. + const segment = Buffer.alloc(600, "\u4e00").toString(); + const p = "C:\\" + Array(150).fill(segment).join("\\"); + expect(() => fs.copyFileSync(p, "copy-file-dest-does-not-matter.txt")).toThrow("ENOENT"); + }); +});