Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6d62485
Fix panic converting over-long Windows paths to UTF-16
robobun Jun 2, 2026
df0f3c2
Add existsSync boundary-length cases from #20258
robobun Jun 2, 2026
f97a773
Reject over-long Windows paths at the conversion call sites
robobun Jun 2, 2026
ce29ae5
Replace the testing hook with Rust unit tests
robobun Jun 2, 2026
315c026
Exercise the wide-path fit check in the multi-byte acceptance test
robobun Jun 2, 2026
6026f94
Keep acceptance-test path components under the NTFS limit
robobun Jun 2, 2026
26d4a48
Use the nominal SIMDUTFResult in the test stubs
robobun Jun 2, 2026
89257b2
Add per-op ENAMETOOLONG to access; guard slice_z's prefixed copies
robobun Jun 2, 2026
d0e8ae0
Gate slice_z's Windows fast path on fits_in_wide_path_buffer
robobun Jun 2, 2026
37d34cc
Bounds-check the cwd joins in resolve_cwd_with_external_buf{,_z}
robobun Jun 2, 2026
f419002
Close the remaining fixed-buffer gaps in the path pipeline
robobun Jun 2, 2026
c344671
Review nits: parameterize boundary test, include error in resolve panic
robobun Jun 2, 2026
7116ec1
Cap the byte length in fits_in_wide_path_buffer
robobun Jun 2, 2026
f49fb30
Count U+FFFD expansion when sizing wide-path conversions
robobun Jun 2, 2026
aaaac55
ci: retrigger
robobun Jun 2, 2026
b1edef4
Leave normalize headroom in the resolver join bounds
robobun Jun 2, 2026
1c0a402
Route convert_utf8_to_utf16_in_buffer_z through the checked core
robobun Jun 2, 2026
4d00960
Use the simdutf length in fits_in_wide_path_buffer
robobun Jun 2, 2026
6ca4ca9
Merge remote-tracking branch 'origin/main' into farm/b67ae5f9/fix-ker…
robobun Jun 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 47 additions & 10 deletions src/bun_core/string/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2963,35 +2963,72 @@ pub fn starts_with_windows_drive_letter_t<T: Copy + Into<u32>>(s: &[T]) -> bool
/// UTF-8 falls back to a scalar WTF-8 decoder that emits U+FFFD for malformed
/// bytes and passes unpaired surrogates through (so non-empty input never yields
/// an empty slice — fixes #8197 / the TODO at unicode.zig:1537).
///
/// Panics when the output does not fit. Callers that cannot statically size
/// `buf` for the worst case must use [`try_convert_utf8_to_utf16_in_buffer`].
pub fn convert_utf8_to_utf16_in_buffer<'a>(buf: &'a mut [u16], input: &[u8]) -> &'a mut [u16] {
let buf_len = buf.len();
match try_convert_utf8_to_utf16_in_buffer(buf, input) {
Some(out) => out,
None => panic!(
"convert_utf8_to_utf16_in_buffer: buf too small (have {} u16 for {} input bytes)",
buf_len,
input.len(),
),
}
}
Comment thread
claude[bot] marked this conversation as resolved.

/// Checked variant of [`convert_utf8_to_utf16_in_buffer`]: returns `None` when
/// the converted output does not fit in `buf`, and never writes past `buf`.
///
/// simdutf's convert API takes only an output *pointer* and writes however
/// many units the input needs, so it must not be entered unless the output
/// provably fits: either `input.len() <= buf.len()` (a UTF-16 unit always
/// consumes at least one UTF-8 byte, and surrogate pairs produce 2 units from
/// 4 bytes), or the exact converted length fits. On invalid input simdutf
/// stops at the first error having written only the valid prefix's units,
/// which is ≤ that same exact-length estimate; the WTF-8 fallback can exceed
/// the estimate (stray continuation bytes become one U+FFFD each), so it
/// re-checks capacity on every write.
pub fn try_convert_utf8_to_utf16_in_buffer<'a>(
buf: &'a mut [u16],
input: &[u8],
) -> Option<&'a mut [u16]> {
if input.is_empty() {
return &mut buf[..0];
}
assert!(
input.len() <= buf.len() || element_length_utf8_into_utf16(input) <= buf.len(),
"convert_utf8_to_utf16_in_buffer: buf too small (have {} u16 for {} input bytes)",
buf.len(),
input.len(),
);
return Some(&mut buf[..0]);
}
if input.len() > buf.len() && element_length_utf8_into_utf16(input) > buf.len() {
return None;
}
let r = simdutf::convert::utf8::to::utf16::with_errors::le(input, buf);
if r.is_successful() {
return &mut buf[..r.count];
debug_assert!(r.count <= buf.len());
return Some(&mut buf[..r.count]);
}
// WTF-8 fallback (invalid byte → U+FFFD; lone surrogates pass through).
let mut written = 0usize;
let mut i = 0usize;
while i < input.len() {
let b = input[i];
if b < 0x80 {
if written >= buf.len() {
return None;
}
buf[written] = b as u16;
written += 1;
i += 1;
} else {
let (cp, adv) = decode_wtf8_one(&input[i..]);
if cp <= 0xFFFF {
if written >= buf.len() {
return None;
}
buf[written] = cp as u16;
written += 1;
} else {
if written + 2 > buf.len() {
return None;
}
let [hi, lo] = encode_surrogate_pair(cp);
buf[written] = hi;
buf[written + 1] = lo;
Expand All @@ -3000,7 +3037,7 @@ pub fn convert_utf8_to_utf16_in_buffer<'a>(buf: &'a mut [u16], input: &[u8]) ->
i += adv;
}
}
&mut buf[..written]
Some(&mut buf[..written])
}

/// Decode one WTF-8 sequence at the head of `s`; invalid lead/truncated → (U+FFFD, 1).
Expand Down
24 changes: 10 additions & 14 deletions src/bun_core/string/immutable/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -944,20 +944,16 @@ use crate::strings::u16_get_supplementary;
pub use crate::strings::{u16_is_lead, u16_is_trail};

pub fn convert_utf8_to_utf16_in_buffer_z<'a>(buf: &'a mut [u16], input: &[u8]) -> &'a WStr {
// TODO: see convert_utf8_to_utf16_in_buffer
if input.is_empty() {
buf[0] = 0;
return wstr_in_buf(buf, 0);
}
assert!(
input.len() < buf.len() || element_length_utf8_into_utf16(input) < buf.len(),
"convert_utf8_to_utf16_in_buffer_z: buf too small (have {} u16 for {} input bytes)",
buf.len(),
input.len(),
);
let result = simdutf::convert::utf8::to::utf16::le(input, buf);
buf[result] = 0;
wstr_in_buf(buf, result)
// Checked conversion (see `try_convert_utf8_to_utf16_in_buffer`): the
// NUL reserves one slot, and over-long input fails safe to "" — which
// the consuming syscall rejects — instead of letting simdutf (which
// never bounds-checks its output) write past `buf`.
let cap = buf.len().saturating_sub(1);
let len = crate::string::immutable::try_convert_utf8_to_utf16_in_buffer(&mut buf[..cap], input)
.map(|converted| converted.len())
.unwrap_or(0);
buf[len] = 0;
wstr_in_buf(buf, len)
}

#[rustfmt::skip]
Expand Down
6 changes: 6 additions & 0 deletions src/paths/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@ bun_alloc.workspace = true
bun_core.workspace = true

thiserror.workspace = true

[dev-dependencies]
# Nominal `SIMDUTFResult`/`Status` for the test-only simdutf stubs in
# `string_paths.rs` — Miri requires the stub's signature to match the extern
# declaration's types exactly, not just their ABI layout.
bun_simdutf_sys.workspace = true
35 changes: 35 additions & 0 deletions src/paths/resolve_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2219,6 +2219,16 @@ impl PosixToWinNormalizer {
debug_assert!(is_sep_any(root[0]));
if strings::is_windows_absolute_path_missing_drive_letter::<u8>(maybe_posix_path) {
let source_root = windows_filesystem_root(source_dir);
// The source root (arbitrarily long for UNC dirs) plus
// the path must fit `buf` with one byte of headroom —
// downstream normalization writes one past the input for
// separator-less UNC roots. Such a join can't exist on NT
// anyway, so fail safe to the un-joined input (which the
// consuming lookup treats as nonexistent) instead of
// writing past the buffer.
if source_root.len() + maybe_posix_path.len() - 1 >= buf.len() {
return maybe_posix_path;
}
buf[0..source_root.len()].copy_from_slice(source_root);
buf[source_root.len()..source_root.len() + maybe_posix_path.len() - 1]
.copy_from_slice(&maybe_posix_path[1..]);
Expand Down Expand Up @@ -2252,6 +2262,11 @@ impl PosixToWinNormalizer {
debug_assert!(is_sep_any(root[0]));
if strings::is_windows_absolute_path_missing_drive_letter::<u8>(mp) {
let source_root = windows_filesystem_root(source_dir);
// See resolve_with_external_buf: over-long joins fail
// safe to the un-joined input (+ NUL accounted for here).
if source_root.len() + mp.len() > buf.len() {
return maybe_posix_path;
}
buf[0..source_root.len()].copy_from_slice(source_root);
buf[source_root.len()..source_root.len() + mp.len() - 1]
.copy_from_slice(&mp[1..]);
Expand Down Expand Up @@ -2293,6 +2308,16 @@ impl PosixToWinNormalizer {
let cwd = bun_core::getcwd(buf)?;
windows_filesystem_root(cwd.as_bytes()).len()
};
// The cwd root (arbitrarily long for UNC cwds) plus the
// path must fit `buf` with one byte of headroom: the
// joined result feeds `normalize_buf`, whose UNC-root
// handling writes one past the input when the cwd is a
// bare share root with no trailing separator. Such a
// combination can't exist on NT anyway, so error out
// instead of writing past a buffer.
if sr_len + maybe_posix_path.len() - 1 >= buf.len() {
return Err(bun_core::err!("NameTooLong"));
}
Comment thread
robobun marked this conversation as resolved.
buf[sr_len..sr_len + maybe_posix_path.len() - 1]
.copy_from_slice(&maybe_posix_path[1..]);
let res = &buf[0..sr_len + maybe_posix_path.len() - 1];
Expand Down Expand Up @@ -2329,6 +2354,13 @@ impl PosixToWinNormalizer {
let cwd = bun_core::getcwd(buf)?;
windows_filesystem_root(cwd.as_bytes()).len()
};
// The cwd root (arbitrarily long for UNC cwds) plus the
// path and its NUL must fit `buf`; such a combination
// can't exist on NT anyway, so error out instead of
// writing past it.
if sr_len + maybe_posix_path.len() > buf.len() {
return Err(bun_core::err!("NameTooLong"));
}
buf[sr_len..sr_len + maybe_posix_path.len() - 1]
.copy_from_slice(&maybe_posix_path[1..]);
buf[sr_len + maybe_posix_path.len() - 1] = 0;
Expand All @@ -2349,6 +2381,9 @@ impl PosixToWinNormalizer {
);
}

if maybe_posix_path.len() + 1 > buf.len() {
return Err(bun_core::err!("NameTooLong"));
}
buf[..maybe_posix_path.len()].copy_from_slice(maybe_posix_path);
buf[maybe_posix_path.len()] = 0;
// SAFETY: NUL at buf[maybe_posix_path.len()]
Expand Down
Loading
Loading