Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 3 additions & 86 deletions src/bun_core/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,16 +1746,6 @@ pub fn clear_to_end() {
// </r> - reset
// <r> - reset

/// Lowercase lookup wrapper. The table
/// itself lives in `bun_output_tags` (shared with the `pretty_fmt!` proc-macro
/// so there is exactly one copy).
pub mod color_map {
#[inline]
pub fn get(name: &[u8]) -> Option<&'static str> {
bun_output_tags::color_for_bytes(name)
}
}

pub use ansi::{BOLD, DIM, RESET};
pub use bun_output_tags::{ansi, ansi_b};

Expand Down Expand Up @@ -2057,82 +2047,9 @@ pub fn pretty_fmt_args<A: FmtTuple>(
}

/// Runtime `<tag>` → ANSI rewriter, used for testing the proc-macro and for
/// the rare dynamic case.
///
/// Colour table lives in `bun_output_tags`; the state machine is kept duplicated
/// vs `bun_core_macros::rewrite` because the two intentionally diverge in the
/// `{` arm (proc-macro rewrites specs `{s}`→`{}`; this side copies braces
/// verbatim) and on unknown tags (proc-macro errors; this side emits `""`).
pub fn pretty_fmt_runtime(fmt: &[u8], is_enabled: bool) -> Vec<u8> {
let mut out = Vec::with_capacity(fmt.len() * 4);
let mut i = 0usize;
while i < fmt.len() {
match fmt[i] {
b'\\' => {
i += 1;
if i < fmt.len() {
match fmt[i] {
b'<' | b'>' => {
out.push(fmt[i]);
i += 1;
}
_ => {
out.push(b'\\');
out.push(fmt[i]);
i += 1;
}
}
}
}
b'>' => {
i += 1;
}
b'{' => {
while i < fmt.len() && fmt[i] != b'}' {
out.push(fmt[i]);
i += 1;
}
}
b'<' => {
i += 1;
let mut is_reset = i < fmt.len() && fmt[i] == b'/';
if is_reset {
i += 1;
}
let start = i;
while i < fmt.len() && fmt[i] != b'>' {
i += 1;
}
let color_name = &fmt[start..i];
let color_str: &str = 'picker: {
if let Some(lit) = color_map::get(color_name) {
break 'picker lit;
} else if color_name == b"r" {
is_reset = true;
break 'picker "";
} else {
// Unknown tag: the `pretty_fmt!` proc-macro rejects
// this at its call sites; this runtime path drops the
// tag.
break 'picker "";
}
};
if is_enabled {
out.extend_from_slice(if is_reset {
RESET.as_bytes()
} else {
color_str.as_bytes()
});
}
}
_ => {
out.push(fmt[i]);
i += 1;
}
}
}
out
}
/// the rare dynamic case. The implementation lives in `bun_output_tags` so the
/// `bun_clap_macros` proc-macro crate shares the same state machine.
pub use bun_output_tags::pretty_fmt_runtime;

#[doc(hidden)]
#[inline]
Expand Down
185 changes: 135 additions & 50 deletions src/bun_core/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2438,8 +2438,12 @@ pub mod printer {
/// `MutableString`, and any other `crate::io::Write` sink.
pub use crate::io::Write as PrinterWriter;

// PERF: `ascii_only` is a *runtime* arg so the large callers
// (`write_pre_quoted_string_inner`, `bun_js_printer::estimate_length_for_utf8`)
// collapse to a single monomorphization instead of one per
// (ascii_only × quote_char × …) combo — see `write_pre_quoted_string_inner`.
#[inline]
pub(crate) fn can_print_without_escape(c: i32, ascii_only: bool) -> bool {
pub fn can_print_without_escape(c: i32, ascii_only: bool) -> bool {
if c <= LAST_ASCII as i32 {
c >= FIRST_ASCII as i32
&& c != b'\\' as i32
Expand All @@ -2456,9 +2460,20 @@ pub mod printer {
}
}

/// Same algorithm as `bun_js_printer::write_pre_quoted_string`.
/// PERF: (quote_char, ascii_only, json, encoding) are runtime params —
/// profile if it shows up on a hot path.
/// `strings::Encoding` stand-in that derives `ConstParamTy` so it can be
/// used as a const-generic parameter (`const ENCODING: Encoding`). The
/// variant set is identical; convert at the boundary if a
/// `strings::Encoding` is ever needed.
#[derive(Clone, Copy, Debug, PartialEq, Eq, core::marker::ConstParamTy)]
pub enum Encoding {
Ascii,
Utf8,
Latin1,
Utf16,
}

/// Runtime-encoding adapter: selects the matching monomorphized
/// [`write_pre_quoted_string_inner`] instance.
pub fn write_pre_quoted_string<W: PrinterWriter + ?Sized>(
text_in: &[u8],
writer: &mut W,
Expand All @@ -2467,68 +2482,119 @@ pub mod printer {
json: bool,
encoding: StrEncoding,
) -> Result<(), crate::Error> {
debug_assert!(!json || quote_char == b'"');
// utf16 view over the same bytes (only used when encoding == Utf16).
// Callers pass 2-byte-aligned even-length input for Utf16; `cast_slice`
// panics (rather than UB) if that contract is violated.
let text16: &[u16] = if encoding == StrEncoding::Utf16 {
crate::cast_slice::<u8, u16>(text_in)
} else {
&[]
};
let n: usize = if encoding == StrEncoding::Utf16 {
text16.len()
} else {
text_in.len()
};
match encoding {
StrEncoding::Ascii => write_pre_quoted_string_inner::<W, { Encoding::Ascii }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Utf8 => write_pre_quoted_string_inner::<W, { Encoding::Utf8 }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Latin1 => write_pre_quoted_string_inner::<W, { Encoding::Latin1 }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Utf16 => write_pre_quoted_string_inner::<W, { Encoding::Utf16 }>(
text_in, writer, quote_char, ascii_only, json,
),
}
}

/// `quote_char` / `ascii_only` / `json` are runtime args: the branches on
/// them are cheap and well-predicted, and collapsing the monomorphizations
/// keeps the hot transpile pages dense. `ENCODING` stays `const` — it
/// changes the code-unit indexing structure of the loop, so a per-encoding
/// copy is genuinely different code.
#[inline(never)]
pub fn write_pre_quoted_string_inner<W, const ENCODING: Encoding>(
text_in: &[u8],
writer: &mut W,
quote_char: u8,
ascii_only: bool,
json: bool,
) -> Result<(), crate::Error>
where
W: PrinterWriter + ?Sized,
{
debug_assert!(
!(json && quote_char != b'"'),
"for json, quote_char must be '\"'"
);

let text = text_in;
let mut i: usize = 0;
let n: usize = match ENCODING {
Encoding::Utf16 => text.len() / 2,
_ => text.len(),
};

macro_rules! code_unit_at {
($idx:expr) => {
match ENCODING {
Encoding::Utf16 => {
let lo = text[$idx * 2];
let hi = text[$idx * 2 + 1];
u16::from_le_bytes([lo, hi]) as i32
}
_ => text[$idx] as i32,
}
};
}

while i < n {
let width: u8 = match encoding {
StrEncoding::Latin1 | StrEncoding::Ascii | StrEncoding::Utf16 => 1,
StrEncoding::Utf8 => strings::wtf8_byte_sequence_length_with_invalid(text_in[i]),
let width: u8 = match ENCODING {
Encoding::Latin1 | Encoding::Ascii => 1,
Encoding::Utf8 => strings::wtf8_byte_sequence_length_with_invalid(text[i]),
Encoding::Utf16 => 1,
};
let clamped_width = (width as usize).min(n.saturating_sub(i));
let c: i32 = match encoding {
StrEncoding::Utf8 => {
let mut buf = [0u8; 4];
buf[..clamped_width].copy_from_slice(&text_in[i..i + clamped_width]);
strings::decode_wtf8_rune_t::<i32>(buf, width, 0)
let c: i32 = match ENCODING {
Encoding::Utf8 => {
let bytes: [u8; 4] = match clamped_width {
1 => [text[i], 0, 0, 0],
2 => [text[i], text[i + 1], 0, 0],
3 => [text[i], text[i + 1], text[i + 2], 0],
4 => [text[i], text[i + 1], text[i + 2], text[i + 3]],
_ => unreachable!(),
};
strings::decode_wtf8_rune_t::<i32>(bytes, width, 0)
}
Encoding::Ascii => {
debug_assert!(text[i] <= 0x7F);
text[i] as i32
}
StrEncoding::Ascii => {
debug_assert!(text_in[i] <= 0x7F);
text_in[i] as i32
Encoding::Latin1 => text[i] as i32,
Encoding::Utf16 => {
// TODO: if this is a part of a surrogate pair, we could parse the whole codepoint in order
// to emit it as a single \u{result} rather than two paired \uLOW\uHIGH.
// eg: "\u{10334}" will convert to "𐌴" without this.
code_unit_at!(i)
}
StrEncoding::Latin1 => text_in[i] as i32,
StrEncoding::Utf16 => text16[i] as i32,
};

if can_print_without_escape(c, ascii_only) {
match encoding {
StrEncoding::Ascii | StrEncoding::Utf8 => {
let remain = &text_in[i + clamped_width..];
match ENCODING {
Encoding::Ascii | Encoding::Utf8 => {
let remain = &text[i + clamped_width..];
if let Some(j) = strings::index_of_needs_escape_for_java_script_string(
remain, quote_char,
) {
writer.write_all(&text_in[i..i + clamped_width])?;
i += clamped_width;
writer.write_all(&remain[..j as usize])?;
i += j as usize;
let j = j as usize;
writer.write_all(&text[i..i + clamped_width + j])?;
i += clamped_width + j;
} else {
writer.write_all(&text_in[i..])?;
writer.write_all(&text[i..])?;
break;
}
}
StrEncoding::Latin1 | StrEncoding::Utf16 => {
let mut cp = [0u8; 4];
let cp_len = strings::encode_wtf8_rune(&mut cp, c as u32);
writer.write_all(&cp[..cp_len])?;
Encoding::Latin1 | Encoding::Utf16 => {
let mut codepoint_bytes = [0u8; 4];
let codepoint_len =
strings::encode_wtf8_rune(&mut codepoint_bytes, c as u32);
writer.write_all(&codepoint_bytes[..codepoint_len])?;
i += clamped_width;
}
}
continue;
}

match c {
0x07 => {
writer.write_all(b"\\x07")?;
Expand Down Expand Up @@ -2572,11 +2638,16 @@ pub mod printer {
}
0x24 => {
if quote_char == b'`' {
let next_is_brace = match encoding {
StrEncoding::Utf16 => i + 1 < n && text16[i + 1] == b'{' as u16,
_ => i + 1 < n && text_in[i + 1] == b'{',
let next = if i + clamped_width < n {
Some(code_unit_at!(i + clamped_width))
} else {
None
};
writer.write_all(if next_is_brace { b"\\$" } else { b"$" })?;
if next == Some(b'{' as i32) {
writer.write_all(b"\\$")?;
} else {
writer.write_all(b"$")?;
}
} else {
writer.write_all(b"$")?;
}
Expand All @@ -2588,6 +2659,7 @@ pub mod printer {
}
_ => {
i += width as usize;

if c <= 0xFF && !json {
let h = hex2_upper(c as u8);
writer.write_all(&[b'\\', b'x', h[0], h[1]])?;
Expand All @@ -2609,9 +2681,22 @@ pub mod printer {
bytes: &mut MutableString,
ascii_only: bool,
) -> Result<(), crate::Error> {
// PERF: consider pre-growing via an estimated UTF-8 length — profile if it shows up on a hot path.
// `ascii_only` is threaded at runtime so
// the heavy escaper isn't monomorphized per ascii_only/quote-char combo.
//
// Heuristic reservation (~12.5% slack) instead of a full
// escaped-length pre-scan, which would do a SIMD scan + per-escape rune
// decode over `text` just to size the buffer — the same work
// `write_pre_quoted_string_inner` repeats immediately below.
// Tab-indented JS (e.g. three.js) has ~9.4% of bytes needing 2-byte
// escapes (tabs + newlines + quotes/backslashes), so 6.25% slack would
// under-shoot and force a 2x doubling memcpy of the whole source. The
// writer still grows on demand if this under-shoots.
bytes.grow_if_needed(text.len() + (text.len() >> 3) + 8)?;
bytes.append_char(b'"')?;
write_pre_quoted_string(text, bytes, b'"', ascii_only, true, StrEncoding::Utf8)?;
write_pre_quoted_string_inner::<_, { Encoding::Utf8 }>(
text, bytes, b'"', ascii_only, true,
)?;
bytes.append_char(b'"').expect("unreachable");
Ok(())
}
Expand Down
Loading
Loading