From 4b72a0a76c3cb1136f31bc125233a01edda569c4 Mon Sep 17 00:00:00 2001
From: Alistair Smith <hi@alistair.sh>
Date: Mon, 8 Jun 2026 14:59:09 -0700
Subject: [PATCH 1/3] parsers: consolidate lexer logging and number scanning
 helpers

---
 src/ast/lexer_log.rs               | 409 +++++++++++++++++++++++++
 src/js_parser/lexer.rs             | 375 +++--------------------
 src/js_parser/parse/parse_entry.rs | 105 ++-----
 src/parsers/json.rs                | 316 ++++---------------
 src/parsers/json_lexer.rs          | 108 ++-----
 src/parsers/lib.rs                 |   4 +
 src/parsers/number_scan.rs         | 128 ++++++++
 src/parsers/toml/lexer.rs          | 477 +++--------------------------
 8 files changed, 744 insertions(+), 1178 deletions(-)
 create mode 100644 src/parsers/number_scan.rs
diff --git a/src/ast/lexer_log.rs b/src/ast/lexer_log.rs
index 7346a5ee2ad..9f25dfe124a 100644
--- a/src/ast/lexer_log.rs
+++ b/src/ast/lexer_log.rs
@@ -6,6 +6,10 @@
 //! `prev_error_loc`, push into `Log`, then record the loc. This trait
 //! collapses all three.
 //!
+//! It also hosts the shared string escape-sequence decoder
+//! ([`decode_escape_sequences`] / [`EscapeLexer`]) that the js/json and toml
+//! lexers previously each carried a ~330-line copy of.
+//!
 //! The trait carries a `'s` lifetime so `source()` can hand back the lexer's
 //! stored `&'s Source` *without* borrowing `self` — that is what lets the
 //! provided bodies call `self.log_mut()` afterwards without a split-borrow
@@ -111,3 +115,408 @@ pub trait LexerLog<'s> {
         Err(Self::syntax_err())
     }
 }
+
+/// Surface [`decode_escape_sequences`] needs from a lexer. Monomorphizes per
+/// lexer type, so codegen matches the previous per-lexer inline copies.
+pub trait EscapeLexer<'s>: LexerLog<'s> {
+    /// Decoded output sink: UTF-16 code units for the js lexer, WTF-8 bytes
+    /// for the toml lexer.
+    type Buf;
+
+    /// JSON mode: reject legacy octal, `\u{...}`, line continuations, and any
+    /// simple escape outside the JSON set.
+    const IS_JSON: bool = false;
+
+    /// toml only: keep error spans in their historical shape — the legacy
+    /// octal `Range` start is text-relative (no `start +`) and the `\u{...}`
+    /// span start also subtracts the width of `{`. The js lexer computes both
+    /// absolutely (oven-sh/bun#31134).
+    const LEGACY_ERROR_SPANS: bool = false;
+
+    fn end_mut(&mut self) -> &mut usize;
+    fn push_codepoint(buf: &mut Self::Buf, c: u32);
+}
+
+/// Decodes the backslash escape sequences of a string-literal body `text`
+/// into `buf`. `start` is the absolute source offset of `text`'s first byte,
+/// used to report error locations.
+///
+/// `ALLOW_LINE_CONTINUATIONS` permits `\<newline>` (always true for js;
+/// toml multiline basic strings only). `REJECT_HEX_ESCAPE` errors on `\x`
+/// (toml multiline basic strings only).
+pub fn decode_escape_sequences<
+    's,
+    L: EscapeLexer<'s>,
+    const ALLOW_LINE_CONTINUATIONS: bool,
+    const REJECT_HEX_ESCAPE: bool,
+>(
+    lexer: &mut L,
+    start: usize,
+    text: &[u8],
+    buf: &mut L::Buf,
+) -> Result<(), L::Err> {
+    use bun_core::fmt::hex_digit_value_u32;
+    use bun_core::strings;
+    use bun_core::strings::CodePoint;
+
+    let iterator = strings::CodepointIterator::init(text);
+    let mut iter = strings::Cursor::default();
+    while iterator.next(&mut iter) {
+        let width = iter.width;
+        match iter.c {
+            0x0D => {
+                // From the specification:
+                //
+                // 11.8.6.1 Static Semantics: TV and TRV
+                //
+                // TV excludes the code units of LineContinuation while TRV includes
+                // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
+                // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
+                // include a <CR> or <CR><LF> sequence.
+
+                // Convert '\r\n' into '\n'. After `next()` returns for `\r`,
+                // `iter.i` is the start byte of the `\r` itself — the `\n` we're
+                // looking for is at `iter.i + 1`.
+                let next_i: usize = iter.i as usize + 1;
+                iter.i += (next_i < text.len() && text[next_i] == b'\n') as u32;
+
+                // Convert '\r' into '\n'
+                L::push_codepoint(buf, u32::from(b'\n'));
+                continue;
+            }
+
+            0x5C => {
+                if !iterator.next(&mut iter) {
+                    return Ok(());
+                }
+
+                let c2 = iter.c;
+                let width2 = iter.width;
+                match c2 {
+                    // https://mathiasbynens.be/notes/javascript-escapes#single
+                    0x62 => {
+                        L::push_codepoint(buf, 0x08);
+                        continue;
+                    }
+                    0x66 => {
+                        L::push_codepoint(buf, 0x0C);
+                        continue;
+                    }
+                    0x6E => {
+                        L::push_codepoint(buf, 0x0A);
+                        continue;
+                    }
+                    0x76 => {
+                        // Vertical tab is invalid JSON
+                        // We're going to allow it.
+                        L::push_codepoint(buf, 0x0B);
+                        continue;
+                    }
+                    0x74 => {
+                        L::push_codepoint(buf, 0x09);
+                        continue;
+                    }
+                    0x72 => {
+                        L::push_codepoint(buf, 0x0D);
+                        continue;
+                    }
+
+                    // legacy octal literals
+                    0x30..=0x37 => {
+                        let octal_start = (iter.i as usize + width2 as usize).saturating_sub(2);
+                        if L::IS_JSON {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.syntax_error()?;
+                        }
+
+                        // 1-3 digit octal
+                        let mut is_bad = false;
+                        let mut value: i64 = (c2 - 0x30) as i64;
+                        let mut prev = iter;
+
+                        if !iterator.next(&mut iter) {
+                            if value == 0 {
+                                L::push_codepoint(buf, 0);
+                                return Ok(());
+                            }
+                            lexer.syntax_error()?;
+                            return Ok(());
+                        }
+
+                        let c3: CodePoint = iter.c;
+
+                        match c3 {
+                            0x30..=0x37 => {
+                                value = value * 8 + (c3 - 0x30) as i64;
+                                prev = iter;
+                                if !iterator.next(&mut iter) {
+                                    return lexer.syntax_error();
+                                }
+
+                                let c4 = iter.c;
+                                match c4 {
+                                    0x30..=0x37 => {
+                                        let temp = value * 8 + (c4 - 0x30) as i64;
+                                        if temp < 256 {
+                                            value = temp;
+                                        } else {
+                                            iter = prev;
+                                        }
+                                    }
+                                    0x38 | 0x39 => {
+                                        is_bad = true;
+                                    }
+                                    _ => {
+                                        iter = prev;
+                                    }
+                                }
+                            }
+                            0x38 | 0x39 => {
+                                is_bad = true;
+                            }
+                            _ => {
+                                iter = prev;
+                            }
+                        }
+
+                        iter.c = i32::try_from(value).expect("int cast");
+                        if is_bad {
+                            // `octal_start` is text-relative like `iter.i`; map back
+                            // to an absolute source position the same way every
+                            // sibling error path does (e.g. `start + hex_start` in
+                            // the `\u{}` branch) — unless the lexer keeps its
+                            // historical text-relative span.
+                            let range_start = if L::LEGACY_ERROR_SPANS {
+                                octal_start
+                            } else {
+                                start + octal_start
+                            };
+                            // `add_range_error` has no failing path; `?` keeps the
+                            // signature free of a `Debug` bound on `L::Err`.
+                            lexer.add_range_error(
+                                Range {
+                                    loc: Loc {
+                                        start: i32::try_from(range_start).expect("int cast"),
+                                    },
+                                    len: i32::try_from(iter.i as usize - octal_start)
+                                        .expect("int cast"),
+                                },
+                                format_args!("Invalid legacy octal literal"),
+                            )?;
+                        }
+                    }
+                    0x38 | 0x39 => {
+                        iter.c = c2;
+                    }
+                    // 2-digit hexadecimal
+                    0x78 => {
+                        if REJECT_HEX_ESCAPE {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.syntax_error()?;
+                        }
+
+                        let mut value: CodePoint = 0;
+                        let mut c3: CodePoint;
+                        let mut width3: u8;
+
+                        if !iterator.next(&mut iter) {
+                            return lexer.syntax_error();
+                        }
+                        c3 = iter.c;
+                        width3 = iter.width;
+                        match hex_digit_value_u32(c3 as u32) {
+                            Some(d) => value = (value * 16) | d as CodePoint,
+                            None => {
+                                *lexer.end_mut() =
+                                    (start + iter.i as usize).saturating_sub(width3 as usize);
+                                return lexer.syntax_error();
+                            }
+                        }
+
+                        if !iterator.next(&mut iter) {
+                            return lexer.syntax_error();
+                        }
+                        c3 = iter.c;
+                        width3 = iter.width;
+                        match hex_digit_value_u32(c3 as u32) {
+                            Some(d) => value = (value * 16) | d as CodePoint,
+                            None => {
+                                *lexer.end_mut() =
+                                    (start + iter.i as usize).saturating_sub(width3 as usize);
+                                return lexer.syntax_error();
+                            }
+                        }
+
+                        iter.c = value;
+                    }
+                    0x75 => {
+                        // We're going to make this an i64 so we don't risk integer overflows
+                        // when people do weird things
+                        let mut value: i64 = 0;
+
+                        if !iterator.next(&mut iter) {
+                            return lexer.syntax_error();
+                        }
+                        let mut c3 = iter.c;
+                        let mut width3 = iter.width;
+
+                        // variable-length
+                        if c3 == 0x7B {
+                            if L::IS_JSON {
+                                *lexer.end_mut() =
+                                    (start + iter.i as usize).saturating_sub(width2 as usize);
+                                lexer.syntax_error()?;
+                            }
+
+                            // `iter.i` is the byte offset of `{` inside `text`;
+                            // back up past `\` and `u` only. `width3` is the
+                            // width of `{` itself, which `iter.i` already points
+                            // at — subtracting it lands one character too early
+                            // (kept for lexers with `LEGACY_ERROR_SPANS`).
+                            let mut hex_start = (iter.i as usize)
+                                .saturating_sub(width as usize)
+                                .saturating_sub(width2 as usize);
+                            if L::LEGACY_ERROR_SPANS {
+                                hex_start = hex_start.saturating_sub(width3 as usize);
+                            }
+                            let mut is_first = true;
+                            let mut is_out_of_range = false;
+                            'variable_length: loop {
+                                if !iterator.next(&mut iter) {
+                                    break 'variable_length;
+                                }
+                                c3 = iter.c;
+
+                                if c3 == 0x7D {
+                                    if is_first {
+                                        *lexer.end_mut() = (start + iter.i as usize)
+                                            .saturating_sub(width3 as usize);
+                                        return lexer.syntax_error();
+                                    }
+                                    break 'variable_length;
+                                }
+                                match hex_digit_value_u32(c3 as u32) {
+                                    Some(d) => value = (value * 16) | d as i64,
+                                    None => {
+                                        *lexer.end_mut() = (start + iter.i as usize)
+                                            .saturating_sub(width3 as usize);
+                                        return lexer.syntax_error();
+                                    }
+                                }
+
+                                // '\U0010FFFF
+                                // copied from golang utf8.MaxRune
+                                if value > 1_114_111 {
+                                    is_out_of_range = true;
+                                }
+                                is_first = false;
+                            }
+
+                            if is_out_of_range {
+                                lexer.add_range_error(
+                                    Range {
+                                        loc: Loc {
+                                            start: i32::try_from(start + hex_start)
+                                                .expect("int cast"),
+                                        },
+                                        len: i32::try_from(
+                                            (iter.i as usize).saturating_sub(hex_start),
+                                        )
+                                        .unwrap(),
+                                    },
+                                    format_args!("Unicode escape sequence is out of range"),
+                                )?;
+
+                                return Ok(());
+                            }
+
+                            // fixed-length
+                        } else {
+                            // Fixed-length
+                            let mut j: usize = 0;
+                            while j < 4 {
+                                match hex_digit_value_u32(c3 as u32) {
+                                    Some(d) => value = (value * 16) | d as i64,
+                                    None => {
+                                        *lexer.end_mut() = (start + iter.i as usize)
+                                            .saturating_sub(width3 as usize);
+                                        return lexer.syntax_error();
+                                    }
+                                }
+
+                                if j < 3 {
+                                    if !iterator.next(&mut iter) {
+                                        return lexer.syntax_error();
+                                    }
+                                    c3 = iter.c;
+                                    width3 = iter.width;
+                                }
+                                j += 1;
+                            }
+                            let _ = width3;
+                        }
+
+                        iter.c = value as CodePoint; // @truncate
+                    }
+                    0x0D => {
+                        if L::IS_JSON {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.syntax_error()?;
+                        } else if !ALLOW_LINE_CONTINUATIONS {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.add_default_error(b"Unexpected end of line")?;
+                        }
+
+                        // Make sure Windows CRLF counts as a single newline.
+                        // Guard on the index we actually read (`iter.i + 1`), not
+                        // `iter.i` — a string ending in `\<CR>` would otherwise
+                        // read `text[len]`.
+                        let next_i: usize = iter.i as usize + 1;
+                        iter.i += (next_i < text.len() && text[next_i] == b'\n') as u32;
+
+                        // Ignore line continuations. A line continuation is not an escaped newline.
+                        continue;
+                    }
+                    0x0A | 0x2028 | 0x2029 => {
+                        if L::IS_JSON {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.syntax_error()?;
+                        } else if !ALLOW_LINE_CONTINUATIONS {
+                            *lexer.end_mut() =
+                                (start + iter.i as usize).saturating_sub(width2 as usize);
+                            lexer.add_default_error(b"Unexpected end of line")?;
+                        }
+
+                        // Ignore line continuations. A line continuation is not an escaped newline.
+                        continue;
+                    }
+                    _ => {
+                        if L::IS_JSON {
+                            match c2 {
+                                0x22 | 0x5C | 0x2F => {}
+                                _ => {
+                                    *lexer.end_mut() =
+                                        (start + iter.i as usize).saturating_sub(width2 as usize);
+                                    lexer.syntax_error()?;
+                                }
+                            }
+                        }
+                        iter.c = c2;
+                    }
+                }
+            }
+            _ => {}
+        }
+
+        match iter.c {
+            -1 => return lexer.add_default_error(b"Unexpected end of file"),
+            c => L::push_codepoint(buf, c as u32),
+        }
+    }
+    Ok(())
+}
diff --git a/src/js_parser/lexer.rs b/src/js_parser/lexer.rs
index 291cbd0ed0a..c09eab335fa 100644
--- a/src/js_parser/lexer.rs
+++ b/src/js_parser/lexer.rs
@@ -5,7 +5,6 @@ use core::fmt;
 use bun_ast as js_ast;
 use bun_ast::lexer_tables as tables;
 use bun_ast::{LexerLog, Loc, Log, Range, Source};
-use bun_core::fmt::hex_digit_value_u32;
 use bun_core::strings;
 use bun_core::strings::CodepointIterator;
 use bun_core::{Environment, feature_flags as FeatureFlags};
@@ -448,6 +447,41 @@ impl<
     }
 }
 
+impl<
+    'a,
+    const IS_JSON: bool,
+    const ALLOW_COMMENTS: bool,
+    const ALLOW_TRAILING_COMMAS: bool,
+    const IGNORE_LEADING_ESCAPE_SEQUENCES: bool,
+    const IGNORE_TRAILING_ESCAPE_SEQUENCES: bool,
+    const JSON_WARN_DUPLICATE_KEYS: bool,
+    const WAS_ORIGINALLY_MACRO: bool,
+    const GUESS_INDENTATION: bool,
+> bun_ast::lexer_log::EscapeLexer<'a>
+    for LexerType<
+        'a,
+        IS_JSON,
+        ALLOW_COMMENTS,
+        ALLOW_TRAILING_COMMAS,
+        IGNORE_LEADING_ESCAPE_SEQUENCES,
+        IGNORE_TRAILING_ESCAPE_SEQUENCES,
+        JSON_WARN_DUPLICATE_KEYS,
+        WAS_ORIGINALLY_MACRO,
+        GUESS_INDENTATION,
+    >
+{
+    type Buf = Vec<u16>;
+    const IS_JSON: bool = IS_JSON;
+    #[inline]
+    fn end_mut(&mut self) -> &mut usize {
+        &mut self.end
+    }
+    #[inline]
+    fn push_codepoint(buf: &mut Vec<u16>, c: u32) {
+        strings::push_codepoint_utf16(buf, c);
+    }
+}
+
 lexer_impl_header! {
     /// Reborrow the shared `Log`. The `&self` receiver lets call sites pass
     /// other `self.*` fields as arguments without a borrow-checker conflict;
@@ -605,344 +639,7 @@ lexer_impl_header! {
         if IS_JSON {
             self.is_ascii_only = false;
         }
-
-        let iterator = CodepointIterator::init(text);
-        let mut iter = strings::Cursor::default();
-        while iterator.next(&mut iter) {
-            let width = iter.width;
-            match iter.c {
-                0x0D => {
-                    // From the specification:
-                    //
-                    // 11.8.6.1 Static Semantics: TV and TRV
-                    //
-                    // TV excludes the code units of LineContinuation while TRV includes
-                    // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
-                    // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
-                    // include a <CR> or <CR><LF> sequence.
-
-                    // Convert '\r\n' into '\n'
-                    let next_i: usize = iter.i as usize + 1;
-                    iter.i += (next_i < text.len() && text[next_i] == b'\n') as u32;
-
-                    // Convert '\r' into '\n'
-                    buf.push(u16::from(b'\n'));
-                    continue;
-                }
-
-                0x5C => {
-                    if !iterator.next(&mut iter) {
-                        return Ok(());
-                    }
-
-                    let c2 = iter.c;
-                    let width2 = iter.width;
-                    match c2 {
-                        // https://mathiasbynens.be/notes/javascript-escapes#single
-                        0x62 => {
-                            buf.push(0x08);
-                            continue;
-                        }
-                        0x66 => {
-                            buf.push(0x0C);
-                            continue;
-                        }
-                        0x6E => {
-                            buf.push(0x0A);
-                            continue;
-                        }
-                        0x76 => {
-                            // Vertical tab is invalid JSON
-                            // We're going to allow it.
-                            buf.push(0x0B);
-                            continue;
-                        }
-                        0x74 => {
-                            buf.push(0x09);
-                            continue;
-                        }
-                        0x72 => {
-                            buf.push(0x0D);
-                            continue;
-                        }
-
-                        // legacy octal literals
-                        0x30..=0x37 => {
-                            let octal_start =
-                                (iter.i as usize + width2 as usize).saturating_sub(2);
-                            if IS_JSON {
-                                self.end = (start + iter.i as usize)
-                                    .saturating_sub(width2 as usize);
-                                self.syntax_error()?;
-                            }
-
-                            // 1-3 digit octal
-                            let mut is_bad = false;
-                            let mut value: i64 = (c2 - 0x30) as i64;
-                            let mut prev = iter;
-
-                            if !iterator.next(&mut iter) {
-                                if value == 0 {
-                                    buf.push(0);
-                                    return Ok(());
-                                }
-                                self.syntax_error()?;
-                                return Ok(());
-                            }
-
-                            let c3: CodePoint = iter.c;
-
-                            match c3 {
-                                0x30..=0x37 => {
-                                    value = value * 8 + (c3 - 0x30) as i64;
-                                    prev = iter;
-                                    if !iterator.next(&mut iter) {
-                                        return self.syntax_error();
-                                    }
-
-                                    let c4 = iter.c;
-                                    match c4 {
-                                        0x30..=0x37 => {
-                                            let temp =
-                                                value * 8 + (c4 - 0x30) as i64;
-                                            if temp < 256 {
-                                                value = temp;
-                                            } else {
-                                                iter = prev;
-                                            }
-                                        }
-                                        0x38 | 0x39 => {
-                                            is_bad = true;
-                                        }
-                                        _ => {
-                                            iter = prev;
-                                        }
-                                    }
-                                }
-                                0x38 | 0x39 => {
-                                    is_bad = true;
-                                }
-                                _ => {
-                                    iter = prev;
-                                }
-                            }
-
-                            iter.c = i32::try_from(value).expect("int cast");
-                            if is_bad {
-                                // `octal_start` is text-relative like `iter.i`;
-                                // map back to absolute source position the same
-                                // way every sibling error path does (e.g.
-                                // `start + hex_start` in the `\u{}` branch).
-                                self.add_range_error(
-                                    Range {
-                                        loc: Loc {
-                                            start: i32::try_from(start + octal_start).expect("int cast"),
-                                        },
-                                        len: i32::try_from(
-                                            iter.i as usize - octal_start,
-                                        )
-                                        .unwrap(),
-                                    },
-                                    format_args!("Invalid legacy octal literal"),
-                                )
-                                .expect("unreachable");
-                            }
-                        }
-                        0x38 | 0x39 => {
-                            iter.c = c2;
-                        }
-                        // 2-digit hexadecimal
-                        0x78 => {
-                            let mut value: CodePoint = 0;
-                            let mut c3: CodePoint;
-                            let mut width3: u8;
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            c3 = iter.c;
-                            width3 = iter.width;
-                            match hex_digit_value_u32(c3 as u32) {
-                                Some(d) => value = (value * 16) | d as CodePoint,
-                                None => {
-                                    self.end = (start + iter.i as usize)
-                                        .saturating_sub(width3 as usize);
-                                    return self.syntax_error();
-                                }
-                            }
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            c3 = iter.c;
-                            width3 = iter.width;
-                            match hex_digit_value_u32(c3 as u32) {
-                                Some(d) => value = (value * 16) | d as CodePoint,
-                                None => {
-                                    self.end = (start + iter.i as usize)
-                                        .saturating_sub(width3 as usize);
-                                    return self.syntax_error();
-                                }
-                            }
-
-                            iter.c = value;
-                        }
-                        0x75 => {
-                            // We're going to make this an i64 so we don't risk integer overflows
-                            // when people do weird things
-                            let mut value: i64 = 0;
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            let mut c3 = iter.c;
-                            let mut width3 = iter.width;
-
-                            // variable-length
-                            if c3 == 0x7B {
-                                if IS_JSON {
-                                    self.end = (start + iter.i as usize)
-                                        .saturating_sub(width2 as usize);
-                                    self.syntax_error()?;
-                                }
-
-                                // `iter.i` is the byte offset of `{` inside `text`;
-                                // back up past `\` and `u` only. `width3` is the
-                                // width of `{` itself, which `iter.i` already points
-                                // at — subtracting it lands one character too early.
-                                let hex_start = (iter.i as usize)
-                                    .saturating_sub(width as usize)
-                                    .saturating_sub(width2 as usize);
-                                let mut is_first = true;
-                                let mut is_out_of_range = false;
-                                'variable_length: loop {
-                                    if !iterator.next(&mut iter) {
-                                        break 'variable_length;
-                                    }
-                                    c3 = iter.c;
-
-                                    if c3 == 0x7D {
-                                        if is_first {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                        break 'variable_length;
-                                    }
-                                    match hex_digit_value_u32(c3 as u32) {
-                                        Some(d) => value = (value * 16) | d as i64,
-                                        None => {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                    }
-
-                                    // '\U0010FFFF
-                                    // copied from golang utf8.MaxRune
-                                    if value > 1_114_111 {
-                                        is_out_of_range = true;
-                                    }
-                                    is_first = false;
-                                }
-
-                                if is_out_of_range {
-                                    self.add_range_error(
-                                        Range {
-                                            loc: Loc {
-                                                start: i32::try_from(start + hex_start)
-                                                    .unwrap(),
-                                            },
-                                            len: i32::try_from(
-                                                (iter.i as usize).saturating_sub(hex_start),
-                                            )
-                                            .unwrap(),
-                                        },
-                                        format_args!(
-                                            "Unicode escape sequence is out of range"
-                                        ),
-                                    )?;
-
-                                    return Ok(());
-                                }
-
-                                // fixed-length
-                            } else {
-                                // Fixed-length
-                                let mut j: usize = 0;
-                                while j < 4 {
-                                    match hex_digit_value_u32(c3 as u32) {
-                                        Some(d) => value = (value * 16) | d as i64,
-                                        None => {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                    }
-
-                                    if j < 3 {
-                                        if !iterator.next(&mut iter) {
-                                            return self.syntax_error();
-                                        }
-                                        c3 = iter.c;
-                                        width3 = iter.width;
-                                    }
-                                    j += 1;
-                                }
-                                let _ = width3;
-                            }
-
-                            iter.c = value as CodePoint; // @truncate
-                        }
-                        0x0D => {
-                            if IS_JSON {
-                                self.end = (start + iter.i as usize)
-                                    .saturating_sub(width2 as usize);
-                                self.syntax_error()?;
-                            }
-
-                            // Make sure Windows CRLF counts as a single newline
-                            let next_i: usize = iter.i as usize + 1;
-                            iter.i +=
-                                (next_i < text.len() && text[next_i] == b'\n') as u32;
-
-                            // Ignore line continuations. A line continuation is not an escaped newline.
-                            continue;
-                        }
-                        0x0A | 0x2028 | 0x2029 => {
-                            if IS_JSON {
-                                self.end = (start + iter.i as usize)
-                                    .saturating_sub(width2 as usize);
-                                self.syntax_error()?;
-                            }
-
-                            // Ignore line continuations. A line continuation is not an escaped newline.
-                            continue;
-                        }
-                        _ => {
-                            if IS_JSON {
-                                match c2 {
-                                    0x22 | 0x5C | 0x2F => {}
-                                    _ => {
-                                        self.end = (start + iter.i as usize)
-                                            .saturating_sub(width2 as usize);
-                                        self.syntax_error()?;
-                                    }
-                                }
-                            }
-                            iter.c = c2;
-                        }
-                    }
-                }
-                _ => {}
-            }
-
-            match iter.c {
-                -1 => return self.add_default_error(b"Unexpected end of file"),
-                c => strings::push_codepoint_utf16(buf, c as u32),
-            }
-        }
-        Ok(())
+        bun_ast::lexer_log::decode_escape_sequences::<_, true, false>(self, start, text, buf)
     }
 
     // PERF: heavy sub-scanner — the per-byte string body loop plus the
diff --git a/src/js_parser/parse/parse_entry.rs b/src/js_parser/parse/parse_entry.rs
index f72795f5d20..b276dd1dd81 100644
--- a/src/js_parser/parse/parse_entry.rs
+++ b/src/js_parser/parse/parse_entry.rs
@@ -56,6 +56,37 @@ macro_rules! init_p {
     }};
 }
 
+/// `init_p!` plus the shared `&mut self` prologue of `_scan_imports`,
+/// `to_lazy_export_ast`, and `analyze`: `Lexer` owns `Vec`s and `Options`
+/// owns `jsx: Pragma` boxes, so a bitwise `ptr::read` would double-free when
+/// `self` later drops. Move them out, leaving inert placeholders, build the
+/// parser in place, and bind `$p` to it.
+///
+/// The inert placeholder lexer is given its *own* arena-allocated `Log`
+/// (empty `Vec`, arena-leaked) so it does not alias `self.log` at all —
+/// keeps the placeholder fully disjoint from the real `Log` handed to `P`
+/// and never read again.
+macro_rules! take_and_init_p {
+    (let $p:ident: $ty:ty = $self:ident) => {
+        let lexer = core::mem::replace(
+            &mut $self.lexer,
+            js_lexer::Lexer::init_without_reading(
+                $self.bump.alloc(bun_ast::Log::default()),
+                $self.source,
+                $self.bump,
+            ),
+        );
+        let options = core::mem::take(&mut $self.options);
+        // `P.log` and `Lexer.log` are both `NonNull<Log>` (see P.rs / lexer.rs
+        // field docs), so handing the same raw pointer to both is defined —
+        // no `&mut` is materialized.
+        let mut __p = init_p!($ty;
+            $self.bump, $self.log, $self.source, $self.define, lexer, options);
+        // SAFETY: `init_p!` only yields after `init` succeeded.
+        let $p: &mut $ty = unsafe { __p.assume_init_mut() };
+    };
+}
+
 pub struct Parser<'a> {
     pub options: Options<'a>,
     pub lexer: js_lexer::Lexer<'a>,
@@ -387,31 +418,7 @@ impl<'a> Parser<'a> {
         scan_pass: &'a mut ScanPassResult,
     ) -> Result<(), Error> {
         type Pi<'a, const TS: bool> = P<'a, TS, true>;
-        // `Lexer` owns `Vec`s and `Options` owns
-        // `jsx: Pragma` boxes, so a bitwise `ptr::read` would double-free
-        // when `self` later drops. Move them out, leaving inert placeholders.
-        //
-        // The inert placeholder lexer is given its *own* arena-allocated `Log`
-        // so it does not alias `self.log` at all — keeps the placeholder fully
-        // disjoint from the real `Log` handed to `P` and never read again.
-        let lexer = core::mem::replace(
-            &mut self.lexer,
-            js_lexer::Lexer::init_without_reading(
-                // Disjoint dummy `Log` (empty `Vec`, arena-leaked); the
-                // placeholder is never read after this point.
-                self.bump.alloc(bun_ast::Log::default()),
-                self.source,
-                self.bump,
-            ),
-        );
-        let options = core::mem::take(&mut self.options);
-        // `P.log` and `Lexer.log` are both `NonNull<Log>` (see P.rs / lexer.rs
-        // field docs), so handing the same raw pointer to both is defined —
-        // no `&mut` is materialized.
-        let mut __p = init_p!(Pi<'_, TS>;
-            self.bump, self.log, self.source, self.define, lexer, options);
-        // SAFETY: `init_p!` only yields after `init` succeeded.
-        let p: &mut Pi<'_, TS> = unsafe { __p.assume_init_mut() };
+        take_and_init_p!(let p: Pi<'_, TS> = self);
         p.import_records = crate::p::ImportRecordList::Borrowed(&mut scan_pass.import_records);
         p.named_imports = crate::p::NamedImportsType::Borrowed(&mut scan_pass.named_imports);
 
@@ -531,29 +538,7 @@ impl<'a> Parser<'a> {
         runtime_api_call: &'static [u8],
         symbols: js_ast::symbol::List<'a>,
     ) -> Result<crate::Result<'a>, Error> {
-        // Move lexer/options out and leave inert
-        // placeholders so `self` may drop without double-free.
-        //
-        // The placeholder lexer gets its own arena `Log` so it does not alias
-        // `self.log` (see `_scan_imports`).
-        let lexer = core::mem::replace(
-            &mut self.lexer,
-            js_lexer::Lexer::init_without_reading(
-                // Disjoint dummy `Log` (empty `Vec`, arena-leaked); the
-                // placeholder is never read after this point.
-                self.bump.alloc(bun_ast::Log::default()),
-                self.source,
-                self.bump,
-            ),
-        );
-        let options = core::mem::take(&mut self.options);
-        // `P.log` and `Lexer.log` are both `NonNull<Log>` (see P.rs / lexer.rs
-        // field docs), so handing the same raw pointer to both is defined —
-        // no `&mut` is materialized.
-        let mut __p = init_p!(JavaScriptParser<'_>;
-            self.bump, self.log, self.source, self.define, lexer, options);
-        // SAFETY: `init_p!` only yields after `init` succeeded.
-        let p: &mut JavaScriptParser<'_> = unsafe { __p.assume_init_mut() };
+        take_and_init_p!(let p: JavaScriptParser<'_> = self);
 
         // Instead of doing "should_fold_typescript_constant_expressions or features.minify_syntax"
         // Let's enable this flag file-wide
@@ -622,29 +607,7 @@ impl<'a> Parser<'a> {
         context: *mut c_void,
         callback: &dyn Fn(*mut c_void, &mut TSXParser, &mut [js_ast::Part]) -> Result<(), Error>,
     ) -> Result<(), Error> {
-        // See `_scan_imports`: move lexer/options out, leaving inert
-        // placeholders so `self` may drop without double-free.
-        //
-        // The placeholder lexer gets its own arena `Log` so it does not alias
-        // `self.log` (see `_scan_imports`).
-        let lexer = core::mem::replace(
-            &mut self.lexer,
-            js_lexer::Lexer::init_without_reading(
-                // Disjoint dummy `Log` (empty `Vec`, arena-leaked); the
-                // placeholder is never read after this point.
-                self.bump.alloc(bun_ast::Log::default()),
-                self.source,
-                self.bump,
-            ),
-        );
-        let options = core::mem::take(&mut self.options);
-        // `P.log` and `Lexer.log` are both `NonNull<Log>` (see P.rs / lexer.rs
-        // field docs), so handing the same raw pointer to both is defined —
-        // no `&mut` is materialized.
-        let mut __p = init_p!(TSXParser<'_>;
-            self.bump, self.log, self.source, self.define, lexer, options);
-        // SAFETY: `init_p!` only yields after `init` succeeded.
-        let p: &mut TSXParser<'_> = unsafe { __p.assume_init_mut() };
+        take_and_init_p!(let p: TSXParser<'_> = self);
 
         // Consume a leading hashbang comment
         let mut hashbang: &[u8] = b"";
diff --git a/src/parsers/json.rs b/src/parsers/json.rs
index 7771dd9126a..d8d6c7060ef 100644
--- a/src/parsers/json.rs
+++ b/src/parsers/json.rs
@@ -811,6 +811,39 @@ fn empty_array_data() -> js_ast::expr::Data {
     js_ast::expr::Data::EArray(js_ast::StoreRef::from_raw(EMPTY_ARRAY.get()))
 }
 
+/// Shared fast-path prologue for every JSON entry point: empty input parses
+/// as an empty object (consistent with how disabled JS files are handled),
+/// and two-byte `""`/`''`/`{}`/`[]` inputs skip the lexer entirely.
+///
+/// Note: the two-byte arms compare a one-byte slice (`contents[0..1]`)
+/// against two-byte literals, so they never match. This mirrors the Zig
+/// reference (`json.zig` does the same with `eqlComptime`) — kept as-is to
+/// preserve behavior, since "fixing" it would accept `''` in strict JSON.
+#[inline]
+fn empty_source_fast_path(source: &bun_ast::Source) -> Option<Expr> {
+    let expr = |data| {
+        Some(Expr {
+            loc: bun_ast::Loc { start: 0 },
+            data,
+        })
+    };
+    match source.contents.len() {
+        0 => expr(empty_object_data()),
+        2 => {
+            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
+                expr(empty_string_data())
+            } else if &source.contents[0..1] == b"{}" {
+                expr(empty_object_data())
+            } else if &source.contents[0..1] == b"[]" {
+                expr(empty_array_data())
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+
 // ──────────────────────────────────────────────────────────────────────────
 
 /// Parse JSON
@@ -827,37 +860,12 @@ pub fn parse<const FORCE_UTF8: bool>(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    let mut parser = JSONLikeParser::init(JSON_OPTS, bump, source, log)?;
-    match source.contents.len() {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
+    let mut parser = JSONLikeParser::init(JSON_OPTS, bump, source, log)?;
+
     parser.parse_expr(false, FORCE_UTF8)
 }
 
@@ -871,36 +879,8 @@ pub fn parse_package_json_utf8(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    let len = source.contents.len();
-
-    match len {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
     let mut parser = JSONLikeParser::init(PACKAGE_JSON_OPTS, bump, source, log)?;
@@ -961,48 +941,11 @@ pub fn parse_package_json_utf8_with_opts_rt(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<JsonResult, bun_core::Error> {
-    let len = source.contents.len();
-
-    match len {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(JsonResult {
-                root: Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                },
-                indentation: Indentation::default(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(JsonResult {
-                    root: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_string_data(),
-                    },
-                    indentation: Indentation::default(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(JsonResult {
-                    root: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_object_data(),
-                    },
-                    indentation: Indentation::default(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(JsonResult {
-                    root: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_array_data(),
-                    },
-                    indentation: Indentation::default(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(root) = empty_source_fast_path(source) {
+        return Ok(JsonResult {
+            root,
+            indentation: Indentation::default(),
+        });
     }
 
     let mut parser = JSONLikeParser::init(opts, bump, source, log)?;
@@ -1039,36 +982,8 @@ pub fn parse_utf8_impl<const CHECK_LEN: bool>(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    let len = source.contents.len();
-
-    match len {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
     let mut parser = JSONLikeParser::init(JSON_OPTS, bump, source, log)?;
@@ -1090,34 +1005,8 @@ pub fn parse_for_macro(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    match source.contents.len() {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
     let mut parser = JSONLikeParser::init(MACRO_JSON_OPTS, bump, source, log)?;
@@ -1143,46 +1032,15 @@ pub fn parse_for_bundling(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<JSONParseResult, bun_core::Error> {
-    match source.contents.len() {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(JSONParseResult {
-                expr: Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                },
-                tag: JSONParseResultTag::Empty,
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(JSONParseResult {
-                    expr: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_string_data(),
-                    },
-                    tag: JSONParseResultTag::Expr,
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(JSONParseResult {
-                    expr: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_object_data(),
-                    },
-                    tag: JSONParseResultTag::Expr,
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(JSONParseResult {
-                    expr: Expr {
-                        loc: bun_ast::Loc { start: 0 },
-                        data: empty_array_data(),
-                    },
-                    tag: JSONParseResultTag::Expr,
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(JSONParseResult {
+            expr,
+            tag: if source.contents.is_empty() {
+                JSONParseResultTag::Empty
+            } else {
+                JSONParseResultTag::Expr
+            },
+        });
     }
 
     let mut parser = JSONLikeParser::init(JSON_OPTS, bump, source, log)?;
@@ -1204,34 +1062,8 @@ pub fn parse_env_json(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    match source.contents.len() {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
     let mut parser = JSONLikeParser::init(DOTENV_JSON_OPTS, bump, source, log)?;
@@ -1272,34 +1104,8 @@ pub fn parse_ts_config<const FORCE_UTF8: bool>(
     log: &mut bun_ast::Log,
     bump: &Bump,
 ) -> Result<Expr, bun_core::Error> {
-    match source.contents.len() {
-        // This is to be consisntent with how disabled JS files are handled
-        0 => {
-            return Ok(Expr {
-                loc: bun_ast::Loc { start: 0 },
-                data: empty_object_data(),
-            });
-        }
-        // This is a fast pass I guess
-        2 => {
-            if &source.contents[0..1] == b"\"\"" || &source.contents[0..1] == b"''" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_string_data(),
-                });
-            } else if &source.contents[0..1] == b"{}" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_object_data(),
-                });
-            } else if &source.contents[0..1] == b"[]" {
-                return Ok(Expr {
-                    loc: bun_ast::Loc { start: 0 },
-                    data: empty_array_data(),
-                });
-            }
-        }
-        _ => {}
+    if let Some(expr) = empty_source_fast_path(source) {
+        return Ok(expr);
     }
 
     let mut parser = JSONLikeParser::init(TSCONFIG_OPTS, bump, source, log)?;
diff --git a/src/parsers/json_lexer.rs b/src/parsers/json_lexer.rs
index 1b5954bd403..5f53dee8e90 100644
--- a/src/parsers/json_lexer.rs
+++ b/src/parsers/json_lexer.rs
@@ -222,6 +222,28 @@ impl<'a, 'bump> LexerLog<'a> for Lexer<'a, 'bump> {
     }
 }
 
+impl<'a, 'bump> crate::number_scan::DecimalLexer<'a> for Lexer<'a, 'bump>
+where
+    'bump: 'a,
+{
+    #[inline]
+    fn code_point(&self) -> CodePoint {
+        self.code_point
+    }
+    #[inline]
+    fn end(&self) -> usize {
+        self.end
+    }
+    #[inline]
+    fn end_mut(&mut self) -> &mut usize {
+        &mut self.end
+    }
+    #[inline]
+    fn step(&mut self) {
+        Lexer::step(self)
+    }
+}
+
 impl<'a, 'bump> Lexer<'a, 'bump>
 where
     // `identifier` may point into `source.contents` (`'a`) *or* a bump-alloc'd
@@ -697,7 +719,6 @@ where
             return self.syntax_error();
         }
 
-        let mut underscore_count: usize = 0;
         let mut last_underscore_end: usize = 0;
         let mut has_dot_or_exponent = first == '.' as CodePoint;
         let mut base: f64 = 0.0;
@@ -818,87 +839,10 @@ where
             }
         } else {
             // Floating-point literal;
-            let is_invalid_legacy_octal_literal = first == '0' as CodePoint
-                && (self.code_point == '8' as CodePoint || self.code_point == '9' as CodePoint);
-
-            // Initial digits;
-            loop {
-                if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                    if self.code_point != '_' as CodePoint {
-                        break;
-                    }
-                    // Cannot have multiple underscores in a row;
-                    if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                        self.syntax_error()?;
-                    }
-                    // The specification forbids underscores in this case;
-                    if is_invalid_legacy_octal_literal {
-                        self.syntax_error()?;
-                    }
-                    last_underscore_end = self.end;
-                    underscore_count += 1;
-                }
-                self.step();
-            }
-
-            // Fractional digits;
-            if first != '.' as CodePoint && self.code_point == '.' as CodePoint {
-                // An underscore must not come last;
-                if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                    self.end -= 1;
-                    self.syntax_error()?;
-                }
-                has_dot_or_exponent = true;
-                self.step();
-                if self.code_point == '_' as CodePoint {
-                    self.syntax_error()?;
-                }
-                loop {
-                    if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                        if self.code_point != '_' as CodePoint {
-                            break;
-                        }
-                        // Cannot have multiple underscores in a row;
-                        if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                            self.syntax_error()?;
-                        }
-                        last_underscore_end = self.end;
-                        underscore_count += 1;
-                    }
-                    self.step();
-                }
-            }
-
-            // Exponent;
-            if self.code_point == 'e' as CodePoint || self.code_point == 'E' as CodePoint {
-                // An underscore must not come last;
-                if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                    self.end -= 1;
-                    self.syntax_error()?;
-                }
-                has_dot_or_exponent = true;
-                self.step();
-                if self.code_point == '+' as CodePoint || self.code_point == '-' as CodePoint {
-                    self.step();
-                }
-                if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                    self.syntax_error()?;
-                }
-                loop {
-                    if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                        if self.code_point != '_' as CodePoint {
-                            break;
-                        }
-                        // Cannot have multiple underscores in a row;
-                        if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                            self.syntax_error()?;
-                        }
-                        last_underscore_end = self.end;
-                        underscore_count += 1;
-                    }
-                    self.step();
-                }
-            }
+            let scan = crate::number_scan::scan_decimal_digits(self, first)?;
+            let underscore_count = scan.underscore_count;
+            last_underscore_end = scan.last_underscore_end;
+            has_dot_or_exponent = scan.has_dot_or_exponent;
 
             // Take a slice of the text to parse;
             let text = self.raw();
diff --git a/src/parsers/lib.rs b/src/parsers/lib.rs
index e5fb1f707a7..997bff8c022 100644
--- a/src/parsers/lib.rs
+++ b/src/parsers/lib.rs
@@ -8,6 +8,10 @@
 // Crate-private: implementation detail of `json.rs`; no external consumers.
 mod json_lexer;
 
+// ───── number_scan ────────────────────────────────────────────────────────
+// Decimal number-literal digit scanner shared by the json and toml lexers.
+mod number_scan;
+
 // ───── json ───────────────────────────────────────────────────────────────
 // Real port — wired against `crate::json_lexer` (the cycle-break above) and
 // `bun_ast::js_ast`; resolves against the local lexer so `bun_js_parser`
diff --git a/src/parsers/number_scan.rs b/src/parsers/number_scan.rs
new file mode 100644
index 00000000000..1b900d50a01
--- /dev/null
+++ b/src/parsers/number_scan.rs
@@ -0,0 +1,128 @@
+//! Shared decimal number-literal digit scanner.
+//!
+//! The json and toml lexers each carried an identical ~80-line scan of a
+//! decimal literal's digits — underscore-separator rules, optional fraction,
+//! optional exponent, and the invalid-legacy-octal underscore check (see the
+//! matching regions in `js_parser/lexer.zig` and `parsers/toml/lexer.zig`).
+//! This generic helper collapses both; it monomorphizes per lexer type, so
+//! codegen matches the previous inline copies.
+
+use bun_ast::LexerLog;
+use bun_core::strings::CodePoint;
+
+/// Cursor surface `scan_decimal_digits` needs from a lexer.
+pub(crate) trait DecimalLexer<'s>: LexerLog<'s> {
+    fn code_point(&self) -> CodePoint;
+    fn end(&self) -> usize;
+    fn end_mut(&mut self) -> &mut usize;
+    fn step(&mut self);
+}
+
+pub(crate) struct DecimalScan {
+    pub underscore_count: usize,
+    pub last_underscore_end: usize,
+    pub has_dot_or_exponent: bool,
+}
+
+/// Scans the digits of a decimal (non-radix-prefixed) number literal:
+/// initial digits, then an optional fraction and exponent. The caller has
+/// already consumed `first` (the literal's first code point); on return the
+/// cursor sits on the first code point past the literal and the caller
+/// parses `lexer.raw()` into a value.
+#[inline]
+pub(crate) fn scan_decimal_digits<'s, L: DecimalLexer<'s>>(
+    lexer: &mut L,
+    first: CodePoint,
+) -> Result<DecimalScan, L::Err> {
+    let mut underscore_count: usize = 0;
+    let mut last_underscore_end: usize = 0;
+    let mut has_dot_or_exponent = first == '.' as CodePoint;
+
+    let is_invalid_legacy_octal_literal = first == '0' as CodePoint
+        && (lexer.code_point() == '8' as CodePoint || lexer.code_point() == '9' as CodePoint);
+
+    // Initial digits;
+    loop {
+        if lexer.code_point() < '0' as CodePoint || lexer.code_point() > '9' as CodePoint {
+            if lexer.code_point() != '_' as CodePoint {
+                break;
+            }
+            // Cannot have multiple underscores in a row;
+            if last_underscore_end > 0 && lexer.end() == last_underscore_end + 1 {
+                lexer.syntax_error()?;
+            }
+            // The specification forbids underscores in this case;
+            if is_invalid_legacy_octal_literal {
+                lexer.syntax_error()?;
+            }
+            last_underscore_end = lexer.end();
+            underscore_count += 1;
+        }
+        lexer.step();
+    }
+
+    // Fractional digits;
+    if first != '.' as CodePoint && lexer.code_point() == '.' as CodePoint {
+        // An underscore must not come last;
+        if last_underscore_end > 0 && lexer.end() == last_underscore_end + 1 {
+            *lexer.end_mut() -= 1;
+            lexer.syntax_error()?;
+        }
+        has_dot_or_exponent = true;
+        lexer.step();
+        if lexer.code_point() == '_' as CodePoint {
+            lexer.syntax_error()?;
+        }
+        loop {
+            if lexer.code_point() < '0' as CodePoint || lexer.code_point() > '9' as CodePoint {
+                if lexer.code_point() != '_' as CodePoint {
+                    break;
+                }
+                // Cannot have multiple underscores in a row;
+                if last_underscore_end > 0 && lexer.end() == last_underscore_end + 1 {
+                    lexer.syntax_error()?;
+                }
+                last_underscore_end = lexer.end();
+                underscore_count += 1;
+            }
+            lexer.step();
+        }
+    }
+
+    // Exponent;
+    if lexer.code_point() == 'e' as CodePoint || lexer.code_point() == 'E' as CodePoint {
+        // An underscore must not come last;
+        if last_underscore_end > 0 && lexer.end() == last_underscore_end + 1 {
+            *lexer.end_mut() -= 1;
+            lexer.syntax_error()?;
+        }
+        has_dot_or_exponent = true;
+        lexer.step();
+        if lexer.code_point() == '+' as CodePoint || lexer.code_point() == '-' as CodePoint {
+            lexer.step();
+        }
+        if lexer.code_point() < '0' as CodePoint || lexer.code_point() > '9' as CodePoint {
+            lexer.syntax_error()?;
+        }
+        loop {
+            if lexer.code_point() < '0' as CodePoint || lexer.code_point() > '9' as CodePoint {
+                if lexer.code_point() != '_' as CodePoint {
+                    break;
+                }
+                // Cannot have multiple underscores in a row;
+                if last_underscore_end > 0 && lexer.end() == last_underscore_end + 1 {
+                    lexer.syntax_error()?;
+                }
+                last_underscore_end = lexer.end();
+                underscore_count += 1;
+            }
+            lexer.step();
+        }
+    }
+
+    Ok(DecimalScan {
+        underscore_count,
+        last_underscore_end,
+        has_dot_or_exponent,
+    })
+}
diff --git a/src/parsers/toml/lexer.rs b/src/parsers/toml/lexer.rs
index cfe8c61a5ca..9c043b7495f 100644
--- a/src/parsers/toml/lexer.rs
+++ b/src/parsers/toml/lexer.rs
@@ -2,7 +2,6 @@ use bun_alloc::Arena; // bumpalo::Bump re-export
 use bun_alloc::ArenaVecExt as _;
 use bun_ast as js_ast;
 use bun_ast::LexerLog;
-use bun_core::fmt::hex_digit_value_u32;
 use bun_core::strings;
 use bun_core::strings::CodePoint;
 
@@ -129,6 +128,44 @@ impl<'a> LexerLog<'a> for Lexer<'a> {
     }
 }
 
+impl<'a> bun_ast::lexer_log::EscapeLexer<'a> for Lexer<'a> {
+    type Buf = bun_alloc::ArenaVec<'a, u8>;
+    const LEGACY_ERROR_SPANS: bool = true;
+    #[inline]
+    fn end_mut(&mut self) -> &mut usize {
+        &mut self.end
+    }
+    #[inline]
+    fn push_codepoint(buf: &mut Self::Buf, c: u32) {
+        if c <= 127 {
+            buf.push(c as u8);
+        } else {
+            let mut part: [u8; 4] = [0; 4];
+            let len = strings::encode_wtf8_rune(&mut part, c);
+            buf.extend_from_slice(&part[0..len]);
+        }
+    }
+}
+
+impl<'a> crate::number_scan::DecimalLexer<'a> for Lexer<'a> {
+    #[inline]
+    fn code_point(&self) -> CodePoint {
+        self.code_point
+    }
+    #[inline]
+    fn end(&self) -> usize {
+        self.end
+    }
+    #[inline]
+    fn end_mut(&mut self) -> &mut usize {
+        &mut self.end
+    }
+    #[inline]
+    fn step(&mut self) {
+        Lexer::step(self)
+    }
+}
+
 impl<'a> Lexer<'a> {
     #[inline]
     pub fn loc(&self) -> bun_ast::Loc {
@@ -318,115 +355,9 @@ impl<'a> Lexer<'a> {
             }
         } else {
             // Floating-point literal;
-            let is_invalid_legacy_octal_literal = first == '0' as CodePoint
-                && (self.code_point == '8' as CodePoint || self.code_point == '9' as CodePoint);
-
-            // Initial digits;
-            loop {
-                if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                    match self.code_point {
-                        // '-' => {
-                        //     if (lexer.raw().len == 5) {
-                        //         // Is this possibly a datetime literal that begins with a 4 digit year?
-                        //         lexer.step();
-                        //         while (!lexer.has_newline_before) {
-                        //             switch (lexer.code_point) {
-                        //                 ',' => {
-                        //                     lexer.string_literal_slice = lexer.raw();
-                        //                     lexer.token = T.t_string_literal;
-                        //                     break;
-                        //                 },
-                        //             }
-                        //         }
-                        //     }
-                        // },
-                        c if c == '_' as CodePoint => {}
-                        _ => break,
-                    }
-                    if self.code_point != '_' as CodePoint {
-                        break;
-                    }
-
-                    // Cannot have multiple underscores in a row;
-                    if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                        self.syntax_error()?;
-                    }
-
-                    // The specification forbids underscores in this case;
-                    if is_invalid_legacy_octal_literal {
-                        self.syntax_error()?;
-                    }
-
-                    last_underscore_end = self.end;
-                    underscore_count += 1;
-                }
-                self.step();
-            }
-
-            // Fractional digits;
-            if first != '.' as CodePoint && self.code_point == '.' as CodePoint {
-                // An underscore must not come last;
-                if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                    self.end -= 1;
-                    self.syntax_error()?;
-                }
-
-                has_dot_or_exponent = true;
-                self.step();
-                if self.code_point == '_' as CodePoint {
-                    self.syntax_error()?;
-                }
-                loop {
-                    if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                        if self.code_point != '_' as CodePoint {
-                            break;
-                        }
-
-                        // Cannot have multiple underscores in a row;
-                        if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                            self.syntax_error()?;
-                        }
-
-                        last_underscore_end = self.end;
-                        underscore_count += 1;
-                    }
-                    self.step();
-                }
-            }
-
-            // Exponent;
-            if self.code_point == 'e' as CodePoint || self.code_point == 'E' as CodePoint {
-                // An underscore must not come last;
-                if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                    self.end -= 1;
-                    self.syntax_error()?;
-                }
-
-                has_dot_or_exponent = true;
-                self.step();
-                if self.code_point == '+' as CodePoint || self.code_point == '-' as CodePoint {
-                    self.step();
-                }
-                if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                    self.syntax_error()?;
-                }
-                loop {
-                    if self.code_point < '0' as CodePoint || self.code_point > '9' as CodePoint {
-                        if self.code_point != '_' as CodePoint {
-                            break;
-                        }
-
-                        // Cannot have multiple underscores in a row;
-                        if last_underscore_end > 0 && self.end == last_underscore_end + 1 {
-                            self.syntax_error()?;
-                        }
-
-                        last_underscore_end = self.end;
-                        underscore_count += 1;
-                    }
-                    self.step();
-                }
-            }
+            let scan = crate::number_scan::scan_decimal_digits(self, first)?;
+            underscore_count = scan.underscore_count;
+            has_dot_or_exponent = scan.has_dot_or_exponent;
 
             // Take a slice of the text to parse;
             let mut text: &[u8] = self.raw();
@@ -854,327 +785,11 @@ impl<'a> Lexer<'a> {
         text: &[u8],
         buf: &mut bun_alloc::ArenaVec<'a, u8>,
     ) -> Result<(), Error> {
-        let iterator = strings::CodepointIterator::init(text);
-        let mut iter = strings::Cursor::default();
-        while iterator.next(&mut iter) {
-            let width = iter.width;
-            match iter.c {
-                c if c == '\r' as CodePoint => {
-                    // Convert '\r\n' into '\n'. After `next()` returns for `\r`,
-                    // `iter.i` is the start byte of the `\r` itself — the `\n`
-                    // we're looking for is at `iter.i + 1`. Reading `text[iter.i]`
-                    // would always be `\r`, so the check never fired and a literal
-                    // CRLF in a slow-path multiline basic string decoded to two LFs.
-                    // Match the JS lexer (js_parser/lexer.rs:660-661).
-                    let next_i: usize = iter.i as usize + 1;
-                    if next_i < text.len() && text[next_i] == b'\n' {
-                        iter.i += 1;
-                    }
-
-                    // Convert '\r' into '\n'
-                    buf.push(b'\n');
-                    continue;
-                }
-
-                c if c == '\\' as CodePoint => {
-                    if !iterator.next(&mut iter) {
-                        return Ok(());
-                    }
-
-                    let c2 = iter.c;
-
-                    let width2 = iter.width;
-                    match c2 {
-                        // https://mathiasbynens.be/notes/javascript-escapes#single
-                        c if c == 'b' as CodePoint => {
-                            buf.push(8);
-                            continue;
-                        }
-                        c if c == 'f' as CodePoint => {
-                            // Form feed: U+000C
-                            buf.push(12);
-                            continue;
-                        }
-                        c if c == 'n' as CodePoint => {
-                            buf.push(10);
-                            continue;
-                        }
-                        c if c == 'v' as CodePoint => {
-                            // Vertical tab is invalid JSON
-                            // We're going to allow it.
-                            buf.push(11);
-                            continue;
-                        }
-                        c if c == 't' as CodePoint => {
-                            // Horizontal tab: U+0009
-                            buf.push(9);
-                            continue;
-                        }
-                        c if c == 'r' as CodePoint => {
-                            buf.push(13);
-                            continue;
-                        }
-
-                        // legacy octal literals
-                        c if ('0' as CodePoint..='7' as CodePoint).contains(&c) => {
-                            let octal_start = (iter.i as usize + width2 as usize).saturating_sub(2);
-
-                            // 1-3 digit octal
-                            let mut is_bad = false;
-                            let mut value: i64 = (c2 - '0' as CodePoint) as i64;
-                            let mut restore = iter;
-
-                            if !iterator.next(&mut iter) {
-                                if value == 0 {
-                                    buf.push(0);
-                                    return Ok(());
-                                }
-
-                                self.syntax_error()?;
-                                return Ok(());
-                            }
-
-                            let c3: CodePoint = iter.c;
-
-                            match c3 {
-                                c if ('0' as CodePoint..='7' as CodePoint).contains(&c) => {
-                                    value = value * 8 + (c3 - '0' as CodePoint) as i64;
-                                    restore = iter;
-                                    if !iterator.next(&mut iter) {
-                                        return self.syntax_error();
-                                    }
-
-                                    let c4 = iter.c;
-                                    match c4 {
-                                        c if ('0' as CodePoint..='7' as CodePoint).contains(&c) => {
-                                            let temp = value * 8 + (c4 - '0' as CodePoint) as i64;
-                                            if temp < 256 {
-                                                value = temp;
-                                            } else {
-                                                iter = restore;
-                                            }
-                                        }
-                                        c if c == '8' as CodePoint || c == '9' as CodePoint => {
-                                            is_bad = true;
-                                        }
-                                        _ => {
-                                            iter = restore;
-                                        }
-                                    }
-                                }
-                                c if c == '8' as CodePoint || c == '9' as CodePoint => {
-                                    is_bad = true;
-                                }
-                                _ => {
-                                    iter = restore;
-                                }
-                            }
-
-                            iter.c = i32::try_from(value).expect("int cast");
-                            if is_bad {
-                                self.add_range_error(
-                                    bun_ast::Range {
-                                        loc: bun_ast::Loc {
-                                            start: i32::try_from(octal_start).expect("int cast"),
-                                        },
-                                        len: i32::try_from(iter.i as usize - octal_start)
-                                            .expect("int cast"),
-                                    },
-                                    format_args!("Invalid legacy octal literal"),
-                                )
-                                .expect("unreachable");
-                            }
-                        }
-                        c if c == '8' as CodePoint || c == '9' as CodePoint => {
-                            iter.c = c2;
-                        }
-                        // 2-digit hexadecimal
-                        c if c == 'x' as CodePoint => {
-                            if ALLOW_MULTILINE {
-                                self.end =
-                                    (start + iter.i as usize).saturating_sub(width2 as usize);
-                                self.syntax_error()?;
-                            }
-
-                            let mut value: CodePoint = 0;
-                            let mut c3: CodePoint;
-                            let mut width3: u8;
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            c3 = iter.c;
-                            width3 = iter.width;
-                            match hex_digit_value_u32(c3 as u32) {
-                                Some(d) => value = (value * 16) | d as CodePoint,
-                                None => {
-                                    self.end =
-                                        (start + iter.i as usize).saturating_sub(width3 as usize);
-                                    return self.syntax_error();
-                                }
-                            }
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            c3 = iter.c;
-                            width3 = iter.width;
-                            match hex_digit_value_u32(c3 as u32) {
-                                Some(d) => value = (value * 16) | d as CodePoint,
-                                None => {
-                                    self.end =
-                                        (start + iter.i as usize).saturating_sub(width3 as usize);
-                                    return self.syntax_error();
-                                }
-                            }
-
-                            iter.c = value;
-                        }
-                        c if c == 'u' as CodePoint => {
-                            // We're going to make this an i64 so we don't risk integer overflows
-                            // when people do weird things
-                            let mut value: i64 = 0;
-
-                            if !iterator.next(&mut iter) {
-                                return self.syntax_error();
-                            }
-                            let mut c3 = iter.c;
-                            let mut width3 = iter.width;
-
-                            // variable-length
-                            if c3 == '{' as CodePoint {
-                                let hex_start = (iter.i as usize)
-                                    .saturating_sub(width as usize)
-                                    .saturating_sub(width2 as usize)
-                                    .saturating_sub(width3 as usize);
-                                let mut is_first = true;
-                                let mut is_out_of_range = false;
-                                'variable_length: loop {
-                                    if !iterator.next(&mut iter) {
-                                        break 'variable_length;
-                                    }
-                                    c3 = iter.c;
-
-                                    if c3 == '}' as CodePoint {
-                                        if is_first {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                        break 'variable_length;
-                                    }
-                                    match hex_digit_value_u32(c3 as u32) {
-                                        Some(d) => value = (value * 16) | d as i64,
-                                        None => {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                    }
-
-                                    // '\U0010FFFF
-                                    // copied from golang utf8.MaxRune
-                                    if value > 1114111 {
-                                        is_out_of_range = true;
-                                    }
-                                    is_first = false;
-                                }
-
-                                if is_out_of_range {
-                                    self.add_range_error(
-                                        bun_ast::Range {
-                                            loc: bun_ast::Loc {
-                                                start: i32::try_from(start + hex_start)
-                                                    .expect("int cast"),
-                                            },
-                                            len: i32::try_from(
-                                                (iter.i as usize).saturating_sub(hex_start),
-                                            )
-                                            .unwrap(),
-                                        },
-                                        format_args!("Unicode escape sequence is out of range"),
-                                    )?;
-                                    return Ok(());
-                                }
-
-                                // fixed-length
-                            } else {
-                                // Fixed-length
-                                let mut j: usize = 0;
-                                while j < 4 {
-                                    match hex_digit_value_u32(c3 as u32) {
-                                        Some(d) => value = (value * 16) | d as i64,
-                                        None => {
-                                            self.end = (start + iter.i as usize)
-                                                .saturating_sub(width3 as usize);
-                                            return self.syntax_error();
-                                        }
-                                    }
-
-                                    if j < 3 {
-                                        if !iterator.next(&mut iter) {
-                                            return self.syntax_error();
-                                        }
-                                        c3 = iter.c;
-
-                                        width3 = iter.width;
-                                    }
-                                    j += 1;
-                                }
-                            }
-
-                            iter.c = value as CodePoint; // @truncate
-                        }
-                        c if c == '\r' as CodePoint => {
-                            if !ALLOW_MULTILINE {
-                                self.end =
-                                    (start + iter.i as usize).saturating_sub(width2 as usize);
-                                self.add_default_error(b"Unexpected end of line")?;
-                            }
-
-                            // Ignore line continuations. A line continuation is not an escaped newline.
-                            // Match the JS lexer (js_parser/lexer.rs:660-661, 937-939): guard on
-                            // the index we actually read (`iter.i + 1`), not `iter.i`. Without
-                            // this, a multiline basic string ending in `\<CR>` right before `"""`
-                            // reads `text[len]` and panics even in release (slice bounds checks
-                            // always run).
-                            let next_i: usize = iter.i as usize + 1;
-                            if next_i < text.len() && text[next_i] == b'\n' {
-                                // Make sure Windows CRLF counts as a single newline
-                                iter.i += 1;
-                            }
-                            continue;
-                        }
-                        c if c == '\n' as CodePoint || c == 0x2028 || c == 0x2029 => {
-                            // Ignore line continuations. A line continuation is not an escaped newline.
-                            if !ALLOW_MULTILINE {
-                                self.end =
-                                    (start + iter.i as usize).saturating_sub(width2 as usize);
-                                self.add_default_error(b"Unexpected end of line")?;
-                            }
-                            continue;
-                        }
-                        _ => {
-                            iter.c = c2;
-                        }
-                    }
-                }
-                _ => {}
-            }
-
-            match iter.c {
-                -1 => return self.add_default_error(b"Unexpected end of file"),
-                0..=127 => {
-                    buf.push(u8::try_from(iter.c).expect("int cast"));
-                }
-                _ => {
-                    let mut part: [u8; 4] = [0; 4];
-                    let len = strings::encode_wtf8_rune(&mut part, iter.c as u32);
-                    buf.extend_from_slice(&part[0..len]);
-                }
-            }
-        }
-        Ok(())
+        // Multiline basic strings permit line continuations but reject `\x`;
+        // single-line basic strings are the inverse.
+        bun_ast::lexer_log::decode_escape_sequences::<_, ALLOW_MULTILINE, ALLOW_MULTILINE>(
+            self, start, text, buf,
+        )
     }
 
     pub fn expected(&mut self, token: T) -> Result<(), Error> {

From 1c37c64448fe3ac80b59796654e5429e6e9ae2d3 Mon Sep 17 00:00:00 2001
From: robobun <robobun@oven.sh>
Date: Tue, 9 Jun 2026 20:40:13 +0000
Subject: [PATCH 2/3] Pin TOML number underscore and string escape quirks in
 tests

The shared number_scan and decode_escape_sequences helpers encode these
behaviors in const parameters; these tests lock in the observable rules:
underscore separator placement, exponent digits, the \x single-line vs
multiline asymmetry, line continuations, and unicode escape range checks.
---
 test/js/bun/resolve/toml/toml-parse.test.ts | 54 +++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/test/js/bun/resolve/toml/toml-parse.test.ts b/test/js/bun/resolve/toml/toml-parse.test.ts
index 45f2c48550b..ce9f8d3f7dc 100644
--- a/test/js/bun/resolve/toml/toml-parse.test.ts
+++ b/test/js/bun/resolve/toml/toml-parse.test.ts
@@ -87,3 +87,57 @@ test("Bun.TOML.parse rejects array values without comma separators (#31252)", ()
   // Trailing comma is legal TOML.
   expect(Bun.TOML.parse("a = [1, 2,]")).toEqual({ a: [1, 2] });
 });
+
+// Digit scanning lives in parsers/number_scan.rs (shared with the json lexer).
+// These pin the underscore-separator rules: legal between digits, illegal
+// doubled, adjacent to the decimal point, or at the start of the exponent.
+test("Bun.TOML.parse accepts underscore digit separators in numbers", () => {
+  expect(Bun.TOML.parse("a = 1_000")).toEqual({ a: 1000 });
+  expect(Bun.TOML.parse("a = 5_349_221")).toEqual({ a: 5349221 });
+  expect(Bun.TOML.parse("a = 1_000.000_1")).toEqual({ a: 1000.0001 });
+  expect(Bun.TOML.parse("a = 1e1_0")).toEqual({ a: 1e10 });
+  expect(Bun.TOML.parse("a = 9_224_617.445_991_228")).toEqual({ a: 9224617.445991228 });
+});
+
+test("Bun.TOML.parse rejects misplaced underscores in numbers", () => {
+  expect(() => Bun.TOML.parse("a = 1__0")).toThrow();
+  expect(() => Bun.TOML.parse("a = 1_.5")).toThrow();
+  expect(() => Bun.TOML.parse("a = 1._5")).toThrow();
+  expect(() => Bun.TOML.parse("a = 1.5_e3")).toThrow();
+  expect(() => Bun.TOML.parse("a = 1.5e_3")).toThrow();
+});
+
+test("Bun.TOML.parse rejects an exponent with no digits", () => {
+  expect(() => Bun.TOML.parse("a = 1e")).toThrow();
+  expect(() => Bun.TOML.parse("a = 1e+")).toThrow();
+  // Signed exponents with digits are fine.
+  expect(Bun.TOML.parse("a = 6.626e-34")).toEqual({ a: 6.626e-34 });
+  expect(Bun.TOML.parse("a = 1e+6")).toEqual({ a: 1e6 });
+});
+
+// decode_escape_sequences is instantiated with REJECT_HEX_ESCAPE and
+// ALLOW_LINE_CONTINUATIONS both keyed to multiline-ness: multiline basic
+// strings permit `\<newline>` but reject `\x`, single-line basic strings do
+// the opposite (`\x` is a historical extension; TOML proper has neither).
+test("Bun.TOML.parse allows \\x escapes in single-line basic strings only", () => {
+  expect(Bun.TOML.parse('a = "\\x41"')).toEqual({ a: "A" });
+  expect(() => Bun.TOML.parse('a = """\\x41"""')).toThrow();
+});
+
+test("Bun.TOML.parse allows line continuations in multiline basic strings only", () => {
+  // Note: only the `\<newline>` pair is dropped. TOML proper also trims the
+  // next line's leading whitespace; Bun's decoder keeps it (JS semantics).
+  expect(Bun.TOML.parse('a = """line \\\n   joined"""')).toEqual({ a: "line    joined" });
+  // CRLF after the backslash is a single continuation too.
+  expect(Bun.TOML.parse('a = """line \\\r\n   joined"""')).toEqual({ a: "line    joined" });
+  expect(() => Bun.TOML.parse('a = "line \\\n   joined"')).toThrow();
+});
+
+test("Bun.TOML.parse decodes unicode escapes and rejects out-of-range ones", () => {
+  expect(Bun.TOML.parse('a = "\\u0041\\u00e9\\u2764"')).toEqual({ a: "A\u00e9\u2764" });
+  expect(Bun.TOML.parse('a = "\\u{1F600}"')).toEqual({ a: "\u{1F600}" });
+  // Above U+10FFFF.
+  expect(() => Bun.TOML.parse('a = "\\u{110000}"')).toThrow();
+  // Non-hex digits in a fixed-length escape.
+  expect(() => Bun.TOML.parse('a = "\\uZZZZ"')).toThrow();
+});

From 000af9785d30b3316b95760d52511e17f6a39694 Mon Sep 17 00:00:00 2001
From: robobun <robobun@oven.sh>
Date: Wed, 10 Jun 2026 07:10:54 +0000
Subject: [PATCH 3/3] ci: retrigger