Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 72 additions & 3 deletions scripts/verify-baseline-static/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,8 +878,10 @@ fn scan_aarch64(

// ARM64 padding is NOP (0xD503201F) or zeros — neither matches any of our
// classify() patterns — so we don't need to special-case tail slop.
// chunks_exact(4) naturally drops any trailing 1-3 bytes.
for (i, chunk) in bytes.chunks_exact(4).enumerate() {
// as_chunks yields `&[u8; 4]` words (dropping any trailing 1-3 bytes via
// the remainder we ignore), so the byte accesses below are statically in
// bounds.
for (i, chunk) in bytes.as_chunks::<4>().0.iter().enumerate() {
let ip = sec_addr + (i as u64) * 4;
// Skip literal-pool data. data_ranges is sorted; partition_point finds
// the first range whose start is > ip, so the candidate is the one
Expand All @@ -888,7 +890,7 @@ fn scan_aarch64(
if dr > 0 && ip < data_ranges[dr - 1].1 {
continue;
}
let w = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
let w = u32::from_le_bytes(*chunk);
total_insns += 1;
let Some(feat) = aarch64::classify(w) else {
continue;
Expand Down Expand Up @@ -1179,3 +1181,70 @@ fn main() -> ExitCode {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

// Little-endian bytes for an ARM64 instruction word.
fn word(w: u32) -> [u8; 4] {
w.to_le_bytes()
}

#[test]
fn scan_aarch64_flags_post_baseline_word() {
// 0xc8a07c41 "cas x" -> Feature::Lse (post-baseline -> violation)
// 0x1f420c20 "fmadd" -> None (baseline, counted but skipped)
let mut bytes = Vec::new();
bytes.extend_from_slice(&word(0xc8a07c41));
bytes.extend_from_slice(&word(0x1f420c20));

let r = scan_aarch64(&bytes, 0x1000, &[], &Allowlist::new(), &[]);

assert_eq!(r.total_insns, 2);
assert!(r.allowlisted.is_empty());
assert_eq!(r.violations.len(), 1);
let report = r.violations.values().next().unwrap();
assert_eq!(report.hits.len(), 1);
assert_eq!(report.hits[0].feature, "LSE");
assert_eq!(report.hits[0].mnemonic, "cas");
assert_eq!(report.hits[0].ip, 0x1000);
}

#[test]
fn scan_aarch64_maps_each_chunk_to_its_address() {
// Three "cas" words; each 4-byte chunk's index must map to sec_addr + i*4.
// A single symbol spans the range so all three hits land in one bucket.
let mut bytes = Vec::new();
for _ in 0..3 {
bytes.extend_from_slice(&word(0xc8a07c41));
}
let syms = [Sym {
addr: 0x2000,
end: 0x2000 + bytes.len() as u64,
name: "func".to_string(),
}];

let r = scan_aarch64(&bytes, 0x2000, &syms, &Allowlist::new(), &[]);

assert_eq!(r.total_insns, 3);
assert_eq!(r.violations.len(), 1);
let report = r.violations.values().next().unwrap();
let ips: Vec<u64> = report.hits.iter().map(|h| h.ip).collect();
assert_eq!(ips, vec![0x2000, 0x2004, 0x2008]);
}

#[test]
fn scan_aarch64_drops_trailing_partial_word() {
// One full word plus 3 trailing bytes: the remainder is ignored, so
// exactly one instruction is scanned.
let mut bytes = Vec::new();
bytes.extend_from_slice(&word(0xc8a07c41));
bytes.extend_from_slice(&[0x00, 0x11, 0x22]);

let r = scan_aarch64(&bytes, 0x1000, &[], &Allowlist::new(), &[]);

assert_eq!(r.total_insns, 1);
assert_eq!(r.violations.len(), 1);
}
}
8 changes: 5 additions & 3 deletions src/ast/char_freq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ fn scan_big(out: &mut Buffer, text: &[u8], delta: i32) {

debug_assert!(text.len() >= SCAN_BIG_CHUNK_SIZE);

let unrolled = text.len() - (text.len() % SCAN_BIG_CHUNK_SIZE);
let (chunks, remain) = text.split_at(unrolled);
// `as_chunks` yields `&[u8; SCAN_BIG_CHUNK_SIZE]` arrays (plus the tail
// remainder), so the inner `chunk[i]` accesses are statically in bounds and
// the per-element bounds checks `chunks_exact` leaves in are elided.
let (chunks, remain) = text.as_chunks::<SCAN_BIG_CHUNK_SIZE>();

for chunk in chunks.chunks_exact(SCAN_BIG_CHUNK_SIZE) {
for chunk in chunks {
// PERF: candidate for unrolling — profile
for i in 0..SCAN_BIG_CHUNK_SIZE {
deltas[chunk[i] as usize] += delta;
Expand Down
16 changes: 12 additions & 4 deletions src/bun_core/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1938,18 +1938,26 @@ pub(crate) mod strings_impl {

const HIGH_BITS: u64 = 0x8080_8080_8080_8080;
let mut copied = 0usize;
for (d, s) in dst.chunks_exact_mut(8).zip(src.chunks_exact(8)) {
let word = u64::from_ne_bytes(s.try_into().expect("infallible: size matches"));

// `dst` and `src` are the same length (asserted above), so they split
// into the same number of `&[u8; 8]` words and equal-length remainders.
// The array `s` makes the word load a plain `from_ne_bytes(*s)` with no
// fallible `try_into`, and the store a fixed-size array assignment.
let (dst_chunks, dst_remainder) = dst.as_chunks_mut::<8>();
let (src_chunks, src_remainder) = src.as_chunks::<8>();

for (d, s) in dst_chunks.iter_mut().zip(src_chunks.iter()) {
let word = u64::from_ne_bytes(*s);
let mask = word & HIGH_BITS;
if mask != 0 {
let ascii = (mask.trailing_zeros() / 8) as usize;
d[..ascii].copy_from_slice(&s[..ascii]);
return copied + ascii;
}
d.copy_from_slice(&word.to_ne_bytes());
*d = word.to_ne_bytes();
copied += 8;
}
for (d, &s) in dst[copied..].iter_mut().zip(&src[copied..]) {
for (d, &s) in dst_remainder.iter_mut().zip(src_remainder.iter()) {
if s >= 0x80 {
return copied;
}
Expand Down
4 changes: 3 additions & 1 deletion src/http_jsc/websocket_client/WebSocketUpgradeClient.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,9 @@ impl<'a> Headers8Bit<'a> {

fn iter(&self) -> impl Iterator<Item = (&[u8], &[u8])> + '_ {
self.slices
.chunks_exact(2)
.as_chunks::<2>()
.0
.iter()
.map(|pair| (pair[0].slice(), pair[1].slice()))
}

Expand Down
30 changes: 18 additions & 12 deletions src/install/lockfile/Package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3364,6 +3364,12 @@ pub mod serializer {
// raw u8 here. Layout: `ResolutionType` is `#[repr(C)]
// { tag: Tag, _padding: [u8; 7], value: ... }`, so the
// discriminant is the first byte of each element.
//
// Unlike the `Meta`/`Bin` blocks below, this stays on
// `chunks_exact`: `size_of::<ResolutionType<SemverIntType>>()`
// depends on the generic `SemverIntType`, and a const-generic
// argument that uses a type parameter needs the unstable
// `generic_const_exprs`, which this crate does not enable.
let stride = mem::size_of::<ResolutionType<SemverIntType>>();
debug_assert!(stride != 0 && src.len().is_multiple_of(stride));
for raw in src.chunks_exact(stride) {
Expand All @@ -3380,13 +3386,13 @@ pub mod serializer {
// `HasInstallScript` = 0..=2). Copying an out-of-range byte
// into either field and reading it back as the enum would
// be immediate UB, so check the raw stream bytes first.
let stride = mem::size_of::<Meta>();
let origin_at = mem::offset_of!(Meta, origin);
let install_script_at = mem::offset_of!(Meta, has_install_script);
debug_assert!(stride != 0 && src.len().is_multiple_of(stride));
for raw in src.chunks_exact(stride) {
if !matches!(raw[origin_at], 0..=2)
|| !matches!(raw[install_script_at], 0..=2)
const STRIDE: usize = mem::size_of::<Meta>();
const ORIGIN_AT: usize = mem::offset_of!(Meta, origin);
const INSTALL_SCRIPT_AT: usize = mem::offset_of!(Meta, has_install_script);
debug_assert!(STRIDE != 0 && src.len().is_multiple_of(STRIDE));
for raw in src.as_chunks::<STRIDE>().0 {
if !matches!(raw[ORIGIN_AT], 0..=2)
|| !matches!(raw[INSTALL_SCRIPT_AT], 0..=2)
{
return Err(bun_core::err!(
"Lockfile validation failed: invalid package meta"
Expand All @@ -3397,11 +3403,11 @@ pub mod serializer {
if matches!(field, PackageField::Bin) {
// `Bin.tag` is a `#[repr(u8)]` enum with discriminants
// 0..=4; validate it the same way before the copy.
let stride = mem::size_of::<Bin>();
let tag_at = mem::offset_of!(Bin, tag);
debug_assert!(stride != 0 && src.len().is_multiple_of(stride));
for raw in src.chunks_exact(stride) {
if !matches!(raw[tag_at], 0..=4) {
const STRIDE: usize = mem::size_of::<Bin>();
const TAG_AT: usize = mem::offset_of!(Bin, tag);
debug_assert!(STRIDE != 0 && src.len().is_multiple_of(STRIDE));
for raw in src.as_chunks::<STRIDE>().0 {
if !matches!(raw[TAG_AT], 0..=4) {
return Err(bun_core::err!(
"Lockfile validation failed: invalid bin tag"
));
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/image/codecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ pub fn decode(bytes: &[u8], max_pixels: u64, hint: DecodeHint) -> Result<Decoded
// entry verbatim, leaving the original RGB with α=0. Normalise
// here so
// every backend yields identical bytes for the same GIF.
for px in d.rgba.chunks_exact_mut(4) {
for px in d.rgba.as_chunks_mut::<4>().0 {
if px[3] == 0 {
px[0] = 0;
px[1] = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/webcore/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,8 +781,8 @@ pub(crate) unsafe fn construct_from_u8<const ENCODING: u8>(
// directly into a `Vec<u8>` so we never depend on an allocator-
// layout-dependent `Vec<u16> → Vec<u8>` header reinterpret.
let mut to = vec![0u8; len * 2];
for (out, &b) in to.chunks_exact_mut(2).zip(input_slice) {
out.copy_from_slice(&u16::from(b).to_ne_bytes());
for (out, &b) in to.as_chunks_mut::<2>().0.iter_mut().zip(input_slice) {
*out = u16::from(b).to_ne_bytes();
}
to
}
Expand Down
131 changes: 131 additions & 0 deletions test/cli/install/bun-lockb-field-validation.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { install_test_helpers } from "bun:internal-for-testing";
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
import { copyFileSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";

const { parseLockfile } = install_test_helpers;
Comment thread
robobun marked this conversation as resolved.

// These tests exercise the raw-byte validation loops in the binary-lockfile
// loader (`Package::load_fields`), which iterate the `meta`/`bin` columns and
// reject out-of-range enum discriminants before the bytes are reinterpreted as
// `#[repr(u8)]` enums. A `file:` tarball dependency is used so the lockfile is
// produced and re-parsed entirely offline — no registry needed. `parseLockfile`
// drives `Lockfile::load_from_dir`, which runs `load_fields`.

const tarball = join(import.meta.dir, "bar-0.0.2.tgz");

async function installFileDep(dir: string) {
copyFileSync(tarball, join(dir, "bar-0.0.2.tgz"));
await using proc = Bun.spawn({
cmd: [bunExe(), "install", "--no-progress"],
cwd: dir,
env: bunEnv,
stdout: "ignore",
stderr: "pipe",
});
const [stderr, exitCode] = await Promise.all([proc.stderr.text(), proc.exited]);
expect(stderr).not.toContain("error:");
expect(exitCode).toBe(0);
}
Comment thread
robobun marked this conversation as resolved.

// Locate the `meta` and `bin` columns in a binary lockfile. Packages are stored
// SoA and the columns are written back-to-back in declaration order: name (8),
// name_hash (8), resolution (72 in format v3, 64 in v2), dependencies (8),
// resolutions (8), then meta (88 bytes/record) and bin (20 bytes/record).
const META_SIZE = 88;
const BIN_SIZE = 20;

function packageColumns(lockb: Buffer) {
const fmt = lockb.readUInt32LE(42);
const n = Number(lockb.readBigUInt64LE(86));
const begin = Number(lockb.readBigUInt64LE(110));
let resolutionSize: number;
switch (fmt) {
case 2:
resolutionSize = 64;
break;
case 3:
resolutionSize = 72;
break;
default:
// If the binary format changes again, fail loudly rather than silently
// corrupting the wrong byte and masking it as a field-validation test.
throw new Error(`unexpected bun.lockb format version ${fmt}`);
}
const metaStart = begin + n * (8 + 8 + resolutionSize + 8 + 8);
const binStart = metaStart + n * META_SIZE;
return { n, metaStart, binStart };
}

test("valid binary lockfile round-trips through the field loader", async () => {
using dir = tempDir("lockb-field-valid", {
"package.json": JSON.stringify({
name: "lockb-field-valid",
version: "1.0.0",
dependencies: { "dummy-package": "file:./bar-0.0.2.tgz" },
}),
"bunfig.toml": "[install]\nsaveTextLockfile = false\n",
});
await installFileDep(String(dir));

const parsed = parseLockfile(String(dir)) as { packages?: Record<string, unknown> };
// Loading succeeds, which means `load_fields` ran its meta/bin validation
// loops over the real column bytes without rejecting them.
expect(parsed.packages).toBeDefined();
expect(Object.keys(parsed.packages!).length).toBe(2);
});

test("rejects a binary lockfile whose meta.origin byte is out of range", async () => {
using dir = tempDir("lockb-field-origin", {
"package.json": JSON.stringify({
name: "lockb-field-origin",
version: "1.0.0",
dependencies: { "dummy-package": "file:./bar-0.0.2.tgz" },
}),
"bunfig.toml": "[install]\nsaveTextLockfile = false\n",
});
await installFileDep(String(dir));

const lockbPath = join(String(dir), "bun.lockb");
const lockb = readFileSync(lockbPath);
const { n, metaStart } = packageColumns(lockb);

// `Meta.origin` is the first byte of each 88-byte record; the `Origin` enum
// is `#[repr(u8)]` with discriminants 0..=2, so 0x42 is out of range and the
// per-element check in the `meta` validation loop must reject it.
expect(n).toBeGreaterThan(0);
const originOffset = metaStart + (n - 1) * META_SIZE + 0;
expect(lockb[originOffset]).toBeLessThanOrEqual(2); // sanity: valid before
lockb[originOffset] = 0x42;
writeFileSync(lockbPath, lockb);

expect(() => parseLockfile(String(dir))).toThrow("Lockfile validation failed: invalid package meta");
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

test("rejects a binary lockfile whose bin.tag byte is out of range", async () => {
using dir = tempDir("lockb-field-bin", {
"package.json": JSON.stringify({
name: "lockb-field-bin",
version: "1.0.0",
dependencies: { "dummy-package": "file:./bar-0.0.2.tgz" },
}),
"bunfig.toml": "[install]\nsaveTextLockfile = false\n",
});
await installFileDep(String(dir));

const lockbPath = join(String(dir), "bun.lockb");
const lockb = readFileSync(lockbPath);
const { n, binStart } = packageColumns(lockb);

// `Bin.tag` is the first byte of each 20-byte record; the `Tag` enum is
// `#[repr(u8)]` with discriminants 0..=4, so 0x42 is out of range and the
// per-element check in the `bin` validation loop must reject it.
expect(n).toBeGreaterThan(0);
const tagOffset = binStart + (n - 1) * BIN_SIZE + 0;
expect(lockb[tagOffset]).toBeLessThanOrEqual(4); // sanity: valid before
lockb[tagOffset] = 0x42;
writeFileSync(lockbPath, lockb);

expect(() => parseLockfile(String(dir))).toThrow("Lockfile validation failed: invalid bin tag");
});
Loading
Loading