Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 13 additions & 5 deletions scripts/build/features-json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,25 @@ export function parsePackedFeaturesList(cwd: string): string[] {
const sourcePath = resolve(cwd, "src", "analytics", "lib.rs");
const source = readFileSync(sourcePath, "utf8");

const invocation = source.match(/define_features!\s*\{([\s\S]*?)\n\s*\}/);
if (invocation === null) {
// The macro now recurses internally (`define_features! { @storage ... }`),
// so several invocation-shaped blocks exist; the entry list is the one whose
// body contains `<index> => (...)` arms. Scan every block and keep the
// entries we find — the density check below still rejects partial parses.
const invocations = [...source.matchAll(/define_features!\s*\{([\s\S]*?)\n\s*\}/g)];
if (invocations.length === 0) {
throw new BuildError(`Could not find the define_features! invocation in ${sourcePath}`, {
hint: "parsePackedFeaturesList() in scripts/build/features-json.ts needs updating to match the new shape.",
});
}

const entries: { index: number; name: string }[] = [];
const entryRe = /(\d+)\s*=>\s*\(\s*\w+\s*,\s*"((?:[^"\\]|\\.)*)"\s*\)/g;
for (const m of invocation[1]!.matchAll(entryRe)) {
entries.push({ index: Number(m[1]), name: m[2]! });
// `<index> => (<rust_ident>, "<feature name>")` with an optional
// `, core = IDENT` alias of the bun_core feature static.
const entryRe = /(\d+)\s*=>\s*\(\s*\w+\s*,\s*"((?:[^"\\]|\\.)*)"\s*(?:,\s*core\s*=\s*\w+\s*)?\)/g;
for (const invocation of invocations) {
for (const m of invocation[1]!.matchAll(entryRe)) {
entries.push({ index: Number(m[1]), name: m[2]! });
}
}
if (entries.length === 0) {
throw new BuildError(`Parsed zero entries from define_features! in ${sourcePath}`, {
Expand Down
178 changes: 85 additions & 93 deletions src/analytics/lib.rs

Large diffs are not rendered by default.

77 changes: 26 additions & 51 deletions src/analytics/schema.rs
Original file line number Diff line number Diff line change
@@ -1,75 +1,61 @@
// GENERATED: re-run the analytics schema generator (peechy) with .rs output
// source: src/analytics/schema.zig
// TODO(port): regenerate remaining analytics::* types for Rust
// Hand-ported subset — the remaining analytics::* types are unused at runtime
// and come back with the next peechy regen (see the `analytics` mod below).

use bun_core::Error;

// ──────────────────────────────────────────────────────────────────────────
// Reader / Writer
// ──────────────────────────────────────────────────────────────────────────
//
// Zig's peechy codec exposes a concrete `Reader` struct and a comptime-generic
// `Writer(WritableStream)` struct, but every generated `decode`/`encode` takes
// `reader: anytype` / `writer: anytype` — i.e. structural duck typing. Per
// PORTING.md §Comptime reflection, `anytype` → trait bound: the *protocol* is
// the trait, and the Zig `Reader` struct is one concrete impl (`BufReader`
// below).
//
// Only the primitive-int / byte-slice surface is ported. Zig's
// `readValue(comptime T)` / `writeValue(comptime T, ...)` switch on
// `@typeInfo(T)` to dispatch to enum/packed-struct/`.decode` paths; that
// reflection has no Rust equivalent, so per-type `decode`/`encode` impls call
// the primitive methods directly (which is what the generated schema bodies
// already do).

/// Zig: `Reader.ReadError = error{EOF}`.
// PORT NOTE: peechy's two error cases (`EOF`, `InvalidValue`) are folded into
// The peechy codec protocol is the `Reader` trait below; `BufReader` is one
// concrete impl. Only the primitive-int / byte-slice surface is implemented;
// per-type `decode`/`encode` impls call the primitive methods directly
// (which is what the generated schema bodies already do).

// peechy's two error cases (`EOF`, `InvalidValue`) are folded into
// the crate-wide `bun_core::Error` so downstream `decode` signatures stay
// `Result<_, bun_core::Error>` without an extra `From` hop.
pub(crate) const EOF: Error = Error::TODO; // TODO(port): Error::from_name("EOF") once name→code table lands
// (`Error::from_name` interns at runtime, so this is a fn, not a const.)
#[inline]
pub(crate) fn eof() -> Error {
bun_core::err!("EOF")
}

/// Primitive integers encodable in the peechy wire format (native-endian raw
/// bytes). Zig handled this via `comptime T` + `std.mem.readIntSliceNative` /
/// `std.mem.asBytes`; Rust needs an explicit trait bound.
/// bytes).
pub use bun_core::NativeEndianInt as SchemaInt;

/// Duck-typed reader protocol for peechy `decode` impls.
///
/// Zig: `fn decode(reader: anytype) anyerror!T` — the `anytype` becomes a
/// `R: Reader` bound on the Rust side.
pub trait Reader {
/// Zig: `fn read(this, count: usize) ![]u8` — borrow `count` bytes,
/// advancing the cursor. Errors with `EOF` if fewer than `count` remain.
/// Borrow `count` bytes, advancing the cursor. Errors with `EOF` if
/// fewer than `count` remain.
fn read(&mut self, count: usize) -> Result<&[u8], Error>;

/// Zig: `readByte`
#[inline]
fn read_byte(&mut self) -> Result<u8, Error> {
Ok(self.read(1)?[0])
}

/// Zig: `readBool`
#[inline]
fn read_bool(&mut self) -> Result<bool, Error> {
Ok(self.read_byte()? > 0)
}

/// Zig: `readInt(comptime T)` — `std.mem.readIntSliceNative`.
#[inline]
fn read_int<T: SchemaInt>(&mut self) -> Result<T, Error> {
let b = self.read(T::SIZE)?;
Ok(T::from_ne_slice(b))
}

/// Zig: `readValue(comptime T)` for the primitive-int arm. Struct/enum
/// arms are expressed as per-type `decode(reader)` fns instead (no
/// `@typeInfo` in Rust).
/// Primitive-int read; struct/enum cases are expressed as per-type
/// `decode(reader)` fns instead.
#[inline]
fn read_value<T: SchemaInt>(&mut self) -> Result<T, Error> {
self.read_int::<T>()
}

/// Zig: `readByteArray` — `u32` length prefix + raw bytes.
/// `u32` length prefix + raw bytes.
#[inline]
fn read_byte_array(&mut self) -> Result<&[u8], Error> {
let len = self.read_int::<u32>()? as usize;
Expand All @@ -80,17 +66,12 @@ pub trait Reader {
}
}

// peechy `Writer` lives in `bun_options_types::schema::Writer` (the canonical
// `Vec<u8>`-backed struct port of `schema.zig:169 fn Writer(WritableStream)`).
// This crate keeps only the read side; encode users depend on options_types
// directly.
// peechy `Writer` lives in `bun_options_types::schema::Writer`. This crate
// keeps only the read side; encode users depend on options_types directly.

/// Concrete buffer-backed reader — direct port of Zig's `pub const Reader = struct`.
/// Concrete buffer-backed reader.
///
/// PORT NOTE: the Zig struct also carries `std.mem.Allocator param` for
/// `readArray`'s nested-slice case; per PORTING.md §Allocators (non-AST crate)
/// the allocator param is dropped — callers that need owned sub-arrays
/// allocate at the call site.
/// Callers that need owned sub-arrays allocate at the call site.
pub struct BufReader<'a> {
pub buf: &'a [u8],
pub remain: &'a [u8],
Expand All @@ -107,7 +88,7 @@ impl<'a> Reader for BufReader<'a> {
fn read(&mut self, count: usize) -> Result<&[u8], Error> {
let read_count = core::cmp::min(count, self.remain.len());
if read_count < count {
return Err(EOF);
return Err(eof());
}
let (slice, rest) = self.remain.split_at(read_count);
self.remain = rest;
Expand All @@ -122,10 +103,8 @@ impl<'a> Reader for BufReader<'a> {
// the schema (EventKind, EventListHeader, …) are unused at runtime today and
// will be filled in by the peechy regen.
pub mod analytics {
/// Zig: `pub const OperatingSystem = enum(u8) { _none, linux, macos, windows, wsl, android, freebsd, _ }`
// PORT NOTE: Zig's open enum (`_`) is dropped — Rust enums are closed; the
// schema decoder is the only producer of unknown discriminants and it is
// not yet ported.
// Closed enum: the schema decoder is the only producer of unknown
// discriminants and it is not yet implemented.
#[repr(u8)]
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum OperatingSystem {
Expand All @@ -144,7 +123,6 @@ pub mod analytics {
Freebsd,
}

/// Zig: `pub const Architecture = enum(u8) { _none, x64, arm, _ }`
#[repr(u8)]
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Architecture {
Expand All @@ -155,7 +133,6 @@ pub mod analytics {
Arm,
}

/// Zig: `pub const Platform = struct { os, arch, version: []const u8 }`
#[derive(Copy, Clone)]
pub struct Platform {
/// os
Expand All @@ -166,5 +143,3 @@ pub mod analytics {
pub version: &'static [u8],
}
}

// ported from: src/analytics/schema.zig
27 changes: 11 additions & 16 deletions src/api/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
#![warn(unused_must_use)]
//! `bun.schema.api` namespace.
//!
//! Ground truth: `src/options_types/schema.zig` (the `pub const api = struct {…}`
//! block — generated from `src/api/schema.peechy`). The full peechy → `.rs`
//! emitter is not landed yet; this crate hand-ports the slice of the schema
//! that downstream crates name today (`bun_ini`, `bun_install`, `bun_runtime`
//! Ground truth: `src/api/schema.peechy`. The full peechy → `.rs` emitter is
//! not landed yet; this crate hand-writes the slice of the schema that
//! downstream crates name today (`bun_ini`, `bun_install`, `bun_runtime`
//! bunfig parser) so they can un-gate against real field shapes.
//!
//! LAYERING: the actual data shapes (`NpmRegistry`, `NpmRegistryMap`, `Ca`,
//! `BunInstall`) were originally hand-ported in two places — here *and* in
//! `BunInstall`) were originally hand-written in two places — here *and* in
//! `bun_options_types::schema::api`. Downstream crates ended up holding values
//! of one and passing them to functions typed against the other (e.g.
//! `bun_options_types::context::install` vs. `bun_ini::load_npmrc_config`),
Expand All @@ -34,20 +33,18 @@ pub use bun_options_types::schema::api::{
// npm_registry — module path for the nested `NpmRegistry::Parser`
// ──────────────────────────────────────────────────────────────────────────

/// Zig nests `pub const Parser = struct {…}` inside `NpmRegistry`. Rust can't
/// nest a type inside a struct, so it lives in a sibling module and the
/// canonical path becomes `bun_api::npm_registry::Parser`.
/// `Parser` lives in a sibling module of `NpmRegistry`; the canonical path
/// is `bun_api::npm_registry::Parser`.
pub mod npm_registry {
use bun_url::URL;

pub use super::NpmRegistry;

// PORT NOTE: `Parser` stays generic over `L` (Log) / `S` (Source) so this
// leaf schema crate doesn't need to name `bun_logger`. The lone live body
// (`parse_registry_url_string_impl`) doesn't touch log/source — only the
// not-yet-ported `parse_registry_object` / `parse_registry` paths do, and
// those need `js_ast::Expr` so they belong upstream in the bunfig parser
// anyway.
// `Parser` stays generic over `L` (Log) / `S` (Source) so this leaf
// schema crate doesn't need to name `bun_logger`. The lone live body
// (`parse_registry_url_string_impl`) doesn't touch log/source — only
// `parse_registry_object` / `parse_registry` would, and those need
// `js_ast::Expr` so they belong upstream in the bunfig parser anyway.
pub struct Parser<'a, L, S> {
pub log: &'a mut L,
pub source: &'a S,
Expand Down Expand Up @@ -79,5 +76,3 @@ pub mod npm_registry {
}
}
}

// ported from: src/options_types/schema.zig
1 change: 1 addition & 0 deletions src/ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ bun_ptr.workspace = true
bun_paths.workspace = true
bun_collections.workspace = true
bun_sys.workspace = true
bun_perf.workspace = true
bun_wyhash.workspace = true
49 changes: 14 additions & 35 deletions src/ast/ast_memory_allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,19 @@ use bun_alloc::ast_alloc::{self, AstAllocState};
use crate::expr;
use crate::stmt;

// PERF(port): Zig used `std.heap.StackFallbackAllocator(@min(8192, std.heap.page_size_min))`
// — a small inline stack buffer with heap fallback. `bun_alloc::Arena`
// (`MimallocArena`) has no stack buffer; instead the owned arena is recycled
// `bun_alloc::Arena` (`MimallocArena`) has no inline stack buffer with heap
// fallback; instead the owned arena is recycled
// per thread via `ARENA_POOL` below so the per-module callers don't pay a fresh
// `mi_heap_new` + first-segment page faults every file. (The `AstAlloc` side
// *does* have an inline buffer now — see `bun_alloc::ast_alloc::AstAllocState`.)

// ── Thread-local arena pool ──────────────────────────────────────────────
//
// Zig's `ASTMemoryAllocator` was a `StackFallbackAllocator(8192, fallback)`:
// the 8 KB stack buffer absorbed most per-module AST scratch without touching
// the heap, and the spill went to a long-lived `fallback` arena whose pages
// stayed resident across modules. The Rust port collapsed that to one owned
// `MimallocArena` per `ASTMemoryAllocator`, so a fresh per-module instance
// (`RuntimeTranspilerStore::run`, `Bun.Transpiler.*`, the dev server) paid a
// fresh `mi_heap_new` + first-segment page faults every file, and `enter()`'s
// reset then destroyed-and-recreated that just-created heap before it was even
// used.
// With one owned `MimallocArena` per `ASTMemoryAllocator`, a fresh per-module
// instance (`RuntimeTranspilerStore::run`, `Bun.Transpiler.*`, the dev server)
// would pay a fresh `mi_heap_new` + first-segment page faults every file, and
// `enter()`'s reset would then destroy-and-recreate that just-created heap
// before it was even used.
//
// Instead, recycle one `MimallocArena` per thread: `Drop` cleans the arena
// (`reset()` bulk-frees this module's nodes — leaving it pristine) and parks
Expand All @@ -51,8 +46,6 @@ fn return_pooled_arena(arena: Arena) {
}

pub struct ASTMemoryAllocator {
// Zig fields `stack_arena: SFA` + `bump_std.mem.Allocator param` (the vtable into
// the SFA) collapse to a single bump arena.
arena: Arena,
/// When non-null, allocations route to this caller-owned arena instead of
/// `self.arena` and `Drop`/`reset` never destroy or pool anything. Must
Expand Down Expand Up @@ -229,16 +222,10 @@ impl ASTMemoryAllocator {
}

pub fn enter(&mut self) -> Scope<'_> {
// Zig: this.stack_allocator = SFA{ .fallback_allocator = arena, .. };
// this.bump_allocator = this.stack_allocator.get();
// The Zig spec OVERWRITES the entire SFA on every `enter()` (fresh
// 8 KB stack buffer + rewired fallback to the per-call arena), so any
// bytes bump-allocated by the previous `enter()` are released. The
// Rust port collapsed SFA+fallback into a single internal `Arena`
// owned by `self`, so the equivalent re-init is `arena.reset()` —
// otherwise a thread-local `ASTMemoryAllocator` reused across
// `RuntimeTranspilerStore::run()` calls grows unboundedly (one full
// AST worth of nodes per import).
// `enter()` must release any bytes bump-allocated by the previous
// `enter()`, i.e. `arena.reset()` — otherwise a thread-local
// `ASTMemoryAllocator` reused across `RuntimeTranspilerStore::run()`
// calls grows unboundedly (one full AST worth of nodes per import).
//
// ...but a *pristine* arena (fresh from `new()` / the thread-local
// pool, or just `reset()`) has nothing to discard, so the
Expand All @@ -265,8 +252,6 @@ impl ASTMemoryAllocator {
}

pub fn reset(&mut self) {
// Zig rebuilt the SFA against the stored fallback arena; Arena::reset is equivalent.
// PERF(port): was stack-fallback — profile
// Skip the `mi_heap_destroy` + `mi_heap_new` when already pristine.
if self.arena_dirty {
// The AST state's spill pointer targets the arena's heap; null it
Expand Down Expand Up @@ -352,15 +337,12 @@ impl ASTMemoryAllocator {

#[inline]
pub fn append<T>(&self, value: T) -> crate::StoreRef<T> {
// Zig: `this.bump_allocator.create(ValueType) catch unreachable; ptr.* = value;`
// bumpalo's `alloc` aborts on OOM, matching `catch unreachable`.
// bumpalo's `alloc` aborts on OOM.
// SAFETY: bumpalo never returns null.
crate::StoreRef::from_bump(self.arena().alloc(value))
}

/// Zig: `this.stack_allocator.get()` — the `std.mem.Allocator` vtable into
/// the stack-fallback buffer. In the Rust port both `stack_allocator` and
/// `bump_allocator` collapse to the single `Arena`, so this returns it.
/// Returns the single `Arena` backing this allocator.
#[inline]
pub fn stack_allocator(&self) -> &Arena {
self.arena()
Expand Down Expand Up @@ -461,8 +443,7 @@ impl<'a> Scope<'a> {
}
}

// Zig callers write `defer ast_scope.exit()` immediately after `enter()`;
// porting that as RAII so `let _scope = alloc.enter();` restores the previous
// RAII: `let _scope = alloc.enter();` restores the previous
// `Expr/Stmt.Data.Store.memory_allocator` on every return path. `exit()` is
// idempotent (guarded by `entered`), so an explicit `.exit()` followed by Drop
// is harmless.
Expand All @@ -471,5 +452,3 @@ impl<'a> Drop for Scope<'a> {
self.exit();
}
}

// ported from: src/js_parser/ast/ASTMemoryAllocator.zig
Loading
Loading