diff --git a/src/codegen/bake-codegen.ts b/src/codegen/bake-codegen.ts index b60a3dba613..0da1ffd4918 100644 --- a/src/codegen/bake-codegen.ts +++ b/src/codegen/bake-codegen.ts @@ -53,7 +53,13 @@ async function run() { side: JSON.stringify(side), IS_ERROR_RUNTIME: String(file === "error"), IS_BUN_DEVELOPMENT: String(!!debug), - OVERLAY_CSS: css("../runtime/bake/client/overlay.css", !!debug), + // Wrap in `JSON.stringify` so the value parses as a JSON string + // literal directly. Raw CSS starts with `*{…}` and tripped the + // JSON lexer in release-bun revisions older than #30679 (the + // `define` auto-quote fallback). Bootstrap `bun bd` with a + // pre-#30679 release bun in the build cache still fails without + // this; explicit quoting works in both old and new snapshots. + OVERLAY_CSS: JSON.stringify(css("../runtime/bake/client/overlay.css", !!debug)), }, minify: { syntax: !debug, diff --git a/src/jsc/webcore_types.rs b/src/jsc/webcore_types.rs index e7fc53a87e1..f651d87d686 100644 --- a/src/jsc/webcore_types.rs +++ b/src/jsc/webcore_types.rs @@ -740,7 +740,6 @@ pub mod store { // ──────────────────────────────────────────────────────────────────── /// A blob store referencing a file on disk. - #[derive(Clone)] pub struct File { pub pathlike: PathOrFileDescriptor, pub mime_type: MimeType, @@ -748,8 +747,15 @@ pub mod store { pub mode: bun_sys::Mode, pub seekable: Option, pub max_size: SizeType, - /// Milliseconds since ECMAScript epoch. - pub last_modified: crate::JSTimeType, + /// Milliseconds since ECMAScript epoch. Atomic because worker-thread + /// `ReadFile` tasks (`resolve_size_and_last_modified`) write this + /// field while the JS thread may concurrently read it in + /// `get_last_modified`, and N overlapping `file.bytes()` calls share + /// one `Store` (`do_read_file` clones the `StoreRef` into each task). + /// Every writer stores the same `fstat`-derived mtime so the race is + /// idempotent, but it still needs a `Relaxed` atomic under Rust's + /// memory model. + pub last_modified: core::sync::atomic::AtomicU64, } impl Default for File { @@ -761,7 +767,26 @@ pub mod store { mode: 0, seekable: None, max_size: MAX_SIZE, - last_modified: crate::INIT_TIMESTAMP, + last_modified: core::sync::atomic::AtomicU64::new(crate::INIT_TIMESTAMP), + } + } + } + + impl Clone for File { + fn clone(&self) -> Self { + Self { + pathlike: self.pathlike.clone(), + mime_type: self.mime_type.clone(), + is_atty: self.is_atty, + mode: self.mode, + seekable: self.seekable, + max_size: self.max_size, + // Snapshot the atomic via `Relaxed`; `Clone` is a per-thread + // value copy, not a memory-ordering sync point. + last_modified: core::sync::atomic::AtomicU64::new( + self.last_modified + .load(core::sync::atomic::Ordering::Relaxed), + ), } } } @@ -1058,15 +1083,31 @@ pub mod store { core::mem::ManuallyDrop::new(self).ptr.as_ptr() } - /// Mutable access to `data` through the shared handle. The caller - /// must ensure no - /// other `&mut` to the same `Store` is live (single-threaded JS - /// event-loop discipline). + /// Mutable access to `data` through the shared handle. Zig mutates + /// `store.data` freely through any holder; this is the Rust spelling + /// of that interior-mutation pattern. + /// + /// # Safety + /// The caller asserts that no other reference (`&Store`, `&mut Store`, + /// `&Data`, `&mut Data`) to the same pointee is live for the duration + /// of the returned borrow — on this thread **or any other**. Overlapping + /// `&mut`s, including one minted through this function and one through + /// [`core::ops::Deref`], are immediate UB. + /// + /// `StoreRef` is intentionally `!Sync` to block the most direct + /// cross-thread `&StoreRef` shape, but that is only a partial guard: + /// cloned `StoreRef`s (which are `Send`) and `Blob: Sync` projecting + /// `fn store(&self) -> Option<&StoreRef>` from a shared `&Blob` both + /// route around it. This fn's precondition is the primary + /// compile-time guard on the single-owner contract — together with + /// the sibling `unsafe fn blob_store_mut` in `webcore::body` (which + /// carries the same contract via `StoreRef::as_ptr()`). Discharge + /// both in writing at every call site. #[inline] #[allow(clippy::mut_from_ref)] - pub fn data_mut(&self) -> &mut Data { - // SAFETY: caller guarantees no other `&mut` to this `Store` is - // live; see doc comment. + pub unsafe fn data_mut(&self) -> &mut Data { + // SAFETY: precondition — no aliasing `&`/`&mut` to the pointee is + // live for the returned borrow's duration (see fn doc). unsafe { &mut (*self.as_ptr()).data } } } @@ -1117,11 +1158,60 @@ pub mod store { } impl Eq for StoreRef {} - // SAFETY: `Store`'s refcount is atomic and its payload is either - // immutable-after-init or guarded by callers. + // SAFETY: `Store`'s refcount is atomic, so crossing threads with the + // handle itself is sound; the `Data` payload is either immutable-after-init + // or mutated only by the thread that currently owns the handle. Matches + // Zig's cross-thread `*Store` usage: move, don't share. + // + // CAVEAT — `Data::S3`: the `Option>` field uses a + // non-atomic refcount that is typically shared with the JS-thread + // `S3Client` via `S3::init_with_referenced_credentials`. Dropping the + // last S3-backed `StoreRef` on a non-JS thread would race the JS-thread + // `Rc::drop` on that same `S3Credentials`. Callers that route a `Store` + // across threads (worker-pool `ReadFile`/`CopyFile`/`WriteFile`) only do + // so for `Data::File`/`Data::Bytes` variants; the S3 I/O paths run + // entirely on the JS thread (see `src/runtime/webcore/S3File.rs` and + // `src/runtime/webcore/s3/`), so no such `Send` actually happens today. + // If an S3 store ever does need to cross threads, convert the credentials + // refcount to `Arc` first. unsafe impl Send for StoreRef {} - // SAFETY: `Store::ref_count` is atomic and `&StoreRef` only derefs to - // `&Store`. - unsafe impl Sync for StoreRef {} + // Intentionally NOT `Sync`: the only way to mutate `Store::data` through + // an existing `StoreRef` is `unsafe fn data_mut`, whose precondition is + // exclusivity. Dropping `Sync` closes the *direct* "two threads sharing + // `&StoreRef`" shape at the type level — `&StoreRef: !Send` is now + // auto-inferred. + // + // This is a partial guard. `StoreRef: Clone + Send` still lets callers + // route cross-thread access through cloned handles (every worker-pool + // `ReadFile`/`CopyFile`/`WriteFile` task is constructed this way), and + // container types with their own `unsafe impl Sync` — notably `Blob`, + // which exposes `fn store(&self) -> Option<&StoreRef>` — can hand out + // `&StoreRef` from a `&Blob` shared across threads. Therefore the *load-bearing* guard for the + // single-owner contract is the `unsafe fn data_mut` precondition the + // caller must discharge in writing; `!Sync` is a compile-time hint + // that catches the most obvious misuse. If a future use case genuinely + // needs shared cross-thread `Store` mutation, add synchronization + // inside `Store`. + + // Compile-time trip-wire: if `StoreRef` ever gains `Sync`, both blanket + // impls of `_NotSyncCheck` apply and `_NOT_SYNC` fails to compile with + // "conflicting impls". Guards against a future `unsafe impl Sync for + // StoreRef` being added without revisiting `data_mut`'s contract (same + // pattern as `src/runtime/shell/subproc.rs` `__pipe_reader_thread_confined`). + // Note: this only catches direct `Sync` on `StoreRef`. It does NOT + // catch container types (like `Blob`) that embed a `StoreRef` and + // claim `Sync` for themselves — see the `Send`/`!Sync` comment above + // for why `Blob: Sync` plus `Blob::store(&self) -> Option<&StoreRef>` + // currently projects a sharable `&StoreRef`; a future follow-up may + // tighten `Blob: !Sync` once its call sites are audited. + mod __store_ref_not_sync { + use super::StoreRef; + trait _NotSyncCheck { + const OK: () = (); + } + impl _NotSyncCheck<()> for T {} + impl _NotSyncCheck for T {} + const _NOT_SYNC: () = >::OK; + } } pub use store::{Store, StoreRef}; diff --git a/src/parsers/json_lexer.rs b/src/parsers/json_lexer.rs index 1b5954bd403..1968b1fb7be 100644 --- a/src/parsers/json_lexer.rs +++ b/src/parsers/json_lexer.rs @@ -1271,9 +1271,8 @@ where // `JSONLikeParser::parse_expr`'s auto-quote fallback rescue an // unquoted value that starts with one — e.g. a `Bun.build` // `define:` whose value is a raw minified CSS string starting - // with `*{...}` (`bake-codegen.ts`'s `OVERLAY_CSS`). Erroring - // here aborts `Lexer::init` before `parse_env_json` gets a - // chance to auto-quote. + // with `*{...}`. Erroring here aborts `Lexer::init` before + // `parse_env_json` gets a chance to auto-quote. // // They get a dedicated arm (not the catch-all) because these // arms must `step()` past the diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index 49348281ff4..530ce7cdadb 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -1030,7 +1030,11 @@ impl BlobExt for Blob { let content_type = self.content_type_slice(); let offset = self.offset.get(); let store = self.store().expect("infallible: store present"); - match store.data_mut() { + // Shared borrow through `StoreRef: Deref` — this + // formatter block only reads fields; no `&mut Data` is + // materialized. Shared `&Data` safely coexists with any + // concurrent worker-thread `&Data` borrow. + match &store.data { store::Data::S3(s3) => { S3File::write_format::( s3, @@ -2221,21 +2225,40 @@ impl BlobExt for Blob { // TODO: Move this to a separate `File` object or BunFile fn get_last_modified(&self, _: &JSGlobalObject) -> JSValue { if let Some(store) = self.store.get() { - if matches!(store.data, store::Data::File(_)) { - // do not hold a pattern-bound `&File` across - // `resolve_file_stat` — it materializes `&mut File` on the same - // memory (Stacked Borrows UB; the optimizer may legally cache the - // pre-call `last_modified` and return the stale `INIT_TIMESTAMP`). - // Re-read via `StoreRef::data_mut` (raw-ptr-backed accessor) after - // the mutating call. - let last_modified = store.data_mut().as_file().last_modified; - // last_modified can be already set during read. - if last_modified == jsc::INIT_TIMESTAMP && !self.is_s3() { - resolve_file_stat(store); + if !matches!(store.data, store::Data::File(_)) { + if self.is_jsdom_file.get() { + return JSValue::js_number(self.last_modified.get()); } - // Fresh borrow after possible mutation by `resolve_file_stat`. - return JSValue::js_number(store.data_mut().as_file().last_modified as f64); + return JSValue::js_number(jsc::INIT_TIMESTAMP as f64); } + + // PORT NOTE: do not hold a `&File` borrow across + // `resolve_file_stat` — it materializes `&mut File` via + // `data_mut()` (Stacked Borrows UB on overlap; the optimizer may + // legally cache the pre-call `last_modified` and return the + // stale `INIT_TIMESTAMP`). Snapshot the `AtomicU64` first, then + // call `resolve_file_stat` with no outstanding `&Data`. Mirrors + // Zig, which re-loads `store.data.file.*` each time. + // + // `last_modified` is `AtomicU64`: worker-thread `ReadFile` + // tasks may `store(Relaxed)` this field from the work pool + // (`do_read_file` clones the `StoreRef` into each task). The + // shared `&Data` borrow this line takes on the JS thread is + // sound under Rust's aliasing model (no `&mut Data` materialized). + let last_modified = match &store.data { + store::Data::File(f) => f.last_modified.load(core::sync::atomic::Ordering::Relaxed), + _ => unreachable!("checked via matches! above"), + }; + if last_modified == jsc::INIT_TIMESTAMP && !self.is_s3() { + resolve_file_stat(store); + } + // Fresh shared borrow after the possible mutation by + // `resolve_file_stat`; prior borrow is dropped. + let last_modified = match &store.data { + store::Data::File(f) => f.last_modified.load(core::sync::atomic::Ordering::Relaxed), + _ => jsc::INIT_TIMESTAMP, + }; + return JSValue::js_number(last_modified as f64); } if self.is_jsdom_file.get() { @@ -2354,16 +2377,16 @@ impl BlobExt for Blob { self.size.set(0); return; }; - // dispatch on the copied `DataTag` rather than - // `match &store.data { File(file) => … }`. The latter goes through - // `StoreRef::Deref → &Store → &Data` (no `UnsafeCell`), and that shared - // borrow is live across the arm body where `resolve_file_stat` - // materializes `&mut File` on the same memory via the raw - // `heap::alloc` pointer — Stacked Borrows UB, and under noalias the - // optimizer may legally cache the pre-call `seekable: None` and fall - // through to `self.size.get() = 0`. `StoreRef::data_mut` centralises - // the raw-ptr deref so each read here is a fresh, safe borrow. - match store.data_mut().tag() { + // PORT NOTE: dispatch on the copied `DataTag` rather than + // `match &store.data { File(file) => … }` held across + // `resolve_file_stat`. The shared `&Data` borrow is fine to make + // (no `UnsafeCell` is needed on the read path) but we must *drop* + // it before `resolve_file_stat` materializes `&mut File` — under + // noalias the optimizer may legally cache the pre-call + // `seekable: None` and fall through to `self.size.get() = 0`. + // Snapshot shape via a shared borrow, release, then conditionally + // call `resolve_file_stat`, then re-borrow shared. + match store.data.tag() { store::DataTag::Bytes => { let offset = self.offset.get(); let store_size = store.size(); @@ -2382,11 +2405,21 @@ impl BlobExt for Blob { } } store::DataTag::File => { - if store.data_mut().as_file().seekable.is_none() { + // Shared borrow: just read `seekable.is_none()`; drop + // before `resolve_file_stat` below. + let needs_stat = match &store.data { + store::Data::File(f) => f.seekable.is_none(), + _ => unreachable!("tag matched File"), + }; + if needs_stat { resolve_file_stat(store); } - // Fresh borrow after possible mutation by `resolve_file_stat`. - let file = store.data_mut().as_file(); + // Fresh shared borrow after possible mutation by + // `resolve_file_stat`. + let file = match &store.data { + store::Data::File(f) => f, + _ => unreachable!("tag matched File"), + }; if file.seekable.is_some() && file.max_size != MAX_SIZE { let store_size = file.max_size; @@ -2416,10 +2449,10 @@ impl BlobExt for Blob { let Some(store) = self.store.get() else { return (self.offset.get(), 0); }; - // see `resolve_size` — dispatch on the copied tag and re-read - // via `StoreRef::data_mut` after `resolve_file_stat` so no - // `Deref`-produced `&Data`/`&File` is live across the mutating call. - match store.data_mut().tag() { + // PORT NOTE: see `resolve_size` — shared borrows through `Deref`; + // drop before `resolve_file_stat` so no `&Data`/`&File` is live + // across the call that materializes `&mut File`. + match store.data.tag() { store::DataTag::Bytes => { let offset = self.offset.get(); let store_size = store.size(); @@ -2440,11 +2473,20 @@ impl BlobExt for Blob { (self.offset.get(), self.size.get()) } store::DataTag::File => { - if store.data_mut().as_file().seekable.is_none() { + // Shared borrow to read `seekable.is_none()`; release before + // `resolve_file_stat` below. + let needs_stat = match &store.data { + store::Data::File(f) => f.seekable.is_none(), + _ => unreachable!("tag matched File"), + }; + if needs_stat { resolve_file_stat(store); } - // Fresh borrow after possible mutation by `resolve_file_stat`. - let file = store.data_mut().as_file(); + // Fresh shared borrow after the possible mutation. + let file = match &store.data { + store::Data::File(f) => f, + _ => unreachable!("tag matched File"), + }; if file.seekable.is_some() && file.max_size != MAX_SIZE { let store_size = file.max_size; let offset = self.offset.get(); @@ -2656,7 +2698,10 @@ impl BlobExt for Blob { // not alias any outstanding borrow (other `StoreRef`s only hold raw // `NonNull`, never a long-lived `&Store`; JS execution is // single-threaded). - match store_ref.data_mut() { + // SAFETY: single-threaded JS path; no other borrow of the pointee is + // live — the function takes `&self` and releases the borrow at match + // exit (the returned raw pointer is not a reference). + match unsafe { store_ref.data_mut() } { store::Data::Bytes(bytes) => { let v: &mut [u8] = bytes.as_array_list_leak(); let len = v.len(); @@ -4212,7 +4257,11 @@ fn _on_structured_clone_deserialize>( // ScopeGuard derefs to its inner Blob. if let Some(store) = (*guard).store() { - if let store::Data::Bytes(bytes_store) = &mut store.data_mut() { + // SAFETY: deserialization runs on the JS thread; `store` + // was just freshly constructed inside the guarded `blob` + // and the match borrow is released before any other + // access to the pointee. + if let store::Data::Bytes(bytes_store) = &mut unsafe { store.data_mut() } { // Transfer ownership of the local `name: Vec` into // `stored_name` (a `Box<[u8]>`); freed by `Bytes::Drop`. bytes_store.stored_name = name.into_boxed_slice(); @@ -4366,7 +4415,13 @@ pub extern "C" fn Blob__setAsFile(this: &mut Blob, path_str: &mut BunString) { // This is not 100% correct... if let Some(store) = this.store() { - if let store::Data::Bytes(bytes) = &mut store.data_mut() { + // SAFETY: synchronous JS-thread C-ABI entry; no JS re-entry occurs + // inside the match, so no other `&Data`/`&mut Data` borrow of this + // `Store` is live for its duration. (The `Store` itself may be + // aliased by sibling `Blob`s via `dupe()`/`slice()`, but each + // `data_mut` there is likewise a synchronous JS-thread borrow that + // cannot overlap this one.) + if let store::Data::Bytes(bytes) = &mut unsafe { store.data_mut() } { if bytes.stored_name.is_empty() { // Owned heap slice // owned by `stored_name` (`Box<[u8]>`) and freed by `Bytes::Drop`. @@ -5052,8 +5107,11 @@ pub fn write_file_internal( debug_assert!(!matches!(blob_store.data, store::Data::Bytes(_))); // TODO only reset last_modified on success paths instead of resetting // last_modified at the beginning for better performance. - if let store::Data::File(ref mut file) = *blob_store.data_mut() { - file.last_modified = jsc::INIT_TIMESTAMP; + // Shared borrow: `last_modified` is `AtomicU64`, `store(Relaxed)` + // takes `&self` — no `&mut Data` materialized. + if let store::Data::File(file) = &blob_store.data { + file.last_modified + .store(jsc::INIT_TIMESTAMP, core::sync::atomic::Ordering::Relaxed); } } @@ -5633,7 +5691,14 @@ pub fn jsdom_file_construct_( blob = Blob::get::(global_this, args[0])?; if let Some(store_) = blob.store.get() { - match store_.data_mut() { + // SAFETY: synchronous JS-thread `File` constructor; no JS + // re-entry occurs inside the match, so no other `&Data`/`&mut + // Data` to this `Store` is live for its duration. (For the + // `new File([existingBlob], ...)` path `Blob::get` returns + // `existingBlob.dupe()`, so `store_` may share the `Store` + // with the originating JS `Blob`; their respective `data_mut` + // borrows are also JS-thread-synchronous and cannot overlap.) + match unsafe { store_.data_mut() } { store::Data::Bytes(bytes) => { // `get::<_, true>` on a single-Blob sequence returns // `dupe()` (a shared StoreRef), so this `Bytes` may already @@ -5876,7 +5941,10 @@ impl S3BlobDownloadTask { // Move the downloaded body into a Blob store so its lifetime is // tied to the Blob/JS view and freed via the store's finalizer. let store = Store::init(response.body.list); - let bytes: *mut [u8] = match store.data_mut() { + // SAFETY: `store` is a freshly-constructed local and is the + // sole holder of the underlying allocation; the match borrow + // is released before the next statement. + let bytes: *mut [u8] = match unsafe { store.data_mut() } { store::Data::Bytes(b) => std::ptr::from_mut(b.as_array_list()), _ => unreachable!(), }; @@ -6238,10 +6306,28 @@ fn stat_to_js_mtime(stat: &bun_sys::Stat) -> jsc::JSTimeType { /// resolve file stat like size, last_modified fn resolve_file_stat(store: &StoreRef) { - // `StoreRef::data_mut` encapsulates the raw-pointer deref under the - // `StoreRef` liveness invariant; the caller holds the only ref across - // this call, so an exclusive borrow is sound. - let file = store.data_mut().as_file_mut(); + // SAFETY: callers (`get_last_modified`, `resolve_size`, `resolved_size`) + // pass `store` from the JS thread and drop any prior `Data` borrow + // before calling; no other JS-thread `&`/`&mut Data` is live. + // + // Cross-thread aliasing note: worker-thread `ReadFile` tasks (POSIX) + // can concurrently hold a shared `&Data` to this same `Store` via + // `do_read_file`'s `StoreRef::clone` (see `read_file.rs` + // `resolve_size_and_last_modified`); those only access the `AtomicU64` + // `last_modified` field through `&File`, never `&mut`, and the atomic + // closes that observable data-level race. POSIX `WriteFile::run_with_fd` + // (`write_file.rs`) is a second worker-pool `&File` borrower: on + // fd-backed destinations it reads the non-atomic `seekable`/`mode` + // fields, so the JS-thread `&mut File` materialized below can race + // those reads at the data level. Both overlaps are benign in practice: + // every overlapping field-write by this function produces the same + // fstat-derived value a re-stat would (idempotent), and + // `seekable`/`mode`/`max_size` are small `Copy` types written once per + // `Store` lifetime. The `&mut File` still formally aliases the worker + // `&File`s under Rust's memory model; converting the whole `File` to + // interior-mutable fields would close both overlaps, but is out of + // scope for #30800 (which introduced `unsafe fn data_mut`). + let file = unsafe { store.data_mut() }.as_file_mut(); match &file.pathlike { PathOrFileDescriptor::Path(path) => { let mut buffer = bun_paths::PathBuffer::uninit(); @@ -6254,7 +6340,10 @@ fn resolve_file_stat(store: &StoreRef) { }; file.mode = stat.st_mode as bun_sys::Mode; file.seekable = Some(bun_sys::S::ISREG(stat.st_mode as _)); - file.last_modified = stat_to_js_mtime(&stat); + file.last_modified.store( + stat_to_js_mtime(&stat), + core::sync::atomic::Ordering::Relaxed, + ); } // the file may not exist yet. That's okay. _ => {} @@ -6269,7 +6358,10 @@ fn resolve_file_stat(store: &StoreRef) { }; file.mode = stat.st_mode as bun_sys::Mode; file.seekable = Some(bun_sys::S::ISREG(stat.st_mode as _)); - file.last_modified = stat_to_js_mtime(&stat); + file.last_modified.store( + stat_to_js_mtime(&stat), + core::sync::atomic::Ordering::Relaxed, + ); } _ => {} }, @@ -6523,7 +6615,9 @@ impl Any { if let Some(s) = blob.store.get() { if matches!(s.data, store::Data::Bytes(_)) && s.has_one_ref() { // `StoreRef` exposes interior-mutable `data_mut()` (no DerefMut). - let internal = s.data_mut().as_bytes_mut().to_internal_blob(); + // SAFETY: `has_one_ref()` confirms this is the sole holder; + // `Any` is JS-thread-only, so no concurrent access exists. + let internal = unsafe { s.data_mut() }.as_bytes_mut().to_internal_blob(); // StoreRef::drop on the replace below releases the store ref. blob.free_content_type(); *self = Any::InternalBlob(internal); diff --git a/src/runtime/webcore/Body.rs b/src/runtime/webcore/Body.rs index d255480c039..7baf0d37d60 100644 --- a/src/runtime/webcore/Body.rs +++ b/src/runtime/webcore/Body.rs @@ -44,27 +44,48 @@ pub(super) fn wtf_impl(s: &WTFStringImpl) -> &WTFStringImplStruct { /// Mutable view of a [`Blob`]'s backing `Store` through its /// `JsCell>` field. Centralises the per-site raw -/// `(*blob.store.get()…as_ptr()).mime_type = …` deref under the same -/// invariant `StoreRef::data_mut` already documents: -/// shared-mutable interior, single-threaded JS event-loop, no concurrent -/// `&Store` outstanding for the borrow's duration. +/// `(*blob.store.get()…as_ptr()).mime_type = …` deref used by the body-mixin +/// `consume_` helpers. +/// +/// This mirrors [`blob::StoreRef::data_mut`]: it projects `&mut` to the same +/// heap `Store` from a shared `&Blob` via `StoreRef::as_ptr()`. `StoreRef` +/// itself is `!Sync` so a single `&Blob` can't project the handle to another +/// thread, but cloned `StoreRef` handles (`Send`) and `Blob: Sync` projecting +/// `fn store(&self) -> Option<&StoreRef>` both allow other references to the +/// same `Store` to exist concurrently — discharge the precondition at every +/// call site. +/// +/// # Safety +/// For the lifetime of the returned `&mut Store`, the caller asserts that no +/// other reference (`&Store`, `&mut Store`, `&Data`, `&mut Data`) to the same +/// pointee is live — on this thread **or any other**. Same contract as +/// [`blob::StoreRef::data_mut`]. #[inline] #[allow(clippy::mut_from_ref)] -fn blob_store_mut(blob: &Blob) -> Option<&mut blob::Store> { +unsafe fn blob_store_mut(blob: &Blob) -> Option<&mut blob::Store> { blob.store .get() .as_ref() - // SAFETY: `StoreRef` invariant — pointee is a live heap `Store` while - // any `StoreRef` exists; single-threaded JS event-loop discipline - // guarantees no other `&`/`&mut Store` is live for this borrow. + // SAFETY: precondition — no aliasing `&`/`&mut` to the pointee is + // live for the returned borrow's duration (see fn doc). .map(|s| unsafe { &mut *s.as_ptr() }) } -fn set_blob_content_type(blob: &Blob, mime_type: MimeType, allocated: bool) { +/// Stamp `mime_type` onto `blob.content_type` (and the backing `Store`'s +/// `mime_type`, when present). Wraps the `blob_store_mut` back-door, so it +/// inherits the same exclusivity precondition. +/// +/// # Safety +/// Same contract as [`blob_store_mut`]: for the duration of this call, no +/// other reference (`&Store`, `&mut Store`, `&Data`, `&mut Data`) to the +/// `Blob`'s backing `Store` may be live — on this thread or any other. +unsafe fn set_blob_content_type(blob: &Blob, mime_type: MimeType, allocated: bool) { blob.content_type_was_set.set(true); match mime_type.value { Cow::Borrowed(interned) => { - if let Some(store) = blob_store_mut(blob) { + // SAFETY: precondition — no aliasing `Store` reference is live + // for this borrow (see fn doc). + if let Some(store) = unsafe { blob_store_mut(blob) } { store.mime_type = MimeType { value: Cow::Borrowed(interned), category: mime_type.category, @@ -74,7 +95,9 @@ fn set_blob_content_type(blob: &Blob, mime_type: MimeType, allocated: bool) { blob.content_type_allocated.set(false); } Cow::Owned(owned) => { - if let Some(store) = blob_store_mut(blob) { + // SAFETY: precondition — no aliasing `Store` reference is live + // for this borrow (see fn doc). + if let Some(store) = unsafe { blob_store_mut(blob) } { store.mime_type = MimeType { value: Cow::Owned(owned.clone()), category: mime_type.category, @@ -1167,7 +1190,13 @@ impl Value { true, Some(&mut allocated), ); - set_blob_content_type(blob, mime_type, allocated); + // SAFETY: synchronous JS-thread body-consumer + // continuation; no JS re-entry before the + // borrow ends, and other `StoreRef` clones + // (e.g. the originating JS `Blob`) only touch + // this `Store` on the same thread, so no + // aliasing `&`/`&mut Store` is live. + unsafe { set_blob_content_type(blob, mime_type, allocated) }; // content_slice dropped (replaces defer content_slice.deinit()) } } @@ -1177,7 +1206,12 @@ impl Value { )); blob.content_type_allocated.set(false); blob.content_type_was_set.set(true); - blob_store_mut(blob) + // SAFETY: synchronous JS-thread body-consumer + // continuation; no JS re-entry before the borrow + // ends, and other `StoreRef` clones only touch + // this `Store` on the same thread, so no aliasing + // `&`/`&mut Store` is live. + unsafe { blob_store_mut(blob) } .expect("infallible: checked above") .mime_type = bun_http_types::MimeType::TEXT; } @@ -2124,7 +2158,11 @@ pub(crate) trait BodyMixin: BodyOwnerJs + Sized { let mut allocated = false; let mime_type = MimeType::init(content_slice.slice(), true, Some(&mut allocated)); - set_blob_content_type(blob, mime_type, allocated); + // SAFETY: synchronous JS-thread `reject`-path body-consumer + // continuation; no JS re-entry before the borrow ends, and + // other `StoreRef` clones only touch this `Store` on the + // same thread, so no aliasing `&`/`&mut Store` is live. + unsafe { set_blob_content_type(blob, mime_type, allocated) }; // content_slice dropped (replaces defer content_slice.deinit()) } } @@ -2134,7 +2172,11 @@ pub(crate) trait BodyMixin: BodyOwnerJs + Sized { )); blob.content_type_allocated.set(false); blob.content_type_was_set.set(true); - blob_store_mut(blob) + // SAFETY: synchronous JS-thread body-consumer continuation; + // no JS re-entry before the borrow ends, and other `StoreRef` + // clones only touch this `Store` on the same thread, so no + // aliasing `&`/`&mut Store` is live. + unsafe { blob_store_mut(blob) } .expect("infallible: checked above") .mime_type = bun_http_types::MimeType::TEXT; } diff --git a/src/runtime/webcore/FileReader.rs b/src/runtime/webcore/FileReader.rs index e8ff33bd177..62d16d9ea12 100644 --- a/src/runtime/webcore/FileReader.rs +++ b/src/runtime/webcore/FileReader.rs @@ -116,9 +116,11 @@ impl ReadDuringJSOnPullResult { pub enum Lazy { None, - /// Intrusively-refcounted `*Blob.Store`. Uses `StoreRef` (not `Arc`) so the - /// raw pointer carries mutable provenance from `heap::alloc` for the - /// direct field writes in `open_file_blob`. + /// Intrusively-refcounted `*Blob.Store`. Uses `StoreRef` (not `Arc`) + /// because `Store` carries its own intrusive refcount and frees itself + /// when it hits zero; `Arc` would add a second, conflicting + /// refcount/deallocation path (see the `StoreRef` doc in + /// `webcore_types.rs`). Blob(blob::StoreRef), } @@ -355,16 +357,45 @@ impl FileReader { // on every path through the original `if let` body) so the `StoreRef` // is owned locally and the cell borrow is released immediately. if let Lazy::Blob(store) = self.lazy.replace(Lazy::None) { - // `StoreRef::data_mut` encapsulates the raw-pointer deref under the - // `StoreRef` liveness invariant (single-threaded JS event loop; we - // hold the only mutating handle). - match store.data_mut() { + // Shared borrow of the backing `Store` is always sound; clone + // the `File` out so `open_file_blob` can take `&mut File` on + // its own copy rather than needing `data_mut()` to materialize + // `&mut Data` on the shared `Store` (multiple `StoreRef` clones + // to the same allocation exist, e.g. the originating JS `Blob` + // — see the `Lazy::Blob(store.clone())` source in + // `ReadableStream::from_blob_copy_ref`). The clone is cheap: + // `PathLike` bumps its intrusive refcount, `last_modified` is + // an `AtomicU64` snapshot, `mime_type` clones its `Cow` + // (typically `Borrowed`), the rest is `Copy`. + // + // BEHAVIOR NOTE (vs Zig `FileReader.zig:58,109`): `open_file_blob` + // writes `file.is_atty = Some(true)` on the `&mut File`. Because + // we hand it the local clone, that cache value is discarded when + // `file_local` drops — the shared `Store` is not updated. This + // is intentional: writing back would require `data_mut()` on a + // `StoreRef` that may be aliased by the originating JS `Blob` + // and other holders, reintroducing the exact soundness hole + // #30800 closes. The cost is a repeat `open_as_nonblocking_tty` / + // `isatty` probe on a *second* `.stream()` of `Bun.file(0|1|2)`; + // the canonical `Bun.stdin`/`Bun.stdout`/`Bun.stderr` Stores are + // constructed with `is_atty` already populated (see + // `__bun_stdio_blob_store_new`), so they're unaffected. The + // `destination_file_store.is_atty` reads in `copy_file.rs` copy + // from the `File` clone at `CopyFile::create` time, so a + // `Bun.write(Bun.file(0), ...)` after a `.stream()` on the same + // `Blob` will see `None` where previously it saw `Some(true)` + // — a benign fallback from the optimized copy strategy to the + // plain one. Acknowledged as an intentional loss here rather + // than re-opening the aliasing hazard to preserve a 1-syscall + // micro-optimization on a `Bun.file(0).stream()` re-entry path. + match &store.data { blob::store::Data::S3(_) | blob::store::Data::Bytes(_) => { panic!("Invalid state in FileReader: expected file ") } blob::store::Data::File(file) => { - let open_result = Lazy::open_file_blob(file); - // drop the StoreRef; `lazy` was already cleared above + let mut file_local = file.clone(); + let open_result = Lazy::open_file_blob(&mut file_local); + // drop the StoreRef (Zig: this.lazy.blob.deref()); `lazy` was already cleared above drop(store); match open_result { Err(err) => { diff --git a/src/runtime/webcore/blob/copy_file.rs b/src/runtime/webcore/blob/copy_file.rs index 831c84a0bee..cd613b46cf3 100644 --- a/src/runtime/webcore/blob/copy_file.rs +++ b/src/runtime/webcore/blob/copy_file.rs @@ -1320,7 +1320,7 @@ impl<'a> CopyFileWindows<'a> { } fn prepare_pathlike( - pathlike: &mut PathOrFileDescriptor, + pathlike: &PathOrFileDescriptor, must_close: &mut bool, is_reading: bool, ) -> bun_sys::Result { @@ -1363,12 +1363,10 @@ impl<'a> CopyFileWindows<'a> { // Open the destination first, so that if we need to call // mkdirp(), we don't spend extra time opening the file handle for // the source. + // `prepare_pathlike` only reads `pathlike` — shared borrow through + // `StoreRef: Deref` is sufficient; no `data_mut()` needed. self.read_write_loop.destination_fd = match Self::prepare_pathlike( - &mut self - .destination_file_store - .data_mut() - .as_file_mut() - .pathlike, + &self.destination_file_store.data.as_file().pathlike, &mut self.read_write_loop.must_close_destination_fd, false, ) { @@ -1385,7 +1383,7 @@ impl<'a> CopyFileWindows<'a> { }; self.read_write_loop.source_fd = match Self::prepare_pathlike( - &mut self.source_file_store.data_mut().as_file_mut().pathlike, + &self.source_file_store.data.as_file().pathlike, &mut self.read_write_loop.must_close_source_fd, true, ) { diff --git a/src/runtime/webcore/blob/read_file.rs b/src/runtime/webcore/blob/read_file.rs index a79cdc06134..02615fd4eef 100644 --- a/src/runtime/webcore/blob/read_file.rs +++ b/src/runtime/webcore/blob/read_file.rs @@ -679,9 +679,21 @@ impl ReadFile { }; if let Some(store) = &self.store { - if let Data::File(file) = store.data_mut() { + // Route through `StoreRef: Deref` (shared + // borrow), not `data_mut()` — the only field this worker + // writes is `file.last_modified`, an `AtomicU64` whose + // `store(Relaxed)` takes `&self`. Avoiding `&mut Data` + // eliminates the Rust aliasing hazard entirely: N sibling + // `ReadFile` tasks (each `StoreRef`-cloned from the same + // JS-thread `Blob` via `do_read_file`) concurrently reaching + // this code hold `&Data` to the same allocation, which is + // always sound. + if let Data::File(file) = &store.data { let mtime = bun_sys::PosixStat::init(&stat).mtime(); - file.last_modified = jsc::to_js_time(mtime.sec as isize, mtime.nsec as isize); + file.last_modified.store( + jsc::to_js_time(mtime.sec as isize, mtime.nsec as isize), + core::sync::atomic::Ordering::Relaxed, + ); } } @@ -1209,11 +1221,19 @@ impl<'a> ReadFileUV<'a> { let stat = this.req.statbuf; // keep in sync with resolveSizeAndLastModified - if let Data::File(file) = this.store.data_mut() { + // Shared borrow through `StoreRef: Deref` — the + // only field written is `file.last_modified` (an `AtomicU64` + // whose `store(Relaxed)` takes `&self`), so we never materialize + // `&mut Data`. Libuv fs callbacks here run on the JS event-loop + // thread; the atomic is kept for structural consistency with the + // POSIX writer (where sibling worker tasks can race). + if let Data::File(file) = &this.store.data { // `uv_timespec_t` fields are `c_long` (i32 on Windows); widen to the // platform-width `isize` `to_js_time` expects. - file.last_modified = - jsc::to_js_time(stat.mtime().sec as isize, stat.mtime().nsec as isize); + file.last_modified.store( + jsc::to_js_time(stat.mtime().sec as isize, stat.mtime().nsec as isize), + core::sync::atomic::Ordering::Relaxed, + ); } if bun_sys::S::ISDIR(u32::try_from(stat.mode()).expect("int cast")) { diff --git a/test/internal/storeref-not-sync.test.ts b/test/internal/storeref-not-sync.test.ts new file mode 100644 index 00000000000..f264b889e5a --- /dev/null +++ b/test/internal/storeref-not-sync.test.ts @@ -0,0 +1,54 @@ +// Source-level guard for the `StoreRef` soundness contract (oven-sh/bun#30800). +// +// `StoreRef::data_mut(&self) -> &mut Data` hands out a mutable borrow through +// a shared, clonable handle. Combined with `unsafe impl Sync for StoreRef`, +// two threads sharing `&StoreRef` could each mint `&mut Data` to the same +// heap allocation through a safe API — immediate UB. The fix keeps `Send` +// (move-based cross-thread use), drops `Sync`, makes `data_mut` an +// `unsafe fn` whose precondition is borrow exclusivity, and adds the +// `__store_ref_not_sync` compile-time trip-wire so a future +// `unsafe impl Sync for StoreRef` fails the build with conflicting impls. +// +// The trip-wire catches regressions at compile time; this test is the +// suite-level projection of the same invariants, with a readable failure +// message instead of a rustc diagnostic. Like +// `test/internal/dead-code-escapes.test.ts`, it asserts on the source text. +// (Booleans are extracted first so a failure prints `true`/`false`, not the +// whole file.) + +import { expect, test } from "bun:test"; +import path from "path"; + +const root = path.resolve(import.meta.dir, "..", ".."); +const source = await Bun.file(path.join(root, "src", "jsc", "webcore_types.rs")).text(); + +test("StoreRef does not implement Sync", () => { + // The original #30800 hole. Cross-thread mutation must go through cloned + // (moved) handles whose call sites discharge `data_mut`'s exclusivity + // precondition — never through a shared `&StoreRef`. + // + // Anchored to the start of a line (`^\s*unsafe`) so `// `-prefixed prose — + // e.g. the trip-wire comment in `webcore_types.rs`, which quotes the exact + // phrase — can never match. + const hasSyncImpl = /^\s*unsafe impl\s+Sync\s+for\s+StoreRef\b/m.test(source); + expect(hasSyncImpl).toBe(false); +}); + +test("StoreRef::data_mut is an unsafe fn", () => { + // The precondition-bearing signature: every call site must assert, in an + // `unsafe` block, that no aliasing `&`/`&mut` to the pointee is live. + const hasUnsafeDataMut = /pub unsafe fn data_mut\s*\(\s*&self\s*\)/.test(source); + expect(hasUnsafeDataMut).toBe(true); + // And the pre-#30800 safe spelling must not come back. + const hasSafeDataMut = /pub fn data_mut\s*\(\s*&self\s*\)/.test(source); + expect(hasSafeDataMut).toBe(false); +}); + +test("the __store_ref_not_sync compile-time trip-wire is present", () => { + // If `StoreRef` ever gains `Sync`, both blanket impls of `_NotSyncCheck` + // apply and the trip-wire const fails to compile with conflicting impls. + const hasTripWireModule = source.includes("mod __store_ref_not_sync"); + expect(hasTripWireModule).toBe(true); + const hasTripWireConst = />::OK/.test(source); + expect(hasTripWireConst).toBe(true); +}); diff --git a/test/js/web/fetch/blob-write.test.ts b/test/js/web/fetch/blob-write.test.ts index 9fc61c0a110..9132607b284 100644 --- a/test/js/web/fetch/blob-write.test.ts +++ b/test/js/web/fetch/blob-write.test.ts @@ -84,3 +84,45 @@ test("Bun.file(path).stat() returns stats", async () => { expect(stat).toBeDefined(); expect(stat.size).toBe(13); // "Hello, world!" is 13 bytes }); + +// Stress the threadpool `ReadFile` path that reaches into the backing +// `Store` off the JS thread (`resolve_size_and_last_modified` on each +// worker). `do_read_file` `StoreRef::clone`s the backing handle into each +// spawned `ReadFile` task, so N concurrent `file.bytes()` calls on the +// *same* `Blob` schedule N workers that all observe the same `Store` +// allocation. The write-target (`file.last_modified`) is `AtomicU64` on +// Rust's memory model and the only worker-thread write, so the race is +// idempotent (every task stores the same `fstat`-derived mtime). Shared +// (not exclusive) borrow through `StoreRef::Deref` is what keeps the +// `&mut` aliasing hazard away under `bun bd` (ASAN + Rust's UB rules). +// Regression guard for oven-sh/bun#30800 — `StoreRef` soundness (dropped +// `Sync`, `data_mut` is now `unsafe fn`, `last_modified` converted to +// atomic to match the threading reality). +test("Bun.file().bytes() is safe under high concurrency", async () => { + const dir = tempDirWithFiles( + "bun-blob-concurrent", + Object.fromEntries( + Array.from({ length: 16 }, (_, i) => [ + `f${i}.txt`, + `content-${i}-${Buffer.alloc(1024, 65 + (i % 26)).toString()}`, + ]), + ), + ); + // Many overlapping reads per file; each goes through a distinct `ReadFile` + // task on the threadpool, and all 8 per file share ONE `Store` (the + // `Blob` is constructed once and cloned via `StoreRef::clone`). The test + // asserts every task returns the full, uncorrupted file bytes — i.e. + // the shared `Store`'s content-reading paths don't trample each other. + const results = await Promise.all( + Array.from({ length: 16 }, (_, i) => { + const file = Bun.file(path.join(dir, `f${i}.txt`)); + return Promise.all(Array.from({ length: 8 }, () => file.bytes())); + }), + ); + for (let i = 0; i < results.length; i++) { + const expected = `content-${i}-${Buffer.alloc(1024, 65 + (i % 26)).toString()}`; + for (const bytes of results[i]) { + expect(new TextDecoder().decode(bytes)).toBe(expected); + } + } +});