diff --git a/src/bun_alloc/bun_alloc.zig b/src/bun_alloc/bun_alloc.zig index 18be563493c..dc1d2298b90 100644 --- a/src/bun_alloc/bun_alloc.zig +++ b/src/bun_alloc/bun_alloc.zig @@ -61,7 +61,10 @@ pub const Result = struct { status: ItemStatus, pub fn hasCheckedIfExists(r: *const Result) bool { - return r.index.index != Unassigned.index; + // A reclaimed slot (status == .unknown but index is a real slot) must be + // treated as "never looked up" so callers re-resolve instead of returning + // stale data. See BSSMap.remove / BSSMap.getOrPut. + return r.status != .unknown; } pub fn isOverflowing(r: *const Result, comptime count: usize) bool { @@ -526,6 +529,11 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ mutex: Mutex = .{}, backing_buf: [count]ValueType, backing_buf_used: u16, + /// Slots freed by `remove()` that can be reused by the next `put()` for + /// the same key. Without this, every remove+getOrPut+put cycle consumes + /// a fresh `backing_buf` slot (and eventually overflows to the heap), + /// leaking the old slot and whatever heap pointers it owned. + reclaimable: std.AutoHashMapUnmanaged(HashKeyType, IndexType), pub var instance: *Self = undefined; @@ -542,6 +550,7 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ instance.overflow_list.zero(); instance.backing_buf_used = 0; instance.mutex = .{}; + instance.reclaimable = .{}; loaded = true; } @@ -550,6 +559,7 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ pub fn deinit(self: *Self) void { self.index.deinit(self.allocator); + self.reclaimable.deinit(self.allocator); bun.default_allocator.destroy(instance); loaded = false; } @@ -567,17 +577,25 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ const index = try self.index.getOrPut(self.allocator, _key); if (index.found_existing) { + switch (index.value_ptr.index) { + NotFound.index => return Result{ .hash = _key, .index = index.value_ptr.*, .status = .not_found }, + Unassigned.index => {}, + else => return Result{ .hash = _key, .index = index.value_ptr.*, .status = .exists }, + } + } else { + index.value_ptr.* = Unassigned; + } + + // status is .unknown — if this key previously had a real slot that was + // invalidated via remove(), hand it back so put() overwrites in place + // instead of burning a fresh slot. + if (self.reclaimable.get(_key)) |slot| { return Result{ .hash = _key, - .index = index.value_ptr.*, - .status = switch (index.value_ptr.index) { - NotFound.index => .not_found, - Unassigned.index => .unknown, - else => .exists, - }, + .index = slot, + .status = .unknown, }; } - index.value_ptr.* = Unassigned; return Result{ .hash = _key, @@ -599,6 +617,10 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ self.mutex.lock(); defer self.mutex.unlock(); + // Leave any reclaimable slot in place: getOrPut early-returns on + // NotFound before consulting reclaimable, so keeping the entry lets + // a delete→recreate→bust cycle reuse the original slot instead of + // permanently orphaning it. self.index.put(self.allocator, result.hash, NotFound) catch unreachable; } @@ -616,13 +638,24 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ self.mutex.lock(); defer self.mutex.unlock(); + // Consume any parked slot for this key up front. If the caller's + // result came from getOrPut() it already carries the reclaimed + // index, but getOrPut() early-returns on NotFound without + // consulting reclaimable — so a put() after markNotFound() would + // otherwise burn a fresh slot while the parked one is discarded. + const reclaimed = self.reclaimable.fetchRemove(result.hash); + if (result.index.index == NotFound.index or result.index.index == Unassigned.index) { - result.index.is_overflow = instance.backing_buf_used > max_index; - if (result.index.is_overflow) { - result.index.index = self.overflow_list.len(); + if (reclaimed) |kv| { + result.index = kv.value; } else { - result.index.index = instance.backing_buf_used; - instance.backing_buf_used += 1; + result.index.is_overflow = instance.backing_buf_used > max_index; + if (result.index.is_overflow) { + result.index.index = self.overflow_list.len(); + } else { + result.index.index = instance.backing_buf_used; + instance.backing_buf_used += 1; + } } } @@ -643,7 +676,15 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ } } - /// Returns true if the entry was removed + /// Invalidate the cache entry for `key` so the next `getOrPut` returns + /// `.unknown`. The backing slot (and its value) is preserved in + /// `reclaimable` so the next `put()` for the same key overwrites it in + /// place instead of consuming a fresh slot. We do not `deinit()` the + /// value here because other cache entries may still hold raw pointers + /// into it (e.g. DirInfo.enclosing_package_json references a parent + /// directory's PackageJSON). + /// + /// Returns true if the entry was removed. pub fn remove(self: *Self, denormalized_key: []const u8) bool { self.mutex.lock(); defer self.mutex.unlock(); @@ -654,28 +695,15 @@ pub fn BSSMap(comptime ValueType: type, comptime count: anytype, comptime store_ denormalized_key; const _key = bun.hash(key); - return self.index.remove(_key); - // const index = self.index.get(_key) orelse return; - // switch (index) { - // Unassigned.index, NotFound.index => { - // self.index.remove(_key); - // }, - // 0...max_index => { - // if (comptime hasDeinit(ValueType)) { - // instance.backing_buf[index].deinit(); - // } - - // instance.backing_buf[index] = undefined; - // }, - // else => { - // const i = index - count; - // if (hasDeinit(ValueType)) { - // self.overflow_list.items[i].deinit(); - // } - // self.overflow_list.items[index - count] = undefined; - // }, - // } - + const kv = self.index.fetchRemove(_key) orelse return false; + if (kv.value.index != NotFound.index and kv.value.index != Unassigned.index) { + // Best-effort: if we can't stash the hint, degrade to the + // pre-reclaim behavior (leak one slot) rather than crash + // mid–cache-bust. Unlike index.put in markNotFound above, + // this is a leak-reduction hint, not load-bearing state. + self.reclaimable.put(self.allocator, _key, kv.value) catch {}; + } + return true; } pub fn values(self: *Self) []ValueType { diff --git a/src/resolver/resolver.zig b/src/resolver/resolver.zig index 71c953035bb..25b6b8aef0a 100644 --- a/src/resolver/resolver.zig +++ b/src/resolver/resolver.zig @@ -2272,7 +2272,10 @@ pub const Resolver = struct { if (rfs.entries.atIndex(cached_dir_entry_result.index)) |cached_entry| { if (cached_entry.* == .entries) { - if (cached_entry.entries.generation >= r.generation) { + // A reclaimed slot (status == .unknown) was busted — its data is + // stale regardless of generation, but the DirEntry allocation can + // still be reused in place. + if (cached_dir_entry_result.status == .exists and cached_entry.entries.generation >= r.generation) { dir_entries_option = cached_entry; needs_iter = false; } else { @@ -2316,6 +2319,13 @@ pub const Resolver = struct { }) catch unreachable; } + var reuse_package_json: ?*PackageJSON = null; + var reuse_tsconfig_json: ?*TSConfigJSON = null; + if (r.dir_cache.atIndex(dir_cache_info_result.index)) |prev| { + reuse_package_json = prev.package_json; + reuse_tsconfig_json = prev.tsconfig_json; + } + // We must initialize it as empty so that the result index is correct. // This is important so that browser_scope has a valid index. const dir_info_ptr = r.dir_cache.put(&dir_cache_info_result, .{}) catch unreachable; @@ -2336,6 +2346,8 @@ pub const Resolver = struct { allocators.NotFound, open_dir, package_id, + reuse_package_json, + reuse_tsconfig_json, ); return dir_info_ptr; } @@ -2601,6 +2613,11 @@ pub const Resolver = struct { r: *ThisResolver, file: string, dirname_fd: FD, + /// When re-reading a directory whose cache was busted, pass the + /// previous heap allocation so we overwrite it in place instead of + /// leaking it. Child DirInfo.enclosing_tsconfig_json pointers keep + /// working because the address is preserved. + reuse: ?*TSConfigJSON, ) !?*TSConfigJSON { // Since tsconfig.json is cached permanently, in our DirEntries cache // we must use the global allocator @@ -2639,6 +2656,26 @@ pub const Resolver = struct { result.base_url_for_paths = r.fs.dirname_store.append(string, r.fs.absBuf(&paths, bufs(.tsconfig_base_url))) catch unreachable; } + if (reuse) |prev| { + // Each value is a []string slice separately heap-allocated in + // TSConfigJSON.parse(); free them before the map backing itself + // (matches the extends-merge cleanup in dirInfoUncached). + for (prev.paths.values()) |v| bun.default_allocator.free(v); + prev.paths.deinit(); + // jsx.factory/.fragment are heap []string from + // parseMemberExpressionForJSX when set in JSON; the defaults are + // static slices, so only free when jsx_flags records they were + // parsed. Remaining per-field allocations (jsx.import_source, + // the tsconfig file-text buffer) need a TSConfigJSON contents + // handle / centralized deinitOwnedFields() to close fully and + // are tracked as follow-ups alongside the ExportsMap gap. + if (prev.jsx_flags.contains(.factory)) bun.default_allocator.free(prev.jsx.factory); + if (prev.jsx_flags.contains(.fragment)) bun.default_allocator.free(prev.jsx.fragment); + prev.* = result.*; + bun.destroy(result); + return prev; + } + return result; } @@ -2652,6 +2689,11 @@ pub const Resolver = struct { file: string, dirname_fd: FD, package_id: ?Install.PackageID, + /// When re-reading a directory whose cache was busted, pass the + /// previous heap allocation so we overwrite it in place instead of + /// leaking it. Child DirInfo.enclosing_package_json pointers keep + /// working because the address is preserved. + reuse: ?*PackageJSON, comptime allow_dependencies: bool, ) !?*PackageJSON { var pkg: PackageJSON = undefined; @@ -2675,6 +2717,45 @@ pub const Resolver = struct { ) orelse return null; } + if (reuse) |prev| { + // Release the previous value's owned heap before overwriting so + // each hot-reload doesn't leak the prior file text and map backing + // arrays. StringMap.put() dupes values, so main_fields/browser_map + // own their value strings; name/version are explicitly duped in + // PackageJSON.parse(); dependencies.source_buf aliases + // source.contents so is covered by that free. exports/imports tree + // nodes and scripts/config are left alone — there is no recursive + // deinit for ExportsMap yet and they are uncommon compared to the + // per-reload file-contents leak this is primarily closing. + const alloc = bun.default_allocator; + if (prev.source.contents.len > 0) alloc.free(prev.source.contents); + if (prev.name.len > 0) alloc.free(prev.name); + if (prev.version.len > 0) alloc.free(prev.version); + prev.main_fields.deinit(); + // browser_map has dupe_keys=false but its keys are caller-duped + // via allocator.dupe(u8, fs.normalize(...)) in PackageJSON.parse; + // main_fields keys are static slices from r.opts.main_fields. + for (prev.browser_map.map.keys()) |k| alloc.free(k); + prev.browser_map.deinit(); + prev.dependencies.map.deinit(alloc); + switch (prev.side_effects) { + .unspecified, .false => {}, + .map => |*m| m.deinit(alloc), + .glob => |*g| { + // Each pattern is separately duped by normalizePathForGlob. + for (g.items) |s| alloc.free(s); + g.deinit(alloc); + }, + .mixed => |*m| { + m.exact.deinit(alloc); + for (m.globs.items) |s| alloc.free(s); + m.globs.deinit(alloc); + }, + } + prev.* = pkg; + return prev; + } + return PackageJSON.new(pkg); } @@ -2980,7 +3061,10 @@ pub const Resolver = struct { if (rfs.entries.atIndex(cached_dir_entry_result.index)) |cached_entry| { if (cached_entry.* == .entries) { - if (cached_entry.entries.generation >= r.generation) { + // A reclaimed slot (status == .unknown) was busted — its data is + // stale regardless of generation, but the DirEntry allocation can + // still be reused in place. + if (cached_dir_entry_result.status == .exists and cached_entry.entries.generation >= r.generation) { dir_entries_option = cached_entry; needs_iter = false; } else { @@ -3018,6 +3102,25 @@ pub const Resolver = struct { bun.fs.debug("readdir({f}, {s}) = {d}", .{ open_dir, dir_path, dir_entries_ptr.data.count() }); } + // If this key previously had a slot (busted via bustDirCache), reuse + // its heap-allocated PackageJSON / TSConfigJSON instead of leaking + // them. atIndex() returns null for Unassigned/NotFound, so this is a + // no-op on first population. + // + // Note: if the file was deleted (or fails to parse) between busts, + // dirInfoUncached never consumes these and the allocations are + // orphaned. That's a one-off leak per delete event, not per save; + // freeing here would UAF through child DirInfo.enclosing_* aliases, + // and parking the pointer back on the new DirInfo would make the + // resolver think the file still exists. Accepting the bounded leak + // is the conservative choice. + var reuse_package_json: ?*PackageJSON = null; + var reuse_tsconfig_json: ?*TSConfigJSON = null; + if (r.dir_cache.atIndex(queue_top.result.index)) |prev| { + reuse_package_json = prev.package_json; + reuse_tsconfig_json = prev.tsconfig_json; + } + // We must initialize it as empty so that the result index is correct. // This is important so that browser_scope has a valid index. const dir_info_ptr = try r.dir_cache.put(&queue_top.result, DirInfo{}); @@ -3032,6 +3135,8 @@ pub const Resolver = struct { top_parent.index, open_dir, null, + reuse_package_json, + reuse_tsconfig_json, ); if (queue_slice.len == 0) { @@ -3998,6 +4103,10 @@ pub const Resolver = struct { parent_index: allocators.IndexType, fd: FileDescriptorType, package_id: ?Install.PackageID, + /// Previous heap allocations to overwrite in place when re-populating + /// a DirInfo slot after bustDirCache(). Null on first population. + reuse_package_json: ?*PackageJSON, + reuse_tsconfig_json: ?*TSConfigJSON, ) anyerror!void { const result = _result; @@ -4152,9 +4261,9 @@ pub const Resolver = struct { const entry = lookup.entry; if (entry.kind(rfs, r.store_fd) == .file) { info.package_json = if (r.usePackageManager() and !info.hasNodeModules() and !info.isNodeModules()) - r.parsePackageJSON(path, if (FeatureFlags.store_file_descriptors) fd else .invalid, package_id, true) catch null + r.parsePackageJSON(path, if (FeatureFlags.store_file_descriptors) fd else .invalid, package_id, reuse_package_json, true) catch null else - r.parsePackageJSON(path, if (FeatureFlags.store_file_descriptors) fd else .invalid, null, false) catch null; + r.parsePackageJSON(path, if (FeatureFlags.store_file_descriptors) fd else .invalid, null, reuse_package_json, false) catch null; if (info.package_json) |pkg| { if (pkg.browser_map.count() > 0) { @@ -4207,6 +4316,7 @@ pub const Resolver = struct { info.tsconfig_json = r.parseTSConfig( tsconfigpath, if (FeatureFlags.store_file_descriptors) fd else .zero, + reuse_tsconfig_json, ) catch |err| brk: { const pretty = tsconfigpath; if (err == error.ENOENT or err == error.FileNotFound) { @@ -4223,7 +4333,7 @@ pub const Resolver = struct { while (current.extends.len > 0) { const ts_dir_name = Dirname.dirname(current.abs_path); const abs_path = ResolvePath.joinAbsStringBuf(ts_dir_name, bufs(.tsconfig_path_abs), &[_]string{ ts_dir_name, current.extends }, .auto); - const parent_config_maybe = r.parseTSConfig(abs_path, bun.invalid_fd) catch |err| { + const parent_config_maybe = r.parseTSConfig(abs_path, bun.invalid_fd, null) catch |err| { r.log.addDebugFmt(null, logger.Loc.Empty, r.allocator, "{s} loading tsconfig.json extends {f}", .{ @errorName(err), bun.fmt.QuotedFormatter{ @@ -4284,7 +4394,24 @@ pub const Resolver = struct { // without this, every intermediate config in an extends chain leaks on // each dirInfoUncached() call, which is especially bad under HMR where // bustDirCache triggers a re-parse of the whole chain on every reload. - bun.destroy(parent_config); + // + // Exception: the reused allocation must survive because child + // DirInfos that weren't busted still alias it via + // enclosing_tsconfig_json. It's handled below instead. + if (parent_config != reuse_tsconfig_json) { + bun.destroy(parent_config); + } + } + // Preserve the reused allocation's address: if the merged + // result landed in a fresh base-config allocation, move it + // into the reused struct so child enclosing_tsconfig_json + // pointers stay valid. + if (reuse_tsconfig_json) |prev| { + if (merged_config != prev) { + prev.* = merged_config.*; + bun.destroy(merged_config); + merged_config = prev; + } } info.tsconfig_json = merged_config; } diff --git a/test/js/bun/resolve/bust-dir-cache-leak.test.ts b/test/js/bun/resolve/bust-dir-cache-leak.test.ts new file mode 100644 index 00000000000..66854a97006 --- /dev/null +++ b/test/js/bun/resolve/bust-dir-cache-leak.test.ts @@ -0,0 +1,72 @@ +import { expect, test } from "bun:test"; +import { bunEnv, bunExe, tempDir } from "harness"; + +// Resolver.bustDirCache() only dropped the hash→index mapping in BSSMap; the +// backing DirInfo slot was orphaned (along with whatever heap pointers it +// owned), and the next lookup of the same directory allocated a fresh slot. +// Over a long --hot / --watch / FileSystemRouter.reload() session this grew +// without bound — first exhausting the 2048-entry BSS backing_buf, then +// spilling into heap overflow blocks forever. The fix reclaims the slot so +// put() overwrites it in place. +test("bustDirCache reuses DirInfo and EntriesOption slots across repeated reloads", async () => { + // A tree of route directories so each reload() busts and re-resolves many + // DirInfo + EntriesOption slots. FileSystemRouter.reload() calls + // bustDirCacheRecursive which readDirInfo()s every directory, busts it, then + // re-resolves from the root — exactly the bust/getOrPut/put cycle that used + // to burn a fresh backing_buf slot. + using dir = tempDir("bust-dir-cache-leak", { + "pages/index.tsx": "export default 1;", + "pages/a/index.tsx": "export default 1;", + "pages/a/n/index.tsx": "export default 1;", + "pages/b/index.tsx": "export default 1;", + "pages/b/n/index.tsx": "export default 1;", + "pages/c/index.tsx": "export default 1;", + "pages/c/n/index.tsx": "export default 1;", + "pages/d/index.tsx": "export default 1;", + "pages/d/n/index.tsx": "export default 1;", + "run.ts": ` + const router = new Bun.FileSystemRouter({ + dir: import.meta.dir + "/pages", + style: "nextjs", + }); + void router.routes; + + const warmup = 50; + const iters = 1000; + + for (let i = 0; i < warmup; i++) router.reload(); + Bun.gc(true); + const before = process.memoryUsage.rss(); + + for (let i = 0; i < iters; i++) router.reload(); + Bun.gc(true); + const after = process.memoryUsage.rss(); + + const perIter = Math.round((after - before) / iters); + console.log(JSON.stringify({ perIter })); + `, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "run.ts"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stderr).toBe(""); + const { perIter } = JSON.parse(stdout.trim().split("\n").at(-1)!); + // Without slot reuse, each reload() of this 9-directory tree leaks roughly + // 14 KB/iteration (9 DirInfo + 9 EntriesOption structs in overflow blocks, + // plus DirnameStore/FilenameStore duplicates). With the fix, slots are + // overwritten in place and growth drops to the low single-digit KB from + // unrelated dirname-store appends. + expect(perIter).toBeLessThan(7 * 1024); + expect(exitCode).toBe(0); + // Spawning a debug+ASAN subprocess has several seconds of startup overhead + // before the reload loop even runs; the default 5s per-test budget isn't + // enough for that plus ~1s of iterations. +}, 30_000);