From bc09d052a78a4952109ee37bd91b3782b30c0aef Mon Sep 17 00:00:00 2001 From: Alan Egerton Date: Wed, 29 Apr 2026 18:41:09 +0100 Subject: [PATCH] Support finding insertion positions from &HashTable Adds to `HashTable` the following APIs: ```rust fn find_or_find_vacant_position(&self, hash, eq) -> Result<&T, Option> unsafe fn insert_at_position(&mut self, position, value) -> &mut T ``` This enables an insertion position to be located through an immutable `&HashTable` reference (such as through a read-lock of a `RwLock`), in order that it might subsequently be used to perform an insertion when an exclusive `&mut HashTable` reference becomes available (such as through a write-lock of the same `RwLock`). In so doing, users can avoid repeated hash probing when it is known that the table has not been mutated in the interim. --- src/raw.rs | 69 ++++++++++++++++++++++++++++++++++++---------------- src/table.rs | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 21 deletions(-) diff --git a/src/raw.rs b/src/raw.rs index 39f50ef78..5df027fb9 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -1017,6 +1017,20 @@ impl RawTable { } } + /// Returns whether insertion at the given index will exceed the table's maximum load factor. + /// + /// # Safety + /// Behavior is undefined if `index > self.num_buckets()`. + #[inline] + pub(crate) unsafe fn needs_growth_to_insert_at(&self, index: usize) -> bool { + // SAFETY: Caller guarantees that `index` is in range. + let old_ctrl = unsafe { *self.table.ctrl(index) }; + + // We can avoid growing the table once we have reached our load factor if we are replacing + // a tombstone. This works since the number of EMPTY slots does not change in this case. + self.table.growth_left == 0 && old_ctrl.special_is_empty() + } + /// Inserts a new element into the table, and returns its raw bucket. /// /// This does not check if the given element already exists in the table. @@ -1030,12 +1044,8 @@ impl RawTable { // 2. We reserve additional space (if necessary) right after calling this function. let mut index = self.table.find_insert_index(hash); - // We can avoid growing the table once we have reached our load factor if we are replacing - // a tombstone. This works since the number of EMPTY slots does not change in this case. - // // SAFETY: The function is guaranteed to return an index in the range `0..=self.num_buckets()`. - let old_ctrl = *self.table.ctrl(index); - if unlikely(self.table.growth_left == 0 && old_ctrl.special_is_empty()) { + if unlikely(self.needs_growth_to_insert_at(index)) { self.reserve(1, hasher); // SAFETY: We know for sure that `RawTableInner` has control bytes // initialized and that there is extra space in the table. @@ -1112,22 +1122,17 @@ impl RawTable { /// Searches for an element in the table. If the element is not found, /// returns `Err` with the position of a slot where an element with the - /// same hash could be inserted. - /// - /// This function may resize the table if additional space is required for - /// inserting an element. + /// same hash coult be inserted (if insertion can be performed without + /// growing the table). #[inline] - pub(crate) fn find_or_find_insert_index( - &mut self, + pub(crate) fn find_or_find_insert_index_if_available( + &self, hash: u64, mut eq: impl FnMut(&T) -> bool, - hasher: impl Fn(&T) -> u64, - ) -> Result, usize> { - self.reserve(1, hasher); - + ) -> Result, Option> { unsafe { // SAFETY: - // 1. We know for sure that there is at least one empty `bucket` in the table. + // 1. Our load factor forces us to always have at least one empty `bucket` in the table. // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will // never expose `RawTable::new_uninitialized` in a public API. // 3. The `find_or_find_insert_index_inner` function returns the `index` of only the full bucket, @@ -1139,19 +1144,41 @@ impl RawTable { { // SAFETY: See explanation above. Ok(index) => Ok(self.bucket(index)), - Err(index) => Err(index), + Err(index) if unlikely(self.needs_growth_to_insert_at(index)) => Err(None), + Err(index) => Err(Some(index)), } } } + /// Searches for an element in the table. If the element is not found, + /// returns `Err` with the position of a slot where an element with the + /// same hash could be inserted. + /// + /// This function may resize the table if additional space is required for + /// inserting an element. + #[inline] + pub(crate) fn find_or_find_insert_index( + &mut self, + hash: u64, + eq: impl FnMut(&T) -> bool, + hasher: impl Fn(&T) -> u64, + ) -> Result, usize> { + self.reserve(1, hasher); + self.find_or_find_insert_index_if_available(hash, eq) + // SAFETY: + // We reserved space above, so we know for sure that insertion can be performed + // without growing the table. + .map_err(|maybe_idx| unsafe { maybe_idx.unwrap_unchecked() }) + } + /// Inserts a new element into the table at the given index with the given hash, /// and returns its raw bucket. /// /// # Safety /// /// `index` must point to a slot previously returned by - /// `find_or_find_insert_index`, and no mutation of the table must have - /// occurred since that call. + /// `find_or_find_insert_index_if_available`, and no mutation of the table must + /// have occurred since that call. #[inline] pub(crate) unsafe fn insert_at_index( &mut self, @@ -1168,8 +1195,8 @@ impl RawTable { /// # Safety /// /// `index` must point to a slot previously returned by - /// `find_or_find_insert_index`, and no mutation of the table must have - /// occurred since that call. + /// `find_or_find_insert_index_if_available`, and no mutation of the table must + /// have occurred since that call. #[inline] pub(crate) unsafe fn insert_tagged_at_index( &mut self, diff --git a/src/table.rs b/src/table.rs index f50f575f6..e278334c9 100644 --- a/src/table.rs +++ b/src/table.rs @@ -94,6 +94,17 @@ impl HashTable { } } +/// Represents the position in a particular [`HashTable`] at which an entry with +/// some particular hash may be inserted. +/// +/// Created by [`HashTable::find_or_find_vacant_position`] if the specified element +/// does not currently exist in the table and there is capacity for its insertion +/// without reallocating. +pub struct VacantPosition { + hash: u64, + index: usize, +} + impl HashTable where A: Allocator, @@ -229,6 +240,56 @@ where self.raw.get(hash, eq) } + /// Returns a reference to an entry in the table with the given hash and + /// which satisfies the equality function passed. + /// + /// This method will call `eq` for all entries with the given hash, but may + /// also call it for entries with a different hash. `eq` should only return + /// true for the desired entry, at which point the search is stopped. + /// + /// If no matching entry is found, returns `Err(None)` (if there is no space + /// for insertion without reallocation) or `Err(Some(position))` where + /// `position` is suitable for inserting a value with the given `hash` into + /// `self`, via [Self::insert_at_position`]. + pub fn find_or_find_vacant_position( + &self, + hash: u64, + eq: impl FnMut(&T) -> bool, + ) -> Result<&T, Option> { + match self.raw.find_or_find_insert_index_if_available(hash, eq) { + Ok(bucket) => Ok(unsafe { bucket.as_ref() }), + Err(None) => Err(None), + Err(Some(index)) => Err(Some(VacantPosition { hash, index })), + } + } + + /// Inserts `value` into `self` at `position`. + /// + /// # Safety + /// Behavior is undefined unless `self`: + /// + /// 1. is the same [`HashTable`] instance as was used to create `position`; + /// 2. has not been mutated (in any way) in the interim. + pub unsafe fn insert_at_position(&mut self, position: VacantPosition, value: T) -> &mut T { + unsafe { + debug_assert!( + position.index < self.raw.num_buckets() + // SAFETY: we ensured that the index is less than the number of buckets. + && !self.raw.is_bucket_full(position.index) + && !self.raw.needs_growth_to_insert_at(position.index) + ); + self.raw + // SAFETY: a `VacantPosition` can only have been constructed via + // `Self::find_or_find_vacant_position` and is never mutated, + // therefore `position.index` must point at a slot previously + // returned by `self.raw.find_or_find_insert_index_if_available`; + // the caller guarantees that the table has not been mutated in + // the interim. + .insert_at_index(position.hash, position.index, value) + .as_mut() + } + } + /// Returns a mutable reference to an entry in the table with the given hash /// and which satisfies the equality function passed. ///