From 7ca757c14f3c0d25ff6e730a3737fc823efb2584 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Sat, 12 Feb 2022 13:32:27 +1100 Subject: [PATCH 01/74] recovery files added --- .gitignore | 2 +- Cargo.lock | 33 + src/rust/config/src/seg.rs | 74 +- src/rust/entrystore/src/seg/mod.rs | 20 + src/rust/server/segcache/src/lib.rs | 1 + src/rust/storage/seg/Cargo.toml | 1 + src/rust/storage/seg/src/builder.rs | 66 +- src/rust/storage/seg/src/datapool/file.rs | 26 +- src/rust/storage/seg/src/datapool/memory.rs | 7 + src/rust/storage/seg/src/demolisher.rs | 84 +++ src/rust/storage/seg/src/eviction/mod.rs | 1 + .../storage/seg/src/hashtable/hash_bucket.rs | 2 +- src/rust/storage/seg/src/hashtable/mod.rs | 251 ++++++- src/rust/storage/seg/src/lib.rs | 2 + src/rust/storage/seg/src/seg.rs | 57 ++ src/rust/storage/seg/src/segments/builder.rs | 28 +- src/rust/storage/seg/src/segments/header.rs | 2 +- src/rust/storage/seg/src/segments/segments.rs | 343 ++++++++- src/rust/storage/seg/src/tests.rs | 667 +++++++++++++++++- .../storage/seg/src/ttl_buckets/ttl_bucket.rs | 2 + .../seg/src/ttl_buckets/ttl_buckets.rs | 145 +++- 21 files changed, 1786 insertions(+), 28 deletions(-) create mode 100644 src/rust/storage/seg/src/demolisher.rs diff --git a/.gitignore b/.gitignore index addf70a9e..84cd3ff7d 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,4 @@ CMAKE_BINARY_DIR .cargo # Cargo build directory -/target \ No newline at end of file +/target diff --git a/Cargo.lock b/Cargo.lock index 17e2a4201..bd96ade61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -408,6 +408,15 @@ dependencies = [ "libc", ] +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + [[package]] name = "foreign-types" version = "0.5.0" @@ -1000,6 +1009,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "rtrb" version = "0.1.4" @@ -1130,6 +1148,7 @@ dependencies = [ "rand_chacha", "rand_xoshiro", "storage-types", + "tempfile", "thiserror", ] @@ -1314,6 +1333,20 @@ dependencies = [ "winapi 0.2.8", ] +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi 0.3.9", +] + [[package]] name = "termcolor" version = "1.1.2" diff --git a/src/rust/config/src/seg.rs b/src/rust/config/src/seg.rs index 3a84b8331..012f40a12 100644 --- a/src/rust/config/src/seg.rs +++ b/src/rust/config/src/seg.rs @@ -8,6 +8,10 @@ use serde::{Deserialize, Serialize}; const MB: usize = 1024 * 1024; +// restore and graceful shutdown options +const RESTORE: bool = false; +const GRACEFUL_SHUTDOWN: bool = false; + // defaults for hashtable const HASH_POWER: u8 = 16; const OVERFLOW_FACTOR: f64 = 1.0; @@ -24,9 +28,18 @@ const COMPACT_TARGET: usize = 2; const MERGE_TARGET: usize = 4; const MERGE_MAX: usize = 8; -// datapool +// datapool (`Segments.data`) const DATAPOOL_PATH: Option<&str> = None; +// `Segments` fields +const SEGMENT_FIELDS_PATH: Option<&str> = None; + +// ttl buckets +const TTL_BUCKETS_PATH: Option<&str> = None; + +// hashtable +const HASHTABLE_PATH: Option<&str> = None; + #[derive(Copy, Clone, Debug, Serialize, Deserialize)] pub enum Eviction { None, @@ -39,6 +52,14 @@ pub enum Eviction { } // helper functions for default values +fn restore() -> bool { + RESTORE +} + +fn graceful_shutdown() -> bool { + GRACEFUL_SHUTDOWN +} + fn hash_power() -> u8 { HASH_POWER } @@ -75,9 +96,25 @@ fn datapool_path() -> Option { DATAPOOL_PATH.map(|v| v.to_string()) } +fn segments_fields_path() -> Option { + SEGMENT_FIELDS_PATH.map(|v| v.to_string()) +} + +fn ttl_buckets_path() -> Option { + TTL_BUCKETS_PATH.map(|v| v.to_string()) +} + +fn hashtable_path() -> Option { + HASHTABLE_PATH.map(|v| v.to_string()) +} + // definitions #[derive(Serialize, Deserialize, Debug)] pub struct Seg { + #[serde(default = "restore")] + restore: bool, + #[serde(default = "graceful_shutdown")] + graceful_shutdown: bool, #[serde(default = "hash_power")] hash_power: u8, #[serde(default = "overflow_factor")] @@ -96,11 +133,19 @@ pub struct Seg { compact_target: usize, #[serde(default = "datapool_path")] datapool_path: Option, + #[serde(default = "segments_fields_path")] + segments_fields_path: Option, + #[serde(default = "ttl_buckets_path")] + ttl_buckets_path: Option, + #[serde(default = "hashtable_path")] + hashtable_path: Option, } impl Default for Seg { fn default() -> Self { Self { + restore: restore(), + graceful_shutdown: graceful_shutdown(), hash_power: hash_power(), overflow_factor: overflow_factor(), heap_size: heap_size(), @@ -110,12 +155,21 @@ impl Default for Seg { merge_max: merge_max(), compact_target: compact_target(), datapool_path: datapool_path(), + segments_fields_path: segments_fields_path(), + ttl_buckets_path: ttl_buckets_path(), + hashtable_path: hashtable_path(), } } } // implementation impl Seg { + pub fn restore(&self) -> bool { + self.restore + } + pub fn graceful_shutdown(&self) -> bool { + self.graceful_shutdown + } pub fn hash_power(&self) -> u8 { self.hash_power } @@ -151,6 +205,24 @@ impl Seg { pub fn datapool_path(&self) -> Option { self.datapool_path.as_ref().map(|v| Path::new(v).to_owned()) } + + pub fn segments_fields_path(&self) -> Option { + self.segments_fields_path + .as_ref() + .map(|v| Path::new(v).to_owned()) + } + + pub fn ttl_buckets_path(&self) -> Option { + self.ttl_buckets_path + .as_ref() + .map(|v| Path::new(v).to_owned()) + } + + pub fn hashtable_path(&self) -> Option { + self.hashtable_path + .as_ref() + .map(|v| Path::new(v).to_owned()) + } } // trait definitions diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 51b97ec92..2a224731d 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -43,16 +43,36 @@ impl Seg { // build the datastructure from the config let data = ::seg::Seg::builder() + .restore(config.restore()) .hash_power(config.hash_power()) .overflow_factor(config.overflow_factor()) .heap_size(config.heap_size()) .segment_size(config.segment_size()) .eviction(eviction) .datapool_path(config.datapool_path()) + .segments_fields_path(config.segments_fields_path()) + .ttl_buckets_path(config.ttl_buckets_path()) + .hashtable_path(config.hashtable_path()) .build(); Self { data } } + + /// Demolish (gracefully shutdown) the cache if + /// configured to do so + pub fn demolish(self, config: &T) { + let config = config.seg(); + + if config.graceful_shutdown() { + ::seg::Seg::demolisher() + .heap_size(config.heap_size()) + .overflow_factor(config.overflow_factor()) + .segments_fields_path(config.segments_fields_path()) + .ttl_buckets_path(config.ttl_buckets_path()) + .hashtable_path(config.hashtable_path()) + .demolish(self.data); + }; + } } impl EntryStore for Seg { diff --git a/src/rust/server/segcache/src/lib.rs b/src/rust/server/segcache/src/lib.rs index 4679f2bab..da9deacf4 100644 --- a/src/rust/server/segcache/src/lib.rs +++ b/src/rust/server/segcache/src/lib.rs @@ -71,6 +71,7 @@ impl Segcache { /// fully terminated. This is more likely to be used for running integration /// tests or other automated testing. pub fn shutdown(self) { + // TODO: demolish cache self.process.shutdown() } } diff --git a/src/rust/storage/seg/Cargo.toml b/src/rust/storage/seg/Cargo.toml index 775448ebb..ca41db941 100644 --- a/src/rust/storage/seg/Cargo.toml +++ b/src/rust/storage/seg/Cargo.toml @@ -35,6 +35,7 @@ rand_chacha = { version = "0.3.0" } rand_xoshiro = { version = "0.6.0" } storage-types = { path = "../types" } thiserror = "1.0.24" +tempfile = "3" [dev-dependencies] criterion = "0.3.4" diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 0c41a90e9..2066cccac 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -6,26 +6,41 @@ use crate::*; use std::path::Path; +use std::path::PathBuf; /// A builder that is used to construct a new [`Seg`] instance. pub struct Builder { + restore: bool, hash_power: u8, overflow_factor: f64, segments_builder: SegmentsBuilder, + ttl_buckets_path: Option, + hashtable_path: Option, } // Defines the default parameters impl Default for Builder { fn default() -> Self { Self { + restore: false, hash_power: 16, overflow_factor: 0.0, segments_builder: SegmentsBuilder::default(), + ttl_buckets_path: None, + hashtable_path: None, } } } impl Builder { + /// Specify to `Builder` and `SegmentsBuilder` whether the cache will be restored. + /// Otherwise, the cache will be created and treated as new. + pub fn restore(mut self, will_restore: bool) -> Self { + self.restore = will_restore; + self.segments_builder = self.segments_builder.restore(will_restore); + self + } + /// Specify the hash power, which limits the size of the hashtable to 2^N /// entries. 1/8th of these are used for metadata storage, meaning that the /// total number of items which can be held in the cache is limited to @@ -135,17 +150,33 @@ impl Builder { self } - /// Specify a backing file to be used for segment storage. - /// - /// # Panics - /// - /// This will panic if the file already exists + /// Specify a backing file to be used for `Segments.data` storage. pub fn datapool_path>(mut self, path: Option) -> Self { self.segments_builder = self.segments_builder.datapool_path(path); self } + /// Specify a backing file to be used for `Segments` fields' storage. + pub fn segments_fields_path>(mut self, path: Option) -> Self { + self.segments_builder = self.segments_builder.segments_fields_path(path); + self + } + + /// Specify a backing file to be used for `TtlBuckets` storage. + pub fn ttl_buckets_path>(mut self, path: Option) -> Self { + self.ttl_buckets_path = path.map(|p| p.as_ref().to_owned()); + self + } + + /// Specify a backing file to be used for `HashTable` storage. + pub fn hashtable_path>(mut self, path: Option) -> Self { + self.hashtable_path = path.map(|p| p.as_ref().to_owned()); + self + } + /// Consumes the builder and returns a fully-allocated `Seg` instance. + /// If `restore` and valid paths to the structures are given, `Seg` will + /// be restored. Otherwise, create a new `Seg` instance. /// /// ``` /// use seg::{Policy, Seg}; @@ -159,14 +190,35 @@ impl Builder { /// .eviction(Policy::Random).build(); /// ``` pub fn build(self) -> Seg { - let hashtable = HashTable::new(self.hash_power, self.overflow_factor); + // Build `Segments`. + // If `restore` and a valid path is given, + // it will be copied back let segments = self.segments_builder.build(); - let ttl_buckets = TtlBuckets::default(); + if segments.fields_copied_back && self.restore { + // Attempt to restore `HashTable` and `TtlBuckets` + let hashtable = + HashTable::restore(self.hashtable_path, self.hash_power, self.overflow_factor); + let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path); + + // If successful, return a restored segcache + if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { + return Seg { + hashtable, + segments, + ttl_buckets, + _restored: true, + }; + } + } + // If not `restore` or restoration failed, create a new cache + let hashtable = HashTable::new(self.hash_power, self.overflow_factor); + let ttl_buckets = TtlBuckets::new(); Seg { hashtable, segments, ttl_buckets, + _restored: false, } } } diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index 1bd68c7fd..fcf7de772 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -20,23 +20,36 @@ pub struct File { } impl File { - /// Create a new `File` datapool at the given path and with the specified - /// size (in bytes). Returns an error if the file already exists, could not - /// be created, couldn't be extended to the requested size, or couldn't be + /// If there is a file at the given path, open the `File`. + /// Otherwise, create a new `File` datapool at the given path and with the specified + /// size (in bytes). Returns an error if could not + /// be created, size of file isn't as expected (opening), + /// couldn't be extended to the requested size (creating), or couldn't be /// mmap'd pub fn create>( path: T, size: usize, prefault: bool, ) -> Result { + let metadata = std::fs::metadata(&path); + let file_exists = metadata.is_ok(); let file = OpenOptions::new() - .create_new(true) + .create(true) .read(true) .write(true) .open(path)?; - file.set_len(size as u64)?; + + // if file exists, check that the size it is expected to have + // matches its actual size + if file_exists { + assert_eq!(metadata?.len() as usize, size); + } else { + file.set_len(size as u64)?; + } + let mut mmap = unsafe { MmapOptions::new().populate().map_mut(&file)? }; - if prefault { + + if !file_exists && prefault { let mut offset = 0; while offset < size { mmap[offset] = 0; @@ -44,6 +57,7 @@ impl File { } mmap.flush()?; } + Ok(Self { mmap, size }) } } diff --git a/src/rust/storage/seg/src/datapool/memory.rs b/src/rust/storage/seg/src/datapool/memory.rs index f9aff9f88..7ff6ee783 100644 --- a/src/rust/storage/seg/src/datapool/memory.rs +++ b/src/rust/storage/seg/src/datapool/memory.rs @@ -8,6 +8,7 @@ use crate::datapool::Datapool; /// A contiguous allocation of bytes in main memory +#[derive(Clone)] // for testing pub struct Memory { data: Box<[u8]>, } @@ -34,6 +35,12 @@ impl Memory { Self { data } } + + // Used only in Segments::clone() in order to clone `Segments.data` + #[cfg(test)] + pub fn memory_from_data(data: Box<[u8]>) -> Memory { + Memory { data } + } } impl Datapool for Memory { diff --git a/src/rust/storage/seg/src/demolisher.rs b/src/rust/storage/seg/src/demolisher.rs new file mode 100644 index 000000000..e3e95ea04 --- /dev/null +++ b/src/rust/storage/seg/src/demolisher.rs @@ -0,0 +1,84 @@ +// Copyright 2021 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 +// http://www.apache.org/licenses/LICENSE-2.0 + +//! A demolisher for gracefully deconstructing a [`Seg`] instance. + +use crate::*; +use std::path::PathBuf; + +/// A demolisher that is used to gracefully deconstruct a [`Seg`] instance. +pub struct Demolisher { + heap_size: usize, + overflow_factor: f64, + // path at which the `Segments` fields' will be stored + segments_fields_path: Option, + // path at which the `TtlBuckets` will be stored + ttl_buckets_path: Option, + // path at which the `Hashtable` will be stored + hashtable_path: Option, +} + +// Defines the default parameters +impl Default for Demolisher { + fn default() -> Self { + Self { + heap_size: 64 * 1024 * 1024, + overflow_factor: 0.0, + segments_fields_path: None, + ttl_buckets_path: None, + hashtable_path: None, + } + } +} + +impl Demolisher { + /// Function the same as from `SegmentsBuilder`. + /// Specify the total heap size in bytes. The heap size will be divided by + /// the segment size to determine the number of segments to allocate. + pub fn heap_size(mut self, bytes: usize) -> Self { + self.heap_size = bytes; + self + } + + /// Function the same as from `Builder`. + /// Specify an overflow factor which was used to scale the `HashTable` and + /// provide additional capacity for chaining item buckets. A factor of 1.0 + /// will result in a hash table that is 100% larger. + /// Used for demolishing the `HashTable` + pub fn overflow_factor(mut self, percent: f64) -> Self { + self.overflow_factor = percent; + self + } + + // Set `Segments` fields' path + pub fn segments_fields_path(mut self, path: Option) -> Self { + self.segments_fields_path = path; + self + } + + // Set `TtlBuckets` path + pub fn ttl_buckets_path(mut self, path: Option) -> Self { + self.ttl_buckets_path = path; + self + } + + // Set `Hashtable` path + pub fn hashtable_path(mut self, path: Option) -> Self { + self.hashtable_path = path; + self + } + + // Demolish the cache by attempting to save the `Segments`, + // `TtlBuckets` and `HashTable` to the paths specified + // If successful, return True. Else, return False. + pub fn demolish(self, cache: Seg) -> bool { + cache + .segments + .demolish(self.segments_fields_path, self.heap_size) + && cache.ttl_buckets.demolish(self.ttl_buckets_path) + && cache + .hashtable + .demolish(self.hashtable_path, self.overflow_factor) + } +} diff --git a/src/rust/storage/seg/src/eviction/mod.rs b/src/rust/storage/seg/src/eviction/mod.rs index f5a31111c..184944014 100644 --- a/src/rust/storage/seg/src/eviction/mod.rs +++ b/src/rust/storage/seg/src/eviction/mod.rs @@ -21,6 +21,7 @@ pub use policy::Policy; /// The `Eviction` struct is used to rank and return segments for eviction. It /// implements eviction strategies corresponding to the `Policy`. +#[derive(Clone, PartialEq)] pub struct Eviction { policy: Policy, last_update_time: Instant, diff --git a/src/rust/storage/seg/src/hashtable/hash_bucket.rs b/src/rust/storage/seg/src/hashtable/hash_bucket.rs index d29f47c41..3829951bb 100644 --- a/src/rust/storage/seg/src/hashtable/hash_bucket.rs +++ b/src/rust/storage/seg/src/hashtable/hash_bucket.rs @@ -72,7 +72,7 @@ pub(crate) const CLEAR_FREQ_SMOOTH_MASK: u64 = 0xFFF7_FFFF_FFFF_FFFF; /// Mask to get the lower 16 bits from a timestamp pub(crate) const PROC_TS_MASK: u64 = 0x0000_0000_0000_FFFF; -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] pub(crate) struct HashBucket { pub(super) data: [u64; N_BUCKET_SLOT], } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 2a0d38b12..b5a4a85a4 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -73,10 +73,12 @@ const N_BUCKET_SLOT: usize = 8; /// Maximum number of buckets in a chain. Must be <= 255. const MAX_CHAIN_LEN: u64 = 16; +use crate::datapool::*; use crate::*; use ahash::RandomState; use core::num::NonZeroU32; use metrics::{static_metrics, Counter}; +use std::path::PathBuf; mod hash_bucket; @@ -98,6 +100,7 @@ static_metrics! { /// Main structure for performing item lookup. Contains a contiguous allocation /// of [`HashBucket`]s which are used to store item info and metadata. +#[derive(Clone)] // for testing #[repr(C)] pub(crate) struct HashTable { hash_builder: Box, @@ -107,6 +110,8 @@ pub(crate) struct HashTable { rng: Box, started: Instant, next_to_chain: u64, + /// Is `HashTable` copied back from a file? + pub(crate) table_copied_back: bool, } impl HashTable { @@ -130,7 +135,9 @@ impl HashTable { let total_buckets = (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize; let mut data = Vec::with_capacity(0); + // set number of elements in `data` to be `total_buckets` data.reserve_exact(total_buckets as usize); + // fill all elements with `HashBucket::new()` data.resize(total_buckets as usize, HashBucket::new()); debug!( "hashtable has: {} primary slots across {} primary buckets and {} total buckets", @@ -152,15 +159,224 @@ impl HashTable { rng: Box::new(rng()), started: Instant::recent(), next_to_chain: buckets as u64, + table_copied_back: false, } } + pub fn restore(hashtable_path: Option, cfg_power: u8, overflow_factor: f64) -> Self { + // if there is a path to restore from, restore the `HashTable` + if let Some(file) = hashtable_path { + // restore() assumes no changes in `power`. + // I.e. config specifies same `power` as `HashTable` we are + // restoring from + // TODO: Detect a change of `power` and adjust `HashTable` accordingly + + let slots = 1_u64 << cfg_power; + let buckets = slots / 8; + let total_buckets = (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize; + let bucket_size = ::std::mem::size_of::(); + // size from all `HashBucket`s in `data` + let buckets_size = total_buckets * bucket_size; + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + + buckets_size // `data` + + started_size; + + // Mmap file + let pool = File::create(file, hashtable_size, true) + .expect("failed to allocate file backed storage"); + let file_data = Box::new(pool.as_slice()); + + // create blank bytes to copy data into + let mut bytes = vec![0; hashtable_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice(&file_data[0..hashtable_size]); + + // ----- Re-initialise `hash_builder` ----- + + let hash_builder = RandomState::with_seeds( + 0xbb8c484891ec6c86, + 0x0522a25ae9c769f9, + 0xeed2797b9571bc75, + 0x4feb29c1fbbd59d0, + ); + + // ----- Retrieve `power` --------- + + let mut offset = 0; + let mut end = u64_size; + + let power = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + // TODO: compare `cfg_power` and `power` + + // ----- Retrieve `mask` --------- + + offset += u64_size; + end += u64_size; + + let mask = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + + // ----- Retrieve `data` --------- + offset += u64_size; + end += buckets_size; + + let mut data = Vec::with_capacity(0); + data.reserve_exact(total_buckets as usize); + + // Get each `HashBucket` from the raw bytes + for id in 0..total_buckets { + let begin = offset + (bucket_size as usize * id); + let finish = begin + bucket_size as usize; + + // cast bytes to `HashBucket` + let bucket = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut HashBucket) }; + data.push(bucket); + } + + // ----- Retrieve `started` --------- + + offset += buckets_size; + end += started_size; + + let started = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; + + // ----- Retrieve `next_to_chain` --------- + + offset += started_size; + end += u64_size; + + let next_to_chain = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + + Self { + hash_builder: Box::new(hash_builder), + power, + mask, + data: data.into_boxed_slice(), + rng: Box::new(rng()), + started, + next_to_chain, + table_copied_back: true, + } + } + // otherwise, create a new `HashTable` + else { + HashTable::new(cfg_power, overflow_factor) + } + } + + /// Demolishes the `HashTable` by storing it to + /// PMEM (if a path is specified) + pub fn demolish(&self, hashtable_path: Option, overflow_factor: f64) -> bool { + let mut gracefully_shutdown = false; + + // if a path is specified, copy all the `HashBucket`s + // to the file specified by `hashtable_path` + if let Some(file) = hashtable_path { + let slots = 1_u64 << self.power; + let buckets = slots / 8; + let total_buckets = (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize; + let bucket_size = ::std::mem::size_of::(); + // size from all `HashBucket`s in `data` + let buckets_size = total_buckets * bucket_size; + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + + buckets_size // `data` + + started_size; + + // Mmap file + let mut pool = File::create(file, hashtable_size, true) + .expect("failed to allocate file backed storage"); + let file_data = Box::new(pool.as_mut_slice()); + + // --------------------- Store `power` ----------------- + let mut offset = 0; + let mut end = u64_size; + + // cast `power` to byte pointer + let byte_ptr = (&self.power as *const u64) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; + + // store `power` back to mmapped file + file_data[offset..end].copy_from_slice(bytes); + + // --------------------- Store `mask` ----------------- + offset += u64_size; + end += u64_size; + + // cast `mask` to byte pointer + let byte_ptr = (&self.mask as *const u64) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; + + // store `mask` back to mmapped file + file_data[offset..end].copy_from_slice(bytes); + + // --------------------- Store `data` ----------------- + offset += u64_size; + end += buckets_size; + + // for every `HashBucket` + for id in 0..total_buckets { + let begin = offset + (bucket_size as usize * id); + let finish = begin + bucket_size as usize; + + // cast `HashBucket` to byte pointer + let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, bucket_size) }; + + // store `HashBucket` back to mmapped file + file_data[begin..finish].copy_from_slice(bytes); + } + + // --------------------- Store `started` ----------------- + offset += buckets_size; + end += started_size; + + // cast `started` to byte pointer + let byte_ptr = (&self.started as *const Instant) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, started_size) }; + + // store `started` back to mmapped file + file_data[offset..end].copy_from_slice(bytes); + + // --------------------- Store `next_to_chain` ----------------- + offset += started_size; + end += u64_size; + + // cast `next_to_chain` to byte pointer + let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; + + // store `next_to_chain` back to mmapped file + file_data[offset..end].copy_from_slice(bytes); + + gracefully_shutdown = true; + + // TODO: check if this flushes the CPU caches + pool.flush() + .expect("failed to flush `HashTable` to storage"); + } + gracefully_shutdown + } + /// Lookup an item by key and return it pub fn get(&mut self, key: &[u8], segments: &mut Segments) -> Option { let hash = self.hash(key); let tag = tag_from_hash(hash); let bucket_id = hash & self.mask; + // ccc: get bucket corresponding to the key let mut bucket = &mut self.data[bucket_id as usize]; let chain_len = chain_len(bucket.data[0]); let mut chain_idx = 0; @@ -171,6 +387,7 @@ impl HashTable { if curr_ts != get_ts(bucket.data[0]) { bucket.data[0] = (bucket.data[0] & !TS_MASK) | (curr_ts << TS_BIT_SHIFT); + // ccc: Mask every "item info" in this bucket to remove the freq smoothing loop { let n_item_slot = if chain_idx == chain_len { N_BUCKET_SLOT @@ -197,20 +414,24 @@ impl HashTable { bucket = &mut self.data[bucket_id as usize]; } + // ccc: look at every HashBucket in this chain loop { let n_item_slot = if chain_idx == chain_len { - N_BUCKET_SLOT + N_BUCKET_SLOT // ccc: the last HashBucket in this chain has 8 items } else { - N_BUCKET_SLOT - 1 + N_BUCKET_SLOT - 1 // ccc: every other has 7 items (or 6 in the case of HashBucket 0) }; + // ccc: for every slot of "item info" in this HashBucket for i in 0..n_item_slot { + // ccc: ignore the "bucket info" slot (in HashBucket 0) if chain_idx == 0 && i == 0 { continue; } let current_info = bucket.data[i]; + // ccc: check if the tags match if get_tag(current_info) == tag { let current_item = segments.get_item(current_info).unwrap(); if current_item.key() != key { @@ -721,4 +942,30 @@ impl HashTable { hasher.write(key); hasher.finish() } + + #[cfg(test)] + // Checks if `HashTable.data` are equivalent + pub(crate) fn equivalent_hashbuckets(&self, buckets: Box<[HashBucket]>) -> bool { + let total_buckets = self.data.len(); + + // ensure number of `HashBucket`s is the same + let mut equivalent = total_buckets == buckets.len(); + + // Compare each `HashBucket` + for id in 0..total_buckets { + equivalent = equivalent && self.data[id] == buckets[id]; + } + + equivalent + } + + #[cfg(test)] + // Checks if `HashTable` are equivalent + pub(crate) fn equivalent_hashtables(&self, h: HashTable) -> bool { + self.power == h.power + && self.mask == h.mask + && self.equivalent_hashbuckets(h.data.clone()) + && self.started == h.started + && self.next_to_chain == h.next_to_chain + } } diff --git a/src/rust/storage/seg/src/lib.rs b/src/rust/storage/seg/src/lib.rs index 4f573cbd3..0f263ad8b 100644 --- a/src/rust/storage/seg/src/lib.rs +++ b/src/rust/storage/seg/src/lib.rs @@ -36,6 +36,7 @@ use std::convert::TryInto; // submodules mod builder; mod datapool; +mod demolisher; mod error; mod eviction; mod hashtable; @@ -52,6 +53,7 @@ mod tests; // publicly exported items from submodules pub use crate::seg::Seg; pub use builder::Builder; +pub use demolisher::Demolisher; pub use error::SegError; pub use eviction::Policy; pub use item::Item; diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index edccbdb06..00541ed66 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -26,6 +26,8 @@ pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, pub(crate) ttl_buckets: TtlBuckets, + // Used for testing: are the above structures restored? + pub(crate) _restored: bool, } impl Seg { @@ -48,6 +50,32 @@ impl Seg { Builder::default() } + // Returns a new `Demolisher` which is used to configure the graceful + // deconstruction of a `Seg` instance. + // + // Example code: + // ``` + // let segment_size = 4096; + // let segments = 64; + // let heap_size = segments * segment_size as usize; + // let datapool_path : Option = Some(PathBuf::from()); + // let segments_fields_path: Option = Some(PathBuf::from()); + // let ttl_buckets_path : Option = Some(PathBuf::from()); + // let hashtable_path: Option = Some(PathBuf::from()); + // + // // demolish cache by triggering graceful shutdown + // Seg::demolisher() + // .heap_size(heap_size) + // .datapool_path(datapool_path) + // .segments_fields_path(segments_fields_path) + // .ttl_buckets_path(ttl_buckets_path) + // .hashtable_path(hashtable_path) + // .demolish(cache) + // ``` + pub fn demolisher() -> Demolisher { + Demolisher::default() + } + /// Gets a count of items in the `Seg` instance. This is an expensive /// operation and is only enabled for tests and builds with the `debug` /// feature enabled. @@ -132,6 +160,8 @@ impl Seg { let mut retries = RESERVE_RETRIES; let reserved; loop { + // ccc: check tail segment of TTL bucket for free space. + // ccc: If full, try to get a new segment from free q and make this the tail match self .ttl_buckets .get_mut_bucket(ttl) @@ -146,6 +176,11 @@ impl Seg { return Err(SegError::ItemOversized { size, key }); } Err(TtlBucketsError::NoFreeSegments) => { + if retries == RESERVE_RETRIES { + // first attempt to acquire a free segment, increment + // the stats + SEGMENT_REQUEST.increment(); + } if self .segments .evict(&mut self.ttl_buckets, &mut self.hashtable) @@ -306,4 +341,26 @@ impl Seg { Err(SegError::DataCorrupted) } } + + // Used in testing to clone a `Seg` to compare with + #[cfg(test)] + pub(crate) fn clone(&self) -> Seg { + let segments = self.segments.clone(); + let ttl_buckets = self.ttl_buckets.clone(); + let hashtable = self.hashtable.clone(); + Seg { + segments, + ttl_buckets, + hashtable, + _restored: false, // this field doesn't matter as it won't be compared + } + } + + // Used in testing to compare `Seg`s + #[cfg(test)] + pub(crate) fn equivalent_seg(&self, s: Seg) -> bool { + self.segments.equivalent_segments(s.segments) + && self.ttl_buckets.equivalent_ttlbuckets(s.ttl_buckets) + && self.hashtable.equivalent_hashtables(s.hashtable) + } } diff --git a/src/rust/storage/seg/src/segments/builder.rs b/src/rust/storage/seg/src/segments/builder.rs index 578bbb915..1f574087c 100644 --- a/src/rust/storage/seg/src/segments/builder.rs +++ b/src/rust/storage/seg/src/segments/builder.rs @@ -12,24 +12,36 @@ use std::path::{Path, PathBuf}; /// The `SegmentsBuilder` allows for the configuration of the segment storage. pub(crate) struct SegmentsBuilder { + pub(super) restore: bool, pub(super) heap_size: usize, pub(super) segment_size: i32, pub(super) evict_policy: Policy, pub(super) datapool_path: Option, + pub(super) segments_fields_path: Option, } impl Default for SegmentsBuilder { fn default() -> Self { Self { + restore: false, segment_size: 1024 * 1024, heap_size: 64 * 1024 * 1024, evict_policy: Policy::Random, datapool_path: None, + segments_fields_path: None, } } } impl<'a> SegmentsBuilder { + /// Specify whether the `Segments` fields' will be restored + /// from the segments_fields_path. + /// Otherwise, the cache will be created and treated as new. + pub fn restore(mut self, will_restore: bool) -> Self { + self.restore = will_restore; + self + } + /// Set the segment size in bytes. /// /// # Panics @@ -62,7 +74,7 @@ impl<'a> SegmentsBuilder { self } - /// Specify a backing file to be used for the segment storage. If provided, + /// Specify a backing file to be used for the `Segments.data` storage. If provided, /// a file will be created at the corresponding path and used for segment /// storage. pub fn datapool_path>(mut self, path: Option) -> Self { @@ -70,8 +82,20 @@ impl<'a> SegmentsBuilder { self } + /// Specify a backing file to be used for the `Segment` fields' storage. If provided, + /// a file will be created at the corresponding path and used for segment header + /// storage. + pub fn segments_fields_path>(mut self, path: Option) -> Self { + self.segments_fields_path = path.map(|p| p.as_ref().to_owned()); + self + } + /// Construct the [`Segments`] from the builder pub fn build(self) -> Segments { - Segments::from_builder(self) + if self.restore { + Segments::from_builder_restore(self) + } else { + Segments::from_builder_new(self) + } } } diff --git a/src/rust/storage/seg/src/segments/header.rs b/src/rust/storage/seg/src/segments/header.rs index 5bbed133a..67bf3a85e 100644 --- a/src/rust/storage/seg/src/segments/header.rs +++ b/src/rust/storage/seg/src/segments/header.rs @@ -35,7 +35,7 @@ use crate::*; // TODO(bmartin): this should be parameterized. const SEG_MATURE_TIME: Duration = Duration::from_secs(20); -#[derive(Debug)] +#[derive(Debug, Copy, Clone, PartialEq)] #[repr(C)] pub struct SegmentHeader { /// The id for this segment diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 3c7645d5b..1652584fc 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -10,6 +10,7 @@ use crate::segments::*; use core::num::NonZeroU32; use metrics::{static_metrics, Counter, Gauge}; +use std::path::PathBuf; static_metrics! { static EVICT_TIME: Gauge; @@ -41,12 +42,17 @@ pub(crate) struct Segments { flush_at: Instant, /// Eviction configuration and state evict: Box, + /// Is `data` file backed? + data_file_backed: bool, + /// Are `headers` copied back from a file? + pub(crate) fields_copied_back: bool, } impl Segments { /// Private function which allocates and initializes the `Segments` by /// taking ownership of the builder - pub(super) fn from_builder(builder: SegmentsBuilder) -> Self { + /// A new `Segments` is created + pub(super) fn from_builder_new(builder: SegmentsBuilder) -> Self { let segment_size = builder.segment_size; let segments = builder.heap_size / (builder.segment_size as usize); @@ -74,9 +80,11 @@ impl Segments { let mut headers = headers.into_boxed_slice(); let heap_size = segments * segment_size as usize; + let mut data_file_backed = false; // TODO(bmartin): we always prefault, this should be configurable let mut data: Box = if let Some(file) = builder.datapool_path { + data_file_backed = true; let pool = File::create(file, heap_size, true) .expect("failed to allocate file backed storage"); Box::new(pool) @@ -111,15 +119,292 @@ impl Segments { data, flush_at: Instant::recent(), evict: Box::new(Eviction::new(segments, evict_policy)), + data_file_backed, + fields_copied_back: false, } } + /// Private function which allocates and initializes the `Segments` by + /// taking ownership of the builder. + /// `Segments` is restored if the paths are specified, otherwise a new + /// `Segments` is created. + pub(super) fn from_builder_restore(builder: SegmentsBuilder) -> Self { + // this is here to avoid `builder` being moved when it might be needed + // for the else statement + let segments_fields_path = builder.segments_fields_path.clone(); + + // If there are specified paths to restore the `Segments` with, + // copy `Segments` back. + // Otherwise create a new `Segments`. + if let Some(fields_file) = segments_fields_path { + // ----- Recover `data` ------ + let data: Box; + // TODO: like with the HashTable fields, we assume that the configuration + // options for `Segments` hasn't changed upon recovery. We need a way to + // detect the change in fields as well as decided how to + // deal with such changes. + let cfg_segment_size = builder.segment_size; + let cfg_segments = builder.heap_size / (builder.segment_size as usize); + + debug!( + "heap size: {} seg size: {} segments: {}", + builder.heap_size, cfg_segment_size, cfg_segments + ); + + assert!( + cfg_segments < (1 << 24), // we use just 24 bits to store the seg id + "heap size requires too many segments, reduce heap size or increase segment size" + ); + + let heap_size = cfg_segments * cfg_segment_size as usize; + + // TODO(bmartin): we always prefault, this should be configurable + // `Segments.data` must be file backed for a recovery + if let Some(data_file) = builder.datapool_path { + let pool = File::create(data_file, heap_size, true) + .expect("failed to allocate file backed storage"); + data = Box::new(pool) + } else { + return Segments::from_builder_new(builder); + } + + // ----- Recover other fields ------ + + let header_size: usize = ::std::mem::size_of::(); + let headers_size: usize = cfg_segments * header_size as usize; + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); + let fields_size = headers_size + + i32_size // `segment_size` + + u32_size * 2 // `free` and `cap` + + free_q_size + + flush_at_size; + + // Mmap file + let pool = File::create(fields_file, fields_size, true) + .expect("failed to allocate file backed storage"); + let fields_data = Box::new(pool.as_slice()); + + // create blank bytes to copy data into + let mut bytes = vec![0; fields_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice(&fields_data[0..fields_size]); + + // ----- Retrieve `headers` ----- + let mut headers = Vec::with_capacity(0); + headers.reserve_exact(cfg_segments); + + // retrieve each `SegmentHeader` from the raw bytes + for id in 0..cfg_segments { + let begin = header_size as usize * id; + let finish = begin + header_size as usize; + + // cast bytes to `SegmentHeader` + let header = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut SegmentHeader) }; + headers.push(header); + } + + // ----- Retrieve `segment_size` ----- + let mut offset = headers_size; + let mut end = offset + i32_size; + + let segment_size = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut i32) }; + // TODO: compare `cfg_segment_size` and `segment_size` + + // ----- Retrieve `free` ----- + offset += i32_size; + end += u32_size; + + let free = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; + + // ----- Retrieve `cap` ----- + offset += u32_size; + end += u32_size; + + let cap = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; + + // ----- Retrieve `free_q` ----- + offset += u32_size; + end += free_q_size; + + let free_q = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Option) }; + + // ----- Retrieve `flush_at` ----- + offset += free_q_size; + end += flush_at_size; + + let flush_at = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; + + // ----- Re-initialise `evict` ----- + + let evict_policy = builder.evict_policy; + let evict = Eviction::new(cfg_segments, evict_policy); + + SEGMENT_CURRENT.set(cap as _); + SEGMENT_FREE.set(free as _); + + Self { + headers: headers.into_boxed_slice(), + data, + segment_size, + free, + cap, + free_q, + flush_at, + evict: Box::new(evict), + data_file_backed: true, + fields_copied_back: true, + } + } else { + Segments::from_builder_new(builder) + } + } + + /// Demolishes the segments by flushing the `Segments.data` to PMEM + /// (if filed backed) and storing the other `Segments` fields' to + /// PMEM (if a path is specified) + pub fn demolish(&self, segments_fields_path: Option, heap_size: usize) -> bool { + let mut gracefully_shutdown = false; + + // if a path is specified, copy all the `Segments` fields' + // to the file specified by `segments_fields_path` + if let Some(file) = segments_fields_path { + let segments = heap_size / (self.segment_size as usize); + let header_size: usize = ::std::mem::size_of::(); + let headers_size: usize = segments * header_size as usize; + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); + let fields_size = headers_size + + i32_size // `segment_size` + + u32_size * 2 // `free` and `cap` + + free_q_size + + flush_at_size; + + // mmap file + let mut pool = File::create(file, fields_size, true) + .expect("failed to allocate file backed storage"); + let fields_data = Box::new(pool.as_mut_slice()); + + // ----- Store `headers` ----- + + // for every `SegmentHeader` + for id in 0..segments { + let begin = header_size as usize * id; + let finish = begin + header_size as usize; + + // cast `SegmentHeader` to byte pointer + let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, header_size) }; + + // store `SegmentHeader` back to mmapped file + fields_data[begin..finish].copy_from_slice(bytes); + } + + // ----- Store `segment_size` ----- + let mut offset = headers_size; + let mut end = offset + i32_size; + + // cast `segment_size` to byte pointer + let byte_ptr = (&self.segment_size as *const i32) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, i32_size) }; + + // store `segment_size` back to mmapped file + fields_data[offset..end].copy_from_slice(bytes); + + // ----- Store `free` ----- + offset += i32_size; + end += u32_size; + + // cast `free` to byte pointer + let byte_ptr = (&self.free as *const u32) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u32_size) }; + + // store `free` back to mmapped file + fields_data[offset..end].copy_from_slice(bytes); + + // ----- Store `cap` ----- + offset += u32_size; + end += u32_size; + + // cast `cap` to byte pointer + let byte_ptr = (&self.cap as *const u32) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u32_size) }; + + // store `cap` back to mmapped file + fields_data[offset..end].copy_from_slice(bytes); + + // ----- Store `free_q` ----- + offset += u32_size; + end += free_q_size; + + // cast `free_q` to byte pointer + let byte_ptr = (&self.free_q as *const Option) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, free_q_size) }; + + // store `free_q` back to mmapped file + fields_data[offset..end].copy_from_slice(bytes); + + // ----- Store `flush_at` ----- + offset += free_q_size; + end += flush_at_size; + + // cast `flush_at` to byte pointer + let byte_ptr = (&self.flush_at as *const Instant) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, flush_at_size) }; + + // store `flush_at` back to mmapped file + fields_data[offset..end].copy_from_slice(bytes); + + // TODO: check if this flushes fields_data from CPU caches + pool.flush() + .expect("failed to flush `Segments` fields' to storage"); + + gracefully_shutdown = true; + } + + // if `Segments.data` is file backed, flush it to PMEM + if self.data_file_backed { + self.data + .flush() + .expect("failed to flush Segments.data to storage"); + } else { + // This else case is not expected to be reached as this function + // is only called during a graceful shutdown, so it is expected that the + // data is file backed + gracefully_shutdown = false; + } + + gracefully_shutdown + } + /// Return the size of each segment in bytes #[inline] pub fn segment_size(&self) -> i32 { self.segment_size } + /// Returns if `data` is file backed + #[cfg(test)] + pub fn data_file_backed(&self) -> bool { + self.data_file_backed + } + /// Returns the number of free segments #[cfg(test)] pub fn free(&self) -> usize { @@ -571,7 +856,7 @@ impl Segments { // reduces CPU load under heavy rewrite/delete workloads at the // cost of letting more dead items remain in the segements, // reducing the hitrate - // if self.headers[seg_id as usize].merge_at() + CoarseDuration::from_secs(30) > CoarseInstant::recent() { + // if self.headers[seg_id as usize].merge_at() + CoarseDuration::from_secs(30) > Instant::recent() { // return Ok(()); // } @@ -624,6 +909,58 @@ impl Segments { } } + // Used in testing to clone a `Segments` to compare with + #[cfg(test)] + pub(crate) fn clone(&self) -> Segments { + // clone `data` + let heap_size = self.segment_size as usize * self.cap as usize; + let mut data = vec![0; heap_size]; + data.clone_from_slice(self.data.as_slice()); + let segment_data = Memory::memory_from_data(data.into_boxed_slice()); + + // Return a `Segments` where everything is cloned + Self { + headers: self.headers.clone(), + data: Box::new(segment_data), // fill in `data` field with something + segment_size: self.segment_size, + free: self.free, + cap: self.cap, + free_q: self.free_q.clone(), + flush_at: self.flush_at, + evict: self.evict.clone(), + data_file_backed: self.data_file_backed, + fields_copied_back: self.fields_copied_back, + } + } + + #[cfg(test)] + // Checks if `Segments.headers` are equivalent + pub(crate) fn equivalent_headers(&self, headers: Box<[SegmentHeader]>) -> bool { + let total_buckets = self.headers.len(); + + // ensure number of `SegmentHeader`s is the same + let mut equivalent = total_buckets == headers.len(); + + // Compare each `SegmentHeader` + for id in 0..total_buckets { + equivalent = equivalent && self.headers[id] == headers[id]; + } + + equivalent + } + + // Checks if `Segments` are equivalent + #[cfg(test)] + pub(crate) fn equivalent_segments(&self, s: Segments) -> bool { + self.equivalent_headers(s.headers.clone()) + && self.data.as_slice() == s.data.as_slice() + && self.segment_size == s.segment_size + && self.free == s.free + && self.cap == s.cap + && self.free_q == s.free_q + && self.flush_at == s.flush_at + } + #[cfg(feature = "debug")] pub(crate) fn check_integrity(&mut self) -> bool { let mut integrity = true; @@ -924,6 +1261,6 @@ impl Segments { impl Default for Segments { fn default() -> Self { - Self::from_builder(Default::default()) + Self::from_builder_new(Default::default()) } } diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index ee8b8da37..a7badcfc3 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -6,8 +6,10 @@ use super::*; use crate::hashtable::HashBucket; use crate::item::ITEM_HDR_SIZE; use core::num::NonZeroU32; - +use std::collections::HashSet; +use std::path::PathBuf; use std::time::Duration; +use tempfile::TempDir; #[test] fn sizes() { @@ -21,7 +23,7 @@ fn sizes() { assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 64); + assert_eq!(std::mem::size_of::(), 72); // increased to accommodate fields added for testing assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 24); @@ -396,3 +398,664 @@ fn clear() { assert_eq!(cache.items(), 0); assert!(cache.get(b"coffee").is_none()); } + +// ----------- TESTS FOR RECOVERY ------------- +// Configuration Options: +// +// New cache, not file backed +// ---- Cache is created new in main memory. +// New cache, file backed +// ---- Cache is created new and is file backed. +// ---- In other words, PMEM is used as an extension of DRAM. +// ---- Note: Since the same `datapool_path` is used by the `builder` and +// ---- `demolisher`, the cache cannot be gracefully shutdown by the `demolisher` +// ---- if it wasn't file backed by the `builder`. That is, if there is no path +// ---- used to file back the cache, there is no path to copy the cache data to on shutdown +// Not gracefully shutdown +// ---- Nothing is saved on shutdown. +// Gracefully shutdown +// ---- `Segments.data` is flushed to PMEM it is file backed +// ---- Rest of `Seg` instance saved on shutdown if the paths are valid +// ---- That is, all of `Seg.hashtable`, `Seg.ttl_buckets` and +// ---- the relevant `Seg.Segments` fields are saved +// Restored cache +// ---- `Segments.data` must be file backed +// ---- Rest of `Seg` copied back from the files they were saved to and +// ---- If any of the file paths are not valid, then the cache is created new (TODO) + +// ------------- Set up / Helper Functions for below tests ------------ + +// path to tmp directory used for temp files +const TMP_DIR: &str = "target/debug/tmp"; + +const SEGMENTS: usize = 64; + +// Creates a temporary directory for temporary test files +fn tmp_dir() -> TempDir { + // Create parent directory for the temporary directory + std::fs::create_dir_all(TMP_DIR).expect("failed to create parent tmp directory"); + + // Create the temporary directory + TempDir::new_in(TMP_DIR).unwrap() +} + +// Returns a `Seg` instance. +// Cache is restored only if `restore` and `segments_fields_path`, `ttl_buckets_path`. `hashtable_path` are not `None`. +// Otherwise, new `Seg` instance is returned. +// Cache is file backed if `datapool_path` is not `None`. +fn make_cache( + restore: bool, + datapool_path: Option, + segments_fields_path: Option, + ttl_buckets_path: Option, + hashtable_path: Option, +) -> Seg { + let segment_size = 4096; + let segments = SEGMENTS; + let heap_size = segments * segment_size as usize; + + Seg::builder() + .restore(restore) + .segment_size(segment_size as i32) + .heap_size(heap_size) + .datapool_path(datapool_path) // set path + .segments_fields_path(segments_fields_path) // set path + .ttl_buckets_path(ttl_buckets_path) // set path + .hashtable_path(hashtable_path) // set path + .build() +} + +// Demolish the cache by attempting to save the `Segments`, +// `TtlBuckets` and `HashTable` to the paths specified +// If successful, return True. Else, return False. +fn demolish_cache( + cache: Seg, + segments_fields_path: Option, + ttl_buckets_path: Option, + hashtable_path: Option, +) -> bool { + let segment_size = 4096; + let segments = SEGMENTS; + let heap_size = segments * segment_size as usize; + + Seg::demolisher() + .heap_size(heap_size) + .segments_fields_path(segments_fields_path) + .ttl_buckets_path(ttl_buckets_path) + .hashtable_path(hashtable_path) + .demolish(cache) +} + +// ------------------- Set Paths Correctly Tests -------------------------- + +// Check that a file backed, new cache is file backed and the `Seg` +// and thus the `Segments` fields', `HashTable` and `TTLBuckets` +// are new (and not restored) +#[test] +fn new_cache_file_backed() { + // Create parent directory for temporary test files + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + + // create new, file backed cache + let restore = false; + let cache = make_cache(restore, datapool_path, None, None, None); + + // the `Segments.data` should be filed backed + assert!(cache.segments.data_file_backed()); + // -- Check entire `Seg` -- + // the `Seg` should not be restored + assert!(!cache._restored); + // -- Check `Seg` fields/components -- + // the `Segments` fields' should not have been restored + assert!(!cache.segments.fields_copied_back); + // the `TtlBuckets` should not have been restored + assert!(!cache.ttl_buckets.buckets_copied_back); + // the `HashTable` should not have been restored + assert!(!cache.hashtable.table_copied_back); +} + +// Check that a new, not file backed cache is not file backed +// and the `Seg` is new (and not restored) +#[test] +fn new_cache_not_file_backed() { + // create new, not file backed cache + let restore = false; + let cache = make_cache(restore, None, None, None, None); + + // the `Segments.data` should not be filed backed + assert!(!cache.segments.data_file_backed()); + // the `Seg` should not be restored + assert!(!cache._restored); + // the `Segments` fields' should not have been restored + assert!(!cache.segments.fields_copied_back); + // the `TtlBuckets` should not have been restored + assert!(!cache.ttl_buckets.buckets_copied_back); + // the `HashTable` should not have been restored + assert!(!cache.hashtable.table_copied_back); +} + +// Check that a restored cache is file backed and the `Seg` is restored +#[test] +fn restored_cache_file_backed() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // restore, file backed cache + let restore = true; + let cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + // the `Segments.data` should be filed backed + assert!(cache.segments.data_file_backed()); + // the `Seg` should be restored + assert!(cache._restored); + // the `Segments` fields' should have been restored + assert!(cache.segments.fields_copied_back); + // the `TtlBuckets` should have been restored + assert!(cache.ttl_buckets.buckets_copied_back); + // the `HashTable` should have been restored + assert!(cache.hashtable.table_copied_back); +} + +// Edge Case: Check that an attempt to restore a cache without specifing +// any paths for the `Segments.data`, `Segments` fields', +// `HashTable` and `TTLBuckets` will lead to `Segments.data` not +// being file backed and none of the other structures being restored +#[test] +fn restored_cache_no_paths_set() { + let segment_size = 4096; + let segments = 64; + let heap_size = segments * segment_size as usize; + let datapool_path: Option = None; + + let cache = Seg::builder() + .restore(true) + .segment_size(segment_size as i32) + .heap_size(heap_size) + .datapool_path(datapool_path) // set no path + .build(); + + // the `Segments.data` should not be filed backed + assert!(!cache.segments.data_file_backed()); + // the `Seg` should not be restored + assert!(!cache._restored); + // the `Segments` fields' should not have been restored + assert!(!cache.segments.fields_copied_back); + // the `TtlBuckets` should not have been restored + assert!(!cache.ttl_buckets.buckets_copied_back); + // the `HashTable` should not have been restored + assert!(!cache.hashtable.table_copied_back); +} + +// Check that if paths are specified, then the cache is gracefully +// shutdown +#[test] +fn cache_gracefully_shutdown() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + let segment_size = 4096; + let segments = SEGMENTS; + let heap_size = segments * segment_size as usize; + + // create new, file backed cache + let cache = Seg::builder() + .restore(false) + .segment_size(segment_size as i32) + .heap_size(heap_size) + .datapool_path(datapool_path) // set path + .build(); + + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + assert!(Seg::demolisher() + .heap_size(heap_size) + .segments_fields_path(segments_fields_path) + .ttl_buckets_path(ttl_buckets_path) + .hashtable_path(hashtable_path) + .demolish(cache)); +} + +// Check that if paths are not specified, then the cache is not gracefully +// shutdown +#[test] +fn cache_not_gracefully_shutdown() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + let segment_size = 4096; + let segments = SEGMENTS; + let heap_size = segments * segment_size as usize; + + // create new, file backed cache + let cache = Seg::builder() + .restore(false) + .segment_size(segment_size as i32) + .heap_size(heap_size) + .datapool_path(datapool_path) // set path + .build(); + + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Do not set a HashTable path + let hashtable_path: Option = None; + + assert!(!Seg::demolisher() + .heap_size(heap_size) + .segments_fields_path(segments_fields_path) + .ttl_buckets_path(ttl_buckets_path) + .hashtable_path(hashtable_path) + .demolish(cache)); +} + +// --------------------- Data copied back Tests---------------------------- + +// Creates a new cache, stores an item, gracefully shutsdown cache and restore cache +// Check item is still there and caches are equivalent +#[test] +fn new_file_backed_cache_changed_and_restored() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // create new, file backed cache + let mut restore = false; + let mut cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!cache._restored); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), SEGMENTS); + + // "latte" should not be in a new, empty cache + assert!(cache.get(b"latte").is_none()); + // insert "latte" into cache + assert!(cache + .insert(b"latte", b"", None, Duration::from_secs(5)) + .is_ok()); + // "latte" should now be in cache + assert!(cache.get(b"latte").is_some()); + + assert_eq!(cache.items(), 1); + assert_eq!(cache.segments.free(), SEGMENTS - 1); + + // Get a copy of the cache to be compared later + let old_cache = cache.clone(); + + // gracefully shutdown cache + assert!(demolish_cache( + cache, + segments_fields_path, + ttl_buckets_path, + hashtable_path + )); + + // Create same tempfiles (they have been moved since first created) + let datapool_path: Option = Some(dir.path().join("datapool")); + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // restore cache + // This cache is file backed by same file as the above cache + // saved `Segments.data` to and the `Seg` is restored + restore = true; + let mut new_cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + assert!(new_cache._restored); + // "latte" should be in restored cache + assert!(new_cache.get(b"latte").is_some()); + assert_eq!(new_cache.items(), 1); + assert_eq!(new_cache.segments.free(), SEGMENTS - 1); + + // the restored cache should be equivalent to the old cache + assert!(new_cache.equivalent_seg(old_cache)); +} + +// Creates a new cache, gracefully shutsdown cache and restore cache +// Check caches are equivalent +#[test] +fn new_file_backed_cache_not_changed_and_restored() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // create new, file backed cache + let mut restore = false; + let cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!cache._restored); + + // Get a copy of the cache to be compared later + let old_cache = cache.clone(); + + // gracefully shutdown cache + assert!(demolish_cache( + cache, + segments_fields_path, + ttl_buckets_path, + hashtable_path + )); + + // Create same tempfiles (they have been moved since first created) + let datapool_path: Option = Some(dir.path().join("datapool")); + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // restore cache + // This cache is file backed by same file as the above cache + // saved `Segments.data` to and the `Seg` is restored + restore = true; + let new_cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + assert!(new_cache._restored); + + // the restored cache should be equivalent to the old cache + assert!(new_cache.equivalent_seg(old_cache)); +} + +// Creates a new cache, stores an item, gracefully shutsdown cache and spawn new cache +// Check item is not in new cache and caches are not equivalent +#[test] +fn new_cache_changed_and_not_restored() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // create new, file backed cache + let mut restore = false; + let mut cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!cache._restored); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), SEGMENTS); + + // "latte" should not be in a new, empty cache + assert!(cache.get(b"latte").is_none()); + // insert "latte" into cache + assert!(cache + .insert(b"latte", b"", None, Duration::from_secs(5)) + .is_ok()); + // "latte" should now be in cache + assert!(cache.get(b"latte").is_some()); + + assert_eq!(cache.items(), 1); + assert_eq!(cache.segments.free(), SEGMENTS - 1); + + // Get a copy of the cache to be compared later + let old_cache = cache.clone(); + + // gracefully shutdown cache + assert!(demolish_cache( + cache, + segments_fields_path, + ttl_buckets_path, + hashtable_path + )); + + // Create same tempfile (it has been moved since first created) + let datapool_path: Option = Some(dir.path().join("datapool")); + + // create new, file backed cache. + // This new cache is file backed by same file as the above cache + // saved `Segments.data` to but this cache is treated as new + restore = false; + let mut new_cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!new_cache._restored); + assert_eq!(new_cache.items(), 0); + assert_eq!(new_cache.segments.free(), SEGMENTS); + + // "latte" should not be in new cache + assert!(new_cache.get(b"latte").is_none()); + + // the restored cache should not be equivalent to the old cache + assert!(!new_cache.equivalent_seg(old_cache)); +} + +// Creates a new cache, stores an item, gracefully shutsdown cache and restore cache +// with an incorrect path to the `HashTable`. +// The restoration should "succeed" and the # items recorded should be the same in the restored cache +// as the `segments_fields_path` is the same but an attempt to get item from new cache should fail +// as the `hashtable_path` is different and caches should not equivalent +#[test] +fn new_cache_changed_and_restoration_fails() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // create new, file backed cache + let mut restore = false; + let mut cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!cache._restored); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), SEGMENTS); + + // "latte" should not be in a new, empty cache + assert!(cache.get(b"latte").is_none()); + // insert "latte" into cache + assert!(cache + .insert(b"latte", b"", None, Duration::from_secs(5)) + .is_ok()); + // "latte" should now be in cache + assert!(cache.get(b"latte").is_some()); + + assert_eq!(cache.items(), 1); + assert_eq!(cache.segments.free(), SEGMENTS - 1); + + // Get a copy of the cache to be compared later + let old_cache = cache.clone(); + + // gracefully shutdown cache + assert!(demolish_cache( + cache, + segments_fields_path, + ttl_buckets_path, + hashtable_path + )); + + // Create same tempfiles (they have been moved since first created) for `datapool`, `segments_fields`, `ttl_buckets` + let datapool_path: Option = Some(dir.path().join("datapool")); + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + + // Create different tempfile for `hashtable` + let hashtable_path: Option = Some(dir.path().join("hashtable_diff")); + + // Restore cache + restore = true; + let mut new_cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + // Cache is restored as all paths exist + assert!(new_cache._restored); + // `Segments` data should be the same as old cache since `segments_fields_path` is the same + assert_eq!(new_cache.items(), 1); + assert_eq!(new_cache.segments.free(), SEGMENTS - 1); + + // "latte" should not be in new cache as `HashTable` restored from + // incorrect path does not have this information + assert!(new_cache.get(b"latte").is_none()); + + // the restored cache should not be equivalent to the old cache + assert!(!new_cache.equivalent_seg(old_cache)); +} + +// Create a new cache, fill it with items. +// Gracefully shutdown this cache. +// Restore cache and check that every key from the original cache +// exists in the restored cache +// Check caches are equivalent +#[test] +fn full_cache_recovery_long() { + // Create a temporary directory + let dir = tmp_dir(); + // Create tempfile for datapool + let datapool_path: Option = Some(dir.path().join("datapool")); + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + let ttl = Duration::ZERO; + let value_size = 512; + let key_size = 1; + let iters = 1_000_000; + + // create new, file backed cache + let mut restore = false; + let mut cache = make_cache(restore, datapool_path, None, None, None); + + assert!(!cache._restored); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), SEGMENTS); + + let mut rng = rand::rng(); + + let mut key = vec![0; key_size]; + let mut value = vec![0; value_size]; + + // record all of the unique keys + let mut unique_keys = HashSet::new(); + + // fill cache + for _ in 0..iters { + rng.fill_bytes(&mut key); + rng.fill_bytes(&mut value); + + let save_key = key.clone(); + unique_keys.insert(save_key); + + assert!(cache.insert(&key, &value, None, ttl).is_ok()); + } + + // record all active keys in cache + // (this could be less than # unique keys if eviction has occurred) + let mut unique_active_keys = Vec::new(); + for key in &unique_keys { + // if this key exists, save it! + if cache.get(&key).is_some() { + unique_active_keys.push(key); + } + } + + // check that the number of active items in the cache equals the number + // of active keys + assert_eq!(cache.items(), unique_active_keys.len()); + + // Get a copy of the cache to be compared later + let old_cache = cache.clone(); + + // gracefully shutdown cache + assert!(demolish_cache( + cache, + segments_fields_path, + ttl_buckets_path, + hashtable_path + )); + + // Create same tempfiles (they have been moved since first created) + let datapool_path: Option = Some(dir.path().join("datapool")); + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + let hashtable_path: Option = Some(dir.path().join("hashtable")); + + // restore cache + // This new cache is file backed by same file as the above cache + // saved `Segments.data` to and the `Seg` is restored + restore = true; + let mut new_cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + assert!(new_cache._restored); + + // the restored cache should be equivalent to the old cache + assert!(new_cache.equivalent_seg(old_cache)); + + // check that the number of active items in the restored cache + // equals the number of active keys in the original cache + assert_eq!(new_cache.items(), unique_active_keys.len()); + + // check that every active key from the original cache is in + // the restored cache + while let Some(key) = unique_active_keys.pop() { + assert!(new_cache.get(&key).is_some()); + } +} diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs index aae90e230..7a906884a 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs @@ -34,6 +34,8 @@ use core::num::NonZeroU32; /// in an ordered fashion. The first segment to expire will be the head of the /// segment chain. This allows us to efficiently scan across the [`TtlBuckets`] /// and expire segments in an eager fashion. +#[derive(Debug, Copy, Clone, PartialEq)] +#[repr(C)] pub struct TtlBucket { head: Option, tail: Option, diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 590b63326..0f11ff153 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -21,7 +21,9 @@ //! more detail. use super::{CLEAR_TIME, EXPIRE_TIME}; +use crate::datapool::*; use crate::*; +use std::path::PathBuf; const N_BUCKET_PER_STEP_N_BIT: usize = 8; const N_BUCKET_PER_STEP: usize = 1 << N_BUCKET_PER_STEP_N_BIT; @@ -42,10 +44,12 @@ const TTL_BOUNDARY_3: i32 = 1 << (TTL_BUCKET_INTERVAL_N_BIT_3 + N_BUCKET_PER_STE const MAX_N_TTL_BUCKET: usize = N_BUCKET_PER_STEP * 4; const MAX_TTL_BUCKET_IDX: usize = MAX_N_TTL_BUCKET - 1; - +#[derive(Clone)] // for testing pub struct TtlBuckets { pub(crate) buckets: Box<[TtlBucket]>, pub(crate) last_expired: Instant, + /// Are `TtlBuckets` copied back from a file? + pub(crate) buckets_copied_back: bool, } impl TtlBuckets { @@ -76,10 +80,125 @@ impl TtlBuckets { Self { buckets, last_expired, + buckets_copied_back: false, + } + } + + // Returns a restored `TtlBuckets` if file path + // to restore from is valid. Otherwise return a new `TtlBuckets` + pub fn restore(ttl_buckets_path: Option) -> Self { + // if there is a path to restore from, restore the `TtlBuckets` + if let Some(file) = ttl_buckets_path { + let bucket_size = ::std::mem::size_of::(); + // size from all `TtlBucket`s in `TtlBuckets` + let buckets_size = MAX_N_TTL_BUCKET * bucket_size; + let last_expired_size = ::std::mem::size_of::(); + let ttl_buckets_struct_size = buckets_size + last_expired_size; + + // Mmap file + let pool = File::create(file, ttl_buckets_struct_size, true) + .expect("failed to allocate file backed storage"); + let data = Box::new(pool.as_slice()); + + // create blank bytes to copy data into + let mut bytes = vec![0; ttl_buckets_struct_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice(&data[0..ttl_buckets_struct_size]); + + // ----- Retrieve `last_expired` ----- + let mut offset = 0; + let last_expired = + unsafe { *(bytes[offset..last_expired_size].as_mut_ptr() as *mut Instant) }; + + // ----- Retrieve `buckets` ----- + offset += last_expired_size; + + let mut buckets = Vec::with_capacity(0); + buckets.reserve_exact(MAX_N_TTL_BUCKET); + + // Get each `TtlBucket` from the raw bytes + for id in 0..MAX_N_TTL_BUCKET { + let begin = offset + (bucket_size as usize * id); + let finish = begin + bucket_size as usize; + + // cast bytes to `TtlBucket` + let bucket = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut TtlBucket) }; + buckets.push(bucket); + } + + let buckets = buckets.into_boxed_slice(); + + Self { + buckets, + last_expired, + buckets_copied_back: true, + } + } + // otherwise, create a new `TtlBuckets` + else { + TtlBuckets::new() } } - /// Get the index of the `TtlBucket` for the given TTL. + /// Demolishes the `TtlBuckets` by storing them to + /// PMEM (if a path is specified) + pub fn demolish(&self, ttl_buckets_path: Option) -> bool { + let mut gracefully_shutdown = false; + + // if a path is specified, copy all the `TtlBucket`s + // to the file specified by `ttl_buckets_path` + if let Some(file) = ttl_buckets_path { + let bucket_size = ::std::mem::size_of::(); + // size of all `TtlBucket`s in `TtlBuckets` + let buckets_size = MAX_N_TTL_BUCKET * bucket_size; + let last_expired_size = ::std::mem::size_of::(); + let ttl_buckets_struct_size = buckets_size + last_expired_size; + + // Mmap file + let mut pool = File::create(file, ttl_buckets_struct_size, true) + .expect("failed to allocate file backed storage"); + let data = Box::new(pool.as_mut_slice()); + + // --------------------- Store `last_expired` ----------------- + let mut offset = 0; + + // cast `last_expired` to byte pointer + let byte_ptr = (&self.last_expired as *const Instant) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, last_expired_size) }; + + // store `started` back to mmapped file + data[offset..last_expired_size].copy_from_slice(bytes); + + // --------------------- Store `buckets` ----------------- + offset += last_expired_size; + + // for every `TtlBucket` + for id in 0..MAX_N_TTL_BUCKET { + let begin = offset + (bucket_size as usize * id); + let finish = begin + bucket_size as usize; + + // cast `TtlBucket` to byte pointer + let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; + + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, bucket_size) }; + + // store `TtlBucket` back to mmapped file + data[begin..finish].copy_from_slice(bytes); + } + + gracefully_shutdown = true; + + // TODO: check if this flushes the CPU caches + pool.flush() + .expect("failed to flush `TtlBuckets` to storage"); + } + + gracefully_shutdown + } + pub(crate) fn get_bucket_index(&self, ttl: Duration) -> usize { let ttl = ttl.as_secs() as i32; if ttl <= 0 { @@ -142,6 +261,28 @@ impl TtlBuckets { CLEAR_TIME.add(duration.as_nanos() as _); cleared } + + #[cfg(test)] + // Checks if `TtlBuckets.buckets` are equivalent + pub(crate) fn equivalent_buckets(&self, buckets: Box<[TtlBucket]>) -> bool { + let total_buckets = self.buckets.len(); + + // ensure number of `TtlBucket`s is the same + let mut equivalent = total_buckets == buckets.len(); + + // Compare each `TtlBucket` + for id in 0..total_buckets { + equivalent = equivalent && self.buckets[id] == buckets[id]; + } + + equivalent + } + + #[cfg(test)] + // Checks if `TtlBuckets.buckets` are equivalent + pub(crate) fn equivalent_ttlbuckets(&self, t: TtlBuckets) -> bool { + self.equivalent_buckets(t.buckets.clone()) && self.last_expired == t.last_expired + } } impl Default for TtlBuckets { From 7e32771542ce2acf26031d70eed9c7797917138c Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Sat, 12 Feb 2022 14:14:11 +1100 Subject: [PATCH 02/74] removed workflows --- .github/workflows/cargo.yml | 139 ------------------------------------ .github/workflows/cmake.yml | 42 ----------- .github/workflows/fuzz.yml | 40 ----------- 3 files changed, 221 deletions(-) delete mode 100644 .github/workflows/cargo.yml delete mode 100644 .github/workflows/cmake.yml delete mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/cargo.yml b/.github/workflows/cargo.yml deleted file mode 100644 index 68d553f50..000000000 --- a/.github/workflows/cargo.yml +++ /dev/null @@ -1,139 +0,0 @@ -name: cargo-build - -on: - push: - pull_request: - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: full - -jobs: - build: - strategy: - matrix: - os: [ ubuntu-18.04, macos-10.15 ] - profile: [ release, debug ] - name: build-${{ matrix.os }}-${{ matrix.profile }} - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Update cargo flags - if: ${{ matrix.profile == 'release' }} - run: echo 'CARGO_FLAGS=--release' >> $GITHUB_ENV - shell: bash - - name: Update cargo flags - if: ${{ matrix.profile == 'debug' }} - run: echo 'CARGO_FLAGS=' >> $GITHUB_ENV - shell: bash - - - uses: Swatinem/rust-cache@v1 - with: - key: ${{ matrix.profile }} - - uses: actions-rs/cargo@v1 - name: build - with: - command: test - args: ${{ env.CARGO_FLAGS }} --workspace --all-features --tests --lib --bins --examples --no-run - - uses: actions-rs/cargo@v1 - name: test - with: - command: test - args: ${{ env.CARGO_FLAGS }} --workspace --all-features --tests --lib --bins --examples - - uses: actions-rs/cargo@v1 - if: ${{ matrix.profile == 'debug' }} - name: doctests - with: - command: test - args: ${{ env.CARGO_FLAGS }} --workspace --all-features --doc -- --test-threads 16 - - bench-check: - strategy: - matrix: - os: [ ubuntu-18.04, macos-10.15 ] - name: build-${{ matrix.os }}-bench - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Update cargo flags - if: ${{ matrix.profile == 'release' }} - run: echo 'CARGO_FLAGS=--release' >> $GITHUB_ENV - shell: bash - - name: Update cargo flags - if: ${{ matrix.profile == 'debug' }} - run: echo 'CARGO_FLAGS=' >> $GITHUB_ENV - shell: bash - - - uses: Swatinem/rust-cache@v1 - with: - key: bench - - uses: actions-rs/cargo@v1 - name: build - with: - command: bench - args: --workspace --no-run - - # Fast cargo check to ensure things compile - check: - strategy: - matrix: - os: [ ubuntu-20.04, macos-10.15 ] - name: check-${{ matrix.os }} - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - uses: Swatinem/rust-cache@v1 - - uses: actions-rs/cargo@v1 - with: - command: check - args: --release - - rustfmt: - name: rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --all -- --check - - # Note: We could run these using the pull_request_target trigger. I haven't - # done this since I'm not sure if it would be secure. - # - # See this link for more details on this - # https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ - clippy: - name: clippy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: Swatinem/rust-cache@v1 - with: - key: clippy - - uses: actions-rs/clippy-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - - audit: - name: audit - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/audit-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml deleted file mode 100644 index e39f58984..000000000 --- a/.github/workflows/cmake.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: cmake-build - -on: - push: - pull_request: - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: full - -jobs: - build: - strategy: - matrix: - os: [ ubuntu-18.04, macos-10.15 ] - profile: [ Release ] - name: build-${{ matrix.os }}-${{ matrix.profile }} - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - - name: Configure - run: | - mkdir -p _build - cmake -B _build -S . \ - -DCMAKE_BUILD_TYPE=${{ matrix.profile }} \ - -DBUILD_AND_INSTALL_CHECK=yes - - - name: Build - run: | - cmake --build _build - - - name: Test - run: | - cmake --build _build --target test - env: - CTEST_OUTPUT_ON_FAILURE: 1 - - - name: Integration tests - run: | - cd test/integration - python test_twemcache.py \ No newline at end of file diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml deleted file mode 100644 index b58f8fb8d..000000000 --- a/.github/workflows/fuzz.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: cargo-fuzz - -on: - push: - pull_request: - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: full - FUZZ_TIME: 300 - FUZZ_JOBS: 2 - -jobs: - build: - strategy: - matrix: - os: [ ubuntu-18.04 ] - target: [ admin, memcache ] - name: fuzz-${{ matrix.os }}-${{ matrix.target }} - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - - uses: Swatinem/rust-cache@v1 - with: - key: ${{ matrix.os }}-${{ matrix.target }} - - - uses: actions-rs/cargo@v1 - name: Install cargo-fuzz - with: - command: install - args: cargo-fuzz - - - name: fuzz ${{ matrix.target }} - run: | - cd src/rust/protocol - cargo +nightly fuzz run ${{ matrix.target }} --jobs ${{ env.FUZZ_JOBS }} -- \ - -max_total_time=${{ env.FUZZ_TIME }} From 26b55b50c6fbddb0d94adf2d8ad7d006bdd819b0 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Sat, 12 Feb 2022 14:15:59 +1100 Subject: [PATCH 03/74] added back workflows --- .github/workflows/cargo.yml | 139 ++++++++++++++++++++++++++++++++++++ .github/workflows/cmake.yml | 40 +++++++++++ .github/workflows/fuzz.yml | 40 +++++++++++ 3 files changed, 219 insertions(+) create mode 100644 .github/workflows/cargo.yml create mode 100644 .github/workflows/cmake.yml create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/cargo.yml b/.github/workflows/cargo.yml new file mode 100644 index 000000000..68d553f50 --- /dev/null +++ b/.github/workflows/cargo.yml @@ -0,0 +1,139 @@ +name: cargo-build + +on: + push: + pull_request: + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + +jobs: + build: + strategy: + matrix: + os: [ ubuntu-18.04, macos-10.15 ] + profile: [ release, debug ] + name: build-${{ matrix.os }}-${{ matrix.profile }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Update cargo flags + if: ${{ matrix.profile == 'release' }} + run: echo 'CARGO_FLAGS=--release' >> $GITHUB_ENV + shell: bash + - name: Update cargo flags + if: ${{ matrix.profile == 'debug' }} + run: echo 'CARGO_FLAGS=' >> $GITHUB_ENV + shell: bash + + - uses: Swatinem/rust-cache@v1 + with: + key: ${{ matrix.profile }} + - uses: actions-rs/cargo@v1 + name: build + with: + command: test + args: ${{ env.CARGO_FLAGS }} --workspace --all-features --tests --lib --bins --examples --no-run + - uses: actions-rs/cargo@v1 + name: test + with: + command: test + args: ${{ env.CARGO_FLAGS }} --workspace --all-features --tests --lib --bins --examples + - uses: actions-rs/cargo@v1 + if: ${{ matrix.profile == 'debug' }} + name: doctests + with: + command: test + args: ${{ env.CARGO_FLAGS }} --workspace --all-features --doc -- --test-threads 16 + + bench-check: + strategy: + matrix: + os: [ ubuntu-18.04, macos-10.15 ] + name: build-${{ matrix.os }}-bench + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Update cargo flags + if: ${{ matrix.profile == 'release' }} + run: echo 'CARGO_FLAGS=--release' >> $GITHUB_ENV + shell: bash + - name: Update cargo flags + if: ${{ matrix.profile == 'debug' }} + run: echo 'CARGO_FLAGS=' >> $GITHUB_ENV + shell: bash + + - uses: Swatinem/rust-cache@v1 + with: + key: bench + - uses: actions-rs/cargo@v1 + name: build + with: + command: bench + args: --workspace --no-run + + # Fast cargo check to ensure things compile + check: + strategy: + matrix: + os: [ ubuntu-20.04, macos-10.15 ] + name: check-${{ matrix.os }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - uses: Swatinem/rust-cache@v1 + - uses: actions-rs/cargo@v1 + with: + command: check + args: --release + + rustfmt: + name: rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + # Note: We could run these using the pull_request_target trigger. I haven't + # done this since I'm not sure if it would be secure. + # + # See this link for more details on this + # https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + clippy: + name: clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: Swatinem/rust-cache@v1 + with: + key: clippy + - uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + audit: + name: audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml new file mode 100644 index 000000000..71d2d857b --- /dev/null +++ b/.github/workflows/cmake.yml @@ -0,0 +1,40 @@ +name: cmake-build + +on: + push: + pull_request: + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + +jobs: + build: + strategy: + matrix: + os: [ ubuntu-18.04, macos-10.15 ] + profile: [ Release ] + name: build-${{ matrix.os }}-${{ matrix.profile }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + + - name: Configure + run: | + mkdir -p _build + cmake -B _build -S . \ + -DCMAKE_BUILD_TYPE=${{ matrix.profile }} \ + -DBUILD_AND_INSTALL_CHECK=yes + - name: Build + run: | + cmake --build _build + - name: Test + run: | + cmake --build _build --target test + env: + CTEST_OUTPUT_ON_FAILURE: 1 + + - name: Integration tests + run: | + cd test/integration + python test_twemcache.py diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 000000000..b58f8fb8d --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,40 @@ +name: cargo-fuzz + +on: + push: + pull_request: + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + FUZZ_TIME: 300 + FUZZ_JOBS: 2 + +jobs: + build: + strategy: + matrix: + os: [ ubuntu-18.04 ] + target: [ admin, memcache ] + name: fuzz-${{ matrix.os }}-${{ matrix.target }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + - uses: Swatinem/rust-cache@v1 + with: + key: ${{ matrix.os }}-${{ matrix.target }} + + - uses: actions-rs/cargo@v1 + name: Install cargo-fuzz + with: + command: install + args: cargo-fuzz + + - name: fuzz ${{ matrix.target }} + run: | + cd src/rust/protocol + cargo +nightly fuzz run ${{ matrix.target }} --jobs ${{ env.FUZZ_JOBS }} -- \ + -max_total_time=${{ env.FUZZ_TIME }} From 356e4fdabb73c1e40f5435cdc510bef79ccdd1d0 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 10:32:12 +1100 Subject: [PATCH 04/74] removed newline from .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 84cd3ff7d..addf70a9e 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,4 @@ CMAKE_BINARY_DIR .cargo # Cargo build directory -/target +/target \ No newline at end of file From 0503216a924bf2b7fac1c12d3906515b35d939ce Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 10:55:46 +1100 Subject: [PATCH 05/74] added descriptions for restore() and graceful_shutdown() --- src/rust/config/src/seg.rs | 10 ++++++++++ src/rust/storage/seg/src/builder.rs | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/rust/config/src/seg.rs b/src/rust/config/src/seg.rs index 012f40a12..7dc896a2a 100644 --- a/src/rust/config/src/seg.rs +++ b/src/rust/config/src/seg.rs @@ -164,9 +164,19 @@ impl Default for Seg { // implementation impl Seg { + // Determines if the `Seg` will be restored. + // The restoration will be successful if `datapool_path`, `segments_fields_path` + // `ttl_buckets_path` and `hashtable_path` are valid paths. + // Otherwise, the `Seg` will be created as new. pub fn restore(&self) -> bool { self.restore } + + // Determines if the `Seg` will be gracefully shutdown. + // The graceful shutdown will be successful if the cache is file backed + // and `segments_fields_path`, `ttl_buckets_path` and `hashtable_path` are + // valid paths to save the relevant `Seg` fields to. + // Otherwise, the relevant `Seg` fields will not be saved. pub fn graceful_shutdown(&self) -> bool { self.graceful_shutdown } diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 2066cccac..46af8f750 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -191,7 +191,7 @@ impl Builder { /// ``` pub fn build(self) -> Seg { // Build `Segments`. - // If `restore` and a valid path is given, + // If `restore` and valid paths are given, // it will be copied back let segments = self.segments_builder.build(); if segments.fields_copied_back && self.restore { From 5ea566794a41ceb69dfdbdb7080ce1c37c28ffd2 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:01:23 +1100 Subject: [PATCH 06/74] added full tempfile version --- src/rust/storage/seg/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/storage/seg/Cargo.toml b/src/rust/storage/seg/Cargo.toml index ca41db941..632baf8e9 100644 --- a/src/rust/storage/seg/Cargo.toml +++ b/src/rust/storage/seg/Cargo.toml @@ -35,7 +35,7 @@ rand_chacha = { version = "0.3.0" } rand_xoshiro = { version = "0.6.0" } storage-types = { path = "../types" } thiserror = "1.0.24" -tempfile = "3" +tempfile = "3.3.0" [dev-dependencies] criterion = "0.3.4" From 4ccf01f667e7ad620283cda62c61928d4fd1cf56 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:05:04 +1100 Subject: [PATCH 07/74] changed derive traits to be in order --- src/rust/storage/seg/src/hashtable/hash_bucket.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/storage/seg/src/hashtable/hash_bucket.rs b/src/rust/storage/seg/src/hashtable/hash_bucket.rs index 3829951bb..e9c3bc194 100644 --- a/src/rust/storage/seg/src/hashtable/hash_bucket.rs +++ b/src/rust/storage/seg/src/hashtable/hash_bucket.rs @@ -72,7 +72,7 @@ pub(crate) const CLEAR_FREQ_SMOOTH_MASK: u64 = 0xFFF7_FFFF_FFFF_FFFF; /// Mask to get the lower 16 bits from a timestamp pub(crate) const PROC_TS_MASK: u64 = 0x0000_0000_0000_FFFF; -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub(crate) struct HashBucket { pub(super) data: [u64; N_BUCKET_SLOT], } From 592a8234ea56084d098db38445c617a55ad61666 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:11:59 +1100 Subject: [PATCH 08/74] new function that generates hash_builder, to remove repeated code --- src/rust/storage/seg/src/hashtable/mod.rs | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index b5a4a85a4..8d22ca6d8 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -144,12 +144,7 @@ impl HashTable { slots, buckets, total_buckets, ); - let hash_builder = RandomState::with_seeds( - 0xbb8c484891ec6c86, - 0x0522a25ae9c769f9, - 0xeed2797b9571bc75, - 0x4feb29c1fbbd59d0, - ); + let hash_builder = hash_builder(); Self { hash_builder: Box::new(hash_builder), @@ -195,12 +190,7 @@ impl HashTable { // ----- Re-initialise `hash_builder` ----- - let hash_builder = RandomState::with_seeds( - 0xbb8c484891ec6c86, - 0x0522a25ae9c769f9, - 0xeed2797b9571bc75, - 0x4feb29c1fbbd59d0, - ); + let hash_builder = hash_builder(); // ----- Retrieve `power` --------- @@ -935,6 +925,17 @@ impl HashTable { evicted } + + /// Internal function used to generate a new `hash_builder` + fn hash_builder() -> RandomState { + RandomState::with_seeds( + 0xbb8c484891ec6c86, + 0x0522a25ae9c769f9, + 0xeed2797b9571bc75, + 0x4feb29c1fbbd59d0, + ) + } + /// Internal function used to calculate a hash value for a key fn hash(&self, key: &[u8]) -> u64 { HASH_LOOKUP.increment(); From 67d5aa541bc41e2414d13089d3971f55f2e931e3 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:26:09 +1100 Subject: [PATCH 09/74] removed ccc comments --- src/rust/storage/seg/src/hashtable/mod.rs | 10 ++-------- src/rust/storage/seg/src/seg.rs | 2 -- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 8d22ca6d8..a34f7c01f 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -366,7 +366,6 @@ impl HashTable { let tag = tag_from_hash(hash); let bucket_id = hash & self.mask; - // ccc: get bucket corresponding to the key let mut bucket = &mut self.data[bucket_id as usize]; let chain_len = chain_len(bucket.data[0]); let mut chain_idx = 0; @@ -377,7 +376,6 @@ impl HashTable { if curr_ts != get_ts(bucket.data[0]) { bucket.data[0] = (bucket.data[0] & !TS_MASK) | (curr_ts << TS_BIT_SHIFT); - // ccc: Mask every "item info" in this bucket to remove the freq smoothing loop { let n_item_slot = if chain_idx == chain_len { N_BUCKET_SLOT @@ -404,24 +402,20 @@ impl HashTable { bucket = &mut self.data[bucket_id as usize]; } - // ccc: look at every HashBucket in this chain loop { let n_item_slot = if chain_idx == chain_len { - N_BUCKET_SLOT // ccc: the last HashBucket in this chain has 8 items + N_BUCKET_SLOT } else { - N_BUCKET_SLOT - 1 // ccc: every other has 7 items (or 6 in the case of HashBucket 0) + N_BUCKET_SLOT - 1 }; - // ccc: for every slot of "item info" in this HashBucket for i in 0..n_item_slot { - // ccc: ignore the "bucket info" slot (in HashBucket 0) if chain_idx == 0 && i == 0 { continue; } let current_info = bucket.data[i]; - // ccc: check if the tags match if get_tag(current_info) == tag { let current_item = segments.get_item(current_info).unwrap(); if current_item.key() != key { diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 00541ed66..d2746e829 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -160,8 +160,6 @@ impl Seg { let mut retries = RESERVE_RETRIES; let reserved; loop { - // ccc: check tail segment of TTL bucket for free space. - // ccc: If full, try to get a new segment from free q and make this the tail match self .ttl_buckets .get_mut_bucket(ttl) From 9caeec5f69cb195dcc08bcecfbb172576ae93e15 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:34:54 +1100 Subject: [PATCH 10/74] changed will_resore to restore. Moved hash_builder() so it is in scope --- src/rust/storage/seg/src/hashtable/mod.rs | 21 ++++++++++---------- src/rust/storage/seg/src/segments/builder.rs | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index a34f7c01f..34177f267 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -919,17 +919,6 @@ impl HashTable { evicted } - - /// Internal function used to generate a new `hash_builder` - fn hash_builder() -> RandomState { - RandomState::with_seeds( - 0xbb8c484891ec6c86, - 0x0522a25ae9c769f9, - 0xeed2797b9571bc75, - 0x4feb29c1fbbd59d0, - ) - } - /// Internal function used to calculate a hash value for a key fn hash(&self, key: &[u8]) -> u64 { HASH_LOOKUP.increment(); @@ -964,3 +953,13 @@ impl HashTable { && self.next_to_chain == h.next_to_chain } } + +/// Internal function used to generate a new `hash_builder` +fn hash_builder() -> RandomState { + RandomState::with_seeds( + 0xbb8c484891ec6c86, + 0x0522a25ae9c769f9, + 0xeed2797b9571bc75, + 0x4feb29c1fbbd59d0, + ) +} diff --git a/src/rust/storage/seg/src/segments/builder.rs b/src/rust/storage/seg/src/segments/builder.rs index 1f574087c..32214b61b 100644 --- a/src/rust/storage/seg/src/segments/builder.rs +++ b/src/rust/storage/seg/src/segments/builder.rs @@ -37,8 +37,8 @@ impl<'a> SegmentsBuilder { /// Specify whether the `Segments` fields' will be restored /// from the segments_fields_path. /// Otherwise, the cache will be created and treated as new. - pub fn restore(mut self, will_restore: bool) -> Self { - self.restore = will_restore; + pub fn restore(mut self, restore: bool) -> Self { + self.restore = restore; self } From b4bed3a3f6675e3602a416ff8b21b9795ed911f6 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 11:38:19 +1100 Subject: [PATCH 11/74] changed will_resore to restore --- src/rust/storage/seg/src/builder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 46af8f750..ddcd1f334 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -35,9 +35,9 @@ impl Default for Builder { impl Builder { /// Specify to `Builder` and `SegmentsBuilder` whether the cache will be restored. /// Otherwise, the cache will be created and treated as new. - pub fn restore(mut self, will_restore: bool) -> Self { - self.restore = will_restore; - self.segments_builder = self.segments_builder.restore(will_restore); + pub fn restore(mut self, restore: bool) -> Self { + self.restore = restore; + self.segments_builder = self.segments_builder.restore(restore); self } From d2966d21d166fb04ff5b074dbabbdaf5c6e64d2e Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 12:15:53 +1100 Subject: [PATCH 12/74] took out of a Boxed struct so I can use helper function store() to copy bytes to it --- src/rust/storage/seg/src/hashtable/mod.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 34177f267..45bbb0446 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -271,6 +271,7 @@ impl HashTable { let buckets_size = total_buckets * bucket_size; let u64_size = ::std::mem::size_of::(); let started_size = ::std::mem::size_of::(); + // needed let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + buckets_size // `data` + started_size; @@ -278,7 +279,7 @@ impl HashTable { // Mmap file let mut pool = File::create(file, hashtable_size, true) .expect("failed to allocate file backed storage"); - let file_data = Box::new(pool.as_mut_slice()); + let file_data = pool.as_mut_slice(); // --------------------- Store `power` ----------------- let mut offset = 0; @@ -314,7 +315,7 @@ impl HashTable { for id in 0..total_buckets { let begin = offset + (bucket_size as usize * id); let finish = begin + bucket_size as usize; - + // cast `HashBucket` to byte pointer let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; @@ -336,7 +337,8 @@ impl HashTable { let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, started_size) }; // store `started` back to mmapped file - file_data[offset..end].copy_from_slice(bytes); + //file_data[offset..end].copy_from_slice(bytes); + store(bytes, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- offset += started_size; @@ -963,3 +965,9 @@ fn hash_builder() -> RandomState { 0x4feb29c1fbbd59d0, ) } + +/// Copies `bytes` to the `offset` of `data` +fn store(bytes: &[u8], offset: usize, size: usize, data: &mut [u8]) { + let end = offset + size; + data[offset..end].copy_from_slice(bytes); +} \ No newline at end of file From deaaa916b4375b49e5faaf674b583d216868934d Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 12:21:46 +1100 Subject: [PATCH 13/74] created store() and now will integreate into all demolish() functions --- src/rust/storage/seg/src/hashtable/mod.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 45bbb0446..e5ffd14f2 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -334,11 +334,11 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, started_size) }; + // let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, started_size) }; // store `started` back to mmapped file //file_data[offset..end].copy_from_slice(bytes); - store(bytes, offset, started_size, file_data); + store(byte_ptr, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- offset += started_size; @@ -966,8 +966,19 @@ fn hash_builder() -> RandomState { ) } -/// Copies `bytes` to the `offset` of `data` -fn store(bytes: &[u8], offset: usize, size: usize, data: &mut [u8]) { +// /// Copies `bytes` to the `offset` of `data` +// fn store(bytes: &[u8], offset: usize, size: usize, data: &mut [u8]) { +// let end = offset + size; +// data[offset..end].copy_from_slice(bytes); +// } + +/// Copies bytes at `byte_ptr` to the `offset` of `data` +fn store(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; + let end = offset + size; + + // store `bytes` to `data` data[offset..end].copy_from_slice(bytes); } \ No newline at end of file From 677200be97b5d5b9e39e9a87417625d05a4055b1 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 12:29:32 +1100 Subject: [PATCH 14/74] all of HashTable::demolish() uses store(). Will make similar changes for TtlBuckets and Segments --- src/rust/storage/seg/src/hashtable/mod.rs | 50 ++++++----------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index e5ffd14f2..6122b89df 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -281,77 +281,57 @@ impl HashTable { .expect("failed to allocate file backed storage"); let file_data = pool.as_mut_slice(); - // --------------------- Store `power` ----------------- let mut offset = 0; - let mut end = u64_size; + // --------------------- Store `power` ----------------- // cast `power` to byte pointer let byte_ptr = (&self.power as *const u64) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; - // store `power` back to mmapped file - file_data[offset..end].copy_from_slice(bytes); + store(byte_ptr, offset, u64_size, file_data); - // --------------------- Store `mask` ----------------- offset += u64_size; - end += u64_size; + // --------------------- Store `mask` ----------------- // cast `mask` to byte pointer let byte_ptr = (&self.mask as *const u64) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; - // store `mask` back to mmapped file - file_data[offset..end].copy_from_slice(bytes); + store(byte_ptr, offset, u64_size, file_data); - // --------------------- Store `data` ----------------- offset += u64_size; - end += buckets_size; + // --------------------- Store `data` ----------------- // for every `HashBucket` for id in 0..total_buckets { - let begin = offset + (bucket_size as usize * id); - let finish = begin + bucket_size as usize; // cast `HashBucket` to byte pointer let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, bucket_size) }; - // store `HashBucket` back to mmapped file - file_data[begin..finish].copy_from_slice(bytes); + store(byte_ptr, offset, bucket_size, file_data); + + offset = offset + bucket_size; } // --------------------- Store `started` ----------------- - offset += buckets_size; - end += started_size; // cast `started` to byte pointer let byte_ptr = (&self.started as *const Instant) as *const u8; - // get corresponding bytes from byte pointer - // let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, started_size) }; - // store `started` back to mmapped file - //file_data[offset..end].copy_from_slice(bytes); store(byte_ptr, offset, started_size, file_data); - // --------------------- Store `next_to_chain` ----------------- offset += started_size; - end += u64_size; + // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u64_size) }; - // store `next_to_chain` back to mmapped file - file_data[offset..end].copy_from_slice(bytes); + store(byte_ptr, offset, u64_size, file_data); + + // ------------------------------------------------------------- gracefully_shutdown = true; @@ -966,12 +946,6 @@ fn hash_builder() -> RandomState { ) } -// /// Copies `bytes` to the `offset` of `data` -// fn store(bytes: &[u8], offset: usize, size: usize, data: &mut [u8]) { -// let end = offset + size; -// data[offset..end].copy_from_slice(bytes); -// } - /// Copies bytes at `byte_ptr` to the `offset` of `data` fn store(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { // get corresponding bytes from byte pointer From 3603c1bb7a66fa9299a794b8880c5354aee273e3 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 12:46:05 +1100 Subject: [PATCH 15/74] updated store() so it also returns new offset --- src/rust/storage/seg/src/hashtable/mod.rs | 26 ++++++++----------- src/rust/storage/seg/src/segments/segments.rs | 24 +++++++++++------ 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 6122b89df..12592260d 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -288,18 +288,15 @@ impl HashTable { let byte_ptr = (&self.power as *const u64) as *const u8; // store `power` back to mmapped file - store(byte_ptr, offset, u64_size, file_data); + offset = store_and_update_offset(byte_ptr, offset, u64_size, file_data); - offset += u64_size; // --------------------- Store `mask` ----------------- // cast `mask` to byte pointer let byte_ptr = (&self.mask as *const u64) as *const u8; // store `mask` back to mmapped file - store(byte_ptr, offset, u64_size, file_data); - - offset += u64_size; + offset = store_and_update_offset(byte_ptr, offset, u64_size, file_data); // --------------------- Store `data` ----------------- // for every `HashBucket` @@ -309,9 +306,7 @@ impl HashTable { let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; // store `HashBucket` back to mmapped file - store(byte_ptr, offset, bucket_size, file_data); - - offset = offset + bucket_size; + offset = store_and_update_offset(byte_ptr, offset, bucket_size, file_data); } // --------------------- Store `started` ----------------- @@ -320,17 +315,14 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // store `started` back to mmapped file - store(byte_ptr, offset, started_size, file_data); - - offset += started_size; + offset = store_and_update_offset(byte_ptr, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; // store `next_to_chain` back to mmapped file - store(byte_ptr, offset, u64_size, file_data); - + store_and_update_offset(byte_ptr, offset, u64_size, file_data); // ------------------------------------------------------------- gracefully_shutdown = true; @@ -946,8 +938,9 @@ fn hash_builder() -> RandomState { ) } -/// Copies bytes at `byte_ptr` to the `offset` of `data` -fn store(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { +/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` +/// Returns the next `offset`, that is, the next byte of `data` to be copied into +fn store_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { // get corresponding bytes from byte pointer let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; @@ -955,4 +948,7 @@ fn store(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { // store `bytes` to `data` data[offset..end].copy_from_slice(bytes); + + // next `offset` + end } \ No newline at end of file diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 1652584fc..49a5adea3 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -287,27 +287,24 @@ impl Segments { // mmap file let mut pool = File::create(file, fields_size, true) .expect("failed to allocate file backed storage"); - let fields_data = Box::new(pool.as_mut_slice()); + let fields_data = pool.as_mut_slice(); + let mut offset = 0; // ----- Store `headers` ----- // for every `SegmentHeader` for id in 0..segments { - let begin = header_size as usize * id; - let finish = begin + header_size as usize; // cast `SegmentHeader` to byte pointer let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, header_size) }; - // store `SegmentHeader` back to mmapped file - fields_data[begin..finish].copy_from_slice(bytes); + store_and_update_offset(byte_ptr, offset, header_size, fields_data); + + offset += header_size; } // ----- Store `segment_size` ----- - let mut offset = headers_size; let mut end = offset + i32_size; // cast `segment_size` to byte pointer @@ -1264,3 +1261,14 @@ impl Default for Segments { Self::from_builder_new(Default::default()) } } + +/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` +fn store_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; + + let end = offset + size; + + // store `bytes` to `data` + data[offset..end].copy_from_slice(bytes); +} \ No newline at end of file From 09a1f65bc677961d238f196be11f05e30bc8d413 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 12:47:23 +1100 Subject: [PATCH 16/74] updated store() so it also returns new offset --- src/rust/storage/seg/src/hashtable/mod.rs | 12 ++++++------ src/rust/storage/seg/src/segments/segments.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 12592260d..b761853b5 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -288,7 +288,7 @@ impl HashTable { let byte_ptr = (&self.power as *const u64) as *const u8; // store `power` back to mmapped file - offset = store_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // --------------------- Store `mask` ----------------- @@ -296,7 +296,7 @@ impl HashTable { let byte_ptr = (&self.mask as *const u64) as *const u8; // store `mask` back to mmapped file - offset = store_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // --------------------- Store `data` ----------------- // for every `HashBucket` @@ -306,7 +306,7 @@ impl HashTable { let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; // store `HashBucket` back to mmapped file - offset = store_and_update_offset(byte_ptr, offset, bucket_size, file_data); + offset = store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); } // --------------------- Store `started` ----------------- @@ -315,14 +315,14 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // store `started` back to mmapped file - offset = store_and_update_offset(byte_ptr, offset, started_size, file_data); + offset = store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; // store `next_to_chain` back to mmapped file - store_and_update_offset(byte_ptr, offset, u64_size, file_data); + store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // ------------------------------------------------------------- gracefully_shutdown = true; @@ -940,7 +940,7 @@ fn hash_builder() -> RandomState { /// Copies `size` bytes at `byte_ptr` to the `offset` of `data` /// Returns the next `offset`, that is, the next byte of `data` to be copied into -fn store_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { +fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { // get corresponding bytes from byte pointer let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 49a5adea3..b55c6e7bd 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -299,7 +299,7 @@ impl Segments { let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; // store `SegmentHeader` back to mmapped file - store_and_update_offset(byte_ptr, offset, header_size, fields_data); + store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); offset += header_size; } @@ -1263,7 +1263,7 @@ impl Default for Segments { } /// Copies `size` bytes at `byte_ptr` to the `offset` of `data` -fn store_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { +fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { // get corresponding bytes from byte pointer let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; From 21ba0453079ee5def7dc06725a7ba9ddaf08de01 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 14:58:07 +1100 Subject: [PATCH 17/74] added helper function total_buckets() to reduce repeated code --- src/rust/storage/seg/src/hashtable/mod.rs | 50 ++++++++----------- src/rust/storage/seg/src/segments/segments.rs | 10 ++-- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index b761853b5..a93994257 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -165,17 +165,13 @@ impl HashTable { // I.e. config specifies same `power` as `HashTable` we are // restoring from // TODO: Detect a change of `power` and adjust `HashTable` accordingly - - let slots = 1_u64 << cfg_power; - let buckets = slots / 8; - let total_buckets = (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize; + let total_buckets = total_buckets(cfg_power.into(), overflow_factor); let bucket_size = ::std::mem::size_of::(); - // size from all `HashBucket`s in `data` - let buckets_size = total_buckets * bucket_size; let u64_size = ::std::mem::size_of::(); let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being restored let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` - + buckets_size // `data` + + total_buckets * bucket_size // `data` + started_size; // Mmap file @@ -192,48 +188,43 @@ impl HashTable { let hash_builder = hash_builder(); - // ----- Retrieve `power` --------- - let mut offset = 0; + // ----- Retrieve `power` --------- let mut end = u64_size; let power = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; // TODO: compare `cfg_power` and `power` - // ----- Retrieve `mask` --------- - offset += u64_size; + // ----- Retrieve `mask` --------- end += u64_size; let mask = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; - // ----- Retrieve `data` --------- offset += u64_size; - end += buckets_size; + // ----- Retrieve `data` --------- let mut data = Vec::with_capacity(0); data.reserve_exact(total_buckets as usize); // Get each `HashBucket` from the raw bytes - for id in 0..total_buckets { - let begin = offset + (bucket_size as usize * id); - let finish = begin + bucket_size as usize; + for _ in 0..total_buckets { + end += bucket_size; // cast bytes to `HashBucket` - let bucket = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut HashBucket) }; + let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut HashBucket) }; data.push(bucket); + + offset += bucket_size; } // ----- Retrieve `started` --------- - - offset += buckets_size; end += started_size; let started = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; - // ----- Retrieve `next_to_chain` --------- - offset += started_size; + // ----- Retrieve `next_to_chain` --------- end += u64_size; let next_to_chain = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; @@ -263,17 +254,13 @@ impl HashTable { // if a path is specified, copy all the `HashBucket`s // to the file specified by `hashtable_path` if let Some(file) = hashtable_path { - let slots = 1_u64 << self.power; - let buckets = slots / 8; - let total_buckets = (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize; + let total_buckets = total_buckets(self.power, overflow_factor); let bucket_size = ::std::mem::size_of::(); - // size from all `HashBucket`s in `data` - let buckets_size = total_buckets * bucket_size; let u64_size = ::std::mem::size_of::(); let started_size = ::std::mem::size_of::(); - // needed + // Size of all components of `HashTable` that are being saved let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` - + buckets_size // `data` + + total_buckets * bucket_size // `data` + started_size; // Mmap file @@ -928,6 +915,13 @@ impl HashTable { } } +/// Internal function used to calculate the total number of buckets +fn total_buckets(power: u64, overflow_factor: f64) -> usize { + let slots = 1_u64 << power; + let buckets = slots / 8; + (buckets as f64 * (1.0 + overflow_factor)).ceil() as usize +} + /// Internal function used to generate a new `hash_builder` fn hash_builder() -> RandomState { RandomState::with_seeds( diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index b55c6e7bd..dbff352c1 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -299,9 +299,7 @@ impl Segments { let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; // store `SegmentHeader` back to mmapped file - store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); - - offset += header_size; + offset = store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); } // ----- Store `segment_size` ----- @@ -1263,7 +1261,8 @@ impl Default for Segments { } /// Copies `size` bytes at `byte_ptr` to the `offset` of `data` -fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) { +/// Returns the next `offset`, that is, the next byte of `data` to be copied into +fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { // get corresponding bytes from byte pointer let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; @@ -1271,4 +1270,7 @@ fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize // store `bytes` to `data` data[offset..end].copy_from_slice(bytes); + + // next `offset` + end } \ No newline at end of file From 7dc7d736764786330bca13139c990b1124091839 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 15:18:35 +1100 Subject: [PATCH 18/74] integrated store_bytes_and_update_offset() into Segments --- src/rust/storage/seg/src/segments/segments.rs | 36 ++++--------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index dbff352c1..7c030cc69 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -303,68 +303,46 @@ impl Segments { } // ----- Store `segment_size` ----- - let mut end = offset + i32_size; // cast `segment_size` to byte pointer let byte_ptr = (&self.segment_size as *const i32) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, i32_size) }; - // store `segment_size` back to mmapped file - fields_data[offset..end].copy_from_slice(bytes); + offset = store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); // ----- Store `free` ----- - offset += i32_size; - end += u32_size; // cast `free` to byte pointer let byte_ptr = (&self.free as *const u32) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u32_size) }; - // store `free` back to mmapped file - fields_data[offset..end].copy_from_slice(bytes); + offset = store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); // ----- Store `cap` ----- - offset += u32_size; - end += u32_size; // cast `cap` to byte pointer let byte_ptr = (&self.cap as *const u32) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, u32_size) }; - // store `cap` back to mmapped file - fields_data[offset..end].copy_from_slice(bytes); + offset = store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); // ----- Store `free_q` ----- - offset += u32_size; - end += free_q_size; // cast `free_q` to byte pointer let byte_ptr = (&self.free_q as *const Option) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, free_q_size) }; - // store `free_q` back to mmapped file - fields_data[offset..end].copy_from_slice(bytes); + offset = store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); // ----- Store `flush_at` ----- - offset += free_q_size; - end += flush_at_size; // cast `flush_at` to byte pointer let byte_ptr = (&self.flush_at as *const Instant) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, flush_at_size) }; - // store `flush_at` back to mmapped file - fields_data[offset..end].copy_from_slice(bytes); + store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); + + // ----------------------------- // TODO: check if this flushes fields_data from CPU caches pool.flush() From 45aa5b1384fd05395e20fe5ebe9f13c9d071ec39 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 15:30:25 +1100 Subject: [PATCH 19/74] reduced number of variables from Segments::restore() --- src/rust/storage/seg/src/segments/segments.rs | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 7c030cc69..2de058303 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -171,12 +171,12 @@ impl Segments { // ----- Recover other fields ------ let header_size: usize = ::std::mem::size_of::(); - let headers_size: usize = cfg_segments * header_size as usize; let i32_size = ::std::mem::size_of::(); let u32_size = ::std::mem::size_of::(); let free_q_size = ::std::mem::size_of::>(); let flush_at_size = ::std::mem::size_of::(); - let fields_size = headers_size + // Size of all components of `Segments` that are being restored + let fields_size = cfg_segments * header_size // `headers` + i32_size // `segment_size` + u32_size * 2 // `free` and `cap` + free_q_size @@ -192,47 +192,50 @@ impl Segments { // retrieve bytes from mmapped file bytes.copy_from_slice(&fields_data[0..fields_size]); + + let mut offset = 0; + let mut end = 0; // ----- Retrieve `headers` ----- let mut headers = Vec::with_capacity(0); headers.reserve_exact(cfg_segments); // retrieve each `SegmentHeader` from the raw bytes - for id in 0..cfg_segments { - let begin = header_size as usize * id; - let finish = begin + header_size as usize; + for _ in 0..cfg_segments { + end += header_size; // cast bytes to `SegmentHeader` - let header = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut SegmentHeader) }; + let header = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut SegmentHeader) }; headers.push(header); + + offset += header_size; } // ----- Retrieve `segment_size` ----- - let mut offset = headers_size; - let mut end = offset + i32_size; + end += i32_size; let segment_size = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut i32) }; // TODO: compare `cfg_segment_size` and `segment_size` - // ----- Retrieve `free` ----- offset += i32_size; + // ----- Retrieve `free` ----- end += u32_size; let free = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; - // ----- Retrieve `cap` ----- offset += u32_size; + // ----- Retrieve `cap` ----- end += u32_size; let cap = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; - // ----- Retrieve `free_q` ----- offset += u32_size; + // ----- Retrieve `free_q` ----- end += free_q_size; let free_q = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Option) }; - // ----- Retrieve `flush_at` ----- offset += free_q_size; + // ----- Retrieve `flush_at` ----- end += flush_at_size; let flush_at = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; @@ -273,12 +276,12 @@ impl Segments { if let Some(file) = segments_fields_path { let segments = heap_size / (self.segment_size as usize); let header_size: usize = ::std::mem::size_of::(); - let headers_size: usize = segments * header_size as usize; let i32_size = ::std::mem::size_of::(); let u32_size = ::std::mem::size_of::(); let free_q_size = ::std::mem::size_of::>(); let flush_at_size = ::std::mem::size_of::(); - let fields_size = headers_size + // Size of all components of `Segments` that are being restored + let fields_size = segments * header_size // `headers` + i32_size // `segment_size` + u32_size * 2 // `free` and `cap` + free_q_size From 7c15eb87f95c2295fd0860fc716f1a045a1dc136 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 15:46:21 +1100 Subject: [PATCH 20/74] added store_bytes_and_update_offset() usage to TtlBuckets::demolish() and neatened up TtlBuckets::restore() --- .../seg/src/ttl_buckets/ttl_buckets.rs | 61 +++++++++++-------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 0f11ff153..94a2e3160 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -90,10 +90,9 @@ impl TtlBuckets { // if there is a path to restore from, restore the `TtlBuckets` if let Some(file) = ttl_buckets_path { let bucket_size = ::std::mem::size_of::(); - // size from all `TtlBucket`s in `TtlBuckets` - let buckets_size = MAX_N_TTL_BUCKET * bucket_size; let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = buckets_size + last_expired_size; + let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + + last_expired_size; // Mmap file let pool = File::create(file, ttl_buckets_struct_size, true) @@ -105,25 +104,27 @@ impl TtlBuckets { // retrieve bytes from mmapped file bytes.copy_from_slice(&data[0..ttl_buckets_struct_size]); - // ----- Retrieve `last_expired` ----- let mut offset = 0; + // ----- Retrieve `last_expired` ----- + let mut end = last_expired_size; let last_expired = unsafe { *(bytes[offset..last_expired_size].as_mut_ptr() as *mut Instant) }; - // ----- Retrieve `buckets` ----- offset += last_expired_size; + // ----- Retrieve `buckets` ----- let mut buckets = Vec::with_capacity(0); buckets.reserve_exact(MAX_N_TTL_BUCKET); // Get each `TtlBucket` from the raw bytes - for id in 0..MAX_N_TTL_BUCKET { - let begin = offset + (bucket_size as usize * id); - let finish = begin + bucket_size as usize; + for _ in 0..MAX_N_TTL_BUCKET { + end += bucket_size; // cast bytes to `TtlBucket` - let bucket = unsafe { *(bytes[begin..finish].as_mut_ptr() as *mut TtlBucket) }; + let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut TtlBucket) }; buckets.push(bucket); + + offset += bucket_size; } let buckets = buckets.into_boxed_slice(); @@ -149,46 +150,38 @@ impl TtlBuckets { // to the file specified by `ttl_buckets_path` if let Some(file) = ttl_buckets_path { let bucket_size = ::std::mem::size_of::(); - // size of all `TtlBucket`s in `TtlBuckets` - let buckets_size = MAX_N_TTL_BUCKET * bucket_size; let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = buckets_size + last_expired_size; + let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + + last_expired_size; // Mmap file let mut pool = File::create(file, ttl_buckets_struct_size, true) .expect("failed to allocate file backed storage"); - let data = Box::new(pool.as_mut_slice()); + let data = pool.as_mut_slice(); - // --------------------- Store `last_expired` ----------------- let mut offset = 0; + // --------------------- Store `last_expired` ----------------- // cast `last_expired` to byte pointer let byte_ptr = (&self.last_expired as *const Instant) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, last_expired_size) }; - - // store `started` back to mmapped file - data[offset..last_expired_size].copy_from_slice(bytes); + // store `last_expired` back to mmapped file + offset = store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); // --------------------- Store `buckets` ----------------- - offset += last_expired_size; // for every `TtlBucket` for id in 0..MAX_N_TTL_BUCKET { - let begin = offset + (bucket_size as usize * id); - let finish = begin + bucket_size as usize; // cast `TtlBucket` to byte pointer let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, bucket_size) }; - // store `TtlBucket` back to mmapped file - data[begin..finish].copy_from_slice(bytes); + offset = store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); } + // -------------------------------------------------- + gracefully_shutdown = true; // TODO: check if this flushes the CPU caches @@ -290,3 +283,19 @@ impl Default for TtlBuckets { Self::new() } } + + +/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` +/// Returns the next `offset`, that is, the next byte of `data` to be copied into +fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; + + let end = offset + size; + + // store `bytes` to `data` + data[offset..end].copy_from_slice(bytes); + + // next `offset` + end +} \ No newline at end of file From 5d0e633d89d8b8cefbe6992602e544b10f0be279 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Tue, 15 Feb 2022 16:11:38 +1100 Subject: [PATCH 21/74] moved store_bytes_and_update_offset() to store.rs so there is only 1 copy of the function --- src/rust/storage/seg/src/hashtable/mod.rs | 24 ++++------------- src/rust/storage/seg/src/lib.rs | 1 + src/rust/storage/seg/src/segments/segments.rs | 27 +++++-------------- src/rust/storage/seg/src/store.rs | 14 ++++++++++ .../seg/src/ttl_buckets/ttl_buckets.rs | 20 ++------------ 5 files changed, 28 insertions(+), 58 deletions(-) create mode 100644 src/rust/storage/seg/src/store.rs diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index a93994257..800cbe25a 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -275,7 +275,7 @@ impl HashTable { let byte_ptr = (&self.power as *const u64) as *const u8; // store `power` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // --------------------- Store `mask` ----------------- @@ -283,7 +283,7 @@ impl HashTable { let byte_ptr = (&self.mask as *const u64) as *const u8; // store `mask` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // --------------------- Store `data` ----------------- // for every `HashBucket` @@ -293,7 +293,7 @@ impl HashTable { let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; // store `HashBucket` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); } // --------------------- Store `started` ----------------- @@ -302,14 +302,14 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // store `started` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; // store `next_to_chain` back to mmapped file - store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // ------------------------------------------------------------- gracefully_shutdown = true; @@ -932,17 +932,3 @@ fn hash_builder() -> RandomState { ) } -/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` -/// Returns the next `offset`, that is, the next byte of `data` to be copied into -fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; - - let end = offset + size; - - // store `bytes` to `data` - data[offset..end].copy_from_slice(bytes); - - // next `offset` - end -} \ No newline at end of file diff --git a/src/rust/storage/seg/src/lib.rs b/src/rust/storage/seg/src/lib.rs index 0f263ad8b..628418d97 100644 --- a/src/rust/storage/seg/src/lib.rs +++ b/src/rust/storage/seg/src/lib.rs @@ -42,6 +42,7 @@ mod eviction; mod hashtable; mod item; mod rand; +mod store; mod seg; mod segments; mod ttl_buckets; diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 2de058303..8cb92684b 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -302,7 +302,7 @@ impl Segments { let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; // store `SegmentHeader` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); } // ----- Store `segment_size` ----- @@ -311,7 +311,7 @@ impl Segments { let byte_ptr = (&self.segment_size as *const i32) as *const u8; // store `segment_size` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); // ----- Store `free` ----- @@ -319,7 +319,7 @@ impl Segments { let byte_ptr = (&self.free as *const u32) as *const u8; // store `free` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); // ----- Store `cap` ----- @@ -327,7 +327,7 @@ impl Segments { let byte_ptr = (&self.cap as *const u32) as *const u8; // store `cap` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); // ----- Store `free_q` ----- @@ -335,7 +335,7 @@ impl Segments { let byte_ptr = (&self.free_q as *const Option) as *const u8; // store `free_q` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); // ----- Store `flush_at` ----- @@ -343,7 +343,7 @@ impl Segments { let byte_ptr = (&self.flush_at as *const Instant) as *const u8; // store `flush_at` back to mmapped file - store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); + store::store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); // ----------------------------- @@ -1240,18 +1240,3 @@ impl Default for Segments { Self::from_builder_new(Default::default()) } } - -/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` -/// Returns the next `offset`, that is, the next byte of `data` to be copied into -fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; - - let end = offset + size; - - // store `bytes` to `data` - data[offset..end].copy_from_slice(bytes); - - // next `offset` - end -} \ No newline at end of file diff --git a/src/rust/storage/seg/src/store.rs b/src/rust/storage/seg/src/store.rs new file mode 100644 index 000000000..ce0296d1b --- /dev/null +++ b/src/rust/storage/seg/src/store.rs @@ -0,0 +1,14 @@ +/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` +/// Returns the next `offset`, that is, the next byte of `data` to be copied into +pub fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { + // get corresponding bytes from byte pointer + let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; + + let end = offset + size; + + // store `bytes` to `data` + data[offset..end].copy_from_slice(bytes); + + // next `offset` + end +} \ No newline at end of file diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 94a2e3160..5582c362e 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -166,7 +166,7 @@ impl TtlBuckets { let byte_ptr = (&self.last_expired as *const Instant) as *const u8; // store `last_expired` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); // --------------------- Store `buckets` ----------------- @@ -177,7 +177,7 @@ impl TtlBuckets { let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; // store `TtlBucket` back to mmapped file - offset = store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); } // -------------------------------------------------- @@ -283,19 +283,3 @@ impl Default for TtlBuckets { Self::new() } } - - -/// Copies `size` bytes at `byte_ptr` to the `offset` of `data` -/// Returns the next `offset`, that is, the next byte of `data` to be copied into -fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { - // get corresponding bytes from byte pointer - let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; - - let end = offset + size; - - // store `bytes` to `data` - data[offset..end].copy_from_slice(bytes); - - // next `offset` - end -} \ No newline at end of file From aeb62de2832471575b99f14c3c5fea740aad1cf6 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 10:01:30 +1100 Subject: [PATCH 22/74] changed back to only having from_builder() to build . This function decides whether to build a new segments or not --- src/rust/storage/seg/src/segments/builder.rs | 6 +- src/rust/storage/seg/src/segments/segments.rs | 154 +++++++----------- 2 files changed, 58 insertions(+), 102 deletions(-) diff --git a/src/rust/storage/seg/src/segments/builder.rs b/src/rust/storage/seg/src/segments/builder.rs index 32214b61b..edb932b15 100644 --- a/src/rust/storage/seg/src/segments/builder.rs +++ b/src/rust/storage/seg/src/segments/builder.rs @@ -92,10 +92,6 @@ impl<'a> SegmentsBuilder { /// Construct the [`Segments`] from the builder pub fn build(self) -> Segments { - if self.restore { - Segments::from_builder_restore(self) - } else { - Segments::from_builder_new(self) - } + Segments::from_builder(self) } } diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 8cb92684b..f9e953958 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -50,36 +50,33 @@ pub(crate) struct Segments { impl Segments { /// Private function which allocates and initializes the `Segments` by - /// taking ownership of the builder - /// A new `Segments` is created - pub(super) fn from_builder_new(builder: SegmentsBuilder) -> Self { - let segment_size = builder.segment_size; - let segments = builder.heap_size / (builder.segment_size as usize); + /// taking ownership of the builder. + /// `Segments` is restored if the paths are specified, otherwise a new + /// `Segments` is created. + pub(super) fn from_builder(builder: SegmentsBuilder) -> Self { + let cfg_segment_size = builder.segment_size; + let cfg_segments = builder.heap_size / (builder.segment_size as usize); debug!( "heap size: {} seg size: {} segments: {}", - builder.heap_size, segment_size, segments + builder.heap_size, cfg_segment_size, cfg_segments ); assert!( - segments < (1 << 24), // we use just 24 bits to store the seg id + cfg_segments < (1 << 24), // we use just 24 bits to store the seg id "heap size requires too many segments, reduce heap size or increase segment size" ); + // initialise `evict` let evict_policy = builder.evict_policy; + let evict = Eviction::new(cfg_segments, evict_policy); debug!("eviction policy: {:?}", evict_policy); let mut headers = Vec::with_capacity(0); - headers.reserve_exact(segments); - for id in 0..segments { - // safety: we start iterating from 1 and seg id is constrained to < 2^24 - let header = SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); - headers.push(header); - } - let mut headers = headers.into_boxed_slice(); + headers.reserve_exact(cfg_segments); - let heap_size = segments * segment_size as usize; + let heap_size = cfg_segments * cfg_segment_size as usize; let mut data_file_backed = false; // TODO(bmartin): we always prefault, this should be configurable @@ -92,83 +89,18 @@ impl Segments { Box::new(Memory::create(heap_size, true)) }; - for idx in 0..segments { - let begin = segment_size as usize * idx; - let end = begin + segment_size as usize; - - let mut segment = - Segment::from_raw_parts(&mut headers[idx], &mut data.as_mut_slice()[begin..end]); - segment.init(); - - let id = idx as u32 + 1; // we index segments from 1 - segment.set_prev_seg(NonZeroU32::new(id - 1)); - if id < segments as u32 { - segment.set_next_seg(NonZeroU32::new(id + 1)); - } - } - - SEGMENT_CURRENT.set(segments as _); - SEGMENT_FREE.set(segments as _); - - Self { - headers, - segment_size, - cap: segments as u32, - free: segments as u32, - free_q: NonZeroU32::new(1), - data, - flush_at: Instant::recent(), - evict: Box::new(Eviction::new(segments, evict_policy)), - data_file_backed, - fields_copied_back: false, - } - } - - /// Private function which allocates and initializes the `Segments` by - /// taking ownership of the builder. - /// `Segments` is restored if the paths are specified, otherwise a new - /// `Segments` is created. - pub(super) fn from_builder_restore(builder: SegmentsBuilder) -> Self { - // this is here to avoid `builder` being moved when it might be needed - // for the else statement - let segments_fields_path = builder.segments_fields_path.clone(); - - // If there are specified paths to restore the `Segments` with, - // copy `Segments` back. + // If `builder.restore` and + // there are specified paths to restore the `Segments` with and + // `Segments.data` is file backed, restore relevant + // `Segments` fields. // Otherwise create a new `Segments`. - if let Some(fields_file) = segments_fields_path { - // ----- Recover `data` ------ - let data: Box; + if builder.restore && + data_file_backed && + builder.segments_fields_path.is_some(){ // TODO: like with the HashTable fields, we assume that the configuration // options for `Segments` hasn't changed upon recovery. We need a way to // detect the change in fields as well as decided how to // deal with such changes. - let cfg_segment_size = builder.segment_size; - let cfg_segments = builder.heap_size / (builder.segment_size as usize); - - debug!( - "heap size: {} seg size: {} segments: {}", - builder.heap_size, cfg_segment_size, cfg_segments - ); - - assert!( - cfg_segments < (1 << 24), // we use just 24 bits to store the seg id - "heap size requires too many segments, reduce heap size or increase segment size" - ); - - let heap_size = cfg_segments * cfg_segment_size as usize; - - // TODO(bmartin): we always prefault, this should be configurable - // `Segments.data` must be file backed for a recovery - if let Some(data_file) = builder.datapool_path { - let pool = File::create(data_file, heap_size, true) - .expect("failed to allocate file backed storage"); - data = Box::new(pool) - } else { - return Segments::from_builder_new(builder); - } - - // ----- Recover other fields ------ let header_size: usize = ::std::mem::size_of::(); let i32_size = ::std::mem::size_of::(); @@ -183,7 +115,7 @@ impl Segments { + flush_at_size; // Mmap file - let pool = File::create(fields_file, fields_size, true) + let pool = File::create(builder.segments_fields_path.unwrap(), fields_size, true) .expect("failed to allocate file backed storage"); let fields_data = Box::new(pool.as_slice()); @@ -196,8 +128,6 @@ impl Segments { let mut offset = 0; let mut end = 0; // ----- Retrieve `headers` ----- - let mut headers = Vec::with_capacity(0); - headers.reserve_exact(cfg_segments); // retrieve each `SegmentHeader` from the raw bytes for _ in 0..cfg_segments { @@ -240,11 +170,6 @@ impl Segments { let flush_at = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; - // ----- Re-initialise `evict` ----- - - let evict_policy = builder.evict_policy; - let evict = Eviction::new(cfg_segments, evict_policy); - SEGMENT_CURRENT.set(cap as _); SEGMENT_FREE.set(free as _); @@ -261,7 +186,42 @@ impl Segments { fields_copied_back: true, } } else { - Segments::from_builder_new(builder) + for id in 0..cfg_segments { + // safety: we start iterating from 1 and seg id is constrained to < 2^24 + let header = SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); + headers.push(header); + } + + for idx in 0..cfg_segments { + let begin = cfg_segment_size as usize * idx; + let end = begin + cfg_segment_size as usize; + + let mut segment = + Segment::from_raw_parts(&mut headers[idx], &mut data.as_mut_slice()[begin..end]); + segment.init(); + + let id = idx as u32 + 1; // we index cfg_segments from 1 + segment.set_prev_seg(NonZeroU32::new(id - 1)); + if id < cfg_segments as u32 { + segment.set_next_seg(NonZeroU32::new(id + 1)); + } + } + + SEGMENT_CURRENT.set(cfg_segments as _); + SEGMENT_FREE.set(cfg_segments as _); + + Self { + headers: headers.into_boxed_slice(), + segment_size: cfg_segment_size, + cap: cfg_segments as u32, + free: cfg_segments as u32, + free_q: NonZeroU32::new(1), + data, + flush_at: Instant::recent(), + evict: Box::new(evict), + data_file_backed, + fields_copied_back: false, + } } } @@ -1237,6 +1197,6 @@ impl Segments { impl Default for Segments { fn default() -> Self { - Self::from_builder_new(Default::default()) + Self::from_builder(Default::default()) } } From 49c06e37f9c5ad761062b9a6a187e0ba3b8ba5be Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 10:09:33 +1100 Subject: [PATCH 23/74] fixed ordering of derive traits for TtlBucket --- src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs index 7a906884a..3f8bae2b0 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_bucket.rs @@ -34,7 +34,7 @@ use core::num::NonZeroU32; /// in an ordered fashion. The first segment to expire will be the head of the /// segment chain. This allows us to efficiently scan across the [`TtlBuckets`] /// and expire segments in an eager fashion. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] #[repr(C)] pub struct TtlBucket { head: Option, From 1606340ead9051db8c4689c9b67db706b9ed56d6 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 10:11:06 +1100 Subject: [PATCH 24/74] removed unnecessary comments --- src/rust/storage/seg/src/datapool/memory.rs | 2 +- src/rust/storage/seg/src/hashtable/mod.rs | 2 +- src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/memory.rs b/src/rust/storage/seg/src/datapool/memory.rs index 7ff6ee783..ecbf33410 100644 --- a/src/rust/storage/seg/src/datapool/memory.rs +++ b/src/rust/storage/seg/src/datapool/memory.rs @@ -8,7 +8,7 @@ use crate::datapool::Datapool; /// A contiguous allocation of bytes in main memory -#[derive(Clone)] // for testing +#[derive(Clone)] pub struct Memory { data: Box<[u8]>, } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 800cbe25a..ee9a302ac 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -100,7 +100,7 @@ static_metrics! { /// Main structure for performing item lookup. Contains a contiguous allocation /// of [`HashBucket`]s which are used to store item info and metadata. -#[derive(Clone)] // for testing +#[derive(Clone)] #[repr(C)] pub(crate) struct HashTable { hash_builder: Box, diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 5582c362e..24ee6158c 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -44,7 +44,7 @@ const TTL_BOUNDARY_3: i32 = 1 << (TTL_BUCKET_INTERVAL_N_BIT_3 + N_BUCKET_PER_STE const MAX_N_TTL_BUCKET: usize = N_BUCKET_PER_STEP * 4; const MAX_TTL_BUCKET_IDX: usize = MAX_N_TTL_BUCKET - 1; -#[derive(Clone)] // for testing +#[derive(Clone)] pub struct TtlBuckets { pub(crate) buckets: Box<[TtlBucket]>, pub(crate) last_expired: Instant, From 1caaf226b183ee8f733fcd4e9ef228dabe41098f Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 10:23:45 +1100 Subject: [PATCH 25/74] removed newline --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 71d2d857b..61417ac37 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -37,4 +37,4 @@ jobs: - name: Integration tests run: | cd test/integration - python test_twemcache.py + python test_twemcache.py \ No newline at end of file From 83e428adff2abb0601011205c981fb345cac1219 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 10:27:15 +1100 Subject: [PATCH 26/74] removed newline --- .github/workflows/cmake.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 61417ac37..e39f58984 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -25,9 +25,11 @@ jobs: cmake -B _build -S . \ -DCMAKE_BUILD_TYPE=${{ matrix.profile }} \ -DBUILD_AND_INSTALL_CHECK=yes + - name: Build run: | cmake --build _build + - name: Test run: | cmake --build _build --target test From 2df8f223307d3f3d5574a524a3fab042e6134392 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 12:10:29 +1100 Subject: [PATCH 27/74] added TODOs to implement Drop trait for TtlBuckets --- src/rust/storage/seg/src/tests.rs | 9 ++- .../seg/src/ttl_buckets/ttl_buckets.rs | 70 ++++++++++++++++++- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index a7badcfc3..fb10673c7 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -26,7 +26,7 @@ fn sizes() { assert_eq!(std::mem::size_of::(), 72); // increased to accommodate fields added for testing assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 24); + assert_eq!(std::mem::size_of::(), 48); } #[test] @@ -712,6 +712,12 @@ fn new_file_backed_cache_changed_and_restored() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); + // // force cache to go out of scope and thus `cache.segments`, + // // `cache.hashtable` and `cache.ttl_buckets` will be dropped (demolished) + // { + // let _x = cache; + // } + // gracefully shutdown cache assert!(demolish_cache( cache, @@ -746,6 +752,7 @@ fn new_file_backed_cache_changed_and_restored() { // the restored cache should be equivalent to the old cache assert!(new_cache.equivalent_seg(old_cache)); + } // Creates a new cache, gracefully shutsdown cache and restore cache diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 24ee6158c..4defcfba5 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -50,12 +50,14 @@ pub struct TtlBuckets { pub(crate) last_expired: Instant, /// Are `TtlBuckets` copied back from a file? pub(crate) buckets_copied_back: bool, + /// Path to store `TtlBuckets` upon graceful shutdown + pub(crate) ttl_buckets_path: Option, } impl TtlBuckets { /// Create a new set of `TtlBuckets` which cover the full range of TTLs. See /// the module-level documentation for how the range of TTLs are stored. - pub fn new() -> Self { + pub fn new() -> Self { // TODO: add path as argument let intervals = [ TTL_BUCKET_INTERVAL_1, TTL_BUCKET_INTERVAL_2, @@ -81,6 +83,7 @@ impl TtlBuckets { buckets, last_expired, buckets_copied_back: false, + ttl_buckets_path: None, //TODO: replace with given path } } @@ -88,14 +91,15 @@ impl TtlBuckets { // to restore from is valid. Otherwise return a new `TtlBuckets` pub fn restore(ttl_buckets_path: Option) -> Self { // if there is a path to restore from, restore the `TtlBuckets` - if let Some(file) = ttl_buckets_path { + if ttl_buckets_path.is_some() { let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; + // Mmap file - let pool = File::create(file, ttl_buckets_struct_size, true) + let pool = File::create(ttl_buckets_path.as_ref().unwrap(), ttl_buckets_struct_size, true) .expect("failed to allocate file backed storage"); let data = Box::new(pool.as_slice()); @@ -133,14 +137,18 @@ impl TtlBuckets { buckets, last_expired, buckets_copied_back: true, + ttl_buckets_path, } } // otherwise, create a new `TtlBuckets` else { + // TODO: uncomment this line when implementing Drop trait + //TtlBuckets::new(ttl_buckets_path: Option) TtlBuckets::new() } } + /// TODO: Move this to drop() /// Demolishes the `TtlBuckets` by storing them to /// PMEM (if a path is specified) pub fn demolish(&self, ttl_buckets_path: Option) -> bool { @@ -283,3 +291,59 @@ impl Default for TtlBuckets { Self::new() } } + +// // TODO: use self.path, figure out how to indicate there was a graceful shutdown, +// // implement the same for Segments and HashTable +// // Add description +// impl Drop for TtlBuckets { +// fn drop(&mut self) { +// /// Demolishes the `TtlBuckets` by storing them to +// /// PMEM (if a path is specified) +// let mut gracefully_shutdown = false; + +// // if a path is specified, copy all the `TtlBucket`s +// // to the file specified by `ttl_buckets_path` +// if let Some(file) = ttl_buckets_path { +// let bucket_size = ::std::mem::size_of::(); +// let last_expired_size = ::std::mem::size_of::(); +// let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` +// + last_expired_size; + +// // Mmap file +// let mut pool = File::create(file, ttl_buckets_struct_size, true) +// .expect("failed to allocate file backed storage"); +// let data = pool.as_mut_slice(); + +// let mut offset = 0; +// // --------------------- Store `last_expired` ----------------- + +// // cast `last_expired` to byte pointer +// let byte_ptr = (&self.last_expired as *const Instant) as *const u8; + +// // store `last_expired` back to mmapped file +// offset = store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); + +// // --------------------- Store `buckets` ----------------- + +// // for every `TtlBucket` +// for id in 0..MAX_N_TTL_BUCKET { + +// // cast `TtlBucket` to byte pointer +// let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; + +// // store `TtlBucket` back to mmapped file +// offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); +// } + +// // -------------------------------------------------- + +// gracefully_shutdown = true; + +// // TODO: check if this flushes the CPU caches +// pool.flush() +// .expect("failed to flush `TtlBuckets` to storage"); +// } + +// gracefully_shutdown +// } +// } From 7be8f07b126929335375be2f57ab4e9088ef3ce8 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 13:23:42 +1100 Subject: [PATCH 28/74] Added Brian's File::create() changes --- src/rust/storage/seg/src/datapool/file.rs | 61 ++++++++++++++--------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index fcf7de772..aea63e463 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -31,33 +31,46 @@ impl File { size: usize, prefault: bool, ) -> Result { - let metadata = std::fs::metadata(&path); - let file_exists = metadata.is_ok(); - let file = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(path)?; - // if file exists, check that the size it is expected to have - // matches its actual size - if file_exists { - assert_eq!(metadata?.len() as usize, size); + // check if the file exists and is the right size + let exists = if let Ok(current_size) = std::fs::metadata(&path).map(|m| m.len()) { + if current_size != size as u64 { + return Err(std::io::Error::new(std::io::ErrorKind::Other, "existing file has wrong size")); + } + true } else { - file.set_len(size as u64)?; - } - - let mut mmap = unsafe { MmapOptions::new().populate().map_mut(&file)? }; - - if !file_exists && prefault { - let mut offset = 0; - while offset < size { - mmap[offset] = 0; - offset += PAGE_SIZE; + false + }; + + let mmap = if exists { + let f = OpenOptions::new() + .read(true) + .write(true) + .open(path)?; + + unsafe { MmapOptions::new().populate().map_mut(&f)? } + } else { + let f = OpenOptions::new() + .create_new(true) + .read(true) + .write(true) + .open(path)?; + f.set_len(size as u64)?; + + let mut mmap = unsafe { MmapOptions::new().populate().map_mut(&f)? }; + + if prefault { + let mut offset = 0; + while offset < size { + mmap[offset] = 0; + offset += PAGE_SIZE; + } + mmap.flush()?; } - mmap.flush()?; - } - + + mmap + }; + Ok(Self { mmap, size }) } } From 19385a4cc31b01f6cbe9cc627c8f705320e3a3a1 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 13:41:33 +1100 Subject: [PATCH 29/74] implemented PartialEq for HashTable --- src/rust/storage/seg/src/hashtable/mod.rs | 30 +++++++++++------------ src/rust/storage/seg/src/seg.rs | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index ee9a302ac..064599cf8 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -887,31 +887,29 @@ impl HashTable { hasher.write(key); hasher.finish() } +} - #[cfg(test)] - // Checks if `HashTable.data` are equivalent - pub(crate) fn equivalent_hashbuckets(&self, buckets: Box<[HashBucket]>) -> bool { +#[cfg(test)] +impl PartialEq for HashTable { + // Checks if `HashTable` are equivalent + fn eq(&self, other: &Self) -> bool { + // ---- Check if `HashTable.data` are equivalent --- let total_buckets = self.data.len(); // ensure number of `HashBucket`s is the same - let mut equivalent = total_buckets == buckets.len(); + let mut buckets_equivalent = total_buckets == other.data.len(); // Compare each `HashBucket` for id in 0..total_buckets { - equivalent = equivalent && self.data[id] == buckets[id]; + buckets_equivalent = buckets_equivalent && self.data[id] == other.data[id]; } - equivalent - } - - #[cfg(test)] - // Checks if `HashTable` are equivalent - pub(crate) fn equivalent_hashtables(&self, h: HashTable) -> bool { - self.power == h.power - && self.mask == h.mask - && self.equivalent_hashbuckets(h.data.clone()) - && self.started == h.started - && self.next_to_chain == h.next_to_chain + // ---- Check if the other fields are equivalent --- + buckets_equivalent + && self.power == other.power + && self.mask == other.mask + && self.started == other.started + && self.next_to_chain == other.next_to_chain } } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index d2746e829..267c08afc 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -359,6 +359,6 @@ impl Seg { pub(crate) fn equivalent_seg(&self, s: Seg) -> bool { self.segments.equivalent_segments(s.segments) && self.ttl_buckets.equivalent_ttlbuckets(s.ttl_buckets) - && self.hashtable.equivalent_hashtables(s.hashtable) + && self.hashtable == s.hashtable } } From f7e9468eff3728871c67f95ee26740434406c12f Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 13:52:11 +1100 Subject: [PATCH 30/74] implemented PartialEq trait for TtlBuckets, Segments and Seg --- src/rust/storage/seg/src/seg.rs | 14 ++--- src/rust/storage/seg/src/segments/segments.rs | 54 +++++++++---------- src/rust/storage/seg/src/tests.rs | 10 ++-- .../seg/src/ttl_buckets/ttl_buckets.rs | 35 ++++++------ 4 files changed, 56 insertions(+), 57 deletions(-) diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 267c08afc..50c2faf6c 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -353,12 +353,14 @@ impl Seg { _restored: false, // this field doesn't matter as it won't be compared } } +} - // Used in testing to compare `Seg`s - #[cfg(test)] - pub(crate) fn equivalent_seg(&self, s: Seg) -> bool { - self.segments.equivalent_segments(s.segments) - && self.ttl_buckets.equivalent_ttlbuckets(s.ttl_buckets) - && self.hashtable == s.hashtable +// Used in testing to compare `Seg`s +#[cfg(test)] +impl PartialEq for Seg { + fn eq(&self, other: &Self) -> bool { + self.segments == other.segments + && self.ttl_buckets == other.ttl_buckets + && self.hashtable == other.hashtable } } diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index f9e953958..ae291ab81 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -869,34 +869,6 @@ impl Segments { } } - #[cfg(test)] - // Checks if `Segments.headers` are equivalent - pub(crate) fn equivalent_headers(&self, headers: Box<[SegmentHeader]>) -> bool { - let total_buckets = self.headers.len(); - - // ensure number of `SegmentHeader`s is the same - let mut equivalent = total_buckets == headers.len(); - - // Compare each `SegmentHeader` - for id in 0..total_buckets { - equivalent = equivalent && self.headers[id] == headers[id]; - } - - equivalent - } - - // Checks if `Segments` are equivalent - #[cfg(test)] - pub(crate) fn equivalent_segments(&self, s: Segments) -> bool { - self.equivalent_headers(s.headers.clone()) - && self.data.as_slice() == s.data.as_slice() - && self.segment_size == s.segment_size - && self.free == s.free - && self.cap == s.cap - && self.free_q == s.free_q - && self.flush_at == s.flush_at - } - #[cfg(feature = "debug")] pub(crate) fn check_integrity(&mut self) -> bool { let mut integrity = true; @@ -1200,3 +1172,29 @@ impl Default for Segments { Self::from_builder(Default::default()) } } + +#[cfg(test)] +impl PartialEq for Segments { + // Checks if `Segments` are equivalent + fn eq(&self, other: &Self) -> bool { + // ---- Check if `Segments.headers` are equivalent ---- + let total_buckets = self.headers.len(); + + // ensure number of `SegmentHeader`s is the same + let mut headers_equivalent = total_buckets == other.headers.len(); + + // Compare each `SegmentHeader` + for id in 0..total_buckets { + headers_equivalent = headers_equivalent && self.headers[id] == other.headers[id]; + } + + // ---- Check if the other fields are equivalent --- + headers_equivalent + && self.data.as_slice() == other.data.as_slice() + && self.segment_size == other.segment_size + && self.free == other.free + && self.cap == other.cap + && self.free_q == other.free_q + && self.flush_at == other.flush_at + } +} diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index fb10673c7..51c4c7a7d 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -751,7 +751,7 @@ fn new_file_backed_cache_changed_and_restored() { assert_eq!(new_cache.segments.free(), SEGMENTS - 1); // the restored cache should be equivalent to the old cache - assert!(new_cache.equivalent_seg(old_cache)); + assert!(new_cache == old_cache); } @@ -808,7 +808,7 @@ fn new_file_backed_cache_not_changed_and_restored() { assert!(new_cache._restored); // the restored cache should be equivalent to the old cache - assert!(new_cache.equivalent_seg(old_cache)); + assert!(new_cache == old_cache); } // Creates a new cache, stores an item, gracefully shutsdown cache and spawn new cache @@ -874,7 +874,7 @@ fn new_cache_changed_and_not_restored() { assert!(new_cache.get(b"latte").is_none()); // the restored cache should not be equivalent to the old cache - assert!(!new_cache.equivalent_seg(old_cache)); + assert!(new_cache != old_cache); } // Creates a new cache, stores an item, gracefully shutsdown cache and restore cache @@ -955,7 +955,7 @@ fn new_cache_changed_and_restoration_fails() { assert!(new_cache.get(b"latte").is_none()); // the restored cache should not be equivalent to the old cache - assert!(!new_cache.equivalent_seg(old_cache)); + assert!(new_cache != old_cache); } // Create a new cache, fill it with items. @@ -1054,7 +1054,7 @@ fn full_cache_recovery_long() { assert!(new_cache._restored); // the restored cache should be equivalent to the old cache - assert!(new_cache.equivalent_seg(old_cache)); + assert!(new_cache == old_cache); // check that the number of active items in the restored cache // equals the number of active keys in the original cache diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 4defcfba5..e02aa761b 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -263,32 +263,31 @@ impl TtlBuckets { cleared } - #[cfg(test)] - // Checks if `TtlBuckets.buckets` are equivalent - pub(crate) fn equivalent_buckets(&self, buckets: Box<[TtlBucket]>) -> bool { +} + +impl Default for TtlBuckets { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +impl PartialEq for TtlBuckets { + // Checks if `TtlBuckets` are equivalent + fn eq(&self, other: &Self) -> bool { + // ---- Check if `TtlBuckets.buckets` are equivalent ---- let total_buckets = self.buckets.len(); // ensure number of `TtlBucket`s is the same - let mut equivalent = total_buckets == buckets.len(); + let mut buckets_equivalent = total_buckets == other.buckets.len(); // Compare each `TtlBucket` for id in 0..total_buckets { - equivalent = equivalent && self.buckets[id] == buckets[id]; + buckets_equivalent = buckets_equivalent && self.buckets[id] == other.buckets[id]; } - equivalent - } - - #[cfg(test)] - // Checks if `TtlBuckets.buckets` are equivalent - pub(crate) fn equivalent_ttlbuckets(&self, t: TtlBuckets) -> bool { - self.equivalent_buckets(t.buckets.clone()) && self.last_expired == t.last_expired - } -} - -impl Default for TtlBuckets { - fn default() -> Self { - Self::new() + // ---- Check if the other fields are equivalent --- + buckets_equivalent && self.last_expired == other.last_expired } } From 710674dbe8b24987a9c75e4c09092e485d71f218 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 14:09:23 +1100 Subject: [PATCH 31/74] removed _restored field from Seg and replaced it with a restored() function --- src/rust/storage/seg/src/builder.rs | 2 -- src/rust/storage/seg/src/seg.rs | 11 ++++++++--- src/rust/storage/seg/src/tests.rs | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index ddcd1f334..811f29e21 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -206,7 +206,6 @@ impl Builder { hashtable, segments, ttl_buckets, - _restored: true, }; } } @@ -218,7 +217,6 @@ impl Builder { hashtable, segments, ttl_buckets, - _restored: false, } } } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 50c2faf6c..ae730dd1e 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -26,8 +26,6 @@ pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, pub(crate) ttl_buckets: TtlBuckets, - // Used for testing: are the above structures restored? - pub(crate) _restored: bool, } impl Seg { @@ -350,9 +348,16 @@ impl Seg { segments, ttl_buckets, hashtable, - _restored: false, // this field doesn't matter as it won't be compared } } + + // Indicated if `Seg` has been restored + #[cfg(test)] + pub(crate) fn restored(&self) -> bool { + self.segments.fields_copied_back + && self.ttl_buckets.buckets_copied_back + && self.hashtable.table_copied_back + } } // Used in testing to compare `Seg`s diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 51c4c7a7d..57c3d2dc8 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -506,7 +506,7 @@ fn new_cache_file_backed() { assert!(cache.segments.data_file_backed()); // -- Check entire `Seg` -- // the `Seg` should not be restored - assert!(!cache._restored); + assert!(!cache.restored()); // -- Check `Seg` fields/components -- // the `Segments` fields' should not have been restored assert!(!cache.segments.fields_copied_back); @@ -527,7 +527,7 @@ fn new_cache_not_file_backed() { // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); // the `Seg` should not be restored - assert!(!cache._restored); + assert!(!cache.restored()); // the `Segments` fields' should not have been restored assert!(!cache.segments.fields_copied_back); // the `TtlBuckets` should not have been restored @@ -563,7 +563,7 @@ fn restored_cache_file_backed() { // the `Segments.data` should be filed backed assert!(cache.segments.data_file_backed()); // the `Seg` should be restored - assert!(cache._restored); + assert!(cache.restored()); // the `Segments` fields' should have been restored assert!(cache.segments.fields_copied_back); // the `TtlBuckets` should have been restored @@ -593,7 +593,7 @@ fn restored_cache_no_paths_set() { // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); // the `Seg` should not be restored - assert!(!cache._restored); + assert!(!cache.restored()); // the `Segments` fields' should not have been restored assert!(!cache.segments.fields_copied_back); // the `TtlBuckets` should not have been restored @@ -693,7 +693,7 @@ fn new_file_backed_cache_changed_and_restored() { let mut restore = false; let mut cache = make_cache(restore, datapool_path, None, None, None); - assert!(!cache._restored); + assert!(!cache.restored()); assert_eq!(cache.items(), 0); assert_eq!(cache.segments.free(), SEGMENTS); @@ -744,7 +744,7 @@ fn new_file_backed_cache_changed_and_restored() { hashtable_path, ); - assert!(new_cache._restored); + assert!(new_cache.restored()); // "latte" should be in restored cache assert!(new_cache.get(b"latte").is_some()); assert_eq!(new_cache.items(), 1); @@ -774,7 +774,7 @@ fn new_file_backed_cache_not_changed_and_restored() { let mut restore = false; let cache = make_cache(restore, datapool_path, None, None, None); - assert!(!cache._restored); + assert!(!cache.restored()); // Get a copy of the cache to be compared later let old_cache = cache.clone(); @@ -805,7 +805,7 @@ fn new_file_backed_cache_not_changed_and_restored() { hashtable_path, ); - assert!(new_cache._restored); + assert!(new_cache.restored()); // the restored cache should be equivalent to the old cache assert!(new_cache == old_cache); @@ -830,7 +830,7 @@ fn new_cache_changed_and_not_restored() { let mut restore = false; let mut cache = make_cache(restore, datapool_path, None, None, None); - assert!(!cache._restored); + assert!(!cache.restored()); assert_eq!(cache.items(), 0); assert_eq!(cache.segments.free(), SEGMENTS); @@ -866,7 +866,7 @@ fn new_cache_changed_and_not_restored() { restore = false; let mut new_cache = make_cache(restore, datapool_path, None, None, None); - assert!(!new_cache._restored); + assert!(!new_cache.restored()); assert_eq!(new_cache.items(), 0); assert_eq!(new_cache.segments.free(), SEGMENTS); @@ -899,7 +899,7 @@ fn new_cache_changed_and_restoration_fails() { let mut restore = false; let mut cache = make_cache(restore, datapool_path, None, None, None); - assert!(!cache._restored); + assert!(!cache.restored()); assert_eq!(cache.items(), 0); assert_eq!(cache.segments.free(), SEGMENTS); @@ -945,7 +945,7 @@ fn new_cache_changed_and_restoration_fails() { ); // Cache is restored as all paths exist - assert!(new_cache._restored); + assert!(new_cache.restored()); // `Segments` data should be the same as old cache since `segments_fields_path` is the same assert_eq!(new_cache.items(), 1); assert_eq!(new_cache.segments.free(), SEGMENTS - 1); @@ -985,7 +985,7 @@ fn full_cache_recovery_long() { let mut restore = false; let mut cache = make_cache(restore, datapool_path, None, None, None); - assert!(!cache._restored); + assert!(!cache.restored()); assert_eq!(cache.items(), 0); assert_eq!(cache.segments.free(), SEGMENTS); @@ -1051,7 +1051,7 @@ fn full_cache_recovery_long() { hashtable_path, ); - assert!(new_cache._restored); + assert!(new_cache.restored()); // the restored cache should be equivalent to the old cache assert!(new_cache == old_cache); From a73aa63ee290e9c7de0686cd52527aede5d08e30 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Feb 2022 14:46:01 +1100 Subject: [PATCH 32/74] Clone trait now implemented for Segments and Seg can now derive Clone. Also changed derivation so that only Clone is derived for testign --- src/rust/storage/seg/src/hashtable/mod.rs | 2 +- src/rust/storage/seg/src/seg.rs | 27 +++++----- src/rust/storage/seg/src/segments/segments.rs | 54 ++++++++++--------- .../seg/src/ttl_buckets/ttl_buckets.rs | 2 +- 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 064599cf8..18994299c 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -100,7 +100,7 @@ static_metrics! { /// Main structure for performing item lookup. Contains a contiguous allocation /// of [`HashBucket`]s which are used to store item info and metadata. -#[derive(Clone)] +#[cfg_attr(test, derive(Clone))] #[repr(C)] pub(crate) struct HashTable { hash_builder: Box, diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index ae730dd1e..7c916380f 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -22,6 +22,7 @@ static_metrics! { /// segment-structured design that stores data in fixed-size segments, grouping /// objects with nearby expiration time into the same segment, and lifting most /// per-object metadata into the shared segment header. +#[cfg_attr(test, derive(Clone))] pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, @@ -338,19 +339,6 @@ impl Seg { } } - // Used in testing to clone a `Seg` to compare with - #[cfg(test)] - pub(crate) fn clone(&self) -> Seg { - let segments = self.segments.clone(); - let ttl_buckets = self.ttl_buckets.clone(); - let hashtable = self.hashtable.clone(); - Seg { - segments, - ttl_buckets, - hashtable, - } - } - // Indicated if `Seg` has been restored #[cfg(test)] pub(crate) fn restored(&self) -> bool { @@ -369,3 +357,16 @@ impl PartialEq for Seg { && self.hashtable == other.hashtable } } + + // // Used in testing to clone a `Seg` to compare with + // #[cfg(test)] + // pub(crate) fn clone(&self) -> Seg { + // let segments = self.segments.clone(); + // let ttl_buckets = self.ttl_buckets.clone(); + // let hashtable = self.hashtable.clone(); + // Seg { + // segments, + // ttl_buckets, + // hashtable, + // } + // } diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index ae291ab81..377b4d994 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -191,6 +191,8 @@ impl Segments { let header = SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); headers.push(header); } + + let mut headers = headers.into_boxed_slice(); for idx in 0..cfg_segments { let begin = cfg_segment_size as usize * idx; @@ -211,7 +213,7 @@ impl Segments { SEGMENT_FREE.set(cfg_segments as _); Self { - headers: headers.into_boxed_slice(), + headers, segment_size: cfg_segment_size, cap: cfg_segments as u32, free: cfg_segments as u32, @@ -845,30 +847,6 @@ impl Segments { } } - // Used in testing to clone a `Segments` to compare with - #[cfg(test)] - pub(crate) fn clone(&self) -> Segments { - // clone `data` - let heap_size = self.segment_size as usize * self.cap as usize; - let mut data = vec![0; heap_size]; - data.clone_from_slice(self.data.as_slice()); - let segment_data = Memory::memory_from_data(data.into_boxed_slice()); - - // Return a `Segments` where everything is cloned - Self { - headers: self.headers.clone(), - data: Box::new(segment_data), // fill in `data` field with something - segment_size: self.segment_size, - free: self.free, - cap: self.cap, - free_q: self.free_q.clone(), - flush_at: self.flush_at, - evict: self.evict.clone(), - data_file_backed: self.data_file_backed, - fields_copied_back: self.fields_copied_back, - } - } - #[cfg(feature = "debug")] pub(crate) fn check_integrity(&mut self) -> bool { let mut integrity = true; @@ -1198,3 +1176,29 @@ impl PartialEq for Segments { && self.flush_at == other.flush_at } } + +#[cfg(test)] +impl Clone for Segments { + // Used in testing to clone a `Segments` to compare with + fn clone(&self) -> Self { + // clone `data` + let heap_size = self.segment_size as usize * self.cap as usize; + let mut data = vec![0; heap_size]; + data.clone_from_slice(self.data.as_slice()); + let segment_data = Memory::memory_from_data(data.into_boxed_slice()); + + // Return a `Segments` where everything is cloned + Self { + headers: self.headers.clone(), + data: Box::new(segment_data), + segment_size: self.segment_size, + free: self.free, + cap: self.cap, + free_q: self.free_q.clone(), + flush_at: self.flush_at, + evict: self.evict.clone(), + data_file_backed: self.data_file_backed, + fields_copied_back: self.fields_copied_back, + } + } +} \ No newline at end of file diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index e02aa761b..819a36977 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -44,7 +44,7 @@ const TTL_BOUNDARY_3: i32 = 1 << (TTL_BUCKET_INTERVAL_N_BIT_3 + N_BUCKET_PER_STE const MAX_N_TTL_BUCKET: usize = N_BUCKET_PER_STEP * 4; const MAX_TTL_BUCKET_IDX: usize = MAX_N_TTL_BUCKET - 1; -#[derive(Clone)] +#[cfg_attr(test, derive(Clone))] pub struct TtlBuckets { pub(crate) buckets: Box<[TtlBucket]>, pub(crate) last_expired: Instant, From 7334b3f9dcab83e8e70df7a11e2b60937032d2a8 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 13:03:03 +1100 Subject: [PATCH 33/74] changed implementation of PartialEq to be less awkward for Segments, HashTable and TtlBuckets --- src/rust/storage/seg/src/hashtable/mod.rs | 14 +------------- src/rust/storage/seg/src/segments/segments.rs | 14 +------------- .../storage/seg/src/ttl_buckets/ttl_buckets.rs | 14 +------------- 3 files changed, 3 insertions(+), 39 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 18994299c..92b06b517 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -893,19 +893,7 @@ impl HashTable { impl PartialEq for HashTable { // Checks if `HashTable` are equivalent fn eq(&self, other: &Self) -> bool { - // ---- Check if `HashTable.data` are equivalent --- - let total_buckets = self.data.len(); - - // ensure number of `HashBucket`s is the same - let mut buckets_equivalent = total_buckets == other.data.len(); - - // Compare each `HashBucket` - for id in 0..total_buckets { - buckets_equivalent = buckets_equivalent && self.data[id] == other.data[id]; - } - - // ---- Check if the other fields are equivalent --- - buckets_equivalent + self.data == other.data && self.power == other.power && self.mask == other.mask && self.started == other.started diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 377b4d994..88f626999 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -1155,19 +1155,7 @@ impl Default for Segments { impl PartialEq for Segments { // Checks if `Segments` are equivalent fn eq(&self, other: &Self) -> bool { - // ---- Check if `Segments.headers` are equivalent ---- - let total_buckets = self.headers.len(); - - // ensure number of `SegmentHeader`s is the same - let mut headers_equivalent = total_buckets == other.headers.len(); - - // Compare each `SegmentHeader` - for id in 0..total_buckets { - headers_equivalent = headers_equivalent && self.headers[id] == other.headers[id]; - } - - // ---- Check if the other fields are equivalent --- - headers_equivalent + self.headers == other.headers && self.data.as_slice() == other.data.as_slice() && self.segment_size == other.segment_size && self.free == other.free diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 819a36977..33ee10e9b 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -275,19 +275,7 @@ impl Default for TtlBuckets { impl PartialEq for TtlBuckets { // Checks if `TtlBuckets` are equivalent fn eq(&self, other: &Self) -> bool { - // ---- Check if `TtlBuckets.buckets` are equivalent ---- - let total_buckets = self.buckets.len(); - - // ensure number of `TtlBucket`s is the same - let mut buckets_equivalent = total_buckets == other.buckets.len(); - - // Compare each `TtlBucket` - for id in 0..total_buckets { - buckets_equivalent = buckets_equivalent && self.buckets[id] == other.buckets[id]; - } - - // ---- Check if the other fields are equivalent --- - buckets_equivalent && self.last_expired == other.last_expired + self.buckets == other.buckets && self.last_expired == other.last_expired } } From 9e9dd5cebfe67a5ce273c6b840602195b143eaf0 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 13:04:01 +1100 Subject: [PATCH 34/74] ran cargo fmt --- src/rust/config/src/seg.rs | 2 +- src/rust/storage/seg/src/datapool/file.rs | 23 +++++----- src/rust/storage/seg/src/hashtable/mod.rs | 10 ++--- src/rust/storage/seg/src/lib.rs | 2 +- src/rust/storage/seg/src/seg.rs | 32 ++++++------- src/rust/storage/seg/src/segments/segments.rs | 45 ++++++++++--------- src/rust/storage/seg/src/store.rs | 9 +++- src/rust/storage/seg/src/tests.rs | 3 +- .../seg/src/ttl_buckets/ttl_buckets.rs | 21 +++++---- 9 files changed, 79 insertions(+), 68 deletions(-) diff --git a/src/rust/config/src/seg.rs b/src/rust/config/src/seg.rs index 7dc896a2a..77d1bc486 100644 --- a/src/rust/config/src/seg.rs +++ b/src/rust/config/src/seg.rs @@ -173,7 +173,7 @@ impl Seg { } // Determines if the `Seg` will be gracefully shutdown. - // The graceful shutdown will be successful if the cache is file backed + // The graceful shutdown will be successful if the cache is file backed // and `segments_fields_path`, `ttl_buckets_path` and `hashtable_path` are // valid paths to save the relevant `Seg` fields to. // Otherwise, the relevant `Seg` fields will not be saved. diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index aea63e463..298bb1019 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -31,23 +31,22 @@ impl File { size: usize, prefault: bool, ) -> Result { - // check if the file exists and is the right size let exists = if let Ok(current_size) = std::fs::metadata(&path).map(|m| m.len()) { if current_size != size as u64 { - return Err(std::io::Error::new(std::io::ErrorKind::Other, "existing file has wrong size")); + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "existing file has wrong size", + )); } true } else { false }; - + let mmap = if exists { - let f = OpenOptions::new() - .read(true) - .write(true) - .open(path)?; - + let f = OpenOptions::new().read(true).write(true).open(path)?; + unsafe { MmapOptions::new().populate().map_mut(&f)? } } else { let f = OpenOptions::new() @@ -56,9 +55,9 @@ impl File { .write(true) .open(path)?; f.set_len(size as u64)?; - + let mut mmap = unsafe { MmapOptions::new().populate().map_mut(&f)? }; - + if prefault { let mut offset = 0; while offset < size { @@ -67,10 +66,10 @@ impl File { } mmap.flush()?; } - + mmap }; - + Ok(Self { mmap, size }) } } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 92b06b517..773b0d106 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -288,12 +288,12 @@ impl HashTable { // for every `HashBucket` for id in 0..total_buckets { - // cast `HashBucket` to byte pointer let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; // store `HashBucket` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); } // --------------------- Store `started` ----------------- @@ -302,7 +302,8 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // store `started` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer @@ -367,7 +368,7 @@ impl HashTable { let n_item_slot = if chain_idx == chain_len { N_BUCKET_SLOT } else { - N_BUCKET_SLOT - 1 + N_BUCKET_SLOT - 1 }; for i in 0..n_item_slot { @@ -917,4 +918,3 @@ fn hash_builder() -> RandomState { 0x4feb29c1fbbd59d0, ) } - diff --git a/src/rust/storage/seg/src/lib.rs b/src/rust/storage/seg/src/lib.rs index 628418d97..15dd6490a 100644 --- a/src/rust/storage/seg/src/lib.rs +++ b/src/rust/storage/seg/src/lib.rs @@ -42,9 +42,9 @@ mod eviction; mod hashtable; mod item; mod rand; -mod store; mod seg; mod segments; +mod store; mod ttl_buckets; // tests diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 7c916380f..708b19c7e 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -343,8 +343,8 @@ impl Seg { #[cfg(test)] pub(crate) fn restored(&self) -> bool { self.segments.fields_copied_back - && self.ttl_buckets.buckets_copied_back - && self.hashtable.table_copied_back + && self.ttl_buckets.buckets_copied_back + && self.hashtable.table_copied_back } } @@ -353,20 +353,20 @@ impl Seg { impl PartialEq for Seg { fn eq(&self, other: &Self) -> bool { self.segments == other.segments - && self.ttl_buckets == other.ttl_buckets - && self.hashtable == other.hashtable + && self.ttl_buckets == other.ttl_buckets + && self.hashtable == other.hashtable } } - // // Used in testing to clone a `Seg` to compare with - // #[cfg(test)] - // pub(crate) fn clone(&self) -> Seg { - // let segments = self.segments.clone(); - // let ttl_buckets = self.ttl_buckets.clone(); - // let hashtable = self.hashtable.clone(); - // Seg { - // segments, - // ttl_buckets, - // hashtable, - // } - // } +// // Used in testing to clone a `Seg` to compare with +// #[cfg(test)] +// pub(crate) fn clone(&self) -> Seg { +// let segments = self.segments.clone(); +// let ttl_buckets = self.ttl_buckets.clone(); +// let hashtable = self.hashtable.clone(); +// Seg { +// segments, +// ttl_buckets, +// hashtable, +// } +// } diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 88f626999..8e506f3e7 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -89,14 +89,12 @@ impl Segments { Box::new(Memory::create(heap_size, true)) }; - // If `builder.restore` and + // If `builder.restore` and // there are specified paths to restore the `Segments` with and - // `Segments.data` is file backed, restore relevant - // `Segments` fields. + // `Segments.data` is file backed, restore relevant + // `Segments` fields. // Otherwise create a new `Segments`. - if builder.restore && - data_file_backed && - builder.segments_fields_path.is_some(){ + if builder.restore && data_file_backed && builder.segments_fields_path.is_some() { // TODO: like with the HashTable fields, we assume that the configuration // options for `Segments` hasn't changed upon recovery. We need a way to // detect the change in fields as well as decided how to @@ -124,7 +122,6 @@ impl Segments { // retrieve bytes from mmapped file bytes.copy_from_slice(&fields_data[0..fields_size]); - let mut offset = 0; let mut end = 0; // ----- Retrieve `headers` ----- @@ -188,30 +185,33 @@ impl Segments { } else { for id in 0..cfg_segments { // safety: we start iterating from 1 and seg id is constrained to < 2^24 - let header = SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); + let header = + SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); headers.push(header); } let mut headers = headers.into_boxed_slice(); - + for idx in 0..cfg_segments { let begin = cfg_segment_size as usize * idx; let end = begin + cfg_segment_size as usize; - - let mut segment = - Segment::from_raw_parts(&mut headers[idx], &mut data.as_mut_slice()[begin..end]); + + let mut segment = Segment::from_raw_parts( + &mut headers[idx], + &mut data.as_mut_slice()[begin..end], + ); segment.init(); - + let id = idx as u32 + 1; // we index cfg_segments from 1 segment.set_prev_seg(NonZeroU32::new(id - 1)); if id < cfg_segments as u32 { segment.set_next_seg(NonZeroU32::new(id + 1)); } } - + SEGMENT_CURRENT.set(cfg_segments as _); SEGMENT_FREE.set(cfg_segments as _); - + Self { headers, segment_size: cfg_segment_size, @@ -259,12 +259,16 @@ impl Segments { // for every `SegmentHeader` for id in 0..segments { - // cast `SegmentHeader` to byte pointer let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; // store `SegmentHeader` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, header_size, fields_data); + offset = store::store_bytes_and_update_offset( + byte_ptr, + offset, + header_size, + fields_data, + ); } // ----- Store `segment_size` ----- @@ -297,7 +301,8 @@ impl Segments { let byte_ptr = (&self.free_q as *const Option) as *const u8; // store `free_q` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); // ----- Store `flush_at` ----- @@ -1178,7 +1183,7 @@ impl Clone for Segments { // Return a `Segments` where everything is cloned Self { headers: self.headers.clone(), - data: Box::new(segment_data), + data: Box::new(segment_data), segment_size: self.segment_size, free: self.free, cap: self.cap, @@ -1189,4 +1194,4 @@ impl Clone for Segments { fields_copied_back: self.fields_copied_back, } } -} \ No newline at end of file +} diff --git a/src/rust/storage/seg/src/store.rs b/src/rust/storage/seg/src/store.rs index ce0296d1b..1e3e275f3 100644 --- a/src/rust/storage/seg/src/store.rs +++ b/src/rust/storage/seg/src/store.rs @@ -1,6 +1,11 @@ /// Copies `size` bytes at `byte_ptr` to the `offset` of `data` /// Returns the next `offset`, that is, the next byte of `data` to be copied into -pub fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: usize, data: &mut [u8]) -> usize { +pub fn store_bytes_and_update_offset( + byte_ptr: *const u8, + offset: usize, + size: usize, + data: &mut [u8], +) -> usize { // get corresponding bytes from byte pointer let bytes = unsafe { ::std::slice::from_raw_parts(byte_ptr, size) }; @@ -11,4 +16,4 @@ pub fn store_bytes_and_update_offset(byte_ptr: *const u8, offset: usize, size: u // next `offset` end -} \ No newline at end of file +} diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 57c3d2dc8..bf0200238 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -26,7 +26,7 @@ fn sizes() { assert_eq!(std::mem::size_of::(), 72); // increased to accommodate fields added for testing assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 48); + assert_eq!(std::mem::size_of::(), 48); } #[test] @@ -752,7 +752,6 @@ fn new_file_backed_cache_changed_and_restored() { // the restored cache should be equivalent to the old cache assert!(new_cache == old_cache); - } // Creates a new cache, gracefully shutsdown cache and restore cache diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 33ee10e9b..36f129b2b 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -57,7 +57,8 @@ pub struct TtlBuckets { impl TtlBuckets { /// Create a new set of `TtlBuckets` which cover the full range of TTLs. See /// the module-level documentation for how the range of TTLs are stored. - pub fn new() -> Self { // TODO: add path as argument + pub fn new() -> Self { + // TODO: add path as argument let intervals = [ TTL_BUCKET_INTERVAL_1, TTL_BUCKET_INTERVAL_2, @@ -97,10 +98,13 @@ impl TtlBuckets { let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; - // Mmap file - let pool = File::create(ttl_buckets_path.as_ref().unwrap(), ttl_buckets_struct_size, true) - .expect("failed to allocate file backed storage"); + let pool = File::create( + ttl_buckets_path.as_ref().unwrap(), + ttl_buckets_struct_size, + true, + ) + .expect("failed to allocate file backed storage"); let data = Box::new(pool.as_slice()); // create blank bytes to copy data into @@ -174,13 +178,13 @@ impl TtlBuckets { let byte_ptr = (&self.last_expired as *const Instant) as *const u8; // store `last_expired` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); // --------------------- Store `buckets` ----------------- // for every `TtlBucket` for id in 0..MAX_N_TTL_BUCKET { - // cast `TtlBucket` to byte pointer let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; @@ -262,7 +266,6 @@ impl TtlBuckets { CLEAR_TIME.add(duration.as_nanos() as _); cleared } - } impl Default for TtlBuckets { @@ -279,7 +282,7 @@ impl PartialEq for TtlBuckets { } } -// // TODO: use self.path, figure out how to indicate there was a graceful shutdown, +// // TODO: use self.path, figure out how to indicate there was a graceful shutdown, // // implement the same for Segments and HashTable // // Add description // impl Drop for TtlBuckets { @@ -293,7 +296,7 @@ impl PartialEq for TtlBuckets { // if let Some(file) = ttl_buckets_path { // let bucket_size = ::std::mem::size_of::(); // let last_expired_size = ::std::mem::size_of::(); -// let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` +// let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` // + last_expired_size; // // Mmap file From de08656e6701f4eb13fb31e999ba58eaf828bf8a Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 13:36:40 +1100 Subject: [PATCH 35/74] update File::create() documentation --- src/rust/storage/seg/src/datapool/file.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index 298bb1019..ae9147545 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -20,12 +20,12 @@ pub struct File { } impl File { - /// If there is a file at the given path, open the `File`. - /// Otherwise, create a new `File` datapool at the given path and with the specified - /// size (in bytes). Returns an error if could not - /// be created, size of file isn't as expected (opening), - /// couldn't be extended to the requested size (creating), or couldn't be - /// mmap'd + /// Create a new `File` datapool at the given path. If a file already exists + /// at the given path, check it is the right size and open it. Otherwise + /// open a new file at the given path and with the specified size + /// (in bytes). Returns an error if could not be created, size of file is + // not the right size (opening), couldn't be extended to the requested size + /// (creating), or couldn't be mmap'd. pub fn create>( path: T, size: usize, From 6d77a73054b7aeee16bc396d7badb9a5f2031565 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 13:38:26 +1100 Subject: [PATCH 36/74] update File::create() documentation --- src/rust/storage/seg/src/datapool/file.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index ae9147545..65d090f4a 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -20,12 +20,12 @@ pub struct File { } impl File { - /// Create a new `File` datapool at the given path. If a file already exists - /// at the given path, check it is the right size and open it. Otherwise - /// open a new file at the given path and with the specified size - /// (in bytes). Returns an error if could not be created, size of file is - // not the right size (opening), couldn't be extended to the requested size - /// (creating), or couldn't be mmap'd. + /// Create a new `File` datapool at the given path and with the specified + /// size (in bytes). If a file already exists at the given path, check it is + /// the right size and open it. Otherwise, open a new file at the given path + ///and with the specified size. Returns an error if could not be created, + /// size of file is not the right size (opening), couldn't be extended to + /// the requested size (creating), or couldn't be mmap'd. pub fn create>( path: T, size: usize, From d7692ef08453b1fbc5eded99576d767757fd32eb Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 13:57:31 +1100 Subject: [PATCH 37/74] removed conditional derivations in Seg, Segments, HashTable and TtlBuckets --- src/rust/storage/seg/src/datapool/memory.rs | 7 +++++++ src/rust/storage/seg/src/hashtable/mod.rs | 3 +-- src/rust/storage/seg/src/seg.rs | 12 +----------- src/rust/storage/seg/src/segments/segments.rs | 5 ++--- src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs | 3 +-- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/memory.rs b/src/rust/storage/seg/src/datapool/memory.rs index ecbf33410..fe643bd05 100644 --- a/src/rust/storage/seg/src/datapool/memory.rs +++ b/src/rust/storage/seg/src/datapool/memory.rs @@ -56,3 +56,10 @@ impl Datapool for Memory { Ok(()) } } + + +impl From> for Memory { + fn from(data: Box<[u8]>) -> Memory { + Memory { data } + } +} diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 773b0d106..bad9babf9 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -100,7 +100,7 @@ static_metrics! { /// Main structure for performing item lookup. Contains a contiguous allocation /// of [`HashBucket`]s which are used to store item info and metadata. -#[cfg_attr(test, derive(Clone))] +#[derive(Clone)] #[repr(C)] pub(crate) struct HashTable { hash_builder: Box, @@ -890,7 +890,6 @@ impl HashTable { } } -#[cfg(test)] impl PartialEq for HashTable { // Checks if `HashTable` are equivalent fn eq(&self, other: &Self) -> bool { diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 708b19c7e..dafae16e5 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -22,7 +22,7 @@ static_metrics! { /// segment-structured design that stores data in fixed-size segments, grouping /// objects with nearby expiration time into the same segment, and lifting most /// per-object metadata into the shared segment header. -#[cfg_attr(test, derive(Clone))] +#[derive(Clone, PartialEq)] pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, @@ -348,16 +348,6 @@ impl Seg { } } -// Used in testing to compare `Seg`s -#[cfg(test)] -impl PartialEq for Seg { - fn eq(&self, other: &Self) -> bool { - self.segments == other.segments - && self.ttl_buckets == other.ttl_buckets - && self.hashtable == other.hashtable - } -} - // // Used in testing to clone a `Seg` to compare with // #[cfg(test)] // pub(crate) fn clone(&self) -> Seg { diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 8e506f3e7..58f19f6f5 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -1156,7 +1156,6 @@ impl Default for Segments { } } -#[cfg(test)] impl PartialEq for Segments { // Checks if `Segments` are equivalent fn eq(&self, other: &Self) -> bool { @@ -1170,7 +1169,6 @@ impl PartialEq for Segments { } } -#[cfg(test)] impl Clone for Segments { // Used in testing to clone a `Segments` to compare with fn clone(&self) -> Self { @@ -1178,7 +1176,8 @@ impl Clone for Segments { let heap_size = self.segment_size as usize * self.cap as usize; let mut data = vec![0; heap_size]; data.clone_from_slice(self.data.as_slice()); - let segment_data = Memory::memory_from_data(data.into_boxed_slice()); + let segment_data = Memory::from(data.into_boxed_slice()); + //let segment_data = Memory::memory_from_data(data.into_boxed_slice()); // Return a `Segments` where everything is cloned Self { diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 36f129b2b..799e3efc6 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -44,7 +44,7 @@ const TTL_BOUNDARY_3: i32 = 1 << (TTL_BUCKET_INTERVAL_N_BIT_3 + N_BUCKET_PER_STE const MAX_N_TTL_BUCKET: usize = N_BUCKET_PER_STEP * 4; const MAX_TTL_BUCKET_IDX: usize = MAX_N_TTL_BUCKET - 1; -#[cfg_attr(test, derive(Clone))] +#[derive(Clone)] pub struct TtlBuckets { pub(crate) buckets: Box<[TtlBucket]>, pub(crate) last_expired: Instant, @@ -274,7 +274,6 @@ impl Default for TtlBuckets { } } -#[cfg(test)] impl PartialEq for TtlBuckets { // Checks if `TtlBuckets` are equivalent fn eq(&self, other: &Self) -> bool { From 8d72b48efed21d3b2acd5b49596f7ae464cbdc62 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:19:32 +1100 Subject: [PATCH 38/74] cannot reproduce failing of test new_file_backed_cache_changed_and_restored --- src/rust/storage/seg/src/hashtable/mod.rs | 18 ++++++++++---- src/rust/storage/seg/src/seg.rs | 15 +----------- src/rust/storage/seg/src/segments/segments.rs | 24 +++++++++++++------ .../seg/src/ttl_buckets/ttl_buckets.rs | 6 ++++- 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index bad9babf9..9e348d976 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -893,11 +893,19 @@ impl HashTable { impl PartialEq for HashTable { // Checks if `HashTable` are equivalent fn eq(&self, other: &Self) -> bool { - self.data == other.data - && self.power == other.power - && self.mask == other.mask - && self.started == other.started - && self.next_to_chain == other.next_to_chain + + let a = self.data == other.data; + let b = self.power == other.power; + let c = self.mask == other.mask; + let d = self.started == other.started; + let e = self.next_to_chain == other.next_to_chain; + println!("HashTable: {}, {}, {}, {}, {}",a,b,c,d,e); + a && b && c && d && e + //self.data == other.data + // && self.power == other.power + // && self.mask == other.mask + // && self.started == other.started + // && self.next_to_chain == other.next_to_chain } } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index dafae16e5..5f36a9ab0 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -346,17 +346,4 @@ impl Seg { && self.ttl_buckets.buckets_copied_back && self.hashtable.table_copied_back } -} - -// // Used in testing to clone a `Seg` to compare with -// #[cfg(test)] -// pub(crate) fn clone(&self) -> Seg { -// let segments = self.segments.clone(); -// let ttl_buckets = self.ttl_buckets.clone(); -// let hashtable = self.hashtable.clone(); -// Seg { -// segments, -// ttl_buckets, -// hashtable, -// } -// } +} \ No newline at end of file diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 58f19f6f5..060da22c9 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -1159,13 +1159,23 @@ impl Default for Segments { impl PartialEq for Segments { // Checks if `Segments` are equivalent fn eq(&self, other: &Self) -> bool { - self.headers == other.headers - && self.data.as_slice() == other.data.as_slice() - && self.segment_size == other.segment_size - && self.free == other.free - && self.cap == other.cap - && self.free_q == other.free_q - && self.flush_at == other.flush_at + + let a = self.headers == other.headers; + let b = self.data.as_slice() == other.data.as_slice(); + let c = self.segment_size == other.segment_size; + let d = self.free == other.free; + let e = self.cap == other.cap; + let f = self.free_q == other.free_q; + let g = self.flush_at == other.flush_at; + println!("Segments: {}, {}, {}, {}, {}, {}, {}",a,b,c,d,e,f,g); + a && b && c && d && e && f && g + //self.headers == other.headers + // && self.data.as_slice() == other.data.as_slice() + // && self.segment_size == other.segment_size + // && self.free == other.free + // && self.cap == other.cap + // && self.free_q == other.free_q + // && self.flush_at == other.flush_at } } diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 799e3efc6..8698bd65e 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -277,7 +277,11 @@ impl Default for TtlBuckets { impl PartialEq for TtlBuckets { // Checks if `TtlBuckets` are equivalent fn eq(&self, other: &Self) -> bool { - self.buckets == other.buckets && self.last_expired == other.last_expired + let a = self.buckets == other.buckets; + let b = self.last_expired == other.last_expired; + println!("TTL: {}, {}",a,b); + a && b + //self.buckets == other.buckets && self.last_expired == other.last_expired } } From 5b6dd2f1eb2f36ed243447cbbdabd94daa2cb618 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:25:30 +1100 Subject: [PATCH 39/74] implemented From> trait for Memory --- src/rust/storage/seg/src/datapool/memory.rs | 8 +------ src/rust/storage/seg/src/hashtable/mod.rs | 18 ++++---------- src/rust/storage/seg/src/segments/segments.rs | 24 ++++++------------- .../seg/src/ttl_buckets/ttl_buckets.rs | 6 +---- 4 files changed, 14 insertions(+), 42 deletions(-) diff --git a/src/rust/storage/seg/src/datapool/memory.rs b/src/rust/storage/seg/src/datapool/memory.rs index fe643bd05..d905d61a0 100644 --- a/src/rust/storage/seg/src/datapool/memory.rs +++ b/src/rust/storage/seg/src/datapool/memory.rs @@ -35,12 +35,6 @@ impl Memory { Self { data } } - - // Used only in Segments::clone() in order to clone `Segments.data` - #[cfg(test)] - pub fn memory_from_data(data: Box<[u8]>) -> Memory { - Memory { data } - } } impl Datapool for Memory { @@ -57,7 +51,7 @@ impl Datapool for Memory { } } - +// Used only in Segments::clone() in order to clone `Segments.data` impl From> for Memory { fn from(data: Box<[u8]>) -> Memory { Memory { data } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 9e348d976..bad9babf9 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -893,19 +893,11 @@ impl HashTable { impl PartialEq for HashTable { // Checks if `HashTable` are equivalent fn eq(&self, other: &Self) -> bool { - - let a = self.data == other.data; - let b = self.power == other.power; - let c = self.mask == other.mask; - let d = self.started == other.started; - let e = self.next_to_chain == other.next_to_chain; - println!("HashTable: {}, {}, {}, {}, {}",a,b,c,d,e); - a && b && c && d && e - //self.data == other.data - // && self.power == other.power - // && self.mask == other.mask - // && self.started == other.started - // && self.next_to_chain == other.next_to_chain + self.data == other.data + && self.power == other.power + && self.mask == other.mask + && self.started == other.started + && self.next_to_chain == other.next_to_chain } } diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 060da22c9..58f19f6f5 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -1159,23 +1159,13 @@ impl Default for Segments { impl PartialEq for Segments { // Checks if `Segments` are equivalent fn eq(&self, other: &Self) -> bool { - - let a = self.headers == other.headers; - let b = self.data.as_slice() == other.data.as_slice(); - let c = self.segment_size == other.segment_size; - let d = self.free == other.free; - let e = self.cap == other.cap; - let f = self.free_q == other.free_q; - let g = self.flush_at == other.flush_at; - println!("Segments: {}, {}, {}, {}, {}, {}, {}",a,b,c,d,e,f,g); - a && b && c && d && e && f && g - //self.headers == other.headers - // && self.data.as_slice() == other.data.as_slice() - // && self.segment_size == other.segment_size - // && self.free == other.free - // && self.cap == other.cap - // && self.free_q == other.free_q - // && self.flush_at == other.flush_at + self.headers == other.headers + && self.data.as_slice() == other.data.as_slice() + && self.segment_size == other.segment_size + && self.free == other.free + && self.cap == other.cap + && self.free_q == other.free_q + && self.flush_at == other.flush_at } } diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 8698bd65e..799e3efc6 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -277,11 +277,7 @@ impl Default for TtlBuckets { impl PartialEq for TtlBuckets { // Checks if `TtlBuckets` are equivalent fn eq(&self, other: &Self) -> bool { - let a = self.buckets == other.buckets; - let b = self.last_expired == other.last_expired; - println!("TTL: {}, {}",a,b); - a && b - //self.buckets == other.buckets && self.last_expired == other.last_expired + self.buckets == other.buckets && self.last_expired == other.last_expired } } From bdd17b645ab0eb03f5f17c7d6655f58f024c4d94 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:30:37 +1100 Subject: [PATCH 40/74] moved merged non-recovery tests to above recovery section --- src/rust/storage/seg/src/tests.rs | 145 +++++++++++++++--------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index c85d72947..81d1aa477 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -399,6 +399,79 @@ fn clear() { assert!(cache.get(b"coffee").is_none()); } +#[test] +fn wrapping_add() { + let ttl = Duration::ZERO; + let segment_size = 4096; + let segments = 64; + let heap_size = segments * segment_size as usize; + + let mut cache = Seg::builder() + .segment_size(segment_size) + .heap_size(heap_size) + .build(); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), 64); + assert!(cache.insert(b"coffee", 0, None, ttl).is_ok()); + assert_eq!(cache.segments.free(), 63); + assert_eq!(cache.items(), 1); + assert!(cache.get(b"coffee").is_some()); + + let item = cache.get(b"coffee").unwrap(); + assert_eq!(item.value(), 0, "item is: {:?}", item); + cache + .wrapping_add(b"coffee", 1) + .expect("failed to increment"); + assert_eq!(item.value(), 1, "item is: {:?}", item); + cache + .wrapping_add(b"coffee", u64::MAX - 1) + .expect("failed to increment"); + assert_eq!(item.value(), u64::MAX, "item is: {:?}", item); + cache + .wrapping_add(b"coffee", 1) + .expect("failed to increment"); + assert_eq!(item.value(), 0, "item is: {:?}", item); + cache + .wrapping_add(b"coffee", 2) + .expect("failed to increment"); + assert_eq!(item.value(), 2, "item is: {:?}", item); +} + +#[test] +fn saturating_sub() { + let ttl = Duration::ZERO; + let segment_size = 4096; + let segments = 64; + let heap_size = segments * segment_size as usize; + + let mut cache = Seg::builder() + .segment_size(segment_size) + .heap_size(heap_size) + .build(); + assert_eq!(cache.items(), 0); + assert_eq!(cache.segments.free(), 64); + assert!(cache.insert(b"coffee", 3, None, ttl).is_ok()); + assert_eq!(cache.segments.free(), 63); + assert_eq!(cache.items(), 1); + assert!(cache.get(b"coffee").is_some()); + + let item = cache.get(b"coffee").unwrap(); + assert_eq!(item.value(), 3, "item is: {:?}", item); + cache + .saturating_sub(b"coffee", 2) + .expect("failed to increment"); + assert_eq!(item.value(), 1, "item is: {:?}", item); + cache + .saturating_sub(b"coffee", 1) + .expect("failed to increment"); + assert_eq!(item.value(), 0, "item is: {:?}", item); + cache + .saturating_sub(b"coffee", 1) + .expect("failed to increment"); + assert_eq!(item.value(), 0, "item is: {:?}", item); +} + + // ----------- TESTS FOR RECOVERY ------------- // Configuration Options: // @@ -1064,75 +1137,3 @@ fn full_cache_recovery_long() { while let Some(key) = unique_active_keys.pop() { assert!(new_cache.get(&key).is_some()); } - -#[test] -fn wrapping_add() { - let ttl = Duration::ZERO; - let segment_size = 4096; - let segments = 64; - let heap_size = segments * segment_size as usize; - - let mut cache = Seg::builder() - .segment_size(segment_size) - .heap_size(heap_size) - .build(); - assert_eq!(cache.items(), 0); - assert_eq!(cache.segments.free(), 64); - assert!(cache.insert(b"coffee", 0, None, ttl).is_ok()); - assert_eq!(cache.segments.free(), 63); - assert_eq!(cache.items(), 1); - assert!(cache.get(b"coffee").is_some()); - - let item = cache.get(b"coffee").unwrap(); - assert_eq!(item.value(), 0, "item is: {:?}", item); - cache - .wrapping_add(b"coffee", 1) - .expect("failed to increment"); - assert_eq!(item.value(), 1, "item is: {:?}", item); - cache - .wrapping_add(b"coffee", u64::MAX - 1) - .expect("failed to increment"); - assert_eq!(item.value(), u64::MAX, "item is: {:?}", item); - cache - .wrapping_add(b"coffee", 1) - .expect("failed to increment"); - assert_eq!(item.value(), 0, "item is: {:?}", item); - cache - .wrapping_add(b"coffee", 2) - .expect("failed to increment"); - assert_eq!(item.value(), 2, "item is: {:?}", item); -} - -#[test] -fn saturating_sub() { - let ttl = Duration::ZERO; - let segment_size = 4096; - let segments = 64; - let heap_size = segments * segment_size as usize; - - let mut cache = Seg::builder() - .segment_size(segment_size) - .heap_size(heap_size) - .build(); - assert_eq!(cache.items(), 0); - assert_eq!(cache.segments.free(), 64); - assert!(cache.insert(b"coffee", 3, None, ttl).is_ok()); - assert_eq!(cache.segments.free(), 63); - assert_eq!(cache.items(), 1); - assert!(cache.get(b"coffee").is_some()); - - let item = cache.get(b"coffee").unwrap(); - assert_eq!(item.value(), 3, "item is: {:?}", item); - cache - .saturating_sub(b"coffee", 2) - .expect("failed to increment"); - assert_eq!(item.value(), 1, "item is: {:?}", item); - cache - .saturating_sub(b"coffee", 1) - .expect("failed to increment"); - assert_eq!(item.value(), 0, "item is: {:?}", item); - cache - .saturating_sub(b"coffee", 1) - .expect("failed to increment"); - assert_eq!(item.value(), 0, "item is: {:?}", item); -} From f47db51cd8e41fa3bf22334c520bba4bee3aa019 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:33:21 +1100 Subject: [PATCH 41/74] fixed mismatched bracket introduced by merge --- src/rust/storage/seg/src/seg.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 7cb414410..3935f8ec9 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -346,7 +346,6 @@ impl Seg { && self.ttl_buckets.buckets_copied_back && self.hashtable.table_copied_back } -} /// Perform a wrapping addition on the value stored at the supplied key. /// Returns an error if the key is invalid, the item is not found, or the From 739365e873c3956e8d55d9ae687a9fdae1c52885 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:34:04 +1100 Subject: [PATCH 42/74] fixed mismatched bracket introduced by merge --- src/rust/storage/seg/src/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 81d1aa477..05639bd84 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -1137,3 +1137,4 @@ fn full_cache_recovery_long() { while let Some(key) = unique_active_keys.pop() { assert!(new_cache.get(&key).is_some()); } +} From 186b9b494c0828eaaf6e696569993bdf27f8e59c Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 14:57:35 +1100 Subject: [PATCH 43/74] added a Seg.flush() functio --- src/rust/entrystore/src/seg/mod.rs | 20 ++++++++++---------- src/rust/storage/seg/src/seg.rs | 4 ++++ src/rust/storage/seg/src/tests.rs | 6 ++++++ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 33bf28f30..16839c405 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -58,19 +58,19 @@ impl Seg { Self { data } } - /// Demolish (gracefully shutdown) the cache if - /// configured to do so - pub fn demolish(self, config: &T) { + /// Flush (gracefully shutdown) the `Seg` cache if configured to do so + pub fn flush(self, config: &T) { let config = config.seg(); if config.graceful_shutdown() { - ::seg::Seg::demolisher() - .heap_size(config.heap_size()) - .overflow_factor(config.overflow_factor()) - .segments_fields_path(config.segments_fields_path()) - .ttl_buckets_path(config.ttl_buckets_path()) - .hashtable_path(config.hashtable_path()) - .demolish(self.data); + self.data.flush() + // ::seg::Seg::demolisher() + // .heap_size(config.heap_size()) + // .overflow_factor(config.overflow_factor()) + // .segments_fields_path(config.segments_fields_path()) + // .ttl_buckets_path(config.ttl_buckets_path()) + // .hashtable_path(config.hashtable_path()) + // .demolish(self.data); }; } } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 3935f8ec9..81a3fba07 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -75,6 +75,10 @@ impl Seg { Demolisher::default() } + pub fn flush(&self) { + + } + /// Gets a count of items in the `Seg` instance. This is an expensive /// operation and is only enabled for tests and builds with the `debug` /// feature enabled. diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 05639bd84..c0839f0da 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -587,6 +587,12 @@ fn new_cache_file_backed() { assert!(!cache.ttl_buckets.buckets_copied_back); // the `HashTable` should not have been restored assert!(!cache.hashtable.table_copied_back); + + // DELETE + let graceful_shutdown = true; + if graceful_shutdown { + cache.flush() + } } // Check that a new, not file backed cache is not file backed From ed43a6df9e34ae685419d6d0b0b7c810671226cd Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 15:55:12 +1100 Subject: [PATCH 44/74] completed flush() for Segments --- src/rust/entrystore/src/seg/mod.rs | 4 +- src/rust/storage/seg/src/datapool/file.rs | 8 +- src/rust/storage/seg/src/seg.rs | 4 +- src/rust/storage/seg/src/segments/segments.rs | 123 +++++++++++++++++- src/rust/storage/seg/src/tests.rs | 32 +++-- 5 files changed, 144 insertions(+), 27 deletions(-) diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 16839c405..ce21587ca 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -63,7 +63,9 @@ impl Seg { let config = config.seg(); if config.graceful_shutdown() { - self.data.flush() + // TODO: check if successfully shutdown and record result + self.data.flush(); + // ::seg::Seg::demolisher() // .heap_size(config.heap_size()) // .overflow_factor(config.overflow_factor()) diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index 65d090f4a..744d3809e 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -20,11 +20,11 @@ pub struct File { } impl File { - /// Create a new `File` datapool at the given path and with the specified + /// Create a new `File` datapool at the given path and with the specified /// size (in bytes). If a file already exists at the given path, check it is - /// the right size and open it. Otherwise, open a new file at the given path - ///and with the specified size. Returns an error if could not be created, - /// size of file is not the right size (opening), couldn't be extended to + /// the right size and open it. Otherwise, open a new file at the given path + ///and with the specified size. Returns an error if could not be created, + /// size of file is not the right size (opening), couldn't be extended to /// the requested size (creating), or couldn't be mmap'd. pub fn create>( path: T, diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 81a3fba07..114718353 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -75,8 +75,8 @@ impl Seg { Demolisher::default() } - pub fn flush(&self) { - + pub fn flush(&self) -> std::io::Result<()> { + self.segments.flush() } /// Gets a count of items in the `Seg` instance. This is an expensive diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 58f19f6f5..c17e2e1cb 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -46,6 +46,8 @@ pub(crate) struct Segments { data_file_backed: bool, /// Are `headers` copied back from a file? pub(crate) fields_copied_back: bool, + /// Path to save relevant fields upon graceful shutdown + segments_fields_path: Option, } impl Segments { @@ -113,8 +115,12 @@ impl Segments { + flush_at_size; // Mmap file - let pool = File::create(builder.segments_fields_path.unwrap(), fields_size, true) - .expect("failed to allocate file backed storage"); + let pool = File::create( + builder.segments_fields_path.as_ref().unwrap(), + fields_size, + true, + ) + .expect("failed to allocate file backed storage"); let fields_data = Box::new(pool.as_slice()); // create blank bytes to copy data into @@ -181,6 +187,7 @@ impl Segments { evict: Box::new(evict), data_file_backed: true, fields_copied_back: true, + segments_fields_path: builder.segments_fields_path, } } else { for id in 0..cfg_segments { @@ -223,6 +230,7 @@ impl Segments { evict: Box::new(evict), data_file_backed, fields_copied_back: false, + segments_fields_path: builder.segments_fields_path, } } } @@ -336,6 +344,106 @@ impl Segments { gracefully_shutdown } + /// Flushes the `Segments` by flushing the `Segments.data` (if filed backed) + /// and storing the other `Segments` fields' to a file (if a path is + /// specified) + pub fn flush(&self) -> std::io::Result<()> { + // if `Segments.data` is file backed, flush it to PMEM + if self.data_file_backed { + self.data.flush()?; + } + + // if a path is specified, copy all the `Segments` fields' to the file + // specified by `segments_fields_path` + if let Some(file) = &self.segments_fields_path { + let header_size: usize = ::std::mem::size_of::(); + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); + // Size of all components of `Segments` that are being restored + let fields_size = (self.cap as usize) * header_size // `headers` + + i32_size // `segment_size` + + u32_size * 2 // `free` and `cap` + + free_q_size + + flush_at_size; + + // mmap file + let mut pool = File::create(file, fields_size, true) + .expect("failed to allocate file backed storage"); + let fields_data = pool.as_mut_slice(); + + let mut offset = 0; + // ----- Store `headers` ----- + + // for every `SegmentHeader` + for id in 0..(self.cap as usize) { + // cast `SegmentHeader` to byte pointer + let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; + + // store `SegmentHeader` back to mmapped file + offset = store::store_bytes_and_update_offset( + byte_ptr, + offset, + header_size, + fields_data, + ); + } + + // ----- Store `segment_size` ----- + + // cast `segment_size` to byte pointer + let byte_ptr = (&self.segment_size as *const i32) as *const u8; + + // store `segment_size` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); + + // ----- Store `free` ----- + + // cast `free` to byte pointer + let byte_ptr = (&self.free as *const u32) as *const u8; + + // store `free` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + + // ----- Store `cap` ----- + + // cast `cap` to byte pointer + let byte_ptr = (&self.cap as *const u32) as *const u8; + + // store `cap` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + + // ----- Store `free_q` ----- + + // cast `free_q` to byte pointer + let byte_ptr = (&self.free_q as *const Option) as *const u8; + + // store `free_q` back to mmapped file + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); + + // ----- Store `flush_at` ----- + + // cast `flush_at` to byte pointer + let byte_ptr = (&self.flush_at as *const Instant) as *const u8; + + // store `flush_at` back to mmapped file + store::store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); + + // ----------------------------- + + // TODO: check if this flushes fields_data from CPU caches + pool.flush()?; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Segments not gracefully shutdown", + )) + } + } + /// Return the size of each segment in bytes #[inline] pub fn segment_size(&self) -> i32 { @@ -1170,7 +1278,7 @@ impl PartialEq for Segments { } impl Clone for Segments { - // Used in testing to clone a `Segments` to compare with + // Used in testing to clone a `Segments` to compare equivalency with fn clone(&self) -> Self { // clone `data` let heap_size = self.segment_size as usize * self.cap as usize; @@ -1179,7 +1287,7 @@ impl Clone for Segments { let segment_data = Memory::from(data.into_boxed_slice()); //let segment_data = Memory::memory_from_data(data.into_boxed_slice()); - // Return a `Segments` where everything is cloned + // Return a `Segments` where everything relevant is cloned Self { headers: self.headers.clone(), data: Box::new(segment_data), @@ -1188,9 +1296,10 @@ impl Clone for Segments { cap: self.cap, free_q: self.free_q.clone(), flush_at: self.flush_at, - evict: self.evict.clone(), - data_file_backed: self.data_file_backed, - fields_copied_back: self.fields_copied_back, + evict: self.evict.clone(), // not relevant + data_file_backed: self.data_file_backed, // not relevant + fields_copied_back: self.fields_copied_back, // not relevant + segments_fields_path: None, // not relevant } } } diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index c0839f0da..02dff1e8b 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -471,7 +471,6 @@ fn saturating_sub() { assert_eq!(item.value(), 0, "item is: {:?}", item); } - // ----------- TESTS FOR RECOVERY ------------- // Configuration Options: // @@ -587,12 +586,6 @@ fn new_cache_file_backed() { assert!(!cache.ttl_buckets.buckets_copied_back); // the `HashTable` should not have been restored assert!(!cache.hashtable.table_copied_back); - - // DELETE - let graceful_shutdown = true; - if graceful_shutdown { - cache.flush() - } } // Check that a new, not file backed cache is not file backed @@ -770,7 +763,13 @@ fn new_file_backed_cache_changed_and_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, None, None, None); + let mut cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -791,11 +790,18 @@ fn new_file_backed_cache_changed_and_restored() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // // force cache to go out of scope and thus `cache.segments`, - // // `cache.hashtable` and `cache.ttl_buckets` will be dropped (demolished) - // { - // let _x = cache; - // } + // DELETE + let graceful_shutdown = true; + if graceful_shutdown { + assert!(cache.flush().is_ok()); + } + + // Create tempfile for `Segments` fields' + let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // Create tempfile for `TtlBuckets` + let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // Create tempfile for `HashTable` + let hashtable_path: Option = Some(dir.path().join("hashtable")); // gracefully shutdown cache assert!(demolish_cache( From 917855eeff7f65c29992c333ca787c8588878e0c Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 17:09:50 +1100 Subject: [PATCH 45/74] added flush() to TtlBuckets and HashTable. Next step: changes tests.rs so that they call flush() instead of demolish() --- src/rust/storage/seg/src/builder.rs | 33 +++++-- src/rust/storage/seg/src/hashtable/mod.rs | 90 ++++++++++++++++++- src/rust/storage/seg/src/seg.rs | 5 +- src/rust/storage/seg/src/segments/segments.rs | 5 +- src/rust/storage/seg/src/tests.rs | 31 +++---- src/rust/storage/seg/src/ttl_buckets/tests.rs | 2 +- .../seg/src/ttl_buckets/ttl_buckets.rs | 72 ++++++++++++--- 7 files changed, 192 insertions(+), 46 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 811f29e21..2de833f3c 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -177,6 +177,8 @@ impl Builder { /// Consumes the builder and returns a fully-allocated `Seg` instance. /// If `restore` and valid paths to the structures are given, `Seg` will /// be restored. Otherwise, create a new `Seg` instance. + /// If valid paths are given, the files at these paths will be used to copy + /// the structures to upon graceful shutdown. /// /// ``` /// use seg::{Policy, Seg}; @@ -190,15 +192,21 @@ impl Builder { /// .eviction(Policy::Random).build(); /// ``` pub fn build(self) -> Seg { - // Build `Segments`. - // If `restore` and valid paths are given, - // it will be copied back + // Build `Segments`. If there is a path for the datapool set, the + // `Segments.data` will be file backed. If `restore` and there is a path + // for the `Segments` fields, restore the other relevant `Segments` + // fields. let segments = self.segments_builder.build(); + + // If `Segments` successfully restored and `restore` if segments.fields_copied_back && self.restore { // Attempt to restore `HashTable` and `TtlBuckets` - let hashtable = - HashTable::restore(self.hashtable_path, self.hash_power, self.overflow_factor); - let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path); + let hashtable = HashTable::restore( + self.hashtable_path.clone(), + self.hash_power, + self.overflow_factor, + ); + let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path.clone()); // If successful, return a restored segcache if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { @@ -210,9 +218,18 @@ impl Builder { } } + // TODO: Should paths be checked here to see if any are None (or not + // valid)? Then we could take an "All or Nothing" approach. That is, if + // one of the paths is not valid, then all structures are created + // as new AND no paths are set for graceful shutdown. Otherwise, if + // `restore`, we restore from these paths, else, we set these paths. + // Currently, I am not doing this as due to the Segments having a + // separate builder + different control flow, it is too awkward to + // implement. + // If not `restore` or restoration failed, create a new cache - let hashtable = HashTable::new(self.hash_power, self.overflow_factor); - let ttl_buckets = TtlBuckets::new(); + let hashtable = HashTable::new(self.hashtable_path, self.hash_power, self.overflow_factor); + let ttl_buckets = TtlBuckets::new(self.ttl_buckets_path); Seg { hashtable, segments, diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index d7838f186..22a6a10aa 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -112,13 +112,17 @@ pub(crate) struct HashTable { next_to_chain: u64, /// Is `HashTable` copied back from a file? pub(crate) table_copied_back: bool, + /// Path to save relevant fields upon graceful shutdown + hashtable_path: Option, + /// Used in graceful shutdown + overflow_factor: f64, } impl HashTable { /// Creates a new hashtable with a specified power and overflow factor. The /// hashtable will have the capacity to store up to /// `7 * 2^(power - 3) * (1 + overflow_factor)` items. - pub fn new(power: u8, overflow_factor: f64) -> HashTable { + pub fn new(hashtable_path: Option, power: u8, overflow_factor: f64) -> HashTable { if overflow_factor < 0.0 { fatal!("hashtable overflow factor must be >= 0.0"); } @@ -155,12 +159,14 @@ impl HashTable { started: Instant::recent(), next_to_chain: buckets as u64, table_copied_back: false, + hashtable_path, + overflow_factor, } } pub fn restore(hashtable_path: Option, cfg_power: u8, overflow_factor: f64) -> Self { // if there is a path to restore from, restore the `HashTable` - if let Some(file) = hashtable_path { + if let Some(file) = &hashtable_path { // restore() assumes no changes in `power`. // I.e. config specifies same `power` as `HashTable` we are // restoring from @@ -238,11 +244,13 @@ impl HashTable { started, next_to_chain, table_copied_back: true, + hashtable_path, + overflow_factor, } } // otherwise, create a new `HashTable` else { - HashTable::new(cfg_power, overflow_factor) + HashTable::new(hashtable_path, cfg_power, overflow_factor) } } @@ -322,6 +330,82 @@ impl HashTable { gracefully_shutdown } + /// Flushes the `HashTable` by storing it to a file (if a path is specified) + pub fn flush(&self) -> std::io::Result<()> { + // if a path is specified, copy all the `HashBucket`s to the file + // specified by `hashtable_path` + if let Some(file) = &self.hashtable_path { + let total_buckets = total_buckets(self.power, self.overflow_factor); + let bucket_size = ::std::mem::size_of::(); + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being saved + let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + + total_buckets * bucket_size // `data` + + started_size; + + // Mmap file + let mut pool = File::create(file, hashtable_size, true) + .expect("failed to allocate file backed storage"); + let file_data = pool.as_mut_slice(); + + let mut offset = 0; + // --------------------- Store `power` ----------------- + + // cast `power` to byte pointer + let byte_ptr = (&self.power as *const u64) as *const u8; + + // store `power` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + + // --------------------- Store `mask` ----------------- + + // cast `mask` to byte pointer + let byte_ptr = (&self.mask as *const u64) as *const u8; + + // store `mask` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + // --------------------- Store `data` ----------------- + + // for every `HashBucket` + for id in 0..total_buckets { + // cast `HashBucket` to byte pointer + let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; + + // store `HashBucket` back to mmapped file + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + } + + // --------------------- Store `started` ----------------- + + // cast `started` to byte pointer + let byte_ptr = (&self.started as *const Instant) as *const u8; + + // store `started` back to mmapped file + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); + // --------------------- Store `next_to_chain` ----------------- + + // cast `next_to_chain` to byte pointer + let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; + + // store `next_to_chain` back to mmapped file + store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + // ------------------------------------------------------------- + + // TODO: check if this flushes the CPU caches + pool.flush()?; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Path to store HashTable to is None, cannot gracefully + shutdown cache", + )) + } + } + /// Lookup an item by key and return it pub fn get(&mut self, key: &[u8], segments: &mut Segments) -> Option { let hash = self.hash(key); diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 114718353..af1df3b77 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -76,7 +76,10 @@ impl Seg { } pub fn flush(&self) -> std::io::Result<()> { - self.segments.flush() + self.segments.flush()?; + self.hashtable.flush()?; + self.ttl_buckets.flush()?; + Ok(()) } /// Gets a count of items in the `Seg` instance. This is an expensive diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index c17e2e1cb..c214f12f6 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -348,7 +348,7 @@ impl Segments { /// and storing the other `Segments` fields' to a file (if a path is /// specified) pub fn flush(&self) -> std::io::Result<()> { - // if `Segments.data` is file backed, flush it to PMEM + // if `Segments.data` is file backed, flush it to file if self.data_file_backed { self.data.flush()?; } @@ -439,7 +439,8 @@ impl Segments { } else { Err(std::io::Error::new( std::io::ErrorKind::Other, - "Segments not gracefully shutdown", + "Path to store Segments to is None, cannot gracefully + shutdown cache", )) } } diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 02dff1e8b..319e0c173 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -790,26 +790,23 @@ fn new_file_backed_cache_changed_and_restored() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // DELETE - let graceful_shutdown = true; - if graceful_shutdown { - assert!(cache.flush().is_ok()); - } - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + // let segments_fields_path: Option = Some(dir.path().join("segments_fields")); + // // Create tempfile for `TtlBuckets` + // let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); + // // Create tempfile for `HashTable` + // let hashtable_path: Option = Some(dir.path().join("hashtable")); // gracefully shutdown cache - assert!(demolish_cache( - cache, - segments_fields_path, - ttl_buckets_path, - hashtable_path - )); + // assert!(demolish_cache( + // cache, + // segments_fields_path, + // ttl_buckets_path, + // hashtable_path + // )); + + // Flush cache + assert!(cache.flush().is_ok()); // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); diff --git a/src/rust/storage/seg/src/ttl_buckets/tests.rs b/src/rust/storage/seg/src/ttl_buckets/tests.rs index d1fadd975..e731caac9 100644 --- a/src/rust/storage/seg/src/ttl_buckets/tests.rs +++ b/src/rust/storage/seg/src/ttl_buckets/tests.rs @@ -7,7 +7,7 @@ use crate::*; #[test] fn bucket_index() { - let ttl_buckets = TtlBuckets::new(); + let ttl_buckets = TtlBuckets::new(None); // Zero TTL and max duration both go into the same TtlBucket assert_eq!(ttl_buckets.get_bucket_index(Duration::from_secs(0)), 1023); diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 799e3efc6..af8573c19 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -50,14 +50,14 @@ pub struct TtlBuckets { pub(crate) last_expired: Instant, /// Are `TtlBuckets` copied back from a file? pub(crate) buckets_copied_back: bool, - /// Path to store `TtlBuckets` upon graceful shutdown + /// Path to store relevant upon graceful shutdown pub(crate) ttl_buckets_path: Option, } impl TtlBuckets { /// Create a new set of `TtlBuckets` which cover the full range of TTLs. See /// the module-level documentation for how the range of TTLs are stored. - pub fn new() -> Self { + pub fn new(ttl_buckets_path: Option) -> Self { // TODO: add path as argument let intervals = [ TTL_BUCKET_INTERVAL_1, @@ -84,7 +84,7 @@ impl TtlBuckets { buckets, last_expired, buckets_copied_back: false, - ttl_buckets_path: None, //TODO: replace with given path + ttl_buckets_path, } } @@ -92,19 +92,15 @@ impl TtlBuckets { // to restore from is valid. Otherwise return a new `TtlBuckets` pub fn restore(ttl_buckets_path: Option) -> Self { // if there is a path to restore from, restore the `TtlBuckets` - if ttl_buckets_path.is_some() { + if let Some(file) = &ttl_buckets_path { let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; // Mmap file - let pool = File::create( - ttl_buckets_path.as_ref().unwrap(), - ttl_buckets_struct_size, - true, - ) - .expect("failed to allocate file backed storage"); + let pool = File::create(file, ttl_buckets_struct_size, true) + .expect("failed to allocate file backed storage"); let data = Box::new(pool.as_slice()); // create blank bytes to copy data into @@ -146,9 +142,7 @@ impl TtlBuckets { } // otherwise, create a new `TtlBuckets` else { - // TODO: uncomment this line when implementing Drop trait - //TtlBuckets::new(ttl_buckets_path: Option) - TtlBuckets::new() + TtlBuckets::new(ttl_buckets_path) } } @@ -204,6 +198,56 @@ impl TtlBuckets { gracefully_shutdown } + /// Flushes the `TtlBuckets` by storing it to a file (if a path is specified) + pub fn flush(&self) -> std::io::Result<()> { + // if a path is specified, copy all the `TtlBucket`s to the file + // specified by `ttl_buckets_path` + if let Some(file) = &self.ttl_buckets_path { + let bucket_size = ::std::mem::size_of::(); + let last_expired_size = ::std::mem::size_of::(); + let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + + last_expired_size; + + // Mmap file + let mut pool = File::create(file, ttl_buckets_struct_size, true) + .expect("failed to allocate file backed storage"); + let data = pool.as_mut_slice(); + + let mut offset = 0; + // --------------------- Store `last_expired` ----------------- + + // cast `last_expired` to byte pointer + let byte_ptr = (&self.last_expired as *const Instant) as *const u8; + + // store `last_expired` back to mmapped file + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); + + // --------------------- Store `buckets` ----------------- + + // for every `TtlBucket` + for id in 0..MAX_N_TTL_BUCKET { + // cast `TtlBucket` to byte pointer + let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; + + // store `TtlBucket` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); + } + + // -------------------------------------------------- + + // TODO: check if this flushes the CPU caches + pool.flush()?; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Path to store TtlBuckets to is None, cannot gracefully + shutdown cache", + )) + } + } + pub(crate) fn get_bucket_index(&self, ttl: Duration) -> usize { let ttl = ttl.as_secs() as i32; if ttl <= 0 { @@ -270,7 +314,7 @@ impl TtlBuckets { impl Default for TtlBuckets { fn default() -> Self { - Self::new() + Self::new(None) } } From 995544a930836bbd162328bb26b187c524d630aa Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 18:23:17 +1100 Subject: [PATCH 46/74] replaced demolish() with flush() in tests.rs. Deleted all traces of Demolisher --- src/rust/entrystore/src/seg/mod.rs | 8 - src/rust/storage/seg/src/builder.rs | 6 +- src/rust/storage/seg/src/demolisher.rs | 84 ------- src/rust/storage/seg/src/hashtable/mod.rs | 76 ------ src/rust/storage/seg/src/lib.rs | 2 - src/rust/storage/seg/src/seg.rs | 29 +-- src/rust/storage/seg/src/segments/segments.rs | 111 +-------- src/rust/storage/seg/src/tests.rs | 216 +++++++----------- .../seg/src/ttl_buckets/ttl_buckets.rs | 52 ----- 9 files changed, 88 insertions(+), 496 deletions(-) delete mode 100644 src/rust/storage/seg/src/demolisher.rs diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index ce21587ca..8def7800f 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -65,14 +65,6 @@ impl Seg { if config.graceful_shutdown() { // TODO: check if successfully shutdown and record result self.data.flush(); - - // ::seg::Seg::demolisher() - // .heap_size(config.heap_size()) - // .overflow_factor(config.overflow_factor()) - // .segments_fields_path(config.segments_fields_path()) - // .ttl_buckets_path(config.ttl_buckets_path()) - // .hashtable_path(config.hashtable_path()) - // .demolish(self.data); }; } } diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 2de833f3c..bdeeb0188 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -176,9 +176,9 @@ impl Builder { /// Consumes the builder and returns a fully-allocated `Seg` instance. /// If `restore` and valid paths to the structures are given, `Seg` will - /// be restored. Otherwise, create a new `Seg` instance. - /// If valid paths are given, the files at these paths will be used to copy - /// the structures to upon graceful shutdown. + /// be restored. Otherwise, create a new `Seg` instance. If valid paths are + /// given, the files at these paths will be used to copy the structures to + /// upon graceful shutdown. /// /// ``` /// use seg::{Policy, Seg}; diff --git a/src/rust/storage/seg/src/demolisher.rs b/src/rust/storage/seg/src/demolisher.rs deleted file mode 100644 index e3e95ea04..000000000 --- a/src/rust/storage/seg/src/demolisher.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2021 Twitter, Inc. -// Licensed under the Apache License, Version 2.0 -// http://www.apache.org/licenses/LICENSE-2.0 - -//! A demolisher for gracefully deconstructing a [`Seg`] instance. - -use crate::*; -use std::path::PathBuf; - -/// A demolisher that is used to gracefully deconstruct a [`Seg`] instance. -pub struct Demolisher { - heap_size: usize, - overflow_factor: f64, - // path at which the `Segments` fields' will be stored - segments_fields_path: Option, - // path at which the `TtlBuckets` will be stored - ttl_buckets_path: Option, - // path at which the `Hashtable` will be stored - hashtable_path: Option, -} - -// Defines the default parameters -impl Default for Demolisher { - fn default() -> Self { - Self { - heap_size: 64 * 1024 * 1024, - overflow_factor: 0.0, - segments_fields_path: None, - ttl_buckets_path: None, - hashtable_path: None, - } - } -} - -impl Demolisher { - /// Function the same as from `SegmentsBuilder`. - /// Specify the total heap size in bytes. The heap size will be divided by - /// the segment size to determine the number of segments to allocate. - pub fn heap_size(mut self, bytes: usize) -> Self { - self.heap_size = bytes; - self - } - - /// Function the same as from `Builder`. - /// Specify an overflow factor which was used to scale the `HashTable` and - /// provide additional capacity for chaining item buckets. A factor of 1.0 - /// will result in a hash table that is 100% larger. - /// Used for demolishing the `HashTable` - pub fn overflow_factor(mut self, percent: f64) -> Self { - self.overflow_factor = percent; - self - } - - // Set `Segments` fields' path - pub fn segments_fields_path(mut self, path: Option) -> Self { - self.segments_fields_path = path; - self - } - - // Set `TtlBuckets` path - pub fn ttl_buckets_path(mut self, path: Option) -> Self { - self.ttl_buckets_path = path; - self - } - - // Set `Hashtable` path - pub fn hashtable_path(mut self, path: Option) -> Self { - self.hashtable_path = path; - self - } - - // Demolish the cache by attempting to save the `Segments`, - // `TtlBuckets` and `HashTable` to the paths specified - // If successful, return True. Else, return False. - pub fn demolish(self, cache: Seg) -> bool { - cache - .segments - .demolish(self.segments_fields_path, self.heap_size) - && cache.ttl_buckets.demolish(self.ttl_buckets_path) - && cache - .hashtable - .demolish(self.hashtable_path, self.overflow_factor) - } -} diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 22a6a10aa..0df2b9ad6 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -254,82 +254,6 @@ impl HashTable { } } - /// Demolishes the `HashTable` by storing it to - /// PMEM (if a path is specified) - pub fn demolish(&self, hashtable_path: Option, overflow_factor: f64) -> bool { - let mut gracefully_shutdown = false; - - // if a path is specified, copy all the `HashBucket`s - // to the file specified by `hashtable_path` - if let Some(file) = hashtable_path { - let total_buckets = total_buckets(self.power, overflow_factor); - let bucket_size = ::std::mem::size_of::(); - let u64_size = ::std::mem::size_of::(); - let started_size = ::std::mem::size_of::(); - // Size of all components of `HashTable` that are being saved - let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` - + total_buckets * bucket_size // `data` - + started_size; - - // Mmap file - let mut pool = File::create(file, hashtable_size, true) - .expect("failed to allocate file backed storage"); - let file_data = pool.as_mut_slice(); - - let mut offset = 0; - // --------------------- Store `power` ----------------- - - // cast `power` to byte pointer - let byte_ptr = (&self.power as *const u64) as *const u8; - - // store `power` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - - // --------------------- Store `mask` ----------------- - - // cast `mask` to byte pointer - let byte_ptr = (&self.mask as *const u64) as *const u8; - - // store `mask` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - // --------------------- Store `data` ----------------- - - // for every `HashBucket` - for id in 0..total_buckets { - // cast `HashBucket` to byte pointer - let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; - - // store `HashBucket` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); - } - - // --------------------- Store `started` ----------------- - - // cast `started` to byte pointer - let byte_ptr = (&self.started as *const Instant) as *const u8; - - // store `started` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); - // --------------------- Store `next_to_chain` ----------------- - - // cast `next_to_chain` to byte pointer - let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; - - // store `next_to_chain` back to mmapped file - store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - // ------------------------------------------------------------- - - gracefully_shutdown = true; - - // TODO: check if this flushes the CPU caches - pool.flush() - .expect("failed to flush `HashTable` to storage"); - } - gracefully_shutdown - } - /// Flushes the `HashTable` by storing it to a file (if a path is specified) pub fn flush(&self) -> std::io::Result<()> { // if a path is specified, copy all the `HashBucket`s to the file diff --git a/src/rust/storage/seg/src/lib.rs b/src/rust/storage/seg/src/lib.rs index 15dd6490a..cf0b3fd1f 100644 --- a/src/rust/storage/seg/src/lib.rs +++ b/src/rust/storage/seg/src/lib.rs @@ -36,7 +36,6 @@ use std::convert::TryInto; // submodules mod builder; mod datapool; -mod demolisher; mod error; mod eviction; mod hashtable; @@ -54,7 +53,6 @@ mod tests; // publicly exported items from submodules pub use crate::seg::Seg; pub use builder::Builder; -pub use demolisher::Demolisher; pub use error::SegError; pub use eviction::Policy; pub use item::Item; diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index af1df3b77..e357724c8 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -49,32 +49,9 @@ impl Seg { Builder::default() } - // Returns a new `Demolisher` which is used to configure the graceful - // deconstruction of a `Seg` instance. - // - // Example code: - // ``` - // let segment_size = 4096; - // let segments = 64; - // let heap_size = segments * segment_size as usize; - // let datapool_path : Option = Some(PathBuf::from()); - // let segments_fields_path: Option = Some(PathBuf::from()); - // let ttl_buckets_path : Option = Some(PathBuf::from()); - // let hashtable_path: Option = Some(PathBuf::from()); - // - // // demolish cache by triggering graceful shutdown - // Seg::demolisher() - // .heap_size(heap_size) - // .datapool_path(datapool_path) - // .segments_fields_path(segments_fields_path) - // .ttl_buckets_path(ttl_buckets_path) - // .hashtable_path(hashtable_path) - // .demolish(cache) - // ``` - pub fn demolisher() -> Demolisher { - Demolisher::default() - } - + /// Flushes cache by storing all the relevant fields of `Segments`, + /// `HashTable` and `TtlBuckets` to files at the paths stored in the + /// respective structs. pub fn flush(&self) -> std::io::Result<()> { self.segments.flush()?; self.hashtable.flush()?; diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index c214f12f6..198b286c4 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -235,115 +235,6 @@ impl Segments { } } - /// Demolishes the segments by flushing the `Segments.data` to PMEM - /// (if filed backed) and storing the other `Segments` fields' to - /// PMEM (if a path is specified) - pub fn demolish(&self, segments_fields_path: Option, heap_size: usize) -> bool { - let mut gracefully_shutdown = false; - - // if a path is specified, copy all the `Segments` fields' - // to the file specified by `segments_fields_path` - if let Some(file) = segments_fields_path { - let segments = heap_size / (self.segment_size as usize); - let header_size: usize = ::std::mem::size_of::(); - let i32_size = ::std::mem::size_of::(); - let u32_size = ::std::mem::size_of::(); - let free_q_size = ::std::mem::size_of::>(); - let flush_at_size = ::std::mem::size_of::(); - // Size of all components of `Segments` that are being restored - let fields_size = segments * header_size // `headers` - + i32_size // `segment_size` - + u32_size * 2 // `free` and `cap` - + free_q_size - + flush_at_size; - - // mmap file - let mut pool = File::create(file, fields_size, true) - .expect("failed to allocate file backed storage"); - let fields_data = pool.as_mut_slice(); - - let mut offset = 0; - // ----- Store `headers` ----- - - // for every `SegmentHeader` - for id in 0..segments { - // cast `SegmentHeader` to byte pointer - let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; - - // store `SegmentHeader` back to mmapped file - offset = store::store_bytes_and_update_offset( - byte_ptr, - offset, - header_size, - fields_data, - ); - } - - // ----- Store `segment_size` ----- - - // cast `segment_size` to byte pointer - let byte_ptr = (&self.segment_size as *const i32) as *const u8; - - // store `segment_size` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); - - // ----- Store `free` ----- - - // cast `free` to byte pointer - let byte_ptr = (&self.free as *const u32) as *const u8; - - // store `free` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); - - // ----- Store `cap` ----- - - // cast `cap` to byte pointer - let byte_ptr = (&self.cap as *const u32) as *const u8; - - // store `cap` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); - - // ----- Store `free_q` ----- - - // cast `free_q` to byte pointer - let byte_ptr = (&self.free_q as *const Option) as *const u8; - - // store `free_q` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); - - // ----- Store `flush_at` ----- - - // cast `flush_at` to byte pointer - let byte_ptr = (&self.flush_at as *const Instant) as *const u8; - - // store `flush_at` back to mmapped file - store::store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); - - // ----------------------------- - - // TODO: check if this flushes fields_data from CPU caches - pool.flush() - .expect("failed to flush `Segments` fields' to storage"); - - gracefully_shutdown = true; - } - - // if `Segments.data` is file backed, flush it to PMEM - if self.data_file_backed { - self.data - .flush() - .expect("failed to flush Segments.data to storage"); - } else { - // This else case is not expected to be reached as this function - // is only called during a graceful shutdown, so it is expected that the - // data is file backed - gracefully_shutdown = false; - } - - gracefully_shutdown - } - /// Flushes the `Segments` by flushing the `Segments.data` (if filed backed) /// and storing the other `Segments` fields' to a file (if a path is /// specified) @@ -1295,7 +1186,7 @@ impl Clone for Segments { segment_size: self.segment_size, free: self.free, cap: self.cap, - free_q: self.free_q.clone(), + free_q: self.free_q, flush_at: self.flush_at, evict: self.evict.clone(), // not relevant data_file_backed: self.data_file_backed, // not relevant diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 319e0c173..9149bdf93 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -19,11 +19,11 @@ fn sizes() { #[cfg(not(feature = "magic"))] assert_eq!(ITEM_HDR_SIZE, 5); - assert_eq!(std::mem::size_of::(), 64); + assert_eq!(std::mem::size_of::(), 88); assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 72); // increased to accommodate fields added for testing + assert_eq!(std::mem::size_of::(), 104); // increased to accommodate fields added for testing assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 48); @@ -478,22 +478,21 @@ fn saturating_sub() { // ---- Cache is created new in main memory. // New cache, file backed // ---- Cache is created new and is file backed. -// ---- In other words, PMEM is used as an extension of DRAM. -// ---- Note: Since the same `datapool_path` is used by the `builder` and -// ---- `demolisher`, the cache cannot be gracefully shutdown by the `demolisher` -// ---- if it wasn't file backed by the `builder`. That is, if there is no path -// ---- used to file back the cache, there is no path to copy the cache data to on shutdown +// ---- In other words, file is used as an extension of DRAM. +// ---- Note: The cache cannot be gracefully shutdown by if it isn't file backed. +// ---- That is, if there is no path used to file back the cache, there is no +// ---- path to copy the cache data to on shutdown // Not gracefully shutdown // ---- Nothing is saved on shutdown. // Gracefully shutdown -// ---- `Segments.data` is flushed to PMEM it is file backed -// ---- Rest of `Seg` instance saved on shutdown if the paths are valid -// ---- That is, all of `Seg.hashtable`, `Seg.ttl_buckets` and -// ---- the relevant `Seg.Segments` fields are saved +// ---- `Segments.data` is flushed if it is file backed +// ---- Rest of `Seg` instance saved on shutdown if the paths are valid. +// ---- That is, all of `Seg.hashtable`, `Seg.ttl_buckets` and the relevant +// ---- `Seg.Segments` fields are saved // Restored cache // ---- `Segments.data` must be file backed // ---- Rest of `Seg` copied back from the files they were saved to and -// ---- If any of the file paths are not valid, then the cache is created new (TODO) +// ---- If any of the file paths are not valid, then the cache is created new // ------------- Set up / Helper Functions for below tests ------------ @@ -511,10 +510,10 @@ fn tmp_dir() -> TempDir { TempDir::new_in(TMP_DIR).unwrap() } -// Returns a `Seg` instance. -// Cache is restored only if `restore` and `segments_fields_path`, `ttl_buckets_path`. `hashtable_path` are not `None`. -// Otherwise, new `Seg` instance is returned. -// Cache is file backed if `datapool_path` is not `None`. +// Returns a `Seg` instance. Cache is restored only if `restore` and +// `segments_fields_path`, `ttl_buckets_path`. `hashtable_path` are not `None`. +// Otherwise, new `Seg` instance is returned. Cache is file backed if +// `datapool_path` is not `None`. fn make_cache( restore: bool, datapool_path: Option, @@ -537,27 +536,6 @@ fn make_cache( .build() } -// Demolish the cache by attempting to save the `Segments`, -// `TtlBuckets` and `HashTable` to the paths specified -// If successful, return True. Else, return False. -fn demolish_cache( - cache: Seg, - segments_fields_path: Option, - ttl_buckets_path: Option, - hashtable_path: Option, -) -> bool { - let segment_size = 4096; - let segments = SEGMENTS; - let heap_size = segments * segment_size as usize; - - Seg::demolisher() - .heap_size(heap_size) - .segments_fields_path(segments_fields_path) - .ttl_buckets_path(ttl_buckets_path) - .hashtable_path(hashtable_path) - .demolish(cache) -} - // ------------------- Set Paths Correctly Tests -------------------------- // Check that a file backed, new cache is file backed and the `Seg` @@ -650,17 +628,8 @@ fn restored_cache_file_backed() { // being file backed and none of the other structures being restored #[test] fn restored_cache_no_paths_set() { - let segment_size = 4096; - let segments = 64; - let heap_size = segments * segment_size as usize; - let datapool_path: Option = None; - - let cache = Seg::builder() - .restore(true) - .segment_size(segment_size as i32) - .heap_size(heap_size) - .datapool_path(datapool_path) // set no path - .build(); + let restore = true; + let cache = make_cache(restore, None, None, None, None); // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); @@ -682,18 +651,6 @@ fn cache_gracefully_shutdown() { let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - let segment_size = 4096; - let segments = SEGMENTS; - let heap_size = segments * segment_size as usize; - - // create new, file backed cache - let cache = Seg::builder() - .restore(false) - .segment_size(segment_size as i32) - .heap_size(heap_size) - .datapool_path(datapool_path) // set path - .build(); - // Create tempfile for `Segments` fields' let segments_fields_path: Option = Some(dir.path().join("segments_fields")); // Create tempfile for `TtlBuckets` @@ -701,12 +658,18 @@ fn cache_gracefully_shutdown() { // Create tempfile for `HashTable` let hashtable_path: Option = Some(dir.path().join("hashtable")); - assert!(Seg::demolisher() - .heap_size(heap_size) - .segments_fields_path(segments_fields_path) - .ttl_buckets_path(ttl_buckets_path) - .hashtable_path(hashtable_path) - .demolish(cache)); + // create new, file backed cache + let restore = false; + let cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); + + // Flush cache + assert!(cache.flush().is_ok()); } // Check that if paths are not specified, then the cache is not gracefully @@ -717,31 +680,23 @@ fn cache_not_gracefully_shutdown() { let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - let segment_size = 4096; - let segments = SEGMENTS; - let heap_size = segments * segment_size as usize; - - // create new, file backed cache - let cache = Seg::builder() - .restore(false) - .segment_size(segment_size as i32) - .heap_size(heap_size) - .datapool_path(datapool_path) // set path - .build(); - // Create tempfile for `Segments` fields' let segments_fields_path: Option = Some(dir.path().join("segments_fields")); // Create tempfile for `TtlBuckets` let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - // Do not set a HashTable path - let hashtable_path: Option = None; - assert!(!Seg::demolisher() - .heap_size(heap_size) - .segments_fields_path(segments_fields_path) - .ttl_buckets_path(ttl_buckets_path) - .hashtable_path(hashtable_path) - .demolish(cache)); + // create new, file backed cache + let restore = false; + let cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + None, // Don't set a `HashTable` path + ); + + // Flushing cache should fail + assert!(cache.flush().is_err()); } // --------------------- Data copied back Tests---------------------------- @@ -790,21 +745,6 @@ fn new_file_backed_cache_changed_and_restored() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // Create tempfile for `Segments` fields' - // let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // // Create tempfile for `TtlBuckets` - // let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - // // Create tempfile for `HashTable` - // let hashtable_path: Option = Some(dir.path().join("hashtable")); - - // gracefully shutdown cache - // assert!(demolish_cache( - // cache, - // segments_fields_path, - // ttl_buckets_path, - // hashtable_path - // )); - // Flush cache assert!(cache.flush().is_ok()); @@ -853,20 +793,21 @@ fn new_file_backed_cache_not_changed_and_restored() { // create new, file backed cache let mut restore = false; - let cache = make_cache(restore, datapool_path, None, None, None); + let cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); assert!(!cache.restored()); // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // gracefully shutdown cache - assert!(demolish_cache( - cache, - segments_fields_path, - ttl_buckets_path, - hashtable_path - )); + // Flush cache + assert!(cache.flush().is_ok()); // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); @@ -909,7 +850,13 @@ fn new_cache_changed_and_not_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, None, None, None); + let mut cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -930,13 +877,8 @@ fn new_cache_changed_and_not_restored() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // gracefully shutdown cache - assert!(demolish_cache( - cache, - segments_fields_path, - ttl_buckets_path, - hashtable_path - )); + // Flush cache + assert!(cache.flush().is_ok()); // Create same tempfile (it has been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); @@ -978,7 +920,13 @@ fn new_cache_changed_and_restoration_fails() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, None, None, None); + let mut cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -999,15 +947,11 @@ fn new_cache_changed_and_restoration_fails() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // gracefully shutdown cache - assert!(demolish_cache( - cache, - segments_fields_path, - ttl_buckets_path, - hashtable_path - )); + // Flush cache + assert!(cache.flush().is_ok()); - // Create same tempfiles (they have been moved since first created) for `datapool`, `segments_fields`, `ttl_buckets` + // Create same tempfiles (they have been moved since first created) for + // `datapool`, `segments_fields`, `ttl_buckets` let datapool_path: Option = Some(dir.path().join("datapool")); let segments_fields_path: Option = Some(dir.path().join("segments_fields")); let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); @@ -1027,7 +971,8 @@ fn new_cache_changed_and_restoration_fails() { // Cache is restored as all paths exist assert!(new_cache.restored()); - // `Segments` data should be the same as old cache since `segments_fields_path` is the same + // `Segments` data should be the same as old cache since + // `segments_fields_path` is the same assert_eq!(new_cache.items(), 1); assert_eq!(new_cache.segments.free(), SEGMENTS - 1); @@ -1064,7 +1009,13 @@ fn full_cache_recovery_long() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, None, None, None); + let mut cache = make_cache( + restore, + datapool_path, + segments_fields_path, + ttl_buckets_path, + hashtable_path, + ); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -1106,13 +1057,8 @@ fn full_cache_recovery_long() { // Get a copy of the cache to be compared later let old_cache = cache.clone(); - // gracefully shutdown cache - assert!(demolish_cache( - cache, - segments_fields_path, - ttl_buckets_path, - hashtable_path - )); + // Flush cache + assert!(cache.flush().is_ok()); // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index af8573c19..ba875fcf3 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -146,58 +146,6 @@ impl TtlBuckets { } } - /// TODO: Move this to drop() - /// Demolishes the `TtlBuckets` by storing them to - /// PMEM (if a path is specified) - pub fn demolish(&self, ttl_buckets_path: Option) -> bool { - let mut gracefully_shutdown = false; - - // if a path is specified, copy all the `TtlBucket`s - // to the file specified by `ttl_buckets_path` - if let Some(file) = ttl_buckets_path { - let bucket_size = ::std::mem::size_of::(); - let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` - + last_expired_size; - - // Mmap file - let mut pool = File::create(file, ttl_buckets_struct_size, true) - .expect("failed to allocate file backed storage"); - let data = pool.as_mut_slice(); - - let mut offset = 0; - // --------------------- Store `last_expired` ----------------- - - // cast `last_expired` to byte pointer - let byte_ptr = (&self.last_expired as *const Instant) as *const u8; - - // store `last_expired` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); - - // --------------------- Store `buckets` ----------------- - - // for every `TtlBucket` - for id in 0..MAX_N_TTL_BUCKET { - // cast `TtlBucket` to byte pointer - let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; - - // store `TtlBucket` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); - } - - // -------------------------------------------------- - - gracefully_shutdown = true; - - // TODO: check if this flushes the CPU caches - pool.flush() - .expect("failed to flush `TtlBuckets` to storage"); - } - - gracefully_shutdown - } - /// Flushes the `TtlBuckets` by storing it to a file (if a path is specified) pub fn flush(&self) -> std::io::Result<()> { // if a path is specified, copy all the `TtlBucket`s to the file From 43eef1424f33de1b7ae11c341f96e8dfd4f8f3a5 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 18:24:44 +1100 Subject: [PATCH 47/74] replaced demolish() with flush() in tests.rs. Deleted all traces of Demolisher --- src/rust/server/segcache/src/lib.rs | 2 +- .../seg/src/ttl_buckets/ttl_buckets.rs | 58 +------------------ 2 files changed, 2 insertions(+), 58 deletions(-) diff --git a/src/rust/server/segcache/src/lib.rs b/src/rust/server/segcache/src/lib.rs index da9deacf4..3f9e6df8f 100644 --- a/src/rust/server/segcache/src/lib.rs +++ b/src/rust/server/segcache/src/lib.rs @@ -71,7 +71,7 @@ impl Segcache { /// fully terminated. This is more likely to be used for running integration /// tests or other automated testing. pub fn shutdown(self) { - // TODO: demolish cache + // TODO: flush the cache self.process.shutdown() } } diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index ba875fcf3..1fd797da8 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -271,60 +271,4 @@ impl PartialEq for TtlBuckets { fn eq(&self, other: &Self) -> bool { self.buckets == other.buckets && self.last_expired == other.last_expired } -} - -// // TODO: use self.path, figure out how to indicate there was a graceful shutdown, -// // implement the same for Segments and HashTable -// // Add description -// impl Drop for TtlBuckets { -// fn drop(&mut self) { -// /// Demolishes the `TtlBuckets` by storing them to -// /// PMEM (if a path is specified) -// let mut gracefully_shutdown = false; - -// // if a path is specified, copy all the `TtlBucket`s -// // to the file specified by `ttl_buckets_path` -// if let Some(file) = ttl_buckets_path { -// let bucket_size = ::std::mem::size_of::(); -// let last_expired_size = ::std::mem::size_of::(); -// let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` -// + last_expired_size; - -// // Mmap file -// let mut pool = File::create(file, ttl_buckets_struct_size, true) -// .expect("failed to allocate file backed storage"); -// let data = pool.as_mut_slice(); - -// let mut offset = 0; -// // --------------------- Store `last_expired` ----------------- - -// // cast `last_expired` to byte pointer -// let byte_ptr = (&self.last_expired as *const Instant) as *const u8; - -// // store `last_expired` back to mmapped file -// offset = store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); - -// // --------------------- Store `buckets` ----------------- - -// // for every `TtlBucket` -// for id in 0..MAX_N_TTL_BUCKET { - -// // cast `TtlBucket` to byte pointer -// let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; - -// // store `TtlBucket` back to mmapped file -// offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); -// } - -// // -------------------------------------------------- - -// gracefully_shutdown = true; - -// // TODO: check if this flushes the CPU caches -// pool.flush() -// .expect("failed to flush `TtlBuckets` to storage"); -// } - -// gracefully_shutdown -// } -// } +} \ No newline at end of file From d62bfb0a0e56e1078ce61f8d6b333dc3c25e2747 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 18:25:33 +1100 Subject: [PATCH 48/74] ran cargo fmt --- src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 1fd797da8..3ab792253 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -271,4 +271,4 @@ impl PartialEq for TtlBuckets { fn eq(&self, other: &Self) -> bool { self.buckets == other.buckets && self.last_expired == other.last_expired } -} \ No newline at end of file +} From 92e1f24a4a04f0b0d5cae82bcfc75e8eda9d63ed Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 17 Feb 2022 18:43:42 +1100 Subject: [PATCH 49/74] ran cargo fmt --- src/rust/storage/seg/src/segments/segments.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 198b286c4..7bd4125cb 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -52,9 +52,8 @@ pub(crate) struct Segments { impl Segments { /// Private function which allocates and initializes the `Segments` by - /// taking ownership of the builder. - /// `Segments` is restored if the paths are specified, otherwise a new - /// `Segments` is created. + /// taking ownership of the builder. `Segments` is restored if the paths are + /// specified, otherwise a new `Segments` is created. pub(super) fn from_builder(builder: SegmentsBuilder) -> Self { let cfg_segment_size = builder.segment_size; let cfg_segments = builder.heap_size / (builder.segment_size as usize); From dc85b3013283a92b010453640da47cf2dd13f0e5 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 15:10:00 +1100 Subject: [PATCH 50/74] deleted tests that are attempting recovery from files that don't exist yet, as this doesn't make sense --- src/rust/storage/seg/src/builder.rs | 38 ++++--- src/rust/storage/seg/src/hashtable/mod.rs | 4 +- src/rust/storage/seg/src/tests.rs | 120 ---------------------- 3 files changed, 25 insertions(+), 137 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index bdeeb0188..d3d3216c0 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -200,21 +200,29 @@ impl Builder { // If `Segments` successfully restored and `restore` if segments.fields_copied_back && self.restore { - // Attempt to restore `HashTable` and `TtlBuckets` - let hashtable = HashTable::restore( - self.hashtable_path.clone(), - self.hash_power, - self.overflow_factor, - ); - let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path.clone()); - - // If successful, return a restored segcache - if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { - return Seg { - hashtable, - segments, - ttl_buckets, - }; + // Check if file exists and with what size + // TODO: implement a non-messy way to calculate expected file size + if let Ok(file_size) = std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) { + let file_size = file_size as usize; + + // Attempt to restore `HashTable` and `TtlBuckets` + let hashtable = HashTable::restore( + self.hashtable_path.clone(), + file_size, + self.hash_power, + self.overflow_factor, + ); + let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path.clone()); + println!("hash {}, ttl {}", hashtable.table_copied_back, ttl_buckets.buckets_copied_back); //DELETE + + // If successful, return a restored segcache + if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { + return Seg { + hashtable, + segments, + ttl_buckets, + }; + } } } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 0df2b9ad6..f8d61effa 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -164,7 +164,7 @@ impl HashTable { } } - pub fn restore(hashtable_path: Option, cfg_power: u8, overflow_factor: f64) -> Self { + pub fn restore(hashtable_path: Option, file_size: usize, cfg_power: u8, overflow_factor: f64) -> Self { // if there is a path to restore from, restore the `HashTable` if let Some(file) = &hashtable_path { // restore() assumes no changes in `power`. @@ -181,7 +181,7 @@ impl HashTable { + started_size; // Mmap file - let pool = File::create(file, hashtable_size, true) + let pool = File::create(file, file_size, true) .expect("failed to allocate file backed storage"); let file_data = Box::new(pool.as_slice()); diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 9149bdf93..b41042ed1 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -586,42 +586,6 @@ fn new_cache_not_file_backed() { assert!(!cache.hashtable.table_copied_back); } -// Check that a restored cache is file backed and the `Seg` is restored -#[test] -fn restored_cache_file_backed() { - // Create a temporary directory - let dir = tmp_dir(); - // Create tempfile for datapool - let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); - - // restore, file backed cache - let restore = true; - let cache = make_cache( - restore, - datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, - ); - - // the `Segments.data` should be filed backed - assert!(cache.segments.data_file_backed()); - // the `Seg` should be restored - assert!(cache.restored()); - // the `Segments` fields' should have been restored - assert!(cache.segments.fields_copied_back); - // the `TtlBuckets` should have been restored - assert!(cache.ttl_buckets.buckets_copied_back); - // the `HashTable` should have been restored - assert!(cache.hashtable.table_copied_back); -} - // Edge Case: Check that an attempt to restore a cache without specifing // any paths for the `Segments.data`, `Segments` fields', // `HashTable` and `TTLBuckets` will lead to `Segments.data` not @@ -900,90 +864,6 @@ fn new_cache_changed_and_not_restored() { assert!(new_cache != old_cache); } -// Creates a new cache, stores an item, gracefully shutsdown cache and restore cache -// with an incorrect path to the `HashTable`. -// The restoration should "succeed" and the # items recorded should be the same in the restored cache -// as the `segments_fields_path` is the same but an attempt to get item from new cache should fail -// as the `hashtable_path` is different and caches should not equivalent -#[test] -fn new_cache_changed_and_restoration_fails() { - // Create a temporary directory - let dir = tmp_dir(); - // Create tempfile for datapool - let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); - - // create new, file backed cache - let mut restore = false; - let mut cache = make_cache( - restore, - datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, - ); - - assert!(!cache.restored()); - assert_eq!(cache.items(), 0); - assert_eq!(cache.segments.free(), SEGMENTS); - - // "latte" should not be in a new, empty cache - assert!(cache.get(b"latte").is_none()); - // insert "latte" into cache - assert!(cache - .insert(b"latte", b"", None, Duration::from_secs(5)) - .is_ok()); - // "latte" should now be in cache - assert!(cache.get(b"latte").is_some()); - - assert_eq!(cache.items(), 1); - assert_eq!(cache.segments.free(), SEGMENTS - 1); - - // Get a copy of the cache to be compared later - let old_cache = cache.clone(); - - // Flush cache - assert!(cache.flush().is_ok()); - - // Create same tempfiles (they have been moved since first created) for - // `datapool`, `segments_fields`, `ttl_buckets` - let datapool_path: Option = Some(dir.path().join("datapool")); - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - - // Create different tempfile for `hashtable` - let hashtable_path: Option = Some(dir.path().join("hashtable_diff")); - - // Restore cache - restore = true; - let mut new_cache = make_cache( - restore, - datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, - ); - - // Cache is restored as all paths exist - assert!(new_cache.restored()); - // `Segments` data should be the same as old cache since - // `segments_fields_path` is the same - assert_eq!(new_cache.items(), 1); - assert_eq!(new_cache.segments.free(), SEGMENTS - 1); - - // "latte" should not be in new cache as `HashTable` restored from - // incorrect path does not have this information - assert!(new_cache.get(b"latte").is_none()); - - // the restored cache should not be equivalent to the old cache - assert!(new_cache != old_cache); -} - // Create a new cache, fill it with items. // Gracefully shutdown this cache. // Restore cache and check that every key from the original cache From cefb4b9115dbdbc3d5b33ea0e9d2c092d6b7a437 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 15:17:23 +1100 Subject: [PATCH 51/74] uncommented File::create() check of expected size of file as there is not point in keeping this until there is a nice way to determine expected file size --- src/rust/storage/seg/src/builder.rs | 1 - src/rust/storage/seg/src/datapool/file.rs | 25 +++++++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index d3d3216c0..1bbc7241d 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -213,7 +213,6 @@ impl Builder { self.overflow_factor, ); let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path.clone()); - println!("hash {}, ttl {}", hashtable.table_copied_back, ttl_buckets.buckets_copied_back); //DELETE // If successful, return a restored segcache if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { diff --git a/src/rust/storage/seg/src/datapool/file.rs b/src/rust/storage/seg/src/datapool/file.rs index 744d3809e..46d5bfc08 100644 --- a/src/rust/storage/seg/src/datapool/file.rs +++ b/src/rust/storage/seg/src/datapool/file.rs @@ -31,18 +31,21 @@ impl File { size: usize, prefault: bool, ) -> Result { + // TODO: uncomment below code once there is a better way to determine expected `size` of the existing file // check if the file exists and is the right size - let exists = if let Ok(current_size) = std::fs::metadata(&path).map(|m| m.len()) { - if current_size != size as u64 { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "existing file has wrong size", - )); - } - true - } else { - false - }; + // let exists = if let Ok(current_size) = std::fs::metadata(&path).map(|m| m.len()) { + // if current_size != size as u64 { + // return Err(std::io::Error::new( + // std::io::ErrorKind::Other, + // "existing file has wrong size", + // )); + // } + // true + // } else { + // false + // }; + + let exists = std::fs::metadata(&path).is_ok(); let mmap = if exists { let f = OpenOptions::new().read(true).write(true).open(path)?; From 61972207273ec9cccad6eb91c7b3004ee098efb2 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 17:05:38 +1100 Subject: [PATCH 52/74] Changed code so that HashTable and TtlBuckets share same file. Now will change code so that Segments use same file. Also need to tidy code up --- src/rust/storage/seg/src/builder.rs | 35 ++- src/rust/storage/seg/src/hashtable/mod.rs | 268 ++++++++++-------- src/rust/storage/seg/src/seg.rs | 36 ++- src/rust/storage/seg/src/tests.rs | 4 +- src/rust/storage/seg/src/ttl_buckets/tests.rs | 2 +- .../seg/src/ttl_buckets/ttl_buckets.rs | 181 +++++++----- 6 files changed, 317 insertions(+), 209 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 1bbc7241d..7e8dcb4f2 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -4,6 +4,7 @@ //! A builder for configuring a new [`Seg`] instance. +use crate::datapool::*; use crate::*; use std::path::Path; use std::path::PathBuf; @@ -201,18 +202,29 @@ impl Builder { // If `Segments` successfully restored and `restore` if segments.fields_copied_back && self.restore { // Check if file exists and with what size - // TODO: implement a non-messy way to calculate expected file size - if let Ok(file_size) = std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) { + if let Ok(file_size) = + std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) + { + // TODO: implement a non-messy way to calculate expected file size, rather than just taking actual size let file_size = file_size as usize; + // Mmap file + let pool = File::create(self.hashtable_path.clone().unwrap(), file_size, true) + .expect("failed to allocate file backed storage"); + let file_data = pool.as_slice(); + // Attempt to restore `HashTable` and `TtlBuckets` let hashtable = HashTable::restore( + file_data, self.hashtable_path.clone(), file_size, self.hash_power, self.overflow_factor, ); - let ttl_buckets = TtlBuckets::restore(self.ttl_buckets_path.clone()); + + let offset = hashtable.recover_size(); + let ttl_buckets = + TtlBuckets::restore(file_data, self.hashtable_path.clone(), file_size, offset); // If successful, return a restored segcache if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { @@ -220,6 +232,7 @@ impl Builder { hashtable, segments, ttl_buckets, + hashtable_path: self.hashtable_path, }; } } @@ -235,12 +248,24 @@ impl Builder { // implement. // If not `restore` or restoration failed, create a new cache - let hashtable = HashTable::new(self.hashtable_path, self.hash_power, self.overflow_factor); - let ttl_buckets = TtlBuckets::new(self.ttl_buckets_path); + let hashtable = HashTable::new( + self.hashtable_path.clone(), + self.hash_power, + self.overflow_factor, + ); + let offset = hashtable.recover_size(); + let ttl_buckets = TtlBuckets::new(self.hashtable_path.clone(), offset); + // Set offsets + // let hashtable_size = hashtable.recover_size(); + // let ttl_buckets_size = ttl_buckets.recover_size(); + // let file_size = hashtable_size + ttl_buckets_size; + // hashtable = hashtable.set_file_size(file_size); + Seg { hashtable, segments, ttl_buckets, + hashtable_path: self.hashtable_path, } } } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index f8d61effa..bb9a3a026 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -164,170 +164,174 @@ impl HashTable { } } - pub fn restore(hashtable_path: Option, file_size: usize, cfg_power: u8, overflow_factor: f64) -> Self { + pub fn restore( + file_data: &[u8], + hashtable_path: Option, + file_size: usize, + cfg_power: u8, + overflow_factor: f64, + ) -> Self { // if there is a path to restore from, restore the `HashTable` - if let Some(file) = &hashtable_path { - // restore() assumes no changes in `power`. - // I.e. config specifies same `power` as `HashTable` we are - // restoring from - // TODO: Detect a change of `power` and adjust `HashTable` accordingly - let total_buckets = total_buckets(cfg_power.into(), overflow_factor); - let bucket_size = ::std::mem::size_of::(); - let u64_size = ::std::mem::size_of::(); - let started_size = ::std::mem::size_of::(); - // Size of all components of `HashTable` that are being restored - let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + //if let Some(file) = &hashtable_path { + // restore() assumes no changes in `power`. + // I.e. config specifies same `power` as `HashTable` we are + // restoring from + // TODO: Detect a change of `power` and adjust `HashTable` accordingly + let total_buckets = total_buckets(cfg_power.into(), overflow_factor); + let bucket_size = ::std::mem::size_of::(); + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being restored + let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + total_buckets * bucket_size // `data` + started_size; - // Mmap file - let pool = File::create(file, file_size, true) - .expect("failed to allocate file backed storage"); - let file_data = Box::new(pool.as_slice()); + // // Mmap file + // let pool = File::create(file, file_size, true) + // .expect("failed to allocate file backed storage"); + // let file_data = Box::new(pool.as_slice()); - // create blank bytes to copy data into - let mut bytes = vec![0; hashtable_size]; - // retrieve bytes from mmapped file - bytes.copy_from_slice(&file_data[0..hashtable_size]); + // create blank bytes to copy data into + let mut bytes = vec![0; hashtable_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice(&file_data[0..hashtable_size]); - // ----- Re-initialise `hash_builder` ----- + // ----- Re-initialise `hash_builder` ----- - let hash_builder = hash_builder(); + let hash_builder = hash_builder(); - let mut offset = 0; - // ----- Retrieve `power` --------- - let mut end = u64_size; + let mut offset = 0; + // ----- Retrieve `power` --------- + let mut end = u64_size; - let power = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; - // TODO: compare `cfg_power` and `power` + let power = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + // TODO: compare `cfg_power` and `power` - offset += u64_size; - // ----- Retrieve `mask` --------- - end += u64_size; + offset += u64_size; + // ----- Retrieve `mask` --------- + end += u64_size; - let mask = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + let mask = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; - offset += u64_size; - // ----- Retrieve `data` --------- + offset += u64_size; + // ----- Retrieve `data` --------- - let mut data = Vec::with_capacity(0); - data.reserve_exact(total_buckets as usize); + let mut data = Vec::with_capacity(0); + data.reserve_exact(total_buckets as usize); - // Get each `HashBucket` from the raw bytes - for _ in 0..total_buckets { - end += bucket_size; + // Get each `HashBucket` from the raw bytes + for _ in 0..total_buckets { + end += bucket_size; - // cast bytes to `HashBucket` - let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut HashBucket) }; - data.push(bucket); + // cast bytes to `HashBucket` + let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut HashBucket) }; + data.push(bucket); - offset += bucket_size; - } + offset += bucket_size; + } - // ----- Retrieve `started` --------- - end += started_size; + // ----- Retrieve `started` --------- + end += started_size; - let started = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; + let started = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; - offset += started_size; - // ----- Retrieve `next_to_chain` --------- - end += u64_size; + offset += started_size; + // ----- Retrieve `next_to_chain` --------- + end += u64_size; - let next_to_chain = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; + let next_to_chain = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u64) }; - Self { - hash_builder: Box::new(hash_builder), - power, - mask, - data: data.into_boxed_slice(), - rng: Box::new(rng()), - started, - next_to_chain, - table_copied_back: true, - hashtable_path, - overflow_factor, - } + Self { + hash_builder: Box::new(hash_builder), + power, + mask, + data: data.into_boxed_slice(), + rng: Box::new(rng()), + started, + next_to_chain, + table_copied_back: true, + hashtable_path, + overflow_factor, } + // } // otherwise, create a new `HashTable` - else { - HashTable::new(hashtable_path, cfg_power, overflow_factor) - } + // else { + // HashTable::new(hashtable_path, cfg_power, overflow_factor) + // } } /// Flushes the `HashTable` by storing it to a file (if a path is specified) - pub fn flush(&self) -> std::io::Result<()> { + pub fn flush(&self, file_data: &mut [u8]) -> std::io::Result<()> { // if a path is specified, copy all the `HashBucket`s to the file // specified by `hashtable_path` - if let Some(file) = &self.hashtable_path { - let total_buckets = total_buckets(self.power, self.overflow_factor); - let bucket_size = ::std::mem::size_of::(); - let u64_size = ::std::mem::size_of::(); - let started_size = ::std::mem::size_of::(); - // Size of all components of `HashTable` that are being saved - let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + // if let Some(file) = &self.hashtable_path { + let total_buckets = total_buckets(self.power, self.overflow_factor); + let bucket_size = ::std::mem::size_of::(); + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being saved + let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` + total_buckets * bucket_size // `data` + started_size; - // Mmap file - let mut pool = File::create(file, hashtable_size, true) - .expect("failed to allocate file backed storage"); - let file_data = pool.as_mut_slice(); + // // Mmap file + // let mut pool = File::create(file, hashtable_size, true) + // .expect("failed to allocate file backed storage"); + // let file_data = pool.as_mut_slice(); - let mut offset = 0; - // --------------------- Store `power` ----------------- + let mut offset = 0; + // --------------------- Store `power` ----------------- - // cast `power` to byte pointer - let byte_ptr = (&self.power as *const u64) as *const u8; + // cast `power` to byte pointer + let byte_ptr = (&self.power as *const u64) as *const u8; - // store `power` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + // store `power` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - // --------------------- Store `mask` ----------------- + // --------------------- Store `mask` ----------------- - // cast `mask` to byte pointer - let byte_ptr = (&self.mask as *const u64) as *const u8; + // cast `mask` to byte pointer + let byte_ptr = (&self.mask as *const u64) as *const u8; - // store `mask` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - // --------------------- Store `data` ----------------- + // store `mask` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + // --------------------- Store `data` ----------------- - // for every `HashBucket` - for id in 0..total_buckets { - // cast `HashBucket` to byte pointer - let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; + // for every `HashBucket` + for id in 0..total_buckets { + // cast `HashBucket` to byte pointer + let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; - // store `HashBucket` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); - } + // store `HashBucket` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + } - // --------------------- Store `started` ----------------- + // --------------------- Store `started` ----------------- - // cast `started` to byte pointer - let byte_ptr = (&self.started as *const Instant) as *const u8; + // cast `started` to byte pointer + let byte_ptr = (&self.started as *const Instant) as *const u8; - // store `started` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); - // --------------------- Store `next_to_chain` ----------------- + // store `started` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); + // --------------------- Store `next_to_chain` ----------------- - // cast `next_to_chain` to byte pointer - let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; + // cast `next_to_chain` to byte pointer + let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; - // store `next_to_chain` back to mmapped file - store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); - // ------------------------------------------------------------- + // store `next_to_chain` back to mmapped file + store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + // ------------------------------------------------------------- - // TODO: check if this flushes the CPU caches - pool.flush()?; - Ok(()) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Path to store HashTable to is None, cannot gracefully - shutdown cache", - )) - } + // // TODO: check if this flushes the CPU caches + // pool.flush()?; + Ok(()) + // } else { + // Err(std::io::Error::new( + // std::io::ErrorKind::Other, + // "Path to store HashTable to is None, cannot gracefully + // shutdown cache", + // )) + // } } /// Lookup an item by key and return it @@ -896,6 +900,32 @@ impl HashTable { hasher.write(key); hasher.finish() } + + /// TODO: this code is repeated in restore() and flush(), can it be reduced? + /// Function used by `Builder` to calculate the number of bytes of the `HashTable` + /// that are stored/restored + pub fn recover_size(&self) -> usize { + let total_buckets = total_buckets(self.power, self.overflow_factor); + let bucket_size = ::std::mem::size_of::(); + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being restored + u64_size * 3 // `power`, `mask`, `next_to_chain` + + total_buckets * bucket_size // `data` + + started_size + } + + /// Function used by `Builder` to set the `file_size` + pub fn set_file_size(&self) -> usize { + let total_buckets = total_buckets(self.power, self.overflow_factor); + let bucket_size = ::std::mem::size_of::(); + let u64_size = ::std::mem::size_of::(); + let started_size = ::std::mem::size_of::(); + // Size of all components of `HashTable` that are being restored + u64_size * 3 // `power`, `mask`, `next_to_chain` + + total_buckets * bucket_size // `data` + + started_size + } } impl PartialEq for HashTable { diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index e357724c8..d5f895bff 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -4,9 +4,11 @@ //! Core datastructure +use crate::datapool::*; use crate::Value; use crate::*; use std::cmp::min; +use std::path::PathBuf; use metrics::{static_metrics, Counter}; @@ -27,6 +29,8 @@ pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, pub(crate) ttl_buckets: TtlBuckets, + // Path to datapool + pub(crate) hashtable_path: Option, } impl Seg { @@ -53,10 +57,34 @@ impl Seg { /// `HashTable` and `TtlBuckets` to files at the paths stored in the /// respective structs. pub fn flush(&self) -> std::io::Result<()> { - self.segments.flush()?; - self.hashtable.flush()?; - self.ttl_buckets.flush()?; - Ok(()) + // // Check if file exists and with what size + // if let Ok(file_size) = + // std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) + // { + + if let Some(file) = &self.hashtable_path { + + let file_size = self.hashtable.recover_size() + self.ttl_buckets.recover_size(); + + // Mmap file + let mut pool = File::create(file, file_size, true) + .expect("failed to allocate file backed storage"); + let file_data = pool.as_mut_slice(); + + self.segments.flush()?; + self.hashtable.flush(file_data)?; + self.ttl_buckets.flush(file_data)?; + + // TODO: check if this flushes the CPU caches + pool.flush()?; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Path to datapool to is None, cannot gracefully + shutdown cache", + )) + } } /// Gets a count of items in the `Seg` instance. This is an expensive diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index b41042ed1..f36a9f16d 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -23,10 +23,10 @@ fn sizes() { assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 104); // increased to accommodate fields added for testing + assert_eq!(std::mem::size_of::(), 104); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 48); + assert_eq!(std::mem::size_of::(), 56); } #[test] diff --git a/src/rust/storage/seg/src/ttl_buckets/tests.rs b/src/rust/storage/seg/src/ttl_buckets/tests.rs index e731caac9..b29a459b1 100644 --- a/src/rust/storage/seg/src/ttl_buckets/tests.rs +++ b/src/rust/storage/seg/src/ttl_buckets/tests.rs @@ -7,7 +7,7 @@ use crate::*; #[test] fn bucket_index() { - let ttl_buckets = TtlBuckets::new(None); + let ttl_buckets = TtlBuckets::new(None, 0); // Zero TTL and max duration both go into the same TtlBucket assert_eq!(ttl_buckets.get_bucket_index(Duration::from_secs(0)), 1023); diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 3ab792253..78386d97e 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -52,12 +52,14 @@ pub struct TtlBuckets { pub(crate) buckets_copied_back: bool, /// Path to store relevant upon graceful shutdown pub(crate) ttl_buckets_path: Option, + /// Offset into recovery file where `TtlBuckets` will be stored upon graceful shutdown + pub(crate) offset_into_file: usize, } impl TtlBuckets { /// Create a new set of `TtlBuckets` which cover the full range of TTLs. See /// the module-level documentation for how the range of TTLs are stored. - pub fn new(ttl_buckets_path: Option) -> Self { + pub fn new(ttl_buckets_path: Option, offset_into_file: usize) -> Self { // TODO: add path as argument let intervals = [ TTL_BUCKET_INTERVAL_1, @@ -85,115 +87,128 @@ impl TtlBuckets { last_expired, buckets_copied_back: false, ttl_buckets_path, + offset_into_file, } } // Returns a restored `TtlBuckets` if file path // to restore from is valid. Otherwise return a new `TtlBuckets` - pub fn restore(ttl_buckets_path: Option) -> Self { + pub fn restore( + file_data: &[u8], + ttl_buckets_path: Option, + file_size: usize, + offset_into_file: usize, + ) -> Self { // if there is a path to restore from, restore the `TtlBuckets` - if let Some(file) = &ttl_buckets_path { - let bucket_size = ::std::mem::size_of::(); - let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + // if let Some(file) = &ttl_buckets_path { + let bucket_size = ::std::mem::size_of::(); + let last_expired_size = ::std::mem::size_of::(); + let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; - // Mmap file - let pool = File::create(file, ttl_buckets_struct_size, true) - .expect("failed to allocate file backed storage"); - let data = Box::new(pool.as_slice()); + // // Mmap file + // let pool = File::create(file, file_size, true) + // .expect("failed to allocate file backed storage"); + // let data = Box::new(pool.as_slice()); - // create blank bytes to copy data into - let mut bytes = vec![0; ttl_buckets_struct_size]; - // retrieve bytes from mmapped file - bytes.copy_from_slice(&data[0..ttl_buckets_struct_size]); + // create blank bytes to copy data into + let mut bytes = vec![0; ttl_buckets_struct_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice( + &file_data[offset_into_file..(offset_into_file + ttl_buckets_struct_size)], + ); - let mut offset = 0; - // ----- Retrieve `last_expired` ----- - let mut end = last_expired_size; - let last_expired = - unsafe { *(bytes[offset..last_expired_size].as_mut_ptr() as *mut Instant) }; + let mut offset = 0; + // ----- Retrieve `last_expired` ----- + let mut end = last_expired_size; + let last_expired = + unsafe { *(bytes[offset..last_expired_size].as_mut_ptr() as *mut Instant) }; - offset += last_expired_size; - // ----- Retrieve `buckets` ----- + offset += last_expired_size; + // ----- Retrieve `buckets` ----- - let mut buckets = Vec::with_capacity(0); - buckets.reserve_exact(MAX_N_TTL_BUCKET); + let mut buckets = Vec::with_capacity(0); + buckets.reserve_exact(MAX_N_TTL_BUCKET); - // Get each `TtlBucket` from the raw bytes - for _ in 0..MAX_N_TTL_BUCKET { - end += bucket_size; + // Get each `TtlBucket` from the raw bytes + for _ in 0..MAX_N_TTL_BUCKET { + end += bucket_size; - // cast bytes to `TtlBucket` - let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut TtlBucket) }; - buckets.push(bucket); + // cast bytes to `TtlBucket` + let bucket = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut TtlBucket) }; + buckets.push(bucket); - offset += bucket_size; - } + offset += bucket_size; + } - let buckets = buckets.into_boxed_slice(); + let buckets = buckets.into_boxed_slice(); - Self { - buckets, - last_expired, - buckets_copied_back: true, - ttl_buckets_path, - } - } - // otherwise, create a new `TtlBuckets` - else { - TtlBuckets::new(ttl_buckets_path) + Self { + buckets, + last_expired, + buckets_copied_back: true, + ttl_buckets_path, + offset_into_file, } + // } + // // otherwise, create a new `TtlBuckets` + // else { + // TtlBuckets::new(ttl_buckets_path, offset_into_file) + // } } /// Flushes the `TtlBuckets` by storing it to a file (if a path is specified) - pub fn flush(&self) -> std::io::Result<()> { + pub fn flush(&self, file_data: &mut [u8]) -> std::io::Result<()> { // if a path is specified, copy all the `TtlBucket`s to the file // specified by `ttl_buckets_path` - if let Some(file) = &self.ttl_buckets_path { - let bucket_size = ::std::mem::size_of::(); - let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + // if let Some(file) = &self.ttl_buckets_path { + let bucket_size = ::std::mem::size_of::(); + let last_expired_size = ::std::mem::size_of::(); + let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; - // Mmap file - let mut pool = File::create(file, ttl_buckets_struct_size, true) - .expect("failed to allocate file backed storage"); - let data = pool.as_mut_slice(); + // let file_size = std::fs::metadata(self.ttl_buckets_path.as_ref().unwrap()) + // .expect("Failed to retrieve the datapool metadata") + // .len() as usize; - let mut offset = 0; - // --------------------- Store `last_expired` ----------------- + // Mmap file + // let mut pool = File::create(file, file_size, true) + // .expect("failed to allocate file backed storage"); + // let data = pool.as_mut_slice(); - // cast `last_expired` to byte pointer - let byte_ptr = (&self.last_expired as *const Instant) as *const u8; + let mut offset = self.offset_into_file; + // --------------------- Store `last_expired` ----------------- - // store `last_expired` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, data); + // cast `last_expired` to byte pointer + let byte_ptr = (&self.last_expired as *const Instant) as *const u8; - // --------------------- Store `buckets` ----------------- + // store `last_expired` back to mmapped file + offset = + store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, file_data); - // for every `TtlBucket` - for id in 0..MAX_N_TTL_BUCKET { - // cast `TtlBucket` to byte pointer - let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; - - // store `TtlBucket` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, data); - } + // --------------------- Store `buckets` ----------------- - // -------------------------------------------------- + // for every `TtlBucket` + for id in 0..MAX_N_TTL_BUCKET { + // cast `TtlBucket` to byte pointer + let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; - // TODO: check if this flushes the CPU caches - pool.flush()?; - Ok(()) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Path to store TtlBuckets to is None, cannot gracefully - shutdown cache", - )) + // store `TtlBucket` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); } + + // -------------------------------------------------- + + // TODO: check if this flushes the CPU caches + // pool.flush()?; + Ok(()) + // } else { + // Err(std::io::Error::new( + // std::io::ErrorKind::Other, + // "Path to store TtlBuckets to is None, cannot gracefully + // shutdown cache", + // )) + // } } pub(crate) fn get_bucket_index(&self, ttl: Duration) -> usize { @@ -258,11 +273,21 @@ impl TtlBuckets { CLEAR_TIME.add(duration.as_nanos() as _); cleared } + + /// TODO: this code is repeated in restore() and flush(), can it be reduced? + /// Function used by `Builder` to calculate the number of bytes of the `TtlBuckets` + /// that are stored/restored + pub fn recover_size(&self) -> usize { + let bucket_size = ::std::mem::size_of::(); + let last_expired_size = ::std::mem::size_of::(); + MAX_N_TTL_BUCKET * bucket_size // `buckets` + + last_expired_size + } } impl Default for TtlBuckets { fn default() -> Self { - Self::new(None) + Self::new(None, 0) } } From 154549be1694711ba31f897fbf96dc0b973979ac Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 18:12:16 +1100 Subject: [PATCH 53/74] refactored code to remove now unnecessary fields from HashTable and TtlBuckets --- src/rust/storage/seg/src/builder.rs | 9 +++----- src/rust/storage/seg/src/hashtable/mod.rs | 8 +------ src/rust/storage/seg/src/seg.rs | 3 ++- src/rust/storage/seg/src/tests.rs | 4 ++-- src/rust/storage/seg/src/ttl_buckets/tests.rs | 2 +- .../seg/src/ttl_buckets/ttl_buckets.rs | 21 +++++-------------- 6 files changed, 14 insertions(+), 33 deletions(-) diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 7e8dcb4f2..c747d5fbc 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -216,15 +216,13 @@ impl Builder { // Attempt to restore `HashTable` and `TtlBuckets` let hashtable = HashTable::restore( file_data, - self.hashtable_path.clone(), - file_size, self.hash_power, self.overflow_factor, ); let offset = hashtable.recover_size(); let ttl_buckets = - TtlBuckets::restore(file_data, self.hashtable_path.clone(), file_size, offset); + TtlBuckets::restore(&file_data[offset..]); // If successful, return a restored segcache if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { @@ -249,12 +247,11 @@ impl Builder { // If not `restore` or restoration failed, create a new cache let hashtable = HashTable::new( - self.hashtable_path.clone(), self.hash_power, self.overflow_factor, ); let offset = hashtable.recover_size(); - let ttl_buckets = TtlBuckets::new(self.hashtable_path.clone(), offset); + let ttl_buckets = TtlBuckets::new(); // Set offsets // let hashtable_size = hashtable.recover_size(); // let ttl_buckets_size = ttl_buckets.recover_size(); @@ -265,7 +262,7 @@ impl Builder { hashtable, segments, ttl_buckets, - hashtable_path: self.hashtable_path, + hashtable_path: self.hashtable_path, // TODO: change this to final path } } } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index bb9a3a026..2a098fbcb 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -112,8 +112,6 @@ pub(crate) struct HashTable { next_to_chain: u64, /// Is `HashTable` copied back from a file? pub(crate) table_copied_back: bool, - /// Path to save relevant fields upon graceful shutdown - hashtable_path: Option, /// Used in graceful shutdown overflow_factor: f64, } @@ -122,7 +120,7 @@ impl HashTable { /// Creates a new hashtable with a specified power and overflow factor. The /// hashtable will have the capacity to store up to /// `7 * 2^(power - 3) * (1 + overflow_factor)` items. - pub fn new(hashtable_path: Option, power: u8, overflow_factor: f64) -> HashTable { + pub fn new(power: u8, overflow_factor: f64) -> HashTable { if overflow_factor < 0.0 { fatal!("hashtable overflow factor must be >= 0.0"); } @@ -159,15 +157,12 @@ impl HashTable { started: Instant::recent(), next_to_chain: buckets as u64, table_copied_back: false, - hashtable_path, overflow_factor, } } pub fn restore( file_data: &[u8], - hashtable_path: Option, - file_size: usize, cfg_power: u8, overflow_factor: f64, ) -> Self { @@ -250,7 +245,6 @@ impl HashTable { started, next_to_chain, table_copied_back: true, - hashtable_path, overflow_factor, } // } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index d5f895bff..3ec1e6623 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -73,7 +73,8 @@ impl Seg { self.segments.flush()?; self.hashtable.flush(file_data)?; - self.ttl_buckets.flush(file_data)?; + let offset = self.hashtable.recover_size(); + self.ttl_buckets.flush(&mut file_data[offset..])?; // TODO: check if this flushes the CPU caches pool.flush()?; diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index f36a9f16d..30a03571b 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -23,10 +23,10 @@ fn sizes() { assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 104); + assert_eq!(std::mem::size_of::(), 80); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 56); + assert_eq!(std::mem::size_of::(), 24); } #[test] diff --git a/src/rust/storage/seg/src/ttl_buckets/tests.rs b/src/rust/storage/seg/src/ttl_buckets/tests.rs index b29a459b1..d1fadd975 100644 --- a/src/rust/storage/seg/src/ttl_buckets/tests.rs +++ b/src/rust/storage/seg/src/ttl_buckets/tests.rs @@ -7,7 +7,7 @@ use crate::*; #[test] fn bucket_index() { - let ttl_buckets = TtlBuckets::new(None, 0); + let ttl_buckets = TtlBuckets::new(); // Zero TTL and max duration both go into the same TtlBucket assert_eq!(ttl_buckets.get_bucket_index(Duration::from_secs(0)), 1023); diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 78386d97e..14bb494ea 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -50,16 +50,12 @@ pub struct TtlBuckets { pub(crate) last_expired: Instant, /// Are `TtlBuckets` copied back from a file? pub(crate) buckets_copied_back: bool, - /// Path to store relevant upon graceful shutdown - pub(crate) ttl_buckets_path: Option, - /// Offset into recovery file where `TtlBuckets` will be stored upon graceful shutdown - pub(crate) offset_into_file: usize, } impl TtlBuckets { /// Create a new set of `TtlBuckets` which cover the full range of TTLs. See /// the module-level documentation for how the range of TTLs are stored. - pub fn new(ttl_buckets_path: Option, offset_into_file: usize) -> Self { + pub fn new() -> Self { // TODO: add path as argument let intervals = [ TTL_BUCKET_INTERVAL_1, @@ -86,18 +82,13 @@ impl TtlBuckets { buckets, last_expired, buckets_copied_back: false, - ttl_buckets_path, - offset_into_file, } } // Returns a restored `TtlBuckets` if file path // to restore from is valid. Otherwise return a new `TtlBuckets` pub fn restore( - file_data: &[u8], - ttl_buckets_path: Option, - file_size: usize, - offset_into_file: usize, + file_data: &[u8] ) -> Self { // if there is a path to restore from, restore the `TtlBuckets` // if let Some(file) = &ttl_buckets_path { @@ -115,7 +106,7 @@ impl TtlBuckets { let mut bytes = vec![0; ttl_buckets_struct_size]; // retrieve bytes from mmapped file bytes.copy_from_slice( - &file_data[offset_into_file..(offset_into_file + ttl_buckets_struct_size)], + &file_data[0..ttl_buckets_struct_size], ); let mut offset = 0; @@ -147,8 +138,6 @@ impl TtlBuckets { buckets, last_expired, buckets_copied_back: true, - ttl_buckets_path, - offset_into_file, } // } // // otherwise, create a new `TtlBuckets` @@ -176,7 +165,7 @@ impl TtlBuckets { // .expect("failed to allocate file backed storage"); // let data = pool.as_mut_slice(); - let mut offset = self.offset_into_file; + let mut offset = 0; // --------------------- Store `last_expired` ----------------- // cast `last_expired` to byte pointer @@ -287,7 +276,7 @@ impl TtlBuckets { impl Default for TtlBuckets { fn default() -> Self { - Self::new(None, 0) + Self::new() } } From eb671d098f7a51492e4b49b2c18b7010f12c3d72 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 18:21:40 +1100 Subject: [PATCH 54/74] neatened up comments for HashTable and TtlBuckets --- src/rust/storage/seg/src/hashtable/mod.rs | 36 ++-------------- src/rust/storage/seg/src/seg.rs | 11 ++--- .../seg/src/ttl_buckets/ttl_buckets.rs | 42 ++----------------- 3 files changed, 9 insertions(+), 80 deletions(-) diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 2a098fbcb..7883eb0ff 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -161,13 +161,12 @@ impl HashTable { } } + // Returns a restored `HashTable` using recovery data (`file_data`) pub fn restore( file_data: &[u8], cfg_power: u8, overflow_factor: f64, ) -> Self { - // if there is a path to restore from, restore the `HashTable` - //if let Some(file) = &hashtable_path { // restore() assumes no changes in `power`. // I.e. config specifies same `power` as `HashTable` we are // restoring from @@ -181,11 +180,6 @@ impl HashTable { + total_buckets * bucket_size // `data` + started_size; - // // Mmap file - // let pool = File::create(file, file_size, true) - // .expect("failed to allocate file backed storage"); - // let file_data = Box::new(pool.as_slice()); - // create blank bytes to copy data into let mut bytes = vec![0; hashtable_size]; // retrieve bytes from mmapped file @@ -247,18 +241,10 @@ impl HashTable { table_copied_back: true, overflow_factor, } - // } - // otherwise, create a new `HashTable` - // else { - // HashTable::new(hashtable_path, cfg_power, overflow_factor) - // } } - /// Flushes the `HashTable` by storing it to a file (if a path is specified) - pub fn flush(&self, file_data: &mut [u8]) -> std::io::Result<()> { - // if a path is specified, copy all the `HashBucket`s to the file - // specified by `hashtable_path` - // if let Some(file) = &self.hashtable_path { + /// Flushes the `HashTable` by copying it to `file_data` + pub fn flush(&self, file_data: &mut [u8]){ let total_buckets = total_buckets(self.power, self.overflow_factor); let bucket_size = ::std::mem::size_of::(); let u64_size = ::std::mem::size_of::(); @@ -268,11 +254,6 @@ impl HashTable { + total_buckets * bucket_size // `data` + started_size; - // // Mmap file - // let mut pool = File::create(file, hashtable_size, true) - // .expect("failed to allocate file backed storage"); - // let file_data = pool.as_mut_slice(); - let mut offset = 0; // --------------------- Store `power` ----------------- @@ -315,17 +296,6 @@ impl HashTable { // store `next_to_chain` back to mmapped file store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); // ------------------------------------------------------------- - - // // TODO: check if this flushes the CPU caches - // pool.flush()?; - Ok(()) - // } else { - // Err(std::io::Error::new( - // std::io::ErrorKind::Other, - // "Path to store HashTable to is None, cannot gracefully - // shutdown cache", - // )) - // } } /// Lookup an item by key and return it diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 3ec1e6623..b3c92b51d 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -54,13 +54,8 @@ impl Seg { } /// Flushes cache by storing all the relevant fields of `Segments`, - /// `HashTable` and `TtlBuckets` to files at the paths stored in the - /// respective structs. + /// `HashTable` and `TtlBuckets` to the datapool file pub fn flush(&self) -> std::io::Result<()> { - // // Check if file exists and with what size - // if let Ok(file_size) = - // std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) - // { if let Some(file) = &self.hashtable_path { @@ -72,9 +67,9 @@ impl Seg { let file_data = pool.as_mut_slice(); self.segments.flush()?; - self.hashtable.flush(file_data)?; + self.hashtable.flush(file_data); let offset = self.hashtable.recover_size(); - self.ttl_buckets.flush(&mut file_data[offset..])?; + self.ttl_buckets.flush(&mut file_data[offset..]); // TODO: check if this flushes the CPU caches pool.flush()?; diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index 14bb494ea..dc5378674 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -85,23 +85,15 @@ impl TtlBuckets { } } - // Returns a restored `TtlBuckets` if file path - // to restore from is valid. Otherwise return a new `TtlBuckets` + // Returns a restored `TtlBuckets` using recovery data (`file_data`) pub fn restore( file_data: &[u8] ) -> Self { - // if there is a path to restore from, restore the `TtlBuckets` - // if let Some(file) = &ttl_buckets_path { let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; - // // Mmap file - // let pool = File::create(file, file_size, true) - // .expect("failed to allocate file backed storage"); - // let data = Box::new(pool.as_slice()); - // create blank bytes to copy data into let mut bytes = vec![0; ttl_buckets_struct_size]; // retrieve bytes from mmapped file @@ -139,32 +131,15 @@ impl TtlBuckets { last_expired, buckets_copied_back: true, } - // } - // // otherwise, create a new `TtlBuckets` - // else { - // TtlBuckets::new(ttl_buckets_path, offset_into_file) - // } } - /// Flushes the `TtlBuckets` by storing it to a file (if a path is specified) - pub fn flush(&self, file_data: &mut [u8]) -> std::io::Result<()> { - // if a path is specified, copy all the `TtlBucket`s to the file - // specified by `ttl_buckets_path` - // if let Some(file) = &self.ttl_buckets_path { + /// Flushes the `TtlBuckets` by copying it to `file_data` + pub fn flush(&self, file_data: &mut [u8]){ let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` + last_expired_size; - // let file_size = std::fs::metadata(self.ttl_buckets_path.as_ref().unwrap()) - // .expect("Failed to retrieve the datapool metadata") - // .len() as usize; - - // Mmap file - // let mut pool = File::create(file, file_size, true) - // .expect("failed to allocate file backed storage"); - // let data = pool.as_mut_slice(); - let mut offset = 0; // --------------------- Store `last_expired` ----------------- @@ -187,17 +162,6 @@ impl TtlBuckets { } // -------------------------------------------------- - - // TODO: check if this flushes the CPU caches - // pool.flush()?; - Ok(()) - // } else { - // Err(std::io::Error::new( - // std::io::ErrorKind::Other, - // "Path to store TtlBuckets to is None, cannot gracefully - // shutdown cache", - // )) - // } } pub(crate) fn get_bucket_index(&self, ttl: Duration) -> usize { From 62c953eda5d624dc50e4091f78b0c44868ee1425 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 20:12:47 +1100 Subject: [PATCH 55/74] all of Segments, HashTable and ttlBuckets restored from same file --- src/rust/config/src/seg.rs | 58 +-- src/rust/entrystore/src/seg/mod.rs | 4 +- src/rust/storage/seg/src/builder.rs | 99 ++--- src/rust/storage/seg/src/hashtable/mod.rs | 48 +-- src/rust/storage/seg/src/seg.rs | 30 +- src/rust/storage/seg/src/segments/builder.rs | 19 +- src/rust/storage/seg/src/segments/mod.rs | 2 +- src/rust/storage/seg/src/segments/segments.rs | 376 ++++++++---------- src/rust/storage/seg/src/tests.rs | 163 ++------ .../seg/src/ttl_buckets/ttl_buckets.rs | 28 +- 10 files changed, 310 insertions(+), 517 deletions(-) diff --git a/src/rust/config/src/seg.rs b/src/rust/config/src/seg.rs index 77d1bc486..eca766d3c 100644 --- a/src/rust/config/src/seg.rs +++ b/src/rust/config/src/seg.rs @@ -31,12 +31,6 @@ const MERGE_MAX: usize = 8; // datapool (`Segments.data`) const DATAPOOL_PATH: Option<&str> = None; -// `Segments` fields -const SEGMENT_FIELDS_PATH: Option<&str> = None; - -// ttl buckets -const TTL_BUCKETS_PATH: Option<&str> = None; - // hashtable const HASHTABLE_PATH: Option<&str> = None; @@ -96,15 +90,8 @@ fn datapool_path() -> Option { DATAPOOL_PATH.map(|v| v.to_string()) } -fn segments_fields_path() -> Option { - SEGMENT_FIELDS_PATH.map(|v| v.to_string()) -} -fn ttl_buckets_path() -> Option { - TTL_BUCKETS_PATH.map(|v| v.to_string()) -} - -fn hashtable_path() -> Option { +fn metadata_path() -> Option { HASHTABLE_PATH.map(|v| v.to_string()) } @@ -133,12 +120,8 @@ pub struct Seg { compact_target: usize, #[serde(default = "datapool_path")] datapool_path: Option, - #[serde(default = "segments_fields_path")] - segments_fields_path: Option, - #[serde(default = "ttl_buckets_path")] - ttl_buckets_path: Option, - #[serde(default = "hashtable_path")] - hashtable_path: Option, + #[serde(default = "metadata_path")] + metadata_path: Option, } impl Default for Seg { @@ -155,27 +138,24 @@ impl Default for Seg { merge_max: merge_max(), compact_target: compact_target(), datapool_path: datapool_path(), - segments_fields_path: segments_fields_path(), - ttl_buckets_path: ttl_buckets_path(), - hashtable_path: hashtable_path(), + metadata_path: metadata_path(), } } } // implementation impl Seg { - // Determines if the `Seg` will be restored. - // The restoration will be successful if `datapool_path`, `segments_fields_path` - // `ttl_buckets_path` and `hashtable_path` are valid paths. - // Otherwise, the `Seg` will be created as new. + // Determines if the `Seg` will be restored. The restoration will be + // successful if `datapool_path` and `metadata_path` are valid paths. + // Otherwise, the `Seg` will be created as + //new. pub fn restore(&self) -> bool { self.restore } - // Determines if the `Seg` will be gracefully shutdown. - // The graceful shutdown will be successful if the cache is file backed - // and `segments_fields_path`, `ttl_buckets_path` and `hashtable_path` are - // valid paths to save the relevant `Seg` fields to. + // Determines if the `Seg` will be gracefully shutdown. The graceful + // shutdown will be successful if the cache is file backed and + // metadata_path` is a valid path to save the relevant `Seg` fields to. // Otherwise, the relevant `Seg` fields will not be saved. pub fn graceful_shutdown(&self) -> bool { self.graceful_shutdown @@ -216,20 +196,8 @@ impl Seg { self.datapool_path.as_ref().map(|v| Path::new(v).to_owned()) } - pub fn segments_fields_path(&self) -> Option { - self.segments_fields_path - .as_ref() - .map(|v| Path::new(v).to_owned()) - } - - pub fn ttl_buckets_path(&self) -> Option { - self.ttl_buckets_path - .as_ref() - .map(|v| Path::new(v).to_owned()) - } - - pub fn hashtable_path(&self) -> Option { - self.hashtable_path + pub fn metadata_path(&self) -> Option { + self.metadata_path .as_ref() .map(|v| Path::new(v).to_owned()) } diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 8def7800f..34bb6206c 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -50,9 +50,7 @@ impl Seg { .segment_size(config.segment_size()) .eviction(eviction) .datapool_path(config.datapool_path()) - .segments_fields_path(config.segments_fields_path()) - .ttl_buckets_path(config.ttl_buckets_path()) - .hashtable_path(config.hashtable_path()) + .metadata_path(config.metadata_path()) .build(); Self { data } diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index c747d5fbc..e2c917a15 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -15,8 +15,7 @@ pub struct Builder { hash_power: u8, overflow_factor: f64, segments_builder: SegmentsBuilder, - ttl_buckets_path: Option, - hashtable_path: Option, + metadata_path: Option, } // Defines the default parameters @@ -27,8 +26,7 @@ impl Default for Builder { hash_power: 16, overflow_factor: 0.0, segments_builder: SegmentsBuilder::default(), - ttl_buckets_path: None, - hashtable_path: None, + metadata_path: None, } } } @@ -157,29 +155,18 @@ impl Builder { self } - /// Specify a backing file to be used for `Segments` fields' storage. - pub fn segments_fields_path>(mut self, path: Option) -> Self { - self.segments_builder = self.segments_builder.segments_fields_path(path); - self - } - - /// Specify a backing file to be used for `TtlBuckets` storage. - pub fn ttl_buckets_path>(mut self, path: Option) -> Self { - self.ttl_buckets_path = path.map(|p| p.as_ref().to_owned()); - self - } - - /// Specify a backing file to be used for `HashTable` storage. - pub fn hashtable_path>(mut self, path: Option) -> Self { - self.hashtable_path = path.map(|p| p.as_ref().to_owned()); + /// Specify a backing file to be used for metadata storage. + pub fn metadata_path>(mut self, path: Option) -> Self { + self.metadata_path = path.map(|p| p.as_ref().to_owned()); self } /// Consumes the builder and returns a fully-allocated `Seg` instance. - /// If `restore` and valid paths to the structures are given, `Seg` will - /// be restored. Otherwise, create a new `Seg` instance. If valid paths are - /// given, the files at these paths will be used to copy the structures to - /// upon graceful shutdown. + /// If `restore`, the cache `Segments.data` is file backed by an existing + /// file and a valid file for the `metadata` is given, `Seg` will be + /// restored. Otherwise, create a new `Seg` instance. The path for the + /// `metadata` file will be saved with the `Seg` instance to be used to save + // the structures to upon graceful shutdown. /// /// ``` /// use seg::{Policy, Seg}; @@ -193,76 +180,54 @@ impl Builder { /// .eviction(Policy::Random).build(); /// ``` pub fn build(self) -> Seg { - // Build `Segments`. If there is a path for the datapool set, the - // `Segments.data` will be file backed. If `restore` and there is a path - // for the `Segments` fields, restore the other relevant `Segments` - // fields. - let segments = self.segments_builder.build(); - // If `Segments` successfully restored and `restore` - if segments.fields_copied_back && self.restore { - // Check if file exists and with what size + // If `restore` and there is a path for the metadata file to + // restore from, restore the cache + if self.restore && self.metadata_path.is_some() { + // Check if the metadata file exists and with what size if let Ok(file_size) = - std::fs::metadata(self.hashtable_path.as_ref().unwrap()).map(|m| m.len()) + std::fs::metadata(self.metadata_path.as_ref().unwrap()).map(|m| m.len()) { // TODO: implement a non-messy way to calculate expected file size, rather than just taking actual size let file_size = file_size as usize; // Mmap file - let pool = File::create(self.hashtable_path.clone().unwrap(), file_size, true) + let mut pool = File::create(self.metadata_path.clone().unwrap(), file_size, true) .expect("failed to allocate file backed storage"); - let file_data = pool.as_slice(); + let metadata = pool.as_mut_slice(); + + let hashtable = HashTable::restore(metadata, self.hash_power, self.overflow_factor); - // Attempt to restore `HashTable` and `TtlBuckets` - let hashtable = HashTable::restore( - file_data, - self.hash_power, - self.overflow_factor, - ); + let mut offset = hashtable.recover_size(); + let ttl_buckets = TtlBuckets::restore(&metadata[offset..]); - let offset = hashtable.recover_size(); - let ttl_buckets = - TtlBuckets::restore(&file_data[offset..]); + offset += ttl_buckets.recover_size(); - // If successful, return a restored segcache - if hashtable.table_copied_back && ttl_buckets.buckets_copied_back { + let segments = self.segments_builder.clone().build(Some(&mut metadata[offset..])); + + // Check that `Segments` was copied back, it will fail if the + // file for the file backed `Segments.data` did not exist + if segments.fields_copied_back { return Seg { hashtable, segments, ttl_buckets, - hashtable_path: self.hashtable_path, + metadata_path: self.metadata_path, }; } } } - // TODO: Should paths be checked here to see if any are None (or not - // valid)? Then we could take an "All or Nothing" approach. That is, if - // one of the paths is not valid, then all structures are created - // as new AND no paths are set for graceful shutdown. Otherwise, if - // `restore`, we restore from these paths, else, we set these paths. - // Currently, I am not doing this as due to the Segments having a - // separate builder + different control flow, it is too awkward to - // implement. - - // If not `restore` or restoration failed, create a new cache - let hashtable = HashTable::new( - self.hash_power, - self.overflow_factor, - ); - let offset = hashtable.recover_size(); + // Otherwise, create a new cache + let segments = self.segments_builder.build(None); + let hashtable = HashTable::new(self.hash_power, self.overflow_factor); let ttl_buckets = TtlBuckets::new(); - // Set offsets - // let hashtable_size = hashtable.recover_size(); - // let ttl_buckets_size = ttl_buckets.recover_size(); - // let file_size = hashtable_size + ttl_buckets_size; - // hashtable = hashtable.set_file_size(file_size); Seg { hashtable, segments, ttl_buckets, - hashtable_path: self.hashtable_path, // TODO: change this to final path + metadata_path: self.metadata_path, } } } diff --git a/src/rust/storage/seg/src/hashtable/mod.rs b/src/rust/storage/seg/src/hashtable/mod.rs index 7883eb0ff..2b73ed1f0 100644 --- a/src/rust/storage/seg/src/hashtable/mod.rs +++ b/src/rust/storage/seg/src/hashtable/mod.rs @@ -73,12 +73,10 @@ const N_BUCKET_SLOT: usize = 8; /// Maximum number of buckets in a chain. Must be <= 255. const MAX_CHAIN_LEN: u64 = 16; -use crate::datapool::*; use crate::*; use ahash::RandomState; use core::num::NonZeroU32; use metrics::{static_metrics, Counter}; -use std::path::PathBuf; mod hash_bucket; @@ -111,7 +109,7 @@ pub(crate) struct HashTable { started: Instant, next_to_chain: u64, /// Is `HashTable` copied back from a file? - pub(crate) table_copied_back: bool, + pub(crate) _table_copied_back: bool, /// Used in graceful shutdown overflow_factor: f64, } @@ -156,17 +154,13 @@ impl HashTable { rng: Box::new(rng()), started: Instant::recent(), next_to_chain: buckets as u64, - table_copied_back: false, + _table_copied_back: false, overflow_factor, } } - // Returns a restored `HashTable` using recovery data (`file_data`) - pub fn restore( - file_data: &[u8], - cfg_power: u8, - overflow_factor: f64, - ) -> Self { + // Returns a restored `HashTable` using recovery data (`metadata`) + pub fn restore(metadata: &[u8], cfg_power: u8, overflow_factor: f64) -> Self { // restore() assumes no changes in `power`. // I.e. config specifies same `power` as `HashTable` we are // restoring from @@ -183,7 +177,7 @@ impl HashTable { // create blank bytes to copy data into let mut bytes = vec![0; hashtable_size]; // retrieve bytes from mmapped file - bytes.copy_from_slice(&file_data[0..hashtable_size]); + bytes.copy_from_slice(&metadata[0..hashtable_size]); // ----- Re-initialise `hash_builder` ----- @@ -238,21 +232,17 @@ impl HashTable { rng: Box::new(rng()), started, next_to_chain, - table_copied_back: true, + _table_copied_back: true, overflow_factor, } } - /// Flushes the `HashTable` by copying it to `file_data` - pub fn flush(&self, file_data: &mut [u8]){ + /// Flushes the `HashTable` by copying it to `metadata` + pub fn flush(&self, metadata: &mut [u8]) { let total_buckets = total_buckets(self.power, self.overflow_factor); let bucket_size = ::std::mem::size_of::(); let u64_size = ::std::mem::size_of::(); let started_size = ::std::mem::size_of::(); - // Size of all components of `HashTable` that are being saved - let hashtable_size = u64_size * 3 // `power`, `mask`, `next_to_chain` - + total_buckets * bucket_size // `data` - + started_size; let mut offset = 0; // --------------------- Store `power` ----------------- @@ -261,7 +251,7 @@ impl HashTable { let byte_ptr = (&self.power as *const u64) as *const u8; // store `power` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, metadata); // --------------------- Store `mask` ----------------- @@ -269,7 +259,7 @@ impl HashTable { let byte_ptr = (&self.mask as *const u64) as *const u8; // store `mask` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, metadata); // --------------------- Store `data` ----------------- // for every `HashBucket` @@ -278,7 +268,7 @@ impl HashTable { let byte_ptr = (&self.data[id] as *const HashBucket) as *const u8; // store `HashBucket` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, metadata); } // --------------------- Store `started` ----------------- @@ -287,14 +277,14 @@ impl HashTable { let byte_ptr = (&self.started as *const Instant) as *const u8; // store `started` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, started_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, started_size, metadata); // --------------------- Store `next_to_chain` ----------------- // cast `next_to_chain` to byte pointer let byte_ptr = (&self.next_to_chain as *const u64) as *const u8; // store `next_to_chain` back to mmapped file - store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, file_data); + store::store_bytes_and_update_offset(byte_ptr, offset, u64_size, metadata); // ------------------------------------------------------------- } @@ -878,18 +868,6 @@ impl HashTable { + total_buckets * bucket_size // `data` + started_size } - - /// Function used by `Builder` to set the `file_size` - pub fn set_file_size(&self) -> usize { - let total_buckets = total_buckets(self.power, self.overflow_factor); - let bucket_size = ::std::mem::size_of::(); - let u64_size = ::std::mem::size_of::(); - let started_size = ::std::mem::size_of::(); - // Size of all components of `HashTable` that are being restored - u64_size * 3 // `power`, `mask`, `next_to_chain` - + total_buckets * bucket_size // `data` - + started_size - } } impl PartialEq for HashTable { diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index b3c92b51d..5192118c0 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -29,8 +29,8 @@ pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, pub(crate) ttl_buckets: TtlBuckets, - // Path to datapool - pub(crate) hashtable_path: Option, + // Path to metadata datapool + pub(crate) metadata_path: Option, } impl Seg { @@ -54,22 +54,24 @@ impl Seg { } /// Flushes cache by storing all the relevant fields of `Segments`, - /// `HashTable` and `TtlBuckets` to the datapool file + /// `HashTable` and `TtlBuckets` to the `metadata` file (if it exists) and + // flushing `Segments.data` (if it is file backed) pub fn flush(&self) -> std::io::Result<()> { - - if let Some(file) = &self.hashtable_path { - - let file_size = self.hashtable.recover_size() + self.ttl_buckets.recover_size(); + if let Some(file) = &self.metadata_path { + let file_size = self.hashtable.recover_size() + + self.ttl_buckets.recover_size() + + self.segments.recover_size(); // Mmap file let mut pool = File::create(file, file_size, true) .expect("failed to allocate file backed storage"); - let file_data = pool.as_mut_slice(); + let metadata = pool.as_mut_slice(); - self.segments.flush()?; - self.hashtable.flush(file_data); - let offset = self.hashtable.recover_size(); - self.ttl_buckets.flush(&mut file_data[offset..]); + self.hashtable.flush(metadata); + let mut offset = self.hashtable.recover_size(); + self.ttl_buckets.flush(&mut metadata[offset..]); + offset += self.ttl_buckets.recover_size(); + self.segments.flush(&mut metadata[offset..])?; // TODO: check if this flushes the CPU caches pool.flush()?; @@ -351,8 +353,8 @@ impl Seg { #[cfg(test)] pub(crate) fn restored(&self) -> bool { self.segments.fields_copied_back - && self.ttl_buckets.buckets_copied_back - && self.hashtable.table_copied_back + && self.ttl_buckets._buckets_copied_back + && self.hashtable._table_copied_back } /// Perform a wrapping addition on the value stored at the supplied key. diff --git a/src/rust/storage/seg/src/segments/builder.rs b/src/rust/storage/seg/src/segments/builder.rs index edb932b15..118fde17d 100644 --- a/src/rust/storage/seg/src/segments/builder.rs +++ b/src/rust/storage/seg/src/segments/builder.rs @@ -11,13 +11,13 @@ use crate::segments::*; use std::path::{Path, PathBuf}; /// The `SegmentsBuilder` allows for the configuration of the segment storage. +#[derive(Clone)] pub(crate) struct SegmentsBuilder { pub(super) restore: bool, pub(super) heap_size: usize, pub(super) segment_size: i32, pub(super) evict_policy: Policy, pub(super) datapool_path: Option, - pub(super) segments_fields_path: Option, } impl Default for SegmentsBuilder { @@ -28,15 +28,14 @@ impl Default for SegmentsBuilder { heap_size: 64 * 1024 * 1024, evict_policy: Policy::Random, datapool_path: None, - segments_fields_path: None, } } } impl<'a> SegmentsBuilder { /// Specify whether the `Segments` fields' will be restored - /// from the segments_fields_path. - /// Otherwise, the cache will be created and treated as new. + /// from the `metadata`. Otherwise, the cache will be created and treated as + // new. pub fn restore(mut self, restore: bool) -> Self { self.restore = restore; self @@ -82,16 +81,8 @@ impl<'a> SegmentsBuilder { self } - /// Specify a backing file to be used for the `Segment` fields' storage. If provided, - /// a file will be created at the corresponding path and used for segment header - /// storage. - pub fn segments_fields_path>(mut self, path: Option) -> Self { - self.segments_fields_path = path.map(|p| p.as_ref().to_owned()); - self - } - /// Construct the [`Segments`] from the builder - pub fn build(self) -> Segments { - Segments::from_builder(self) + pub fn build(self, option_metadata: Option<&mut [u8]>) -> Segments { + Segments::from_builder(self, option_metadata) } } diff --git a/src/rust/storage/seg/src/segments/mod.rs b/src/rust/storage/seg/src/segments/mod.rs index dbc89e376..ab7e3a664 100644 --- a/src/rust/storage/seg/src/segments/mod.rs +++ b/src/rust/storage/seg/src/segments/mod.rs @@ -29,7 +29,7 @@ mod test { fn free_q() { let mut segments = SegmentsBuilder::default() .heap_size(16 * 1024 * 1024) - .build(); + .build(None); let mut used = Vec::new(); for _i in 0..16 { let id = segments.pop_free().unwrap(); diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 7bd4125cb..1f9c1eced 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -10,7 +10,6 @@ use crate::segments::*; use core::num::NonZeroU32; use metrics::{static_metrics, Counter, Gauge}; -use std::path::PathBuf; static_metrics! { static EVICT_TIME: Gauge; @@ -46,15 +45,16 @@ pub(crate) struct Segments { data_file_backed: bool, /// Are `headers` copied back from a file? pub(crate) fields_copied_back: bool, - /// Path to save relevant fields upon graceful shutdown - segments_fields_path: Option, } impl Segments { /// Private function which allocates and initializes the `Segments` by - /// taking ownership of the builder. `Segments` is restored if the paths are - /// specified, otherwise a new `Segments` is created. - pub(super) fn from_builder(builder: SegmentsBuilder) -> Self { + /// taking ownership of the builder. `Segments` is restored if there is + /// recovery `metadata`, otherwise a new `Segments` is created. + pub(super) fn from_builder( + builder: SegmentsBuilder, + option_metadata: Option<&mut [u8]>, + ) -> Self { let cfg_segment_size = builder.segment_size; let cfg_segments = builder.heap_size / (builder.segment_size as usize); @@ -79,10 +79,12 @@ impl Segments { let heap_size = cfg_segments * cfg_segment_size as usize; let mut data_file_backed = false; + let mut data_on_existing_file = false; // TODO(bmartin): we always prefault, this should be configurable let mut data: Box = if let Some(file) = builder.datapool_path { data_file_backed = true; + data_on_existing_file = std::fs::metadata(&file).is_ok(); let pool = File::create(file, heap_size, true) .expect("failed to allocate file backed storage"); Box::new(pool) @@ -90,249 +92,207 @@ impl Segments { Box::new(Memory::create(heap_size, true)) }; - // If `builder.restore` and - // there are specified paths to restore the `Segments` with and - // `Segments.data` is file backed, restore relevant - // `Segments` fields. - // Otherwise create a new `Segments`. - if builder.restore && data_file_backed && builder.segments_fields_path.is_some() { - // TODO: like with the HashTable fields, we assume that the configuration - // options for `Segments` hasn't changed upon recovery. We need a way to - // detect the change in fields as well as decided how to - // deal with such changes. - - let header_size: usize = ::std::mem::size_of::(); - let i32_size = ::std::mem::size_of::(); - let u32_size = ::std::mem::size_of::(); - let free_q_size = ::std::mem::size_of::>(); - let flush_at_size = ::std::mem::size_of::(); - // Size of all components of `Segments` that are being restored - let fields_size = cfg_segments * header_size // `headers` + // If `builder.restore` `Segments.data` is file backed with an existing + // file and metadata` to restore the `Segments` with, restore relevant + // `Segments` fields. Otherwise create a new `Segments`. + if builder.restore && data_on_existing_file { + if let Some(metadata) = option_metadata { + // TODO: like with the HashTable fields, we assume that the configuration + // options for `Segments` hasn't changed upon recovery. We need a way to + // detect the change in fields as well as decided how to + // deal with such changes. + + let header_size: usize = ::std::mem::size_of::(); + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); + // Size of all components of `Segments` that are being restored + let fields_size = cfg_segments * header_size // `headers` + i32_size // `segment_size` + u32_size * 2 // `free` and `cap` + free_q_size + flush_at_size; - // Mmap file - let pool = File::create( - builder.segments_fields_path.as_ref().unwrap(), - fields_size, - true, - ) - .expect("failed to allocate file backed storage"); - let fields_data = Box::new(pool.as_slice()); - - // create blank bytes to copy data into - let mut bytes = vec![0; fields_size]; - // retrieve bytes from mmapped file - bytes.copy_from_slice(&fields_data[0..fields_size]); - - let mut offset = 0; - let mut end = 0; - // ----- Retrieve `headers` ----- - - // retrieve each `SegmentHeader` from the raw bytes - for _ in 0..cfg_segments { - end += header_size; - - // cast bytes to `SegmentHeader` - let header = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut SegmentHeader) }; - headers.push(header); - - offset += header_size; - } + // create blank bytes to copy data into + let mut bytes = vec![0; fields_size]; + // retrieve bytes from mmapped file + bytes.copy_from_slice(&metadata[0..fields_size]); - // ----- Retrieve `segment_size` ----- - end += i32_size; + let mut offset = 0; + let mut end = 0; + // ----- Retrieve `headers` ----- - let segment_size = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut i32) }; - // TODO: compare `cfg_segment_size` and `segment_size` + // retrieve each `SegmentHeader` from the raw bytes + for _ in 0..cfg_segments { + end += header_size; - offset += i32_size; - // ----- Retrieve `free` ----- - end += u32_size; + // cast bytes to `SegmentHeader` + let header = + unsafe { *(bytes[offset..end].as_mut_ptr() as *mut SegmentHeader) }; + headers.push(header); - let free = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; + offset += header_size; + } - offset += u32_size; - // ----- Retrieve `cap` ----- - end += u32_size; + // ----- Retrieve `segment_size` ----- + end += i32_size; - let cap = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; + let segment_size = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut i32) }; + // TODO: compare `cfg_segment_size` and `segment_size` - offset += u32_size; - // ----- Retrieve `free_q` ----- - end += free_q_size; + offset += i32_size; + // ----- Retrieve `free` ----- + end += u32_size; - let free_q = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Option) }; + let free = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; - offset += free_q_size; - // ----- Retrieve `flush_at` ----- - end += flush_at_size; + offset += u32_size; + // ----- Retrieve `cap` ----- + end += u32_size; - let flush_at = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; + let cap = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut u32) }; - SEGMENT_CURRENT.set(cap as _); - SEGMENT_FREE.set(free as _); + offset += u32_size; + // ----- Retrieve `free_q` ----- + end += free_q_size; - Self { - headers: headers.into_boxed_slice(), - data, - segment_size, - free, - cap, - free_q, - flush_at, - evict: Box::new(evict), - data_file_backed: true, - fields_copied_back: true, - segments_fields_path: builder.segments_fields_path, - } - } else { - for id in 0..cfg_segments { - // safety: we start iterating from 1 and seg id is constrained to < 2^24 - let header = - SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); - headers.push(header); - } + let free_q = + unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Option) }; - let mut headers = headers.into_boxed_slice(); + offset += free_q_size; + // ----- Retrieve `flush_at` ----- + end += flush_at_size; - for idx in 0..cfg_segments { - let begin = cfg_segment_size as usize * idx; - let end = begin + cfg_segment_size as usize; + let flush_at = unsafe { *(bytes[offset..end].as_mut_ptr() as *mut Instant) }; - let mut segment = Segment::from_raw_parts( - &mut headers[idx], - &mut data.as_mut_slice()[begin..end], - ); - segment.init(); + SEGMENT_CURRENT.set(cap as _); + SEGMENT_FREE.set(free as _); - let id = idx as u32 + 1; // we index cfg_segments from 1 - segment.set_prev_seg(NonZeroU32::new(id - 1)); - if id < cfg_segments as u32 { - segment.set_next_seg(NonZeroU32::new(id + 1)); - } + return Self { + headers: headers.into_boxed_slice(), + data, + segment_size, + free, + cap, + free_q, + flush_at, + evict: Box::new(evict), + data_file_backed: true, + fields_copied_back: true, + }; } + } - SEGMENT_CURRENT.set(cfg_segments as _); - SEGMENT_FREE.set(cfg_segments as _); - - Self { - headers, - segment_size: cfg_segment_size, - cap: cfg_segments as u32, - free: cfg_segments as u32, - free_q: NonZeroU32::new(1), - data, - flush_at: Instant::recent(), - evict: Box::new(evict), - data_file_backed, - fields_copied_back: false, - segments_fields_path: builder.segments_fields_path, + // Create new `Segments` + for id in 0..cfg_segments { + // safety: we start iterating from 1 and seg id is constrained to < 2^24 + let header = SegmentHeader::new(unsafe { NonZeroU32::new_unchecked(id as u32 + 1) }); + headers.push(header); + } + + let mut headers = headers.into_boxed_slice(); + + for idx in 0..cfg_segments { + let begin = cfg_segment_size as usize * idx; + let end = begin + cfg_segment_size as usize; + + let mut segment = + Segment::from_raw_parts(&mut headers[idx], &mut data.as_mut_slice()[begin..end]); + segment.init(); + + let id = idx as u32 + 1; // we index cfg_segments from 1 + segment.set_prev_seg(NonZeroU32::new(id - 1)); + if id < cfg_segments as u32 { + segment.set_next_seg(NonZeroU32::new(id + 1)); } } + + SEGMENT_CURRENT.set(cfg_segments as _); + SEGMENT_FREE.set(cfg_segments as _); + + Self { + headers, + segment_size: cfg_segment_size, + cap: cfg_segments as u32, + free: cfg_segments as u32, + free_q: NonZeroU32::new(1), + data, + flush_at: Instant::recent(), + evict: Box::new(evict), + data_file_backed, + fields_copied_back: false, + } } /// Flushes the `Segments` by flushing the `Segments.data` (if filed backed) - /// and storing the other `Segments` fields' to a file (if a path is - /// specified) - pub fn flush(&self) -> std::io::Result<()> { + /// and copying the other `Segments` fields' by copying it to `metadata` + pub fn flush(&self, metadata: &mut [u8]) -> std::io::Result<()> { // if `Segments.data` is file backed, flush it to file if self.data_file_backed { self.data.flush()?; } - // if a path is specified, copy all the `Segments` fields' to the file - // specified by `segments_fields_path` - if let Some(file) = &self.segments_fields_path { - let header_size: usize = ::std::mem::size_of::(); - let i32_size = ::std::mem::size_of::(); - let u32_size = ::std::mem::size_of::(); - let free_q_size = ::std::mem::size_of::>(); - let flush_at_size = ::std::mem::size_of::(); - // Size of all components of `Segments` that are being restored - let fields_size = (self.cap as usize) * header_size // `headers` - + i32_size // `segment_size` - + u32_size * 2 // `free` and `cap` - + free_q_size - + flush_at_size; + let header_size: usize = ::std::mem::size_of::(); + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); - // mmap file - let mut pool = File::create(file, fields_size, true) - .expect("failed to allocate file backed storage"); - let fields_data = pool.as_mut_slice(); - - let mut offset = 0; - // ----- Store `headers` ----- - - // for every `SegmentHeader` - for id in 0..(self.cap as usize) { - // cast `SegmentHeader` to byte pointer - let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; - - // store `SegmentHeader` back to mmapped file - offset = store::store_bytes_and_update_offset( - byte_ptr, - offset, - header_size, - fields_data, - ); - } + let mut offset = 0; + // ----- Store `headers` ----- - // ----- Store `segment_size` ----- + // for every `SegmentHeader` + for id in 0..(self.cap as usize) { + // cast `SegmentHeader` to byte pointer + let byte_ptr = (&self.headers[id] as *const SegmentHeader) as *const u8; - // cast `segment_size` to byte pointer - let byte_ptr = (&self.segment_size as *const i32) as *const u8; + // store `SegmentHeader` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, header_size, metadata); + } - // store `segment_size` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, i32_size, fields_data); + // ----- Store `segment_size` ----- - // ----- Store `free` ----- + // cast `segment_size` to byte pointer + let byte_ptr = (&self.segment_size as *const i32) as *const u8; - // cast `free` to byte pointer - let byte_ptr = (&self.free as *const u32) as *const u8; + // store `segment_size` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, i32_size, metadata); - // store `free` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + // ----- Store `free` ----- - // ----- Store `cap` ----- + // cast `free` to byte pointer + let byte_ptr = (&self.free as *const u32) as *const u8; - // cast `cap` to byte pointer - let byte_ptr = (&self.cap as *const u32) as *const u8; + // store `free` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, metadata); - // store `cap` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, fields_data); + // ----- Store `cap` ----- - // ----- Store `free_q` ----- + // cast `cap` to byte pointer + let byte_ptr = (&self.cap as *const u32) as *const u8; - // cast `free_q` to byte pointer - let byte_ptr = (&self.free_q as *const Option) as *const u8; + // store `cap` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, u32_size, metadata); - // store `free_q` back to mmapped file - offset = - store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, fields_data); + // ----- Store `free_q` ----- - // ----- Store `flush_at` ----- + // cast `free_q` to byte pointer + let byte_ptr = (&self.free_q as *const Option) as *const u8; - // cast `flush_at` to byte pointer - let byte_ptr = (&self.flush_at as *const Instant) as *const u8; + // store `free_q` back to mmapped file + offset = store::store_bytes_and_update_offset(byte_ptr, offset, free_q_size, metadata); - // store `flush_at` back to mmapped file - store::store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, fields_data); + // ----- Store `flush_at` ----- - // ----------------------------- + // cast `flush_at` to byte pointer + let byte_ptr = (&self.flush_at as *const Instant) as *const u8; - // TODO: check if this flushes fields_data from CPU caches - pool.flush()?; - Ok(()) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Path to store Segments to is None, cannot gracefully - shutdown cache", - )) - } + // store `flush_at` back to mmapped file + store::store_bytes_and_update_offset(byte_ptr, offset, flush_at_size, metadata); + + // ----------------------------- + Ok(()) } /// Return the size of each segment in bytes @@ -1147,11 +1107,28 @@ impl Segments { Ok(next_id) } + + /// TODO: this code is repeated in restore() and flush(), can it be reduced? + /// Function used by `Builder` to calculate the number of bytes of the `Segments` + /// that are stored/restored + pub fn recover_size(&self) -> usize { + let header_size: usize = ::std::mem::size_of::(); + let i32_size = ::std::mem::size_of::(); + let u32_size = ::std::mem::size_of::(); + let free_q_size = ::std::mem::size_of::>(); + let flush_at_size = ::std::mem::size_of::(); + // Size of all components of `Segments` that are being restored + (self.cap as usize) * header_size // `headers` + + i32_size // `segment_size` + + u32_size * 2 // `free` and `cap` + + free_q_size + + flush_at_size + } } impl Default for Segments { fn default() -> Self { - Self::from_builder(Default::default()) + Self::from_builder(Default::default(), None) } } @@ -1190,7 +1167,6 @@ impl Clone for Segments { evict: self.evict.clone(), // not relevant data_file_backed: self.data_file_backed, // not relevant fields_copied_back: self.fields_copied_back, // not relevant - segments_fields_path: None, // not relevant } } } diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 30a03571b..0bf6988f1 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -19,11 +19,11 @@ fn sizes() { #[cfg(not(feature = "magic"))] assert_eq!(ITEM_HDR_SIZE, 5); - assert_eq!(std::mem::size_of::(), 88); + assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 64); - assert_eq!(std::mem::size_of::(), 80); + assert_eq!(std::mem::size_of::(), 80); assert_eq!(std::mem::size_of::(), 64); assert_eq!(std::mem::size_of::(), 24); @@ -486,7 +486,7 @@ fn saturating_sub() { // ---- Nothing is saved on shutdown. // Gracefully shutdown // ---- `Segments.data` is flushed if it is file backed -// ---- Rest of `Seg` instance saved on shutdown if the paths are valid. +// ---- Rest of `Seg` instance saved on shutdown if the `metadata_path` is valid. // ---- That is, all of `Seg.hashtable`, `Seg.ttl_buckets` and the relevant // ---- `Seg.Segments` fields are saved // Restored cache @@ -511,15 +511,12 @@ fn tmp_dir() -> TempDir { } // Returns a `Seg` instance. Cache is restored only if `restore` and -// `segments_fields_path`, `ttl_buckets_path`. `hashtable_path` are not `None`. -// Otherwise, new `Seg` instance is returned. Cache is file backed if -// `datapool_path` is not `None`. +// `metadata_path` and `datapool_path` are not `None`. Otherwise, new `Seg` +// instance is returned. Cache is file backed if `datapool_path` is not `None`. fn make_cache( restore: bool, datapool_path: Option, - segments_fields_path: Option, - ttl_buckets_path: Option, - hashtable_path: Option, + metadata_path: Option, ) -> Seg { let segment_size = 4096; let segments = SEGMENTS; @@ -530,17 +527,14 @@ fn make_cache( .segment_size(segment_size as i32) .heap_size(heap_size) .datapool_path(datapool_path) // set path - .segments_fields_path(segments_fields_path) // set path - .ttl_buckets_path(ttl_buckets_path) // set path - .hashtable_path(hashtable_path) // set path + .metadata_path(metadata_path) // set path .build() } // ------------------- Set Paths Correctly Tests -------------------------- -// Check that a file backed, new cache is file backed and the `Seg` -// and thus the `Segments` fields', `HashTable` and `TTLBuckets` -// are new (and not restored) +// Check that a file backed, new cache is file backed and the `Seg` and thus the +// `Segments` fields', `HashTable` and `TTLBuckets` are new (and not restored) #[test] fn new_cache_file_backed() { // Create parent directory for temporary test files @@ -550,61 +544,40 @@ fn new_cache_file_backed() { // create new, file backed cache let restore = false; - let cache = make_cache(restore, datapool_path, None, None, None); + let cache = make_cache(restore, datapool_path, None); // the `Segments.data` should be filed backed assert!(cache.segments.data_file_backed()); - // -- Check entire `Seg` -- // the `Seg` should not be restored assert!(!cache.restored()); - // -- Check `Seg` fields/components -- - // the `Segments` fields' should not have been restored - assert!(!cache.segments.fields_copied_back); - // the `TtlBuckets` should not have been restored - assert!(!cache.ttl_buckets.buckets_copied_back); - // the `HashTable` should not have been restored - assert!(!cache.hashtable.table_copied_back); } -// Check that a new, not file backed cache is not file backed -// and the `Seg` is new (and not restored) +// Check that a new, not file backed cache is not file backed and the `Seg` is +// new (and not restored) #[test] fn new_cache_not_file_backed() { // create new, not file backed cache let restore = false; - let cache = make_cache(restore, None, None, None, None); + let cache = make_cache(restore, None, None); // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); // the `Seg` should not be restored assert!(!cache.restored()); - // the `Segments` fields' should not have been restored - assert!(!cache.segments.fields_copied_back); - // the `TtlBuckets` should not have been restored - assert!(!cache.ttl_buckets.buckets_copied_back); - // the `HashTable` should not have been restored - assert!(!cache.hashtable.table_copied_back); } -// Edge Case: Check that an attempt to restore a cache without specifing -// any paths for the `Segments.data`, `Segments` fields', -// `HashTable` and `TTLBuckets` will lead to `Segments.data` not -// being file backed and none of the other structures being restored +// Edge Case: Check that an attempt to restore a cache without specifing any +// paths will lead to `Segments.data` not being file backed and +// no of the other structures being restored #[test] fn restored_cache_no_paths_set() { let restore = true; - let cache = make_cache(restore, None, None, None, None); + let cache = make_cache(restore, None, None); // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); // the `Seg` should not be restored assert!(!cache.restored()); - // the `Segments` fields' should not have been restored - assert!(!cache.segments.fields_copied_back); - // the `TtlBuckets` should not have been restored - assert!(!cache.ttl_buckets.buckets_copied_back); - // the `HashTable` should not have been restored - assert!(!cache.hashtable.table_copied_back); } // Check that if paths are specified, then the cache is gracefully @@ -615,21 +588,15 @@ fn cache_gracefully_shutdown() { let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // create new, file backed cache let restore = false; let cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); // Flush cache @@ -644,18 +611,12 @@ fn cache_not_gracefully_shutdown() { let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // create new, file backed cache let restore = false; let cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, None, // Don't set a `HashTable` path ); @@ -665,29 +626,23 @@ fn cache_not_gracefully_shutdown() { // --------------------- Data copied back Tests---------------------------- -// Creates a new cache, stores an item, gracefully shutsdown cache and restore cache -// Check item is still there and caches are equivalent +// Creates a new cache, stores an item, gracefully shutsdown cache and restore +// cache. Check item is still there and caches are equivalent #[test] fn new_file_backed_cache_changed_and_restored() { // Create a temporary directory let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // create new, file backed cache let mut restore = false; let mut cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(!cache.restored()); @@ -714,9 +669,7 @@ fn new_file_backed_cache_changed_and_restored() { // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // restore cache // This cache is file backed by same file as the above cache @@ -725,9 +678,7 @@ fn new_file_backed_cache_changed_and_restored() { let mut new_cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(new_cache.restored()); @@ -740,29 +691,23 @@ fn new_file_backed_cache_changed_and_restored() { assert!(new_cache == old_cache); } -// Creates a new cache, gracefully shutsdown cache and restore cache -// Check caches are equivalent +// Creates a new cache, gracefully shutsdown cache and restore cache. Check +// caches are equivalent #[test] fn new_file_backed_cache_not_changed_and_restored() { // Create a temporary directory let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // create new, file backed cache let mut restore = false; let cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(!cache.restored()); @@ -775,9 +720,7 @@ fn new_file_backed_cache_not_changed_and_restored() { // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // restore cache // This cache is file backed by same file as the above cache @@ -786,9 +729,7 @@ fn new_file_backed_cache_not_changed_and_restored() { let new_cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(new_cache.restored()); @@ -797,29 +738,23 @@ fn new_file_backed_cache_not_changed_and_restored() { assert!(new_cache == old_cache); } -// Creates a new cache, stores an item, gracefully shutsdown cache and spawn new cache -// Check item is not in new cache and caches are not equivalent +// Creates a new cache, stores an item, gracefully shutsdown cache and spawn new +// cache. Check item is not in new cache and caches are not equivalent #[test] fn new_cache_changed_and_not_restored() { // Create a temporary directory let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // create new, file backed cache let mut restore = false; let mut cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(!cache.restored()); @@ -851,7 +786,7 @@ fn new_cache_changed_and_not_restored() { // This new cache is file backed by same file as the above cache // saved `Segments.data` to but this cache is treated as new restore = false; - let mut new_cache = make_cache(restore, datapool_path, None, None, None); + let mut new_cache = make_cache(restore, datapool_path, None); assert!(!new_cache.restored()); assert_eq!(new_cache.items(), 0); @@ -864,23 +799,17 @@ fn new_cache_changed_and_not_restored() { assert!(new_cache != old_cache); } -// Create a new cache, fill it with items. -// Gracefully shutdown this cache. -// Restore cache and check that every key from the original cache -// exists in the restored cache -// Check caches are equivalent +// Create a new cache, fill it with items. Gracefully shutdown this cache. +// Restore cache and check that every key from the original cache exists in the +// restored cache. Check caches are equivalent #[test] fn full_cache_recovery_long() { // Create a temporary directory let dir = tmp_dir(); // Create tempfile for datapool let datapool_path: Option = Some(dir.path().join("datapool")); - // Create tempfile for `Segments` fields' - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - // Create tempfile for `TtlBuckets` - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); // Create tempfile for `HashTable` - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); let ttl = Duration::ZERO; let value_size = 512; @@ -892,9 +821,7 @@ fn full_cache_recovery_long() { let mut cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(!cache.restored()); @@ -942,9 +869,7 @@ fn full_cache_recovery_long() { // Create same tempfiles (they have been moved since first created) let datapool_path: Option = Some(dir.path().join("datapool")); - let segments_fields_path: Option = Some(dir.path().join("segments_fields")); - let ttl_buckets_path: Option = Some(dir.path().join("ttl_buckets")); - let hashtable_path: Option = Some(dir.path().join("hashtable")); + let metadata_path: Option = Some(dir.path().join("hashtable")); // restore cache // This new cache is file backed by same file as the above cache @@ -953,9 +878,7 @@ fn full_cache_recovery_long() { let mut new_cache = make_cache( restore, datapool_path, - segments_fields_path, - ttl_buckets_path, - hashtable_path, + metadata_path, ); assert!(new_cache.restored()); diff --git a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs index dc5378674..ca66b2459 100644 --- a/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs +++ b/src/rust/storage/seg/src/ttl_buckets/ttl_buckets.rs @@ -21,9 +21,7 @@ //! more detail. use super::{CLEAR_TIME, EXPIRE_TIME}; -use crate::datapool::*; use crate::*; -use std::path::PathBuf; const N_BUCKET_PER_STEP_N_BIT: usize = 8; const N_BUCKET_PER_STEP: usize = 1 << N_BUCKET_PER_STEP_N_BIT; @@ -49,7 +47,7 @@ pub struct TtlBuckets { pub(crate) buckets: Box<[TtlBucket]>, pub(crate) last_expired: Instant, /// Are `TtlBuckets` copied back from a file? - pub(crate) buckets_copied_back: bool, + pub(crate) _buckets_copied_back: bool, } impl TtlBuckets { @@ -81,14 +79,12 @@ impl TtlBuckets { Self { buckets, last_expired, - buckets_copied_back: false, + _buckets_copied_back: false, } } - // Returns a restored `TtlBuckets` using recovery data (`file_data`) - pub fn restore( - file_data: &[u8] - ) -> Self { + // Returns a restored `TtlBuckets` using recovery data (`metadata`) + pub fn restore(metadata: &[u8]) -> Self { let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` @@ -97,9 +93,7 @@ impl TtlBuckets { // create blank bytes to copy data into let mut bytes = vec![0; ttl_buckets_struct_size]; // retrieve bytes from mmapped file - bytes.copy_from_slice( - &file_data[0..ttl_buckets_struct_size], - ); + bytes.copy_from_slice(&metadata[0..ttl_buckets_struct_size]); let mut offset = 0; // ----- Retrieve `last_expired` ----- @@ -129,16 +123,14 @@ impl TtlBuckets { Self { buckets, last_expired, - buckets_copied_back: true, + _buckets_copied_back: true, } } - /// Flushes the `TtlBuckets` by copying it to `file_data` - pub fn flush(&self, file_data: &mut [u8]){ + /// Flushes the `TtlBuckets` by copying it to `metadata` + pub fn flush(&self, metadata: &mut [u8]) { let bucket_size = ::std::mem::size_of::(); let last_expired_size = ::std::mem::size_of::(); - let ttl_buckets_struct_size = MAX_N_TTL_BUCKET * bucket_size // `buckets` - + last_expired_size; let mut offset = 0; // --------------------- Store `last_expired` ----------------- @@ -148,7 +140,7 @@ impl TtlBuckets { // store `last_expired` back to mmapped file offset = - store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, file_data); + store::store_bytes_and_update_offset(byte_ptr, offset, last_expired_size, metadata); // --------------------- Store `buckets` ----------------- @@ -158,7 +150,7 @@ impl TtlBuckets { let byte_ptr = (&self.buckets[id] as *const TtlBucket) as *const u8; // store `TtlBucket` back to mmapped file - offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, file_data); + offset = store::store_bytes_and_update_offset(byte_ptr, offset, bucket_size, metadata); } // -------------------------------------------------- From 8816e11020e55fc559bebd5b5c0b6e9f9e7d1139 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Feb 2022 20:15:26 +1100 Subject: [PATCH 56/74] ran cargo fmt --- src/rust/config/src/seg.rs | 15 ++--- src/rust/storage/seg/src/builder.rs | 16 +++-- src/rust/storage/seg/src/segments/builder.rs | 2 +- src/rust/storage/seg/src/segments/segments.rs | 2 +- src/rust/storage/seg/src/tests.rs | 64 +++++-------------- 5 files changed, 33 insertions(+), 66 deletions(-) diff --git a/src/rust/config/src/seg.rs b/src/rust/config/src/seg.rs index eca766d3c..330b1db01 100644 --- a/src/rust/config/src/seg.rs +++ b/src/rust/config/src/seg.rs @@ -90,7 +90,6 @@ fn datapool_path() -> Option { DATAPOOL_PATH.map(|v| v.to_string()) } - fn metadata_path() -> Option { HASHTABLE_PATH.map(|v| v.to_string()) } @@ -145,16 +144,16 @@ impl Default for Seg { // implementation impl Seg { - // Determines if the `Seg` will be restored. The restoration will be - // successful if `datapool_path` and `metadata_path` are valid paths. - // Otherwise, the `Seg` will be created as + // Determines if the `Seg` will be restored. The restoration will be + // successful if `datapool_path` and `metadata_path` are valid paths. + // Otherwise, the `Seg` will be created as //new. pub fn restore(&self) -> bool { self.restore } - // Determines if the `Seg` will be gracefully shutdown. The graceful - // shutdown will be successful if the cache is file backed and + // Determines if the `Seg` will be gracefully shutdown. The graceful + // shutdown will be successful if the cache is file backed and // metadata_path` is a valid path to save the relevant `Seg` fields to. // Otherwise, the relevant `Seg` fields will not be saved. pub fn graceful_shutdown(&self) -> bool { @@ -197,9 +196,7 @@ impl Seg { } pub fn metadata_path(&self) -> Option { - self.metadata_path - .as_ref() - .map(|v| Path::new(v).to_owned()) + self.metadata_path.as_ref().map(|v| Path::new(v).to_owned()) } } diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index e2c917a15..5ce8d30cc 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -163,9 +163,9 @@ impl Builder { /// Consumes the builder and returns a fully-allocated `Seg` instance. /// If `restore`, the cache `Segments.data` is file backed by an existing - /// file and a valid file for the `metadata` is given, `Seg` will be - /// restored. Otherwise, create a new `Seg` instance. The path for the - /// `metadata` file will be saved with the `Seg` instance to be used to save + /// file and a valid file for the `metadata` is given, `Seg` will be + /// restored. Otherwise, create a new `Seg` instance. The path for the + /// `metadata` file will be saved with the `Seg` instance to be used to save // the structures to upon graceful shutdown. /// /// ``` @@ -180,7 +180,6 @@ impl Builder { /// .eviction(Policy::Random).build(); /// ``` pub fn build(self) -> Seg { - // If `restore` and there is a path for the metadata file to // restore from, restore the cache if self.restore && self.metadata_path.is_some() { @@ -203,9 +202,12 @@ impl Builder { offset += ttl_buckets.recover_size(); - let segments = self.segments_builder.clone().build(Some(&mut metadata[offset..])); + let segments = self + .segments_builder + .clone() + .build(Some(&mut metadata[offset..])); - // Check that `Segments` was copied back, it will fail if the + // Check that `Segments` was copied back, it will fail if the // file for the file backed `Segments.data` did not exist if segments.fields_copied_back { return Seg { @@ -227,7 +229,7 @@ impl Builder { hashtable, segments, ttl_buckets, - metadata_path: self.metadata_path, + metadata_path: self.metadata_path, } } } diff --git a/src/rust/storage/seg/src/segments/builder.rs b/src/rust/storage/seg/src/segments/builder.rs index 118fde17d..87e59c9f5 100644 --- a/src/rust/storage/seg/src/segments/builder.rs +++ b/src/rust/storage/seg/src/segments/builder.rs @@ -34,7 +34,7 @@ impl Default for SegmentsBuilder { impl<'a> SegmentsBuilder { /// Specify whether the `Segments` fields' will be restored - /// from the `metadata`. Otherwise, the cache will be created and treated as + /// from the `metadata`. Otherwise, the cache will be created and treated as // new. pub fn restore(mut self, restore: bool) -> Self { self.restore = restore; diff --git a/src/rust/storage/seg/src/segments/segments.rs b/src/rust/storage/seg/src/segments/segments.rs index 1f9c1eced..51cc4413c 100644 --- a/src/rust/storage/seg/src/segments/segments.rs +++ b/src/rust/storage/seg/src/segments/segments.rs @@ -93,7 +93,7 @@ impl Segments { }; // If `builder.restore` `Segments.data` is file backed with an existing - // file and metadata` to restore the `Segments` with, restore relevant + // file and metadata` to restore the `Segments` with, restore relevant // `Segments` fields. Otherwise create a new `Segments`. if builder.restore && data_on_existing_file { if let Some(metadata) = option_metadata { diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 0bf6988f1..33a6f6f7e 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -511,7 +511,7 @@ fn tmp_dir() -> TempDir { } // Returns a `Seg` instance. Cache is restored only if `restore` and -// `metadata_path` and `datapool_path` are not `None`. Otherwise, new `Seg` +// `metadata_path` and `datapool_path` are not `None`. Otherwise, new `Seg` // instance is returned. Cache is file backed if `datapool_path` is not `None`. fn make_cache( restore: bool, @@ -552,7 +552,7 @@ fn new_cache_file_backed() { assert!(!cache.restored()); } -// Check that a new, not file backed cache is not file backed and the `Seg` is +// Check that a new, not file backed cache is not file backed and the `Seg` is // new (and not restored) #[test] fn new_cache_not_file_backed() { @@ -566,8 +566,8 @@ fn new_cache_not_file_backed() { assert!(!cache.restored()); } -// Edge Case: Check that an attempt to restore a cache without specifing any -// paths will lead to `Segments.data` not being file backed and +// Edge Case: Check that an attempt to restore a cache without specifing any +// paths will lead to `Segments.data` not being file backed and // no of the other structures being restored #[test] fn restored_cache_no_paths_set() { @@ -593,11 +593,7 @@ fn cache_gracefully_shutdown() { // create new, file backed cache let restore = false; - let cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let cache = make_cache(restore, datapool_path, metadata_path); // Flush cache assert!(cache.flush().is_ok()); @@ -626,7 +622,7 @@ fn cache_not_gracefully_shutdown() { // --------------------- Data copied back Tests---------------------------- -// Creates a new cache, stores an item, gracefully shutsdown cache and restore +// Creates a new cache, stores an item, gracefully shutsdown cache and restore // cache. Check item is still there and caches are equivalent #[test] fn new_file_backed_cache_changed_and_restored() { @@ -639,11 +635,7 @@ fn new_file_backed_cache_changed_and_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let mut cache = make_cache(restore, datapool_path, metadata_path); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -675,11 +667,7 @@ fn new_file_backed_cache_changed_and_restored() { // This cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let mut new_cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let mut new_cache = make_cache(restore, datapool_path, metadata_path); assert!(new_cache.restored()); // "latte" should be in restored cache @@ -691,7 +679,7 @@ fn new_file_backed_cache_changed_and_restored() { assert!(new_cache == old_cache); } -// Creates a new cache, gracefully shutsdown cache and restore cache. Check +// Creates a new cache, gracefully shutsdown cache and restore cache. Check // caches are equivalent #[test] fn new_file_backed_cache_not_changed_and_restored() { @@ -704,11 +692,7 @@ fn new_file_backed_cache_not_changed_and_restored() { // create new, file backed cache let mut restore = false; - let cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let cache = make_cache(restore, datapool_path, metadata_path); assert!(!cache.restored()); @@ -726,11 +710,7 @@ fn new_file_backed_cache_not_changed_and_restored() { // This cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let new_cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let new_cache = make_cache(restore, datapool_path, metadata_path); assert!(new_cache.restored()); @@ -738,7 +718,7 @@ fn new_file_backed_cache_not_changed_and_restored() { assert!(new_cache == old_cache); } -// Creates a new cache, stores an item, gracefully shutsdown cache and spawn new +// Creates a new cache, stores an item, gracefully shutsdown cache and spawn new // cache. Check item is not in new cache and caches are not equivalent #[test] fn new_cache_changed_and_not_restored() { @@ -751,11 +731,7 @@ fn new_cache_changed_and_not_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let mut cache = make_cache(restore, datapool_path, metadata_path); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -800,7 +776,7 @@ fn new_cache_changed_and_not_restored() { } // Create a new cache, fill it with items. Gracefully shutdown this cache. -// Restore cache and check that every key from the original cache exists in the +// Restore cache and check that every key from the original cache exists in the // restored cache. Check caches are equivalent #[test] fn full_cache_recovery_long() { @@ -818,11 +794,7 @@ fn full_cache_recovery_long() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let mut cache = make_cache(restore, datapool_path, metadata_path); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -875,11 +847,7 @@ fn full_cache_recovery_long() { // This new cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let mut new_cache = make_cache( - restore, - datapool_path, - metadata_path, - ); + let mut new_cache = make_cache(restore, datapool_path, metadata_path); assert!(new_cache.restored()); From 396e704cac2f1d97963a31c832c9b2dbe6de895e Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 13:33:04 +1100 Subject: [PATCH 57/74] added a quit() function to be called when a Quit request is received --- src/rust/entrystore/src/seg/memcache.rs | 4 ++++ src/rust/protocol/src/memcache/storage/mod.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/rust/entrystore/src/seg/memcache.rs b/src/rust/entrystore/src/seg/memcache.rs index c0a6f89ef..f3823aba0 100644 --- a/src/rust/entrystore/src/seg/memcache.rs +++ b/src/rust/entrystore/src/seg/memcache.rs @@ -177,4 +177,8 @@ impl MemcacheStorage for Seg { Err(_) => Err(MemcacheStorageError::NotStored), } } + + fn quit(&mut self) -> Result<(), MemcacheStorageError> { + Ok(()) + } } diff --git a/src/rust/protocol/src/memcache/storage/mod.rs b/src/rust/protocol/src/memcache/storage/mod.rs index 117ddf704..b3a2e9092 100644 --- a/src/rust/protocol/src/memcache/storage/mod.rs +++ b/src/rust/protocol/src/memcache/storage/mod.rs @@ -54,4 +54,8 @@ pub trait MemcacheStorage { /// Compare and store on the CAS value, replacing the stored item if the CAS /// value matches the provided value. fn cas(&mut self, entry: &MemcacheEntry) -> Result<(), MemcacheStorageError>; + + /// Triggers a shutdown of the cache which closes connections to the cache + /// server. + fn quit(&mut self) -> Result<(), MemcacheStorageError>; } From ea1e11441308bc2912a4372a8262f2e06ed8ee33 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 14:25:58 +1100 Subject: [PATCH 58/74] removed quit() as this is not what we want (termination of connection). What we want is shutdown of entire cache --- src/rust/core/server/src/threads/worker/single.rs | 1 + src/rust/entrystore/src/seg/memcache.rs | 4 ---- src/rust/protocol/src/memcache/storage/mod.rs | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/rust/core/server/src/threads/worker/single.rs b/src/rust/core/server/src/threads/worker/single.rs index 4e4b726e9..e575562ad 100644 --- a/src/rust/core/server/src/threads/worker/single.rs +++ b/src/rust/core/server/src/threads/worker/single.rs @@ -200,6 +200,7 @@ where fn handle_data(&mut self, token: Token) -> Result<(), std::io::Error> { if let Ok(session) = self.poll.get_mut_session(token) { loop { + if session.write_capacity() == 0 { // if the write buffer is over-full, skip processing break; diff --git a/src/rust/entrystore/src/seg/memcache.rs b/src/rust/entrystore/src/seg/memcache.rs index f3823aba0..c0a6f89ef 100644 --- a/src/rust/entrystore/src/seg/memcache.rs +++ b/src/rust/entrystore/src/seg/memcache.rs @@ -177,8 +177,4 @@ impl MemcacheStorage for Seg { Err(_) => Err(MemcacheStorageError::NotStored), } } - - fn quit(&mut self) -> Result<(), MemcacheStorageError> { - Ok(()) - } } diff --git a/src/rust/protocol/src/memcache/storage/mod.rs b/src/rust/protocol/src/memcache/storage/mod.rs index b3a2e9092..117ddf704 100644 --- a/src/rust/protocol/src/memcache/storage/mod.rs +++ b/src/rust/protocol/src/memcache/storage/mod.rs @@ -54,8 +54,4 @@ pub trait MemcacheStorage { /// Compare and store on the CAS value, replacing the stored item if the CAS /// value matches the provided value. fn cas(&mut self, entry: &MemcacheEntry) -> Result<(), MemcacheStorageError>; - - /// Triggers a shutdown of the cache which closes connections to the cache - /// server. - fn quit(&mut self) -> Result<(), MemcacheStorageError>; } From 85531a3ba935575bd33a431e718e500982dc10b1 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 14:45:25 +1100 Subject: [PATCH 59/74] added stop() function to be called when he request is Stop --- src/rust/entrystore/src/seg/memcache.rs | 9 +++++++++ src/rust/protocol/src/memcache/storage/mod.rs | 3 +++ 2 files changed, 12 insertions(+) diff --git a/src/rust/entrystore/src/seg/memcache.rs b/src/rust/entrystore/src/seg/memcache.rs index c0a6f89ef..0466feb7c 100644 --- a/src/rust/entrystore/src/seg/memcache.rs +++ b/src/rust/entrystore/src/seg/memcache.rs @@ -177,4 +177,13 @@ impl MemcacheStorage for Seg { Err(_) => Err(MemcacheStorageError::NotStored), } } + + fn stop(&mut self) -> Result<(), MemcacheStorageError> { + if self.data.flush().is_ok() { + Ok(()) + } + else { + Err(MemcacheStorageError::ServerError) + } + } } diff --git a/src/rust/protocol/src/memcache/storage/mod.rs b/src/rust/protocol/src/memcache/storage/mod.rs index 117ddf704..09abe8523 100644 --- a/src/rust/protocol/src/memcache/storage/mod.rs +++ b/src/rust/protocol/src/memcache/storage/mod.rs @@ -54,4 +54,7 @@ pub trait MemcacheStorage { /// Compare and store on the CAS value, replacing the stored item if the CAS /// value matches the provided value. fn cas(&mut self, entry: &MemcacheEntry) -> Result<(), MemcacheStorageError>; + + /// Triggers a shutdown of the cache + fn stop(&mut self) -> Result<(), MemcacheStorageError>; } From 2396be8b14729733e280fd2e2facf4f80b2a74e8 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 15:07:36 +1100 Subject: [PATCH 60/74] added stop() function to be called when he request is Stop --- src/rust/protocol/src/memcache/wire/mod.rs | 10 ++++++++++ .../src/memcache/wire/request/command.rs | 3 +++ .../protocol/src/memcache/wire/request/mod.rs | 3 +++ .../src/memcache/wire/request/parse.rs | 20 +++++++++++++++++++ .../src/memcache/wire/response/mod.rs | 6 ++++++ 5 files changed, 42 insertions(+) diff --git a/src/rust/protocol/src/memcache/wire/mod.rs b/src/rust/protocol/src/memcache/wire/mod.rs index 6c5ab318e..00862e392 100644 --- a/src/rust/protocol/src/memcache/wire/mod.rs +++ b/src/rust/protocol/src/memcache/wire/mod.rs @@ -229,6 +229,16 @@ where MemcacheRequest::FlushAll => { return None; } + MemcacheRequest::Stop => { + let response = match self.stop() { + Ok(_) => MemcacheResult::Stopped, + Err(MemcacheStorageError::ServerError) => MemcacheResult::Error, + _ => { + unreachable!() + } + }; + response + } }; Some(MemcacheResponse { request, result }) diff --git a/src/rust/protocol/src/memcache/wire/request/command.rs b/src/rust/protocol/src/memcache/wire/request/command.rs index bd31eaea4..f24bf53f7 100644 --- a/src/rust/protocol/src/memcache/wire/request/command.rs +++ b/src/rust/protocol/src/memcache/wire/request/command.rs @@ -23,6 +23,7 @@ pub enum MemcacheCommand { Cas, Quit, FlushAll, + Stop, } impl TryFrom<&[u8]> for MemcacheCommand { @@ -43,6 +44,7 @@ impl TryFrom<&[u8]> for MemcacheCommand { b"decr" => MemcacheCommand::Decr, b"quit" => MemcacheCommand::Quit, b"flush_all" => MemcacheCommand::FlushAll, + b"stop" => MemcacheCommand::Stop, _ => { return Err(ParseError::UnknownCommand); } @@ -67,6 +69,7 @@ impl std::fmt::Display for MemcacheCommand { Self::Decr => "decr", Self::Quit => "quit", Self::FlushAll => "flush_all", + Self::Stop => "stop" }; write!(f, "{}", name) } diff --git a/src/rust/protocol/src/memcache/wire/request/mod.rs b/src/rust/protocol/src/memcache/wire/request/mod.rs index 8a56ab78c..674c2b33a 100644 --- a/src/rust/protocol/src/memcache/wire/request/mod.rs +++ b/src/rust/protocol/src/memcache/wire/request/mod.rs @@ -34,6 +34,7 @@ pub enum MemcacheRequest { Decr { key: Key, value: u64, noreply: bool }, Cas { entry: MemcacheEntry, noreply: bool }, FlushAll, + Stop, } impl MemcacheRequest { @@ -101,6 +102,8 @@ impl MemcacheRequest { Self::Decr { .. } => MemcacheCommand::Decr, Self::Cas { .. } => MemcacheCommand::Cas, Self::FlushAll => MemcacheCommand::FlushAll, + Self::Stop => MemcacheCommand::Stop, + } } } diff --git a/src/rust/protocol/src/memcache/wire/request/parse.rs b/src/rust/protocol/src/memcache/wire/request/parse.rs index f4da4cfef..20891a67b 100644 --- a/src/rust/protocol/src/memcache/wire/request/parse.rs +++ b/src/rust/protocol/src/memcache/wire/request/parse.rs @@ -66,6 +66,7 @@ impl Parse for MemcacheRequestParser { Err(ParseError::Invalid) } MemcacheCommand::FlushAll => parse_flush_all(buffer), + MemcacheCommand::Stop => parse_stop(buffer), } } } @@ -637,3 +638,22 @@ fn parse_flush_all(buffer: &[u8]) -> Result, ParseError consumed, }) } + +#[allow(clippy::unnecessary_unwrap)] +fn parse_stop(buffer: &[u8]) -> Result, ParseError> { + let mut parse_state = ParseState::new(buffer); + + // this was already checked for when determining the command + let (whitespace, _cmd_end) = parse_state.next_sequence().unwrap(); + + if whitespace != Sequence::Crlf && whitespace != Sequence::SpaceCrlf { + return Err(ParseError::Invalid); + } + + let consumed = parse_state.position(); + + Ok(ParseOk { + message: MemcacheRequest::Stop, + consumed, + }) +} diff --git a/src/rust/protocol/src/memcache/wire/response/mod.rs b/src/rust/protocol/src/memcache/wire/response/mod.rs index 657d03b44..20066e93b 100644 --- a/src/rust/protocol/src/memcache/wire/response/mod.rs +++ b/src/rust/protocol/src/memcache/wire/response/mod.rs @@ -34,6 +34,7 @@ pub enum MemcacheResult { Stored, Error, Count(u64), + Stopped, } impl Debug for MemcacheResult { @@ -47,6 +48,7 @@ impl Debug for MemcacheResult { Self::Stored => "Stored", Self::Error => "Error", Self::Count(_) => "Count", + Self::Stopped => "Stopped", }; write!(f, "MemcacheResult::{}", name) } @@ -67,6 +69,7 @@ impl MemcacheResult { Self::Stored => b"STORED\r\n", Self::Error => b"ERROR\r\n", Self::Count(_) => b"", + Self::Stopped => b"Stopped\n", } } @@ -86,6 +89,7 @@ impl MemcacheResult { Self::Deleted => 7, Self::NotFound => 8, Self::NotStored => 9, + Self::Stopped => 10, // TODO: check this is the correct code // CLIENT_ERROR // SERVER_ERROR _ => usize::MAX, @@ -245,6 +249,8 @@ impl Compose for MemcacheResponse { CAS.increment(); } MemcacheRequest::FlushAll => {} + // TODO: if needed, add to this statement + MemcacheRequest::Stop => {} } if let MemcacheResult::Values { ref entries, cas } = self.result { let mut hits = 0; From c641d8fa07332457910331298ec4fb15224bbc00 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 16:33:25 +1100 Subject: [PATCH 61/74] fixed seg tests so graceful shutdown was part of configuration --- .../core/server/src/threads/worker/single.rs | 1 - src/rust/entrystore/src/seg/memcache.rs | 3 +- src/rust/entrystore/src/seg/mod.rs | 3 + src/rust/protocol/src/memcache/wire/mod.rs | 17 ++--- .../src/memcache/wire/request/command.rs | 2 +- .../protocol/src/memcache/wire/request/mod.rs | 1 - .../src/memcache/wire/response/mod.rs | 2 +- src/rust/storage/seg/src/builder.rs | 12 ++++ src/rust/storage/seg/src/seg.rs | 65 +++++++++++-------- src/rust/storage/seg/src/tests.rs | 40 ++++++++---- 10 files changed, 92 insertions(+), 54 deletions(-) diff --git a/src/rust/core/server/src/threads/worker/single.rs b/src/rust/core/server/src/threads/worker/single.rs index e575562ad..4e4b726e9 100644 --- a/src/rust/core/server/src/threads/worker/single.rs +++ b/src/rust/core/server/src/threads/worker/single.rs @@ -200,7 +200,6 @@ where fn handle_data(&mut self, token: Token) -> Result<(), std::io::Error> { if let Ok(session) = self.poll.get_mut_session(token) { loop { - if session.write_capacity() == 0 { // if the write buffer is over-full, skip processing break; diff --git a/src/rust/entrystore/src/seg/memcache.rs b/src/rust/entrystore/src/seg/memcache.rs index 0466feb7c..9c6fcfd6c 100644 --- a/src/rust/entrystore/src/seg/memcache.rs +++ b/src/rust/entrystore/src/seg/memcache.rs @@ -181,8 +181,7 @@ impl MemcacheStorage for Seg { fn stop(&mut self) -> Result<(), MemcacheStorageError> { if self.data.flush().is_ok() { Ok(()) - } - else { + } else { Err(MemcacheStorageError::ServerError) } } diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 34bb6206c..c921410c4 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -51,6 +51,9 @@ impl Seg { .eviction(eviction) .datapool_path(config.datapool_path()) .metadata_path(config.metadata_path()) + // TODO: perhaps there is a better way to indicate a graceful shutdown + // upon shutdown + .graceful_shutdown(config.graceful_shutdown()) .build(); Self { data } diff --git a/src/rust/protocol/src/memcache/wire/mod.rs b/src/rust/protocol/src/memcache/wire/mod.rs index 00862e392..99809b7f0 100644 --- a/src/rust/protocol/src/memcache/wire/mod.rs +++ b/src/rust/protocol/src/memcache/wire/mod.rs @@ -229,16 +229,13 @@ where MemcacheRequest::FlushAll => { return None; } - MemcacheRequest::Stop => { - let response = match self.stop() { - Ok(_) => MemcacheResult::Stopped, - Err(MemcacheStorageError::ServerError) => MemcacheResult::Error, - _ => { - unreachable!() - } - }; - response - } + MemcacheRequest::Stop => match self.stop() { + Ok(_) => MemcacheResult::Stopped, + Err(MemcacheStorageError::ServerError) => MemcacheResult::Error, + _ => { + unreachable!() + } + }, }; Some(MemcacheResponse { request, result }) diff --git a/src/rust/protocol/src/memcache/wire/request/command.rs b/src/rust/protocol/src/memcache/wire/request/command.rs index f24bf53f7..035ce7586 100644 --- a/src/rust/protocol/src/memcache/wire/request/command.rs +++ b/src/rust/protocol/src/memcache/wire/request/command.rs @@ -69,7 +69,7 @@ impl std::fmt::Display for MemcacheCommand { Self::Decr => "decr", Self::Quit => "quit", Self::FlushAll => "flush_all", - Self::Stop => "stop" + Self::Stop => "stop", }; write!(f, "{}", name) } diff --git a/src/rust/protocol/src/memcache/wire/request/mod.rs b/src/rust/protocol/src/memcache/wire/request/mod.rs index 674c2b33a..bd1d6525a 100644 --- a/src/rust/protocol/src/memcache/wire/request/mod.rs +++ b/src/rust/protocol/src/memcache/wire/request/mod.rs @@ -103,7 +103,6 @@ impl MemcacheRequest { Self::Cas { .. } => MemcacheCommand::Cas, Self::FlushAll => MemcacheCommand::FlushAll, Self::Stop => MemcacheCommand::Stop, - } } } diff --git a/src/rust/protocol/src/memcache/wire/response/mod.rs b/src/rust/protocol/src/memcache/wire/response/mod.rs index 20066e93b..941cf9e4d 100644 --- a/src/rust/protocol/src/memcache/wire/response/mod.rs +++ b/src/rust/protocol/src/memcache/wire/response/mod.rs @@ -89,7 +89,7 @@ impl MemcacheResult { Self::Deleted => 7, Self::NotFound => 8, Self::NotStored => 9, - Self::Stopped => 10, // TODO: check this is the correct code + Self::Stopped => 10, // TODO: check this is the correct code // CLIENT_ERROR // SERVER_ERROR _ => usize::MAX, diff --git a/src/rust/storage/seg/src/builder.rs b/src/rust/storage/seg/src/builder.rs index 5ce8d30cc..abb908c32 100644 --- a/src/rust/storage/seg/src/builder.rs +++ b/src/rust/storage/seg/src/builder.rs @@ -16,6 +16,7 @@ pub struct Builder { overflow_factor: f64, segments_builder: SegmentsBuilder, metadata_path: Option, + graceful_shutdown: bool, } // Defines the default parameters @@ -27,6 +28,7 @@ impl Default for Builder { overflow_factor: 0.0, segments_builder: SegmentsBuilder::default(), metadata_path: None, + graceful_shutdown: false, } } } @@ -161,6 +163,14 @@ impl Builder { self } + /// Specify whether the cache will be gracefully shutdown. If `true`, then + /// when the cache is flushed, the relevant parts will be stored to the file + /// with path `metadata_path` + pub fn graceful_shutdown(mut self, graceful_shutdown: bool) -> Self { + self.graceful_shutdown = graceful_shutdown; + self + } + /// Consumes the builder and returns a fully-allocated `Seg` instance. /// If `restore`, the cache `Segments.data` is file backed by an existing /// file and a valid file for the `metadata` is given, `Seg` will be @@ -215,6 +225,7 @@ impl Builder { segments, ttl_buckets, metadata_path: self.metadata_path, + graceful_shutdown: self.graceful_shutdown, }; } } @@ -230,6 +241,7 @@ impl Builder { segments, ttl_buckets, metadata_path: self.metadata_path, + graceful_shutdown: self.graceful_shutdown, } } } diff --git a/src/rust/storage/seg/src/seg.rs b/src/rust/storage/seg/src/seg.rs index 5192118c0..eff802a63 100644 --- a/src/rust/storage/seg/src/seg.rs +++ b/src/rust/storage/seg/src/seg.rs @@ -24,13 +24,15 @@ static_metrics! { /// segment-structured design that stores data in fixed-size segments, grouping /// objects with nearby expiration time into the same segment, and lifting most /// per-object metadata into the shared segment header. -#[derive(Clone, PartialEq)] +#[derive(Clone)] pub struct Seg { pub(crate) hashtable: HashTable, pub(crate) segments: Segments, pub(crate) ttl_buckets: TtlBuckets, // Path to metadata datapool pub(crate) metadata_path: Option, + // Will the cache be gracefully shutdown? + pub(crate) graceful_shutdown: bool, } impl Seg { @@ -53,36 +55,38 @@ impl Seg { Builder::default() } - /// Flushes cache by storing all the relevant fields of `Segments`, - /// `HashTable` and `TtlBuckets` to the `metadata` file (if it exists) and - // flushing `Segments.data` (if it is file backed) + /// If `graceful_shutdown`, flushe cache by storing all the relevant fields + /// of `Segments`, `HashTable` and `TtlBuckets` to the `metadata` file + /// (if it exists) and flushing `Segments.data` (if it is file backed) pub fn flush(&self) -> std::io::Result<()> { - if let Some(file) = &self.metadata_path { - let file_size = self.hashtable.recover_size() - + self.ttl_buckets.recover_size() - + self.segments.recover_size(); + if self.graceful_shutdown { + if let Some(file) = &self.metadata_path { + let file_size = self.hashtable.recover_size() + + self.ttl_buckets.recover_size() + + self.segments.recover_size(); - // Mmap file - let mut pool = File::create(file, file_size, true) - .expect("failed to allocate file backed storage"); - let metadata = pool.as_mut_slice(); + // Mmap file + let mut pool = File::create(file, file_size, true) + .expect("failed to allocate file backed storage"); + let metadata = pool.as_mut_slice(); - self.hashtable.flush(metadata); - let mut offset = self.hashtable.recover_size(); - self.ttl_buckets.flush(&mut metadata[offset..]); - offset += self.ttl_buckets.recover_size(); - self.segments.flush(&mut metadata[offset..])?; + self.hashtable.flush(metadata); + let mut offset = self.hashtable.recover_size(); + self.ttl_buckets.flush(&mut metadata[offset..]); + offset += self.ttl_buckets.recover_size(); + self.segments.flush(&mut metadata[offset..])?; - // TODO: check if this flushes the CPU caches - pool.flush()?; - Ok(()) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Path to datapool to is None, cannot gracefully - shutdown cache", - )) + // TODO: check if this flushes the CPU caches + pool.flush()?; + return Ok(()); + } } + + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Path to datapool to is None, cannot gracefully + shutdown cache", + )) } /// Gets a count of items in the `Seg` instance. This is an expensive @@ -381,3 +385,12 @@ impl Seg { Ok(item) } } + +impl PartialEq for Seg { + // Checks if `Segments` are equivalent + fn eq(&self, other: &Self) -> bool { + self.segments == other.segments + && self.hashtable == other.hashtable + && self.ttl_buckets == other.ttl_buckets + } +} diff --git a/src/rust/storage/seg/src/tests.rs b/src/rust/storage/seg/src/tests.rs index 33a6f6f7e..b578be017 100644 --- a/src/rust/storage/seg/src/tests.rs +++ b/src/rust/storage/seg/src/tests.rs @@ -517,6 +517,7 @@ fn make_cache( restore: bool, datapool_path: Option, metadata_path: Option, + graceful_shutdown: bool, ) -> Seg { let segment_size = 4096; let segments = SEGMENTS; @@ -528,6 +529,7 @@ fn make_cache( .heap_size(heap_size) .datapool_path(datapool_path) // set path .metadata_path(metadata_path) // set path + .graceful_shutdown(graceful_shutdown) .build() } @@ -544,7 +546,8 @@ fn new_cache_file_backed() { // create new, file backed cache let restore = false; - let cache = make_cache(restore, datapool_path, None); + let graceful_shutdown = false; + let cache = make_cache(restore, datapool_path, None, graceful_shutdown); // the `Segments.data` should be filed backed assert!(cache.segments.data_file_backed()); @@ -558,7 +561,8 @@ fn new_cache_file_backed() { fn new_cache_not_file_backed() { // create new, not file backed cache let restore = false; - let cache = make_cache(restore, None, None); + let graceful_shutdown = false; + let cache = make_cache(restore, None, None, graceful_shutdown); // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); @@ -572,7 +576,8 @@ fn new_cache_not_file_backed() { #[test] fn restored_cache_no_paths_set() { let restore = true; - let cache = make_cache(restore, None, None); + let graceful_shutdown = false; + let cache = make_cache(restore, None, None, graceful_shutdown); // the `Segments.data` should not be filed backed assert!(!cache.segments.data_file_backed()); @@ -593,7 +598,8 @@ fn cache_gracefully_shutdown() { // create new, file backed cache let restore = false; - let cache = make_cache(restore, datapool_path, metadata_path); + let graceful_shutdown = true; + let cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); // Flush cache assert!(cache.flush().is_ok()); @@ -610,10 +616,12 @@ fn cache_not_gracefully_shutdown() { // create new, file backed cache let restore = false; + let graceful_shutdown = true; let cache = make_cache( restore, datapool_path, None, // Don't set a `HashTable` path + graceful_shutdown, ); // Flushing cache should fail @@ -635,7 +643,8 @@ fn new_file_backed_cache_changed_and_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, metadata_path); + let mut graceful_shutdown = true; + let mut cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -667,7 +676,8 @@ fn new_file_backed_cache_changed_and_restored() { // This cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let mut new_cache = make_cache(restore, datapool_path, metadata_path); + graceful_shutdown = false; + let mut new_cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(new_cache.restored()); // "latte" should be in restored cache @@ -692,7 +702,9 @@ fn new_file_backed_cache_not_changed_and_restored() { // create new, file backed cache let mut restore = false; - let cache = make_cache(restore, datapool_path, metadata_path); + let mut graceful_shutdown = true; + + let cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(!cache.restored()); @@ -710,7 +722,8 @@ fn new_file_backed_cache_not_changed_and_restored() { // This cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let new_cache = make_cache(restore, datapool_path, metadata_path); + graceful_shutdown = false; + let new_cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(new_cache.restored()); @@ -731,7 +744,8 @@ fn new_cache_changed_and_not_restored() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, metadata_path); + let graceful_shutdown = true; + let mut cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -762,7 +776,7 @@ fn new_cache_changed_and_not_restored() { // This new cache is file backed by same file as the above cache // saved `Segments.data` to but this cache is treated as new restore = false; - let mut new_cache = make_cache(restore, datapool_path, None); + let mut new_cache = make_cache(restore, datapool_path, None, graceful_shutdown); assert!(!new_cache.restored()); assert_eq!(new_cache.items(), 0); @@ -794,7 +808,8 @@ fn full_cache_recovery_long() { // create new, file backed cache let mut restore = false; - let mut cache = make_cache(restore, datapool_path, metadata_path); + let mut graceful_shutdown = true; + let mut cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(!cache.restored()); assert_eq!(cache.items(), 0); @@ -847,7 +862,8 @@ fn full_cache_recovery_long() { // This new cache is file backed by same file as the above cache // saved `Segments.data` to and the `Seg` is restored restore = true; - let mut new_cache = make_cache(restore, datapool_path, metadata_path); + graceful_shutdown = false; + let mut new_cache = make_cache(restore, datapool_path, metadata_path, graceful_shutdown); assert!(new_cache.restored()); From b1ed33c10db496bd055516a187033b8de9c3dc96 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 16:36:07 +1100 Subject: [PATCH 62/74] added test config file --- config/segcache_test.toml | 92 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 config/segcache_test.toml diff --git a/config/segcache_test.toml b/config/segcache_test.toml new file mode 100644 index 000000000..e14a8d3c4 --- /dev/null +++ b/config/segcache_test.toml @@ -0,0 +1,92 @@ +daemonize = false + +[admin] +# interfaces listening on +host = "0.0.0.0" +# port listening on +port = "9999" + +[server] +# interfaces listening on +host = "0.0.0.0" +# port listening on +port = "12321" +# epoll timeout in milliseconds +timeout = 100 +# epoll max events returned +nevent = 1024 + +[worker] +# epoll timeout in milliseconds +timeout = 100 +# epoll max events returned +nevent = 1024 +# number of worker threads +threads = 4 + +# storage configuration +[seg] +# hash power adjusts how many items can be held in the hashtable +hash_power = 3 +# total bytes to use for item storage - 32 segments * 1024 segment size +heap_size = 32768 +# size of each segment in bytes +segment_size = 1024 +# number of segments for a non-evict compaction +compact_target = 2 +# number of segments to merge in one merge eviction pass +merge_target = 4 +# max number of segments to merge in one pass +merge_max = 8 +# use merge based eviction +eviction = "Merge" +# optionally, set a file path to back the data datapool +datapool_path = "/mnt/pmem1.0/cassy/data" +# set a file path to back the metadata datapool +metadata_path = "/mnt/pmem1.0/cassy/metadata" +# state whether cache will be restored +restore = false +# state whether cache will be flushed upon shutdown +gracefully_shutdown = true + +[time] +time_type = "Memcache" + +[buf] + +[debug] +# choose from: error, warn, info, debug, trace +log_level = "trace" +# optionally, log to the file below instead of standard out +# log_file = "segcache.log" +# backup file name for use with log rotation +log_backup = "segcache.log.old" +# trigger log rotation when the file grows beyond this size (in bytes). Set this +# option to '0' to disable log rotation. +log_max_size = 1073741824 + +[klog] +# optionally, log commands to the file below +# file = "segcache.cmd" +# backup file name for use with log rotation +backup = "segcache.cmd.old" +# trigger log rotation when the file grows beyond this size (in bytes). Set this +# option to '0' to disable log rotation. +max_size = 1073741824 +# specify the sampling ratio, 1 in N commands will be logged. Setting to '0' +# will disable command logging. +sample = 100 + +[sockio] + +[tcp] + +[tls] +# certificate chain used to validate client certificate +# certificate_chain = "client.chain" +# server certificate +# certificate = "server.crt" +# server private key +# private_key = "server.key" +# ca certificate file used as the root of trust +# ca_file = "ca.crt" From 466a4c1faef6be9181c123f7d14c6c8cdb894219 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 17:09:36 +1100 Subject: [PATCH 63/74] ran cargo fmt --- src/rust/entrystore/src/seg/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index c921410c4..183916381 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -51,8 +51,6 @@ impl Seg { .eviction(eviction) .datapool_path(config.datapool_path()) .metadata_path(config.metadata_path()) - // TODO: perhaps there is a better way to indicate a graceful shutdown - // upon shutdown .graceful_shutdown(config.graceful_shutdown()) .build(); From 7d7e952c483ecd78d2acc47cf75c8d8781c8b190 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 3 Mar 2022 17:12:17 +1100 Subject: [PATCH 64/74] fixed bug in config file --- config/segcache_test.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/segcache_test.toml b/config/segcache_test.toml index e14a8d3c4..6e3060ced 100644 --- a/config/segcache_test.toml +++ b/config/segcache_test.toml @@ -47,7 +47,7 @@ metadata_path = "/mnt/pmem1.0/cassy/metadata" # state whether cache will be restored restore = false # state whether cache will be flushed upon shutdown -gracefully_shutdown = true +graceful_shutdown = true [time] time_type = "Memcache" From e8c0b6e01e18cc64e95c25f83ad7a76f180f0fd2 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Mon, 7 Mar 2022 13:51:02 +1100 Subject: [PATCH 65/74] Added the Stop command wherever FlushAll command was implemented --- src/rust/core/server/src/threads/admin.rs | 1 + src/rust/protocol/src/admin.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index 1f15f4d63..e52357db3 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -434,6 +434,7 @@ impl EventLoop for Admin { match request { AdminRequest::FlushAll => {} + AdminRequest::Stop => {} AdminRequest::Stats => { Self::handle_stats_request(session); } diff --git a/src/rust/protocol/src/admin.rs b/src/rust/protocol/src/admin.rs index 0222ea490..430fef4b8 100644 --- a/src/rust/protocol/src/admin.rs +++ b/src/rust/protocol/src/admin.rs @@ -18,6 +18,7 @@ pub enum AdminRequest { Stats, Version, Quit, + Stop, } #[derive(Default, Copy, Clone)] @@ -62,6 +63,10 @@ impl Parse for AdminRequestParser { message: AdminRequest::Version, consumed: command_end + CRLF.len(), }), + b"stop" => Ok(ParseOk { + message: AdminRequest::Stop, + consumed: command_end + CRLF.len(), + }), _ => Err(ParseError::UnknownCommand), } } From 85ab29a04ae83ebe76cd89bb89d14f2a23796ab6 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 9 Mar 2022 10:09:57 +1100 Subject: [PATCH 66/74] stop signal sent to admin stops cache from processing events and should flush the cache --- src/rust/common/src/signal.rs | 1 + src/rust/core/server/src/lib.rs | 5 +++++ src/rust/core/server/src/threads/admin.rs | 21 +++++++++++++++++-- src/rust/core/server/src/threads/listener.rs | 5 ++--- .../core/server/src/threads/worker/multi.rs | 8 ++----- .../core/server/src/threads/worker/single.rs | 6 +++++- .../core/server/src/threads/worker/storage.rs | 11 +++++----- src/rust/entrystore/src/lib.rs | 3 +++ src/rust/entrystore/src/noop/mod.rs | 1 + src/rust/entrystore/src/seg/mod.rs | 16 ++++++-------- 10 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/rust/common/src/signal.rs b/src/rust/common/src/signal.rs index f4f89a342..bbeff1f0a 100644 --- a/src/rust/common/src/signal.rs +++ b/src/rust/common/src/signal.rs @@ -5,4 +5,5 @@ #[derive(Clone)] pub enum Signal { Shutdown, + Stop, } diff --git a/src/rust/core/server/src/lib.rs b/src/rust/core/server/src/lib.rs index 3b9198c2f..a0b215ef7 100644 --- a/src/rust/core/server/src/lib.rs +++ b/src/rust/core/server/src/lib.rs @@ -114,6 +114,11 @@ pub const DEFAULT_BUFFER_SIZE: usize = 16 * 1024; // 16KB // specific upper bounds. const ADMIN_MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024; // 1MB +// TODO(bmartin): this *should* be plenty safe, the queue should rarely ever be +// full, and a single wakeup should drain at least one message and make room for +// the response. A stat to prove that this is sufficient would be good. +const QUEUE_RETRIES: usize = 3; + const THREAD_PREFIX: &str = "pelikan"; metrics::test_no_duplicates!(); diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index e52357db3..ad2500880 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -7,6 +7,7 @@ use super::EventLoop; use crate::poll::{Poll, LISTENER_TOKEN, WAKER_TOKEN}; +use crate::QUEUE_RETRIES; use crate::TCP_ACCEPT_EX; use common::signal::Signal; use common::ssl::{HandshakeError, MidHandshakeSslStream, Ssl, SslContext, SslStream}; @@ -324,7 +325,6 @@ impl Admin { self.do_accept(); } WAKER_TOKEN => { - #[allow(clippy::never_loop)] // check if we have received signals from any sibling // thread while let Ok(signal) = self.signal_queue.recv_from(0) { @@ -341,6 +341,7 @@ impl Admin { let _ = self.log_drain.flush(); return; } + Signal::Stop => {} } } } @@ -434,7 +435,23 @@ impl EventLoop for Admin { match request { AdminRequest::FlushAll => {} - AdminRequest::Stop => {} + AdminRequest::Stop => { + for _ in 0..QUEUE_RETRIES { + if self.signal_queue.broadcast(Signal::Stop).is_ok() { + warn!("sending stop signal"); + break; + } + } + for _ in 0..QUEUE_RETRIES { + if self.signal_queue.wake_all().is_ok() { + break; + } + } + + let _ = session.write(b"OK\r\n"); + session.finalize_response(); + ADMIN_RESPONSE_COMPOSE.increment(); + } AdminRequest::Stats => { Self::handle_stats_request(session); } diff --git a/src/rust/core/server/src/threads/listener.rs b/src/rust/core/server/src/threads/listener.rs index 48bb5498b..68b5d7c75 100644 --- a/src/rust/core/server/src/threads/listener.rs +++ b/src/rust/core/server/src/threads/listener.rs @@ -216,14 +216,13 @@ impl Listener { LISTENER_TOKEN => { self.do_accept(); } - WAKER_TOKEN => - { - #[allow(clippy::never_loop)] + WAKER_TOKEN => { while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { Signal::Shutdown => { return; } + Signal::Stop => {} } } } diff --git a/src/rust/core/server/src/threads/worker/multi.rs b/src/rust/core/server/src/threads/worker/multi.rs index 9fbfce5d3..33c42d7ff 100644 --- a/src/rust/core/server/src/threads/worker/multi.rs +++ b/src/rust/core/server/src/threads/worker/multi.rs @@ -12,6 +12,7 @@ use super::*; use crate::poll::Poll; use crate::threads::worker::StorageWorker; use crate::threads::worker::TokenWrapper; +use crate::QUEUE_RETRIES; use common::signal::Signal; use common::time::Instant; use config::WorkerConfig; @@ -26,11 +27,6 @@ use session::Session; use std::io::{BufRead, Write}; use std::sync::Arc; -// TODO(bmartin): this *should* be plenty safe, the queue should rarely ever be -// full, and a single wakeup should drain at least one message and make room for -// the request. A stat to prove that this is sufficient would be good. -const QUEUE_RETRIES: usize = 3; - const WAKER_TOKEN: usize = usize::MAX; /// A `MultiWorker` handles events on `Session`s and routes storage requests to @@ -110,7 +106,6 @@ where self.handle_new_sessions(); self.handle_storage_queue(); - #[allow(clippy::never_loop)] // check if we received any signals from the admin thread while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { @@ -119,6 +114,7 @@ where // and stop processing events return; } + Signal::Stop => {} } } } diff --git a/src/rust/core/server/src/threads/worker/single.rs b/src/rust/core/server/src/threads/worker/single.rs index 4e4b726e9..b3e9c3ac5 100644 --- a/src/rust/core/server/src/threads/worker/single.rs +++ b/src/rust/core/server/src/threads/worker/single.rs @@ -97,7 +97,6 @@ where WAKER_TOKEN => { self.handle_new_sessions(); - #[allow(clippy::never_loop)] // check if we received any signals from the admin thread while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { @@ -106,6 +105,11 @@ where // and stop processing events return; } + Signal::Stop => { + warn!("received stop"); + self.storage.flush(); + return; + } } } } diff --git a/src/rust/core/server/src/threads/worker/storage.rs b/src/rust/core/server/src/threads/worker/storage.rs index 41b957b3a..cb8aac1ee 100644 --- a/src/rust/core/server/src/threads/worker/storage.rs +++ b/src/rust/core/server/src/threads/worker/storage.rs @@ -4,6 +4,7 @@ use super::*; use crate::threads::worker::TokenWrapper; +use crate::QUEUE_RETRIES; use common::signal::Signal; use common::time::Instant; use config::WorkerConfig; @@ -17,11 +18,6 @@ use protocol::{Compose, Execute}; use queues::{QueueError, QueuePair, QueuePairs}; use std::sync::Arc; -// TODO(bmartin): this *should* be plenty safe, the queue should rarely ever be -// full, and a single wakeup should drain at least one message and make room for -// the response. A stat to prove that this is sufficient would be good. -const QUEUE_RETRIES: usize = 3; - const WAKER_TOKEN: usize = usize::MAX; /// A `Storage` thread is used in a multi-worker configuration. It owns the @@ -158,6 +154,11 @@ where return; } + Signal::Stop => { + warn!("received stop"); + self.storage.flush(); + return; + } } } } diff --git a/src/rust/entrystore/src/lib.rs b/src/rust/entrystore/src/lib.rs index feac89e40..725860507 100644 --- a/src/rust/entrystore/src/lib.rs +++ b/src/rust/entrystore/src/lib.rs @@ -21,4 +21,7 @@ pub trait EntryStore { /// implementation is a no-op. Types which can efficiently implement eager /// expiration should implement their own handling logic for this function. fn expire(&mut self) {} + + /// Flush all values from the entry store to persistent storage. + fn flush(&mut self); } diff --git a/src/rust/entrystore/src/noop/mod.rs b/src/rust/entrystore/src/noop/mod.rs index c47159367..b2c5b78df 100644 --- a/src/rust/entrystore/src/noop/mod.rs +++ b/src/rust/entrystore/src/noop/mod.rs @@ -22,4 +22,5 @@ impl Noop { impl EntryStore for Noop { fn expire(&mut self) {} + fn flush(&mut self) {} } diff --git a/src/rust/entrystore/src/seg/mod.rs b/src/rust/entrystore/src/seg/mod.rs index 183916381..70b7b48fb 100644 --- a/src/rust/entrystore/src/seg/mod.rs +++ b/src/rust/entrystore/src/seg/mod.rs @@ -56,20 +56,16 @@ impl Seg { Self { data } } - - /// Flush (gracefully shutdown) the `Seg` cache if configured to do so - pub fn flush(self, config: &T) { - let config = config.seg(); - - if config.graceful_shutdown() { - // TODO: check if successfully shutdown and record result - self.data.flush(); - }; - } } impl EntryStore for Seg { fn expire(&mut self) { self.data.expire(); } + + /// Flush (gracefully shutdown) the `Seg` cache + fn flush(&mut self) { + // TODO: check if successfully shutdown and record result + self.data.flush(); + } } From 7dc5c48ee9beffe2ab48f39945ac85450bcf372f Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 9 Mar 2022 11:52:02 +1100 Subject: [PATCH 67/74] listener, admin and multi now return upon Stop --- config/segcache_test.toml | 2 +- src/rust/core/server/src/threads/admin.rs | 12 +++++++++++- src/rust/core/server/src/threads/listener.rs | 4 +++- src/rust/core/server/src/threads/worker/multi.rs | 4 +++- src/rust/core/server/src/threads/worker/storage.rs | 1 - 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/config/segcache_test.toml b/config/segcache_test.toml index 6e3060ced..45f454f38 100644 --- a/config/segcache_test.toml +++ b/config/segcache_test.toml @@ -22,7 +22,7 @@ timeout = 100 # epoll max events returned nevent = 1024 # number of worker threads -threads = 4 +threads = 1 # storage configuration [seg] diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index ad2500880..76ac1f80e 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -341,7 +341,15 @@ impl Admin { let _ = self.log_drain.flush(); return; } - Signal::Stop => {} + Signal::Stop => { + info!("stopping"); + let _ = self.signal_queue.broadcast(Signal::Stop); + if self.signal_queue.wake_all().is_err() { + fatal!("error waking threads for stop"); + } + let _ = self.log_drain.flush(); + return; + } } } } @@ -442,6 +450,8 @@ impl EventLoop for Admin { break; } } + + for _ in 0..QUEUE_RETRIES { if self.signal_queue.wake_all().is_ok() { break; diff --git a/src/rust/core/server/src/threads/listener.rs b/src/rust/core/server/src/threads/listener.rs index 68b5d7c75..2a29d65a9 100644 --- a/src/rust/core/server/src/threads/listener.rs +++ b/src/rust/core/server/src/threads/listener.rs @@ -222,7 +222,9 @@ impl Listener { Signal::Shutdown => { return; } - Signal::Stop => {} + Signal::Stop => { + return; + } } } } diff --git a/src/rust/core/server/src/threads/worker/multi.rs b/src/rust/core/server/src/threads/worker/multi.rs index 33c42d7ff..71f546fe8 100644 --- a/src/rust/core/server/src/threads/worker/multi.rs +++ b/src/rust/core/server/src/threads/worker/multi.rs @@ -114,7 +114,9 @@ where // and stop processing events return; } - Signal::Stop => {} + Signal::Stop => { + return; + } } } } diff --git a/src/rust/core/server/src/threads/worker/storage.rs b/src/rust/core/server/src/threads/worker/storage.rs index cb8aac1ee..59abb2876 100644 --- a/src/rust/core/server/src/threads/worker/storage.rs +++ b/src/rust/core/server/src/threads/worker/storage.rs @@ -141,7 +141,6 @@ where } } - #[allow(clippy::never_loop)] // check if we received any signals from the admin thread while let Ok(s) = self.signal_queue.recv_from(0) { match s { From 9d4642519ef7cf5a8472ae76af0e2846d3a1839e Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 9 Mar 2022 14:10:05 +1100 Subject: [PATCH 68/74] attempted to add admin's own QueuePair to its signal_queue so it can receive its broadcast to its signal_queue --- config/segcache_test.toml | 2 +- src/rust/core/server/src/threads/admin.rs | 17 ++++++++--------- src/rust/core/server/src/threads/listener.rs | 5 ++++- .../core/server/src/threads/worker/multi.rs | 2 ++ .../core/server/src/threads/worker/single.rs | 1 + .../core/server/src/threads/worker/storage.rs | 1 + 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/config/segcache_test.toml b/config/segcache_test.toml index 45f454f38..8470a9708 100644 --- a/config/segcache_test.toml +++ b/config/segcache_test.toml @@ -56,7 +56,7 @@ time_type = "Memcache" [debug] # choose from: error, warn, info, debug, trace -log_level = "trace" +log_level = "warn" # optionally, log to the file below instead of standard out # log_file = "segcache.log" # backup file name for use with log rotation diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index 76ac1f80e..d00a91abe 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -327,7 +327,8 @@ impl Admin { WAKER_TOKEN => { // check if we have received signals from any sibling // thread - while let Ok(signal) = self.signal_queue.recv_from(0) { + #[allow(clippy::never_loop)] + while let Ok(signal) = self.signal_queue.recv_from(1) { match signal { Signal::Shutdown => { // if a shutdown is received from any @@ -342,11 +343,7 @@ impl Admin { return; } Signal::Stop => { - info!("stopping"); - let _ = self.signal_queue.broadcast(Signal::Stop); - if self.signal_queue.wake_all().is_err() { - fatal!("error waking threads for stop"); - } + warn!("received stop"); let _ = self.log_drain.flush(); return; } @@ -444,14 +441,16 @@ impl EventLoop for Admin { match request { AdminRequest::FlushAll => {} AdminRequest::Stop => { + let admin_queue = self.signal_queue.new_pair(128, None); + self.signal_queue.add_pair(admin_queue); + for _ in 0..QUEUE_RETRIES { + // Send Stop to all other threads if self.signal_queue.broadcast(Signal::Stop).is_ok() { - warn!("sending stop signal"); + warn!("sending stop signal to all threads"); break; } } - - for _ in 0..QUEUE_RETRIES { if self.signal_queue.wake_all().is_ok() { break; diff --git a/src/rust/core/server/src/threads/listener.rs b/src/rust/core/server/src/threads/listener.rs index 2a29d65a9..dd9d7acfc 100644 --- a/src/rust/core/server/src/threads/listener.rs +++ b/src/rust/core/server/src/threads/listener.rs @@ -216,13 +216,16 @@ impl Listener { LISTENER_TOKEN => { self.do_accept(); } - WAKER_TOKEN => { + WAKER_TOKEN => + { + #[allow(clippy::never_loop)] while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { Signal::Shutdown => { return; } Signal::Stop => { + warn!("received stop"); return; } } diff --git a/src/rust/core/server/src/threads/worker/multi.rs b/src/rust/core/server/src/threads/worker/multi.rs index 71f546fe8..ef4a1edaa 100644 --- a/src/rust/core/server/src/threads/worker/multi.rs +++ b/src/rust/core/server/src/threads/worker/multi.rs @@ -107,6 +107,7 @@ where self.handle_storage_queue(); // check if we received any signals from the admin thread + #[allow(clippy::never_loop)] while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { Signal::Shutdown => { @@ -115,6 +116,7 @@ where return; } Signal::Stop => { + warn!("received stop"); return; } } diff --git a/src/rust/core/server/src/threads/worker/single.rs b/src/rust/core/server/src/threads/worker/single.rs index b3e9c3ac5..ca4c4eeb5 100644 --- a/src/rust/core/server/src/threads/worker/single.rs +++ b/src/rust/core/server/src/threads/worker/single.rs @@ -98,6 +98,7 @@ where self.handle_new_sessions(); // check if we received any signals from the admin thread + #[allow(clippy::never_loop)] while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { Signal::Shutdown => { diff --git a/src/rust/core/server/src/threads/worker/storage.rs b/src/rust/core/server/src/threads/worker/storage.rs index 59abb2876..0cfd89896 100644 --- a/src/rust/core/server/src/threads/worker/storage.rs +++ b/src/rust/core/server/src/threads/worker/storage.rs @@ -142,6 +142,7 @@ where } // check if we received any signals from the admin thread + #[allow(clippy::never_loop)] while let Ok(s) = self.signal_queue.recv_from(0) { match s { Signal::Shutdown => { From 2318a472d7c96889cbff0200e2a004a6f78c2d57 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 9 Mar 2022 14:35:58 +1100 Subject: [PATCH 69/74] changed back to id=0 when admin receiving signals --- src/rust/core/server/src/threads/admin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index d00a91abe..839cd65cc 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -328,7 +328,7 @@ impl Admin { // check if we have received signals from any sibling // thread #[allow(clippy::never_loop)] - while let Ok(signal) = self.signal_queue.recv_from(1) { + while let Ok(signal) = self.signal_queue.recv_from(0) { match signal { Signal::Shutdown => { // if a shutdown is received from any From be9a15aa9189a119921a0961eca765d23c7fffbc Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Mar 2022 10:15:35 +1100 Subject: [PATCH 70/74] undid Stop responsse through non-admin port --- src/rust/protocol/src/memcache/wire/mod.rs | 8 ++------ src/rust/protocol/src/memcache/wire/response/mod.rs | 4 ---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/rust/protocol/src/memcache/wire/mod.rs b/src/rust/protocol/src/memcache/wire/mod.rs index 99809b7f0..84bccaaf2 100644 --- a/src/rust/protocol/src/memcache/wire/mod.rs +++ b/src/rust/protocol/src/memcache/wire/mod.rs @@ -229,12 +229,8 @@ where MemcacheRequest::FlushAll => { return None; } - MemcacheRequest::Stop => match self.stop() { - Ok(_) => MemcacheResult::Stopped, - Err(MemcacheStorageError::ServerError) => MemcacheResult::Error, - _ => { - unreachable!() - } + MemcacheRequest::Stop => { + return None; }, }; diff --git a/src/rust/protocol/src/memcache/wire/response/mod.rs b/src/rust/protocol/src/memcache/wire/response/mod.rs index 941cf9e4d..df188130d 100644 --- a/src/rust/protocol/src/memcache/wire/response/mod.rs +++ b/src/rust/protocol/src/memcache/wire/response/mod.rs @@ -34,7 +34,6 @@ pub enum MemcacheResult { Stored, Error, Count(u64), - Stopped, } impl Debug for MemcacheResult { @@ -48,7 +47,6 @@ impl Debug for MemcacheResult { Self::Stored => "Stored", Self::Error => "Error", Self::Count(_) => "Count", - Self::Stopped => "Stopped", }; write!(f, "MemcacheResult::{}", name) } @@ -69,7 +67,6 @@ impl MemcacheResult { Self::Stored => b"STORED\r\n", Self::Error => b"ERROR\r\n", Self::Count(_) => b"", - Self::Stopped => b"Stopped\n", } } @@ -89,7 +86,6 @@ impl MemcacheResult { Self::Deleted => 7, Self::NotFound => 8, Self::NotStored => 9, - Self::Stopped => 10, // TODO: check this is the correct code // CLIENT_ERROR // SERVER_ERROR _ => usize::MAX, From ae713b763250365efcf583673cb352e1375db12e Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Mar 2022 10:54:04 +1100 Subject: [PATCH 71/74] removed non-admin parsing of Stop command --- config/segcache_test.toml | 6 ++++-- src/rust/entrystore/src/seg/memcache.rs | 8 -------- src/rust/protocol/src/memcache/storage/mod.rs | 3 --- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/config/segcache_test.toml b/config/segcache_test.toml index 8470a9708..4226361ee 100644 --- a/config/segcache_test.toml +++ b/config/segcache_test.toml @@ -41,9 +41,11 @@ merge_max = 8 # use merge based eviction eviction = "Merge" # optionally, set a file path to back the data datapool -datapool_path = "/mnt/pmem1.0/cassy/data" +#datapool_path = "/home/users/u6632448/file_for_data" +#datapool_path = "/mnt/pmem1.0/cassy/data" # set a file path to back the metadata datapool -metadata_path = "/mnt/pmem1.0/cassy/metadata" +#metadata_path = "/home/users/u6632448/file_for_metadata" +#metadata_path = "/mnt/pmem1.0/cassy/metadata" # state whether cache will be restored restore = false # state whether cache will be flushed upon shutdown diff --git a/src/rust/entrystore/src/seg/memcache.rs b/src/rust/entrystore/src/seg/memcache.rs index 9c6fcfd6c..c0a6f89ef 100644 --- a/src/rust/entrystore/src/seg/memcache.rs +++ b/src/rust/entrystore/src/seg/memcache.rs @@ -177,12 +177,4 @@ impl MemcacheStorage for Seg { Err(_) => Err(MemcacheStorageError::NotStored), } } - - fn stop(&mut self) -> Result<(), MemcacheStorageError> { - if self.data.flush().is_ok() { - Ok(()) - } else { - Err(MemcacheStorageError::ServerError) - } - } } diff --git a/src/rust/protocol/src/memcache/storage/mod.rs b/src/rust/protocol/src/memcache/storage/mod.rs index 09abe8523..117ddf704 100644 --- a/src/rust/protocol/src/memcache/storage/mod.rs +++ b/src/rust/protocol/src/memcache/storage/mod.rs @@ -54,7 +54,4 @@ pub trait MemcacheStorage { /// Compare and store on the CAS value, replacing the stored item if the CAS /// value matches the provided value. fn cas(&mut self, entry: &MemcacheEntry) -> Result<(), MemcacheStorageError>; - - /// Triggers a shutdown of the cache - fn stop(&mut self) -> Result<(), MemcacheStorageError>; } From 553b3339db70d7e54c89f8522c6e9c3c7d92c060 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Mar 2022 11:01:06 +1100 Subject: [PATCH 72/74] updated config --- config/segcache_test.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/segcache_test.toml b/config/segcache_test.toml index 4226361ee..406ad8fed 100644 --- a/config/segcache_test.toml +++ b/config/segcache_test.toml @@ -41,10 +41,10 @@ merge_max = 8 # use merge based eviction eviction = "Merge" # optionally, set a file path to back the data datapool -#datapool_path = "/home/users/u6632448/file_for_data" +datapool_path = "/home/users/u6632448/file_for_data" #datapool_path = "/mnt/pmem1.0/cassy/data" # set a file path to back the metadata datapool -#metadata_path = "/home/users/u6632448/file_for_metadata" +metadata_path = "/home/users/u6632448/file_for_metadata" #metadata_path = "/mnt/pmem1.0/cassy/metadata" # state whether cache will be restored restore = false From befab11804211cf5ca62aa7e0293d218be442856 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Wed, 16 Mar 2022 12:59:18 +1100 Subject: [PATCH 73/74] ran cargo fmt --- src/rust/protocol/src/memcache/wire/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/protocol/src/memcache/wire/mod.rs b/src/rust/protocol/src/memcache/wire/mod.rs index 84bccaaf2..4f9787f0e 100644 --- a/src/rust/protocol/src/memcache/wire/mod.rs +++ b/src/rust/protocol/src/memcache/wire/mod.rs @@ -231,7 +231,7 @@ where } MemcacheRequest::Stop => { return None; - }, + } }; Some(MemcacheResponse { request, result }) From 19e014726cb4c18c144806df8540ae386ee8ecf9 Mon Sep 17 00:00:00 2001 From: Cassy Chun-Crogan Date: Thu, 24 Mar 2022 09:16:02 +1100 Subject: [PATCH 74/74] removed unnecessary adding to admin queue --- src/rust/core/server/src/threads/admin.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rust/core/server/src/threads/admin.rs b/src/rust/core/server/src/threads/admin.rs index 839cd65cc..abb2c24ac 100644 --- a/src/rust/core/server/src/threads/admin.rs +++ b/src/rust/core/server/src/threads/admin.rs @@ -441,8 +441,6 @@ impl EventLoop for Admin { match request { AdminRequest::FlushAll => {} AdminRequest::Stop => { - let admin_queue = self.signal_queue.new_pair(128, None); - self.signal_queue.add_pair(admin_queue); for _ in 0..QUEUE_RETRIES { // Send Stop to all other threads