diff --git a/.gitignore b/.gitignore
index 118714d18..6cb4d5724 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 **/*.rs.bk
 Cargo.lock
 /local_corpus_files
+/local_dict_corpus_files
 /orig-zstd
 fuzz_decodecorpus
 perf.data*
diff --git a/Cargo.toml b/Cargo.toml
index 884b83591..5a7cec4a8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,10 @@ readme = "Readme.md"
 keywords = ["zstd", "zstandard", "decompression"]
 categories = ["compression"]
 
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
 [dependencies]
 twox-hash = { version = "2.0", default-features = false, features = ["xxhash64"], optional = true }
 
@@ -20,17 +24,20 @@ twox-hash = { version = "2.0", default-features = false, features = ["xxhash64"]
 compiler_builtins = { version = "0.1.2", optional = true }
 core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
 alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" }
+fastrand = "2.3.0"
+
 
 [dev-dependencies]
 criterion = "0.5"
 rand = { version = "0.8.5", features = ["small_rng"] }
-zstd = "0.13.2"
+zstd =  { version = "0.13.2", features = ["zstdmt"]}
 
 [features]
 default = ["hash", "std"]
 hash = ["dep:twox-hash"]
 fuzz_exports = []
 std = []
+dict_builder = ["std"]
 
 # Internal feature, only used when building as part of libstd, not part of the
 # stable interface of this crate.
@@ -47,3 +54,7 @@ required-features = ["std"]
 [[bin]]
 name = "zstd_stream"
 required-features = ["std"]
+
+[[bin]]
+name = "zstd_dict"
+required-features = ["std", "dict_builder"]
diff --git a/Readme.md b/Readme.md
index 3281a3d02..79d2ff943 100644
--- a/Readme.md
+++ b/Readme.md
@@ -15,8 +15,20 @@ This crate is currently actively maintained.
 
 # Current Status
 
-Feature complete on the decoder side.
+## Decompression
+The `decoding` module provides a complete
+implementation of a Zstandard decompressor.
+
+In terms of speed, `ruzstd` is behind the original C implementation
+which has a rust binding located [here](https://github.com/gyscos/zstd-rs).
+
+Measuring with the 'time' utility the original zstd and my decoder both
+decoding the same enwik9.zst file from a ramfs, my decoder is about 3.5
+times slower. Enwik9 is highly compressible, for less compressible data
+(like a ubuntu installation .iso) my decoder comes close to only being
+1.4 times slower.
 
+## Compression
 On the compression side:
 - Support for generating compressed blocks at any compression level
   - [x] Uncompressed
@@ -24,13 +36,28 @@ On the compression side:
   - [ ] Default (roughly level 3)
   - [ ] Better (roughly level 7)
   - [ ] Best (roughly level 11)
-- [ ] Checksums
+- [x] Checksums
 - [ ] Dictionaries
 
-## Speed
-In terms of speed this library is behind the original C implementation which has a rust binding located [here](https://github.com/gyscos/zstd-rs).
+## Dictionary Generation
+When the `dict_builder` feature is enabled, the `dictionary` module
+provides the ability to create new dictionaries. 
+
+On the `github-users` sample set, our implementation benchmarks within
+0.2% of the official implementation (as of commit 
+`09e52d07340acdb2e13817b066e8be6e424f7258`):
+```no_build
+uncompressed: 100.00% (7484607 bytes)
+no dict: 34.99% of original size (2618872 bytes)
+reference dict: 16.16% of no dict size (2195672 bytes smaller)
+our dict: 16.28% of no dict size (2192400 bytes smaller)
+```
+
+The dictionary generator only provides support for creating "raw
+content" dictionaries. Tagged dictionaries are currently unsupported.
 
-Measuring with the 'time' utility the original zstd and my decoder both decoding the same enwik9.zst file from a ramfs, my decoder is about 3.5 times slower. Enwik9 is highly compressible, for less compressible data (like a ubuntu installation .iso) my decoder comes close to only being 1.4 times slower.
+See <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format>
+for clarification.
 
 
 # How can you use it?
diff --git a/src/bin/zstd.rs b/src/bin/zstd.rs
index 4ec9db77e..4c629b7cf 100644
--- a/src/bin/zstd.rs
+++ b/src/bin/zstd.rs
@@ -21,7 +21,7 @@ struct StateTracker {
     file_size: u64,
     old_percentage: i8,
 }
-
+#[allow(unused)]
 fn decompress(flags: &[String], file_paths: &[String]) {
     if !flags.contains(&"-d".to_owned()) {
         eprintln!("This zstd implementation only supports decompression. Please add a \"-d\" flag");
@@ -128,6 +128,7 @@ fn decompress(flags: &[String], file_paths: &[String]) {
     }
 }
 
+#[allow(unused)]
 struct PercentPrintReader<R: Read> {
     total: usize,
     counter: usize,
diff --git a/src/bin/zstd_dict.rs b/src/bin/zstd_dict.rs
new file mode 100644
index 000000000..54a4d2651
--- /dev/null
+++ b/src/bin/zstd_dict.rs
@@ -0,0 +1,24 @@
+use ruzstd::dictionary::{create_raw_dict_from_dir, create_raw_dict_from_source};
+use std::env::args;
+use std::fs::File;
+use std::path::Path;
+
+fn main() {
+    let args: Vec<String> = args().collect();
+    let input_path: &Path = args.get(1).expect("no input provided").as_ref();
+    let output_path: &Path = args.get(2).expect("no output path provided").as_ref();
+    let dict_size = args
+        .get(3)
+        .expect("no dict size provided (kb)")
+        .parse::<usize>()
+        .expect("dict size was not a valid num");
+
+    let mut output = File::create(output_path).unwrap();
+    if input_path.is_file() {
+        let source = File::open(input_path).expect("unable to open input path");
+        let source_size = source.metadata().unwrap().len();
+        create_raw_dict_from_source(source, source_size as usize, &mut output, dict_size);
+    } else {
+        create_raw_dict_from_dir(input_path, &mut output, dict_size).unwrap();
+    }
+}
diff --git a/src/bit_io/bit_reader.rs b/src/bit_io/bit_reader.rs
index 0d3e807aa..2140ddb3b 100644
--- a/src/bit_io/bit_reader.rs
+++ b/src/bit_io/bit_reader.rs
@@ -66,7 +66,7 @@ impl<'s> BitReader<'s> {
 
             let mut bit_shift = bits_left_in_current_byte; //this many bits are already set in value
 
-            assert!(self.idx % 8 == 0);
+            assert!(self.idx.is_multiple_of(8));
 
             //collect full bytes
             for _ in 0..full_bytes_needed {
@@ -116,7 +116,7 @@ impl core::fmt::Display for GetBitsError {
             } => {
                 write!(
                     f,
-                    "Cant serve this request. The reader is limited to {limit} bits, requested {num_requested_bits} bits",
+                    "Cant serve this request. The reader is limited to {limit} bits, requested {num_requested_bits} bits"
                 )
             }
             GetBitsError::NotEnoughRemainingBits {
@@ -125,7 +125,7 @@ impl core::fmt::Display for GetBitsError {
             } => {
                 write!(
                     f,
-                    "Can\'t read {requested} bits, only have {remaining} bits left",
+                    "Can\'t read {requested} bits, only have {remaining} bits left"
                 )
             }
         }
diff --git a/src/bit_io/bit_writer.rs b/src/bit_io/bit_writer.rs
index fb809926c..7ce228a54 100644
--- a/src/bit_io/bit_writer.rs
+++ b/src/bit_io/bit_writer.rs
@@ -45,7 +45,7 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
 
     /// Reset to an index. Currently only supports resetting to a byte aligned index
     pub fn reset_to(&mut self, index: usize) {
-        assert!(index % 8 == 0);
+        assert!(index.is_multiple_of(8));
         self.partial = 0;
         self.bits_in_partial = 0;
         self.bit_idx = index;
@@ -66,7 +66,7 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
 
         // We might be changing bits unaligned to byte borders.
         // This means the lower bits of the first byte we are touching must stay the same
-        if idx % 8 != 0 {
+        if !idx.is_multiple_of(8) {
             // How many (upper) bits will change in the first byte?
             let bits_in_first_byte = 8 - (idx % 8);
             // We don't support only changing a few bits in the middle of a byte
@@ -82,7 +82,7 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
             idx += bits_in_first_byte;
         }
 
-        assert!(idx % 8 == 0);
+        assert!(idx.is_multiple_of(8));
         // We are now byte aligned, change idx to byte resolution
         let mut idx = idx / 8;
 
@@ -113,7 +113,7 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
 
     /// Flush temporary internal buffers to the output buffer. Only works if this is currently byte aligned
     pub fn flush(&mut self) {
-        assert!(self.bits_in_partial % 8 == 0);
+        assert!(self.bits_in_partial.is_multiple_of(8));
         let full_bytes = self.bits_in_partial / 8;
         self.output
             .as_mut()
@@ -204,7 +204,7 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
     /// Returns how many bits are missing for an even byte
     pub fn misaligned(&self) -> usize {
         let idx = self.index();
-        if idx % 8 == 0 {
+        if idx.is_multiple_of(8) {
             0
         } else {
             8 - (idx % 8)
diff --git a/src/dictionary/cover.rs b/src/dictionary/cover.rs
new file mode 100644
index 000000000..093b8b656
--- /dev/null
+++ b/src/dictionary/cover.rs
@@ -0,0 +1,134 @@
+//! An implementation of the local maximum coverage algorithm
+//! described in the paper "Effective Construction of Relative Lempel-Ziv Dictionaries",
+//! by Liao, Petri, Moffat, and Wirth, published under the University of Melbourne.
+//!
+//! See: <https://people.eng.unimelb.edu.au/ammoffat/abstracts/lpmw16www.pdf>
+//!
+//! Facebook's implementation was also used as a reference.
+//! <https://github.com/facebook/zstd/tree/dev/lib/dictBuilder>
+
+use super::DictParams;
+use crate::dictionary::frequency::estimate_frequency;
+use core::convert::TryInto;
+use std::collections::HashMap;
+use std::vec::Vec;
+
+/// The size of each k-mer
+pub(super) const K: usize = 16;
+
+///As found under "4: Experiments - Varying k-mer Size" in the original paper,
+/// "when k = 16, across all our text collections, there is a reasonable spread"
+///
+/// Reasonable range: [6, 16]
+pub(super) type KMer = [u8; K];
+
+pub struct Segment {
+    /// The actual contents of the segment.
+    pub raw: Vec<u8>,
+    /// A measure of how "ideal" a given segment would be to include in the dictionary
+    ///
+    /// Higher is better, there's no upper limit. This number is determined by
+    /// estimating the number of occurances in a given epoch
+    pub score: usize,
+}
+
+impl Eq for Segment {}
+
+impl PartialEq for Segment {
+    fn eq(&self, other: &Self) -> bool {
+        // We only really care about score in regards to heap order
+        self.score == other.score
+    }
+}
+
+impl PartialOrd for Segment {
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Segment {
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.score.cmp(&other.score)
+    }
+}
+
+/// A re-usable allocation containing large allocations
+/// that are used multiple times during dictionary construction (once per epoch)
+pub struct Context {
+    /// Keeps track of the number of occurances of a particular k-mer within an epoch.
+    ///
+    /// Reset for each epoch.
+    pub frequencies: HashMap<KMer, usize>,
+}
+
+/// Returns the highest scoring segment in an epoch
+/// as a slice of that epoch.
+pub fn pick_best_segment(
+    params: &DictParams,
+    ctx: &mut Context,
+    collection_sample: &'_ [u8],
+) -> Segment {
+    let mut segments = collection_sample
+        .chunks(params.segment_size as usize)
+        .peekable();
+    let mut best_segment: &[u8] = segments.peek().expect("at least one segment");
+    let mut top_segment_score: usize = 0;
+    // Iterate over segments and score each segment, keeping track of the best segment
+    for segment in segments {
+        let segment_score = score_segment(ctx, collection_sample, segment);
+        if segment_score > top_segment_score {
+            best_segment = segment;
+            top_segment_score = segment_score;
+        }
+    }
+
+    Segment {
+        raw: best_segment.into(),
+        score: top_segment_score,
+    }
+}
+
+/// Given a segment, compute the score (or usefulness) of that segment against the entire epoch.
+///
+/// `score_segment` modifies `ctx.frequencies`.
+fn score_segment(ctx: &mut Context, collection_sample: &[u8], segment: &[u8]) -> usize {
+    let mut segment_score = 0;
+    // Determine the score of each overlapping k-mer
+    for i in 0..(segment.len() - K - 1) {
+        let kmer: &KMer = (&segment[i..i + K])
+            .try_into()
+            .expect("Failed to make kmer");
+        // if the kmer is already in the pool, it recieves a score of zero
+        if ctx.frequencies.contains_key(kmer) {
+            continue;
+        }
+        let kmer_score = estimate_frequency(kmer, collection_sample);
+        ctx.frequencies.insert(*kmer, kmer_score);
+        segment_score += kmer_score;
+    }
+
+    segment_score
+}
+
+/// Computes the number of epochs and the size of each epoch.
+///
+/// Returns a (number of epochs, epoch size) tuple.
+///
+/// A translation of `COVER_epoch_info_t COVER_computeEpochs()` from facebook/zstd.
+pub fn compute_epoch_info(
+    params: &DictParams,
+    max_dict_size: usize,
+    num_kmers: usize,
+) -> (usize, usize) {
+    let min_epoch_size = 10_000; // 10 KiB
+    let mut num_epochs: usize = usize::max(1, max_dict_size / params.segment_size as usize);
+    let mut epoch_size: usize = num_kmers / num_epochs;
+    if epoch_size >= min_epoch_size {
+        assert!(epoch_size * num_epochs <= num_kmers);
+        return (num_epochs, epoch_size);
+    }
+    epoch_size = usize::min(min_epoch_size, num_kmers);
+    num_epochs = num_kmers / epoch_size;
+    (num_epochs, epoch_size)
+}
diff --git a/src/dictionary/frequency.rs b/src/dictionary/frequency.rs
new file mode 100644
index 000000000..074e73839
--- /dev/null
+++ b/src/dictionary/frequency.rs
@@ -0,0 +1,71 @@
+//! Contains `compute_frequency`, a function
+//! that uses a rolling Karp-Rabin hash to
+//! efficiently count the number of occurences
+//! of a given k-mer within a set.
+
+/// Computes a best effort guess as to how many times `pattern` occurs within
+/// `body`. While not 100% accurate, it will be accurate the vast majority of time
+pub fn estimate_frequency(pattern: &[u8], body: &[u8]) -> usize {
+    assert!(body.len() >= pattern.len());
+    // A prime number for modulo operations to reduce collisions (q)
+    const PRIME: isize = 2654435761;
+    // Number of characters in the input alphabet (d)
+    const ALPHABET_SIZE: isize = 256;
+    // Hash of input pattern (p)
+    let mut pattern_hash: isize = 0;
+    // Hash of the current window of text (t)
+    let mut window_hash: isize = 0;
+    // High-order digit multiplier (h)
+    let mut h: isize = 1;
+
+    // Precompute h (?)
+    h = (h * ALPHABET_SIZE) % PRIME;
+
+    // Compute initial hash values
+    for i in 0..pattern.len() {
+        pattern_hash = (ALPHABET_SIZE * pattern_hash + pattern[i] as isize) % PRIME;
+        window_hash = (ALPHABET_SIZE * window_hash + body[i] as isize) % PRIME;
+    }
+
+    let mut num_occurances = 0;
+    for i in 0..=body.len() - pattern.len() {
+        // There's *probably* a match if these two match
+        if pattern_hash == window_hash {
+            num_occurances += 1;
+        }
+
+        // Compute hash values for next window
+        if i < body.len() - pattern.len() {
+            window_hash = (ALPHABET_SIZE * (window_hash - body[i] as isize * h)
+                + body[i + pattern.len()] as isize)
+                % PRIME;
+        }
+    }
+
+    num_occurances
+}
+
+#[cfg(test)]
+mod tests {
+    use super::estimate_frequency;
+    #[test]
+    fn dead_beef() {
+        assert_eq!(
+            estimate_frequency(&[0xde, 0xad], &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad]),
+            2
+        );
+    }
+
+    #[test]
+    fn smallest_body() {
+        assert_eq!(estimate_frequency(&[0x00, 0xff], &[0x00, 0xff]), 1);
+    }
+
+    #[test]
+    fn no_match() {
+        assert_eq!(
+            estimate_frequency(&[0xff, 0xff], &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad]),
+            0
+        );
+    }
+}
diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs
new file mode 100644
index 000000000..322f68c90
--- /dev/null
+++ b/src/dictionary/mod.rs
@@ -0,0 +1,189 @@
+//! Code for creating a separate content dictionary.
+//!
+//! Effective dictionaries are up to 1% the size of the complete training body,
+//! and are trained on many examples of the original data.
+//!
+//! Implemented following the paper "Effective construction of
+//! Relative Lempel-Ziv Dictionaries", by Kewen Liao, Matthias Petri,
+//! Alistair Moffat, and Anthony Wirth
+
+// The algorithm is summarized here
+// 1. The text is split into "epochs", or chunks from the original source
+// 2. From within each epoch, we select the "segment", or 1 KiB contiguous section
+//    that's predicted to be the best option to include in the dictionary. Concatenated,
+//    these segments form the dictionary.
+//
+// This segment scoring algorithm operates as follows:
+// For a given epoch:
+//  - Run a reservoir sampler over the entire epoch, creating a
+//    reservoir of n/t, where `t` is the desired number of occurances
+//    we want the most common k-mers to have
+//  - Have the ability to estimate
+//    the frequency of a given k-mer: `f(w: k-mer)` calculates
+//    the frequency of w in the reservoir using a rolling karp-rabin hash
+//  - The score of a segment is the sum of `f(w)` called on every kmer within the segment
+mod cover;
+mod frequency;
+mod reservoir;
+
+use crate::dictionary::reservoir::create_sample;
+use alloc::vec;
+use core::cmp::Reverse;
+use cover::*;
+use std::{
+    boxed::Box,
+    collections::{BinaryHeap, HashMap},
+    dbg,
+    fs::{self, File},
+    io::{self, BufReader, Read},
+    path::{Path, PathBuf},
+    vec::Vec,
+};
+
+/// A set of values that are used during dictionary construction.
+///
+/// Changing these values can improve the resulting dictionary size for certain datasets.
+// TODO: move `k` here.
+pub(super) struct DictParams {
+    /// Segment size.
+    ///
+    /// As found under "4. Experiments - Varying Segment Size" in the original paper, a
+    /// segment size of 2 kiB was effective.
+    ///
+    /// "We explored a range of \[`segment_size`\] values and found the performance of LMC is insensitive
+    /// to \[`segment_size`\]. We fix \[`segment_size`\] to 2kiB
+    ///
+    /// Reasonable range: [16, 2048+]
+    pub segment_size: u32,
+}
+
+/// Creates a "raw content" dictionary, training off of every file in this directory and all
+/// sub-directories.
+///
+/// The resulting dictionary will be approxamitely `dict_size` or less, and written to `output`.
+///
+/// # Errors
+/// This function returns `Ok(())` if the dictionary was created successfully, and an
+/// `Err(io::Error)` if an error was encountered reading the input directory.
+///
+/// # Examples
+/// ```no_run
+/// use std::fs::File;
+/// // Create a roughly 1mb dictionary, training off of file in `sample_files`
+/// let input_folder = "sample_files/";
+/// let mut output = File::create("output.dict").unwrap();
+/// ruzstd::dictionary::create_raw_dict_from_dir(input_folder, &mut output, 1_000_000);
+/// ```
+pub fn create_raw_dict_from_dir<P: AsRef<Path>, W: io::Write>(
+    path: P,
+    output: &mut W,
+    dict_size: usize,
+) -> Result<(), io::Error> {
+    // Collect a list of a path to every file in the directory into `file_paths`
+    let mut file_paths: Vec<PathBuf> = Vec::new();
+    let dir: fs::ReadDir = fs::read_dir(path)?;
+    fn recurse_read(dir: fs::ReadDir, file_paths: &mut Vec<PathBuf>) -> Result<(), io::Error> {
+        for entry in dir {
+            let entry = entry?;
+            if entry.file_type()?.is_dir() {
+                recurse_read(fs::read_dir(entry.path())?, file_paths)?;
+            } else {
+                file_paths.push(entry.path());
+            }
+        }
+        Ok(())
+    }
+    recurse_read(dir, &mut file_paths)?;
+
+    // Open each file and chain the readers together
+    let mut total_file_len: u64 = 0;
+    let mut file_handles: Vec<fs::File> = Vec::new();
+    for path in file_paths {
+        let handle = File::open(path)?;
+        total_file_len += handle.metadata()?.len();
+        file_handles.push(handle);
+    }
+    let empty_reader: Box<dyn Read> = Box::new(io::empty());
+    let chained_files = file_handles
+        .iter()
+        .fold(empty_reader, |acc, reader| Box::new(acc.chain(reader)));
+
+    // Create a dict using the new reader
+    create_raw_dict_from_source(chained_files, total_file_len as usize, output, dict_size);
+    Ok(())
+}
+
+/// Read from `source` to create a "raw content" dictionary of `dict_size`.
+/// The completed dictionary is written to `output`.
+///
+/// - `source` will be used as training data for the entire dictionary.
+/// - `source_size` influences how the data is divided and sampled and is measured
+///   in bytes. While this does not need to be exact, estimates should attempt to be
+///   larger than the actual collection size.
+/// - `output` is where the completed dictionary will be written.
+/// - `dict_size` determines how large the complete dictionary should be. The completed
+///   dictionary will be this size or smaller.
+///
+/// This function uses `BufRead` internally, the provided reader need not be buffered.
+pub fn create_raw_dict_from_source<R: io::Read, W: io::Write>(
+    source: R,
+    source_size: usize,
+    output: &mut W,
+    dict_size: usize,
+) {
+    vprintln!("create_dict: creating {dict_size} byte dict from {source_size} byte source");
+    let mut buffered_source = BufReader::with_capacity(128_000, source);
+
+    let params = DictParams { segment_size: 2048 };
+    let num_segments = source_size / params.segment_size as usize;
+    // According to 4. Experiments - Varying Reservoir Sampler Thresholds,
+    // setting reservoir size to collection size / min{collection size / (2 * number of segments),
+    // 256} was effective
+    let sample_size = source_size / usize::min(source_size / (2 * num_segments), 256);
+    vprintln!("create_dict: creating {sample_size} byte sample of collection");
+    let collection_sample = create_sample(&mut buffered_source, sample_size);
+
+    // A collection of segments to be used in the final dictionary.
+    //
+    // Contains the best segment from every epoch.
+    // Reverse is used because we want a min heap, where
+    // the lowest scoring items come first
+    let mut pool: BinaryHeap<Reverse<Segment>> = BinaryHeap::new();
+    let (_, epoch_size) = compute_epoch_info(&params, dict_size, source_size / K);
+    let num_epochs = source_size / epoch_size;
+    vprintln!("create_dict: computed epoch info, using {num_epochs} epochs of {epoch_size} bytes");
+    //let mut current_epoch = vec![0; epoch_size];
+    let mut current_epoch = vec![0; 100];
+    let mut epoch_counter = 0;
+    let mut ctx = Context {
+        frequencies: HashMap::with_capacity(epoch_size / K),
+    };
+    // Score each segment in the epoch and select the highest scoring segment
+    // for the pool
+    while dbg!(buffered_source
+        .read(&mut current_epoch)
+        .expect("can read input"))
+        != 0
+    {
+        epoch_counter += 1;
+        let best_segment = pick_best_segment(&params, &mut ctx, &collection_sample);
+        vprintln!(
+            "\tcreate_dict: epoch {epoch_counter}/{num_epochs} has best segment score {}",
+            best_segment.score
+        );
+        pool.push(Reverse(best_segment));
+        // Wipe frequency list for next epoch
+        ctx.frequencies.clear();
+    }
+    vprintln!(
+        "create_dict: {epoch_counter} epochs written, writing {} segments",
+        pool.len()
+    );
+    // Write the dictionary with the highest scoring segment last because
+    // closer items can be represented with a smaller offset
+    while let Some(segment) = pool.pop() {
+        output
+            .write_all(&segment.0.raw)
+            .expect("can write to output");
+    }
+}
diff --git a/src/dictionary/reservoir.rs b/src/dictionary/reservoir.rs
new file mode 100644
index 000000000..6fb318c91
--- /dev/null
+++ b/src/dictionary/reservoir.rs
@@ -0,0 +1,144 @@
+use super::cover::K;
+use alloc::vec::Vec;
+use core::f64::consts::E;
+use fastrand;
+use std::{io, vec};
+
+/// Creates a representative sample of `input` of `size` bytes.
+pub fn create_sample<R: io::Read>(input: &mut R, size: usize) -> Vec<u8> {
+    let reservoir = Reservoir::new(size);
+    reservoir.fill(input)
+}
+
+/// A reservoir is created from an input stream.
+///
+/// Once filled, it will contain a best effort sample of a dataset,
+/// where each input value has an equivalent probability of being included.
+struct Reservoir {
+    /// Where the sampled data is stored.
+    ///
+    /// Once the lake is filled, then this should contain a representative sample
+    /// of the larger dataset.
+    lake: Vec<u8>,
+    /// K is the size of each sample.
+    ///
+    /// The original Zstd dictionary implementation states that values
+    /// between 16 and 2048+ are reasonable.
+    k: u16,
+}
+
+impl Reservoir {
+    /// Initialize a new empty reservoir, creating an allocation of `size`.
+    pub fn new(size: usize) -> Self {
+        assert!(size >= 16, "Reservoirs cannot be below 16 bytes in size");
+        let lake: Vec<u8> = vec![0; size];
+        let k = K as u16;
+        Self { lake, k }
+    }
+
+    /// Filling the reservoir is performed using Algorithm L.
+    ///
+    /// The return value is the populated reservoir.
+    pub fn fill<R: io::Read>(mut self, source: &mut R) -> Vec<u8> {
+        // https://en.wikipedia.org/wiki/Reservoir_sampling#:~:text=end%0A%20%20end%0Aend-,Optimal%3A%20Algorithm,-L%5Bedit
+        // https://richardstartin.github.io/posts/reservoir-sampling#algorithm-l:~:text=%3B%0A%20%20%20%20%7D%0A%7D-,Algorithm%20L,-Algorithm%20L%20was
+
+        // First fill the reservoir with the start of the input stream
+        let mut total_bytes_read: usize = 0;
+        while let Ok(num_bytes) = source.read(self.lake.as_mut_slice()) {
+            total_bytes_read += num_bytes;
+            // Stop when we've completely filled the buffer
+            if total_bytes_read == self.lake.len() {
+                break;
+            }
+            // If we haven't filled the lake all the way, resize it
+            if num_bytes == 0 {
+                self.lake.resize(total_bytes_read, 0);
+            }
+        }
+
+        let mut threshold = E.powf(fastrand::f64().ln() / f64::from(self.k));
+        // An index into the stream of the next sample to take
+        let mut next = self.lake.len();
+        // Because we're sampling k-mers of size K into the lake,
+        // split the lake into chunks of k size for simplicity
+        let mut lake_chunks = self
+            .lake
+            .chunks_mut(self.k as usize)
+            .collect::<Vec<&mut [u8]>>();
+        // Used when discarding chunks
+        let end_of_lake = lake_chunks.len();
+        let mut counter = end_of_lake / self.k as usize;
+        // Algorithm L is considered better than algorithm R because it
+        // determines how many inputs can be skipped, rather than
+        // processing every input.
+
+        // This is done by abusing the statistics in ways
+        // I do not understand.
+
+        // Items with a weight smaller than the threshold enter the lake,
+        // replacing the item in the lake with the largest threshold
+
+        let mut dumpster = Vec::with_capacity(self.k as usize);
+        loop {
+            // `num_bytes_read` is kept track of to watch for EOD.
+            let num_bytes_read: u64;
+            if counter == next {
+                num_bytes_read = source
+                    .read(lake_chunks[fastrand::usize(0..end_of_lake)])
+                    .unwrap() as u64;
+                // Advance at least to the next sample, skipping forward a few samples
+                next += ((fastrand::f64().ln() / f64::ln(1.0 - threshold)).floor() as usize + 1)
+                    * self.k as usize;
+                // Update the threshold to reflect changes
+                threshold *= E.powf(fastrand::f64().ln() / f64::from(end_of_lake as u32))
+            } else {
+                // Drop the next chunk
+                num_bytes_read = source.read(&mut dumpster).unwrap() as u64;
+                //source.seek_relative(self.k.into()).unwrap();
+            }
+            if num_bytes_read == 0 {
+                break;
+            }
+            counter += self.k as usize;
+        }
+        self.lake.shrink_to_fit();
+        self.lake
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Reservoir;
+    use alloc::vec;
+
+    #[test]
+    fn initial_fill() {
+        // Create a reservoir 16 bytes in size and read
+        // 16 bytes into it
+        let r = Reservoir::new(16);
+        let test_data = vec![0_u8; 16];
+        let output = r.fill(&mut test_data.as_slice());
+        assert_eq!(test_data, output);
+    }
+
+    #[test]
+    fn shrinks_for_small_sample() {
+        // Create a reservoir larger than the sample.
+        // The output should be smaller.
+        let r = Reservoir::new(32);
+        let test_data = vec![0_u8; 28];
+        let output = r.fill(&mut test_data.as_slice());
+        assert!(output.len() == 28);
+    }
+
+    #[test]
+    fn lake_doesnt_grow() {
+        // Create a sample larger than the reservoir
+        // The output should be smaller.
+        let r = Reservoir::new(32);
+        let test_data = vec![0_u8; 16_000_000];
+        let output = r.fill(&mut test_data.as_slice());
+        assert!(output.len() == 32);
+    }
+}
diff --git a/src/encoding/blocks/compressed.rs b/src/encoding/blocks/compressed.rs
index 85593e181..ad7f413cf 100644
--- a/src/encoding/blocks/compressed.rs
+++ b/src/encoding/blocks/compressed.rs
@@ -8,6 +8,7 @@ use crate::{
     huff0::huff0_encoder,
 };
 
+/// A block of [`crate::common::BlockType::Compressed`]
 pub fn compress_block<M: Matcher>(state: &mut CompressState<M>, output: &mut Vec<u8>) {
     let mut literals_vec = Vec::new();
     let mut sequences = Vec::new();
diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs
index 33c98a591..2d9797e9b 100644
--- a/src/encoding/mod.rs
+++ b/src/encoding/mod.rs
@@ -105,7 +105,7 @@ pub enum Sequence<'data> {
     /// Is encoded as a sequence for the decoder sequence execution.
     ///
     /// First the literals will be copied to the decoded data,
-    /// then `match_len` bytes are copied from `offset` bytes back in the buffer
+    /// then `match_len` bytes are copied from `offset` bytes back in the decoded data
     Triple {
         literals: &'data [u8],
         offset: usize,
diff --git a/src/fse/fse_decoder.rs b/src/fse/fse_decoder.rs
index bf573c1b0..7cd59dc6d 100644
--- a/src/fse/fse_decoder.rs
+++ b/src/fse/fse_decoder.rs
@@ -297,7 +297,7 @@ impl FSETable {
             });
         }
 
-        let bytes_read = if br.bits_read() % 8 == 0 {
+        let bytes_read = if br.bits_read().is_multiple_of(8) {
             br.bits_read() / 8
         } else {
             (br.bits_read() / 8) + 1
diff --git a/src/huff0/huff0_decoder.rs b/src/huff0/huff0_decoder.rs
index 5c3e98bf0..1952aea3c 100644
--- a/src/huff0/huff0_decoder.rs
+++ b/src/huff0/huff0_decoder.rs
@@ -245,7 +245,7 @@ impl HuffmanTable {
                 let num_weights = header - 127;
                 self.weights.resize(num_weights as usize, 0);
 
-                let bytes_needed = if num_weights % 2 == 0 {
+                let bytes_needed = if num_weights.is_multiple_of(2) {
                     num_weights as usize / 2
                 } else {
                     (num_weights as usize / 2) + 1
diff --git a/src/lib.rs b/src/lib.rs
index 0d87f5ee3..0f85407a4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,8 +7,8 @@
 //!
 //! ## Compression
 //! The [encoding] module contains the code for compression.
-//! Decompression can be achieved by using the [`encoding::compress`]/[`encoding::compress_to_vec`]
-//! functions or the [`encoding::FrameCompressor`]
+//! Compression can be achieved by using the [`encoding::compress`]/[`encoding::compress_to_vec`]
+//! functions or [`encoding::FrameCompressor`]
 //!
 #![doc = include_str!("../Readme.md")]
 #![no_std]
@@ -35,6 +35,9 @@ macro_rules! vprintln {
 mod bit_io;
 mod common;
 pub mod decoding;
+#[cfg(feature = "dict_builder")]
+#[cfg_attr(docsrs, doc(cfg(feature = "dict_builder")))]
+pub mod dictionary;
 pub mod encoding;
 
 pub(crate) mod blocks;
@@ -49,8 +52,6 @@ pub(crate) mod fse;
 #[cfg(not(feature = "fuzz_exports"))]
 pub(crate) mod huff0;
 
-mod tests;
-
 #[cfg(feature = "std")]
 pub mod io_std;
 
@@ -62,3 +63,5 @@ pub mod io_nostd;
 
 #[cfg(not(feature = "std"))]
 pub use io_nostd as io;
+
+mod tests;
diff --git a/src/tests/decode_corpus.rs b/src/tests/decode_corpus.rs
index 049ba1d8b..400b1b4fe 100644
--- a/src/tests/decode_corpus.rs
+++ b/src/tests/decode_corpus.rs
@@ -7,6 +7,7 @@ fn test_decode_corpus_files() {
     use alloc::string::{String, ToString};
     use alloc::vec::Vec;
     use std::fs;
+    use std::io::BufReader;
     use std::io::Read;
     use std::println;
 
@@ -82,7 +83,7 @@ fn test_decode_corpus_files() {
 
         let mut original_p = p.clone();
         original_p.truncate(original_p.len() - 4);
-        let original_f = fs::File::open(original_p).unwrap();
+        let original_f = BufReader::new(fs::File::open(original_p).unwrap());
         let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
         println!("Results for file: {}", p.clone());
diff --git a/src/tests/dict_test.rs b/src/tests/dict_test.rs
index 37eca271a..1fd17d011 100644
--- a/src/tests/dict_test.rs
+++ b/src/tests/dict_test.rs
@@ -83,6 +83,7 @@ fn test_dict_decoding() {
     use alloc::string::{String, ToString};
     use alloc::vec::Vec;
     use std::fs;
+    use std::io::BufReader;
     use std::io::Read;
     use std::println;
 
@@ -97,7 +98,7 @@ fn test_dict_decoding() {
     let mut speeds_read = Vec::new();
 
     let mut files: Vec<_> = fs::read_dir("./dict_tests/files").unwrap().collect();
-    let dict = fs::File::open("./dict_tests/dictionary").unwrap();
+    let dict = BufReader::new(fs::File::open("./dict_tests/dictionary").unwrap());
     let dict: Vec<u8> = dict.bytes().map(|x| x.unwrap()).collect();
 
     files.sort_by_key(|x| match x {
@@ -155,7 +156,7 @@ fn test_dict_decoding() {
 
         let mut original_p = p.clone();
         original_p.truncate(original_p.len() - 4);
-        let original_f = fs::File::open(original_p).unwrap();
+        let original_f = BufReader::new(fs::File::open(original_p).unwrap());
         let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
         println!("Results for file: {}", p.clone());
diff --git a/src/tests/mod.rs b/src/tests/mod.rs
index c0730f535..13090296e 100644
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@@ -130,8 +130,9 @@ fn test_frame_decoder() {
 fn test_decode_from_to() {
     use crate::decoding::FrameDecoder;
     use std::fs::File;
+    use std::io::BufReader;
     use std::io::Read;
-    let f = File::open("./decodecorpus_files/z000088.zst").unwrap();
+    let f = BufReader::new(File::open("./decodecorpus_files/z000088.zst").unwrap());
     let mut frame_dec = FrameDecoder::new();
 
     let content: Vec<u8> = f.bytes().map(|x| x.unwrap()).collect();
@@ -197,7 +198,7 @@ fn test_decode_from_to() {
         None => std::println!("No checksums to test\n"),
     }
 
-    let original_f = File::open("./decodecorpus_files/z000088").unwrap();
+    let original_f = BufReader::new(File::open("./decodecorpus_files/z000088").unwrap());
     let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
     if original.len() != result.len() {
@@ -233,6 +234,7 @@ fn test_specific_file() {
     use crate::decoding::BlockDecodingStrategy;
     use crate::decoding::FrameDecoder;
     use std::fs;
+    use std::io::BufReader;
     use std::io::Read;
 
     let path = "./decodecorpus_files/z000068.zst";
@@ -256,7 +258,7 @@ fn test_specific_file() {
         .unwrap();
     let result = frame_dec.collect().unwrap();
 
-    let original_f = fs::File::open("./decodecorpus_files/z000088").unwrap();
+    let original_f = BufReader::new(fs::File::open("./decodecorpus_files/z000088").unwrap());
     let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
     std::println!("Results for file: {path}");
@@ -293,6 +295,7 @@ fn test_specific_file() {
 #[cfg(feature = "std")]
 fn test_streaming() {
     use std::fs;
+    use std::io::BufReader;
     use std::io::Read;
 
     let mut content = fs::File::open("./decodecorpus_files/z000088.zst").unwrap();
@@ -301,7 +304,7 @@ fn test_streaming() {
     let mut result = Vec::new();
     Read::read_to_end(&mut stream, &mut result).unwrap();
 
-    let original_f = fs::File::open("./decodecorpus_files/z000088").unwrap();
+    let original_f = BufReader::new(fs::File::open("./decodecorpus_files/z000088").unwrap());
     let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
     if original.len() != result.len() {
@@ -343,7 +346,7 @@ fn test_streaming() {
     let mut result = Vec::new();
     Read::read_to_end(&mut stream, &mut result).unwrap();
 
-    let original_f = fs::File::open("./decodecorpus_files/z000068").unwrap();
+    let original_f = BufReader::new(fs::File::open("./decodecorpus_files/z000068").unwrap());
     let original: Vec<u8> = original_f.bytes().map(|x| x.unwrap()).collect();
 
     std::println!("Results for file:");
@@ -576,3 +579,10 @@ pub mod dict_test;
 #[cfg(feature = "std")]
 pub mod encode_corpus;
 pub mod fuzz_regressions;
+
+#[cfg(feature = "std")]
+#[test]
+fn verbose_disabled() {
+    use crate::VERBOSE;
+    assert_eq!(VERBOSE, false);
+}