From 599f4d5bfb15227d7af45315004a92b8533d38bd Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 15 Mar 2026 02:24:40 +0300 Subject: [PATCH 01/29] #723 implement first criterion benchmark this version was created on v3.0.0 originally and later rebased to 1.0.0 --- Cargo.toml | 120 +++++++---- benches/criterion_bench.rs | 398 +++++++++++++++++++++++++++++++++++++ 2 files changed, 483 insertions(+), 35 deletions(-) create mode 100644 benches/criterion_bench.rs diff --git a/Cargo.toml b/Cargo.toml index 9aa907f4b..23680ac93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,55 +1,97 @@ [package] -name = "zip_next" -version = "1.0.0" -authors = ["Mathijs van de Nes ", "Marli Frost ", "Ryan Levick ", -"Chris Hennick "] +name = "zip" +version = "3.0.0" +authors = [ + "Mathijs van de Nes ", + "Marli Frost ", + "Ryan Levick ", + "Chris Hennick ", +] license = "MIT" -repository = "https://github.com/Pr0methean/zip-next.git" -keywords = ["zip", "archive"] +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression"] +# Any change to rust-version must be reflected also in `README.md` and `.github/workflows/ci.yaml`. +# The MSRV policy is documented in `README.md`. +rust-version = "1.75.0" description = """ -rust-version = "1.67.0" Library to support the reading and writing of zip files. """ edition = "2021" +exclude = ["tests/**", "examples/**", ".github/**", "fuzz_read/**", "fuzz_write/**"] + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[workspace.dependencies] +time = { version = "0.3.37", default-features = false } [dependencies] -aes = { version = "0.8.4", optional = true } -byteorder = "1.5.0" -bzip2 = { version = "0.4.4", optional = true } -chrono = { version = "0.4.37", optional = true } -constant_time_eq = { version = "0.3.0", optional = true } -crc32fast = "1.4.0" -flate2 = { version = "1.0.28", default-features = false, optional = true } -hmac = { version = "0.12.1", optional = true, features = ["reset"] } -pbkdf2 = {version = "0.12.2", optional = true } -sha1 = {version = "0.10.6", optional = true } -time = { version = "0.3.34", optional = true, default-features = false, features = ["std"] } -zstd = { version = "0.13.1", optional = true, default-features = false } -zopfli = { version = "0.8.0", optional = true } -deflate64 = { version = "0.1.8", optional = true } - -[target.'cfg(any(all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", target_arch = "powerpc"))'.dependencies] -crossbeam-utils = "0.8.19" +aes = { version = "0.8", optional = true } +bzip2 = { version = "0.5.0", optional = true } +chrono = { version = "0.4", optional = true } +constant_time_eq = { version = "0.3", optional = true } +crc32fast = "1.4" +flate2 = { version = "1.1.1", default-features = false, optional = true } +getrandom = { version = "0.3.1", features = ["wasm_js", "std"], optional = true} +hmac = { version = "0.12", optional = true, features = ["reset"] } +indexmap = "2" +jiff = { version = "0.2.4", optional = true } +memchr = "2.7" +nt-time = { version = "0.10.6", default-features = false, optional = true } +pbkdf2 = { version = "0.12", optional = true } +sha1 = { version = "0.10", optional = true } +time = { workspace = true, optional = true, features = [ + "std", +] } +zeroize = { version = "1.8", optional = true, features = ["zeroize_derive"] } +zstd = { version = "0.13", optional = true, default-features = false } +zopfli = { version = "0.8", optional = true } +deflate64 = { version = "0.1.9", optional = true } +lzma-rs = { version = "0.3", default-features = false, optional = true } +xz2 = { version = "0.1.7", optional = true } [target.'cfg(fuzzing)'.dependencies] -arbitrary = { version = "1.3.2", features = ["derive"] } +arbitrary = { version = "1.4.1", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" -getrandom = { version = "0.2.14", features = ["js"] } -walkdir = "2.5.0" -time = { version = "0.3.34", features = ["formatting", "macros"] } +criterion = { version = "0.5", features = ["html_reports"] } +getrandom = { version = "0.3.1", features = ["wasm_js", "std"] } +walkdir = "2.5" +time = { workspace = true, features = ["formatting", "macros"] } +anyhow = "1.0.95" +clap = { version = "=4.4.18", features = ["derive"] } +tempfile = "3.15" [features] -aes-crypto = [ "aes", "constant_time_eq", "hmac", "pbkdf2", "sha1" ] +aes-crypto = ["aes", "constant_time_eq", "hmac", "pbkdf2", "sha1", "getrandom", "zeroize"] chrono = ["chrono/default"] -deflate = ["flate2/rust_backend"] -deflate-miniz = ["flate2/default"] -deflate-zlib = ["flate2/zlib"] -deflate-zlib-ng = ["flate2/zlib-ng"] -deflate-zopfli = ["zopfli"] +_deflate-any = [] +_all-features = [] # Detect when --all-features is used +deflate = ["deflate-zopfli", "deflate-flate2-zlib-rs"] +# Pull in flate2, but don't choose a backend; useful if you want to choose your own flate2 backend +deflate-flate2 = ["_deflate-any", "dep:flate2"] +# Pull in flate2 and the fast zlib-rs backend; this is what most users will want +deflate-flate2-zlib-rs = ["deflate-flate2", "flate2/zlib-rs"] +# Pull in flate2 and the zlib backend; only use this if you need a dynamically linked system zlib +deflate-flate2-zlib = ["deflate-flate2", "flate2/zlib"] +deflate-zopfli = ["zopfli", "_deflate-any"] +jiff-02 = ["dep:jiff"] +nt-time = ["dep:nt-time"] +lzma = ["lzma-rs/stream"] unreserved = [] -default = ["aes-crypto", "bzip2", "deflate", "deflate64", "deflate-zlib-ng", "deflate-zopfli", "time", "zstd"] +xz = ["dep:xz2"] +default = [ + "aes-crypto", + "bzip2", + "deflate64", + "deflate", + "lzma", + "time", + "zstd", + "xz", +] [[bench]] name = "read_entry" @@ -58,3 +100,11 @@ harness = false [[bench]] name = "read_metadata" harness = false + +[[bench]] +name = "merge_archive" +harness = false + +[[bench]] +name = "criterion_bench" +harness = false diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs new file mode 100644 index 000000000..a177e1032 --- /dev/null +++ b/benches/criterion_bench.rs @@ -0,0 +1,398 @@ +// Criterion-based benchmarks (baseline storage, regression detection). +// Run: cargo bench --bench criterion_bench +// First run saves baseline; later runs compare and can fail on regression. + +use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; +use std::fs; +use std::io::{self, Cursor, Read, Seek, Write}; +use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; + +// deterministic seeded randomness helper (SplitMix64, no external dependencies) +fn seeded_random_bytes(size: usize) -> Vec { + let mut x: u64 = 0xdead_beef_cafe_babe; // seed + let mut out = vec![0u8; size]; + + for chunk in out.chunks_mut(8) { + x = x.wrapping_add(0x9E3779B97F4A7C15); + + let mut z = x; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB); + z ^= z >> 31; + + let bytes = z.to_le_bytes(); + + for (i, b) in chunk.iter_mut().enumerate() { + *b = bytes[i]; + } + } + + out +} + +fn generate_random_archive(size: usize) -> Vec { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + writer.start_file("random.dat", options).unwrap(); + + // generate deterministic seeded random data + let bytes = seeded_random_bytes(size); + + writer.write_all(&bytes).unwrap(); + writer.finish().unwrap().into_inner() +} + +const FILE_COUNT: usize = 15_000; +const FILE_SIZE_META: usize = 1024; + +fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResult> { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + + // seeded random payload reused across entries + let bytes = seeded_random_bytes(file_size); + + for i in 0..count_files { + let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); + writer.start_file(name, options)?; + writer.write_all(&bytes)?; + } + + Ok(writer.finish()?.into_inner()) +} + +fn generate_random_archive_merge( + num_entries: usize, + entry_size: usize, + options: SimpleFileOptions, +) -> ZipResult<(usize, Vec)> { + let buf = Cursor::new(Vec::new()); + let mut zip = ZipWriter::new(buf); + + // seeded random payload reused across entries + let bytes = seeded_random_bytes(entry_size); + + for i in 0..num_entries { + let name = format!("random{i}.dat"); + zip.start_file(name, options)?; + zip.write_all(&bytes)?; + } + + let buf = zip.finish()?.into_inner(); + let len = buf.len(); + + Ok((len, buf)) +} + +fn perform_merge( + src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + target.merge_archive(src)?; + Ok(target) +} + +fn perform_raw_copy_file( + mut src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + for i in 0..src.len() { + let entry = src.by_index(i)?; + target.raw_copy_file(entry)?; + } + Ok(target) +} + +const NUM_ENTRIES: usize = 100; +const ENTRY_SIZE: usize = 1024; + +// Default sizes (desktop). When BENCH_PI=1 or BENCH_LOW_MEMORY=1, use smaller sizes for Pi 3B (~1 GB RAM). +fn is_low_memory() -> bool { + std::env::var("BENCH_PI").as_deref() == Ok("1") + || std::env::var("BENCH_LOW_MEMORY").as_deref() == Ok("1") +} + +fn file_count_meta() -> usize { + if is_low_memory() { + 2_000 // ~2 MB archive instead of ~15 MB + } else { + FILE_COUNT + } +} + +fn comment_size() -> usize { + if is_low_memory() { + 10_000 + } else { + 50_000 + } +} + +fn read_all_entries_count() -> usize { + if is_low_memory() { + 200 + } else { + 500 + } +} + +fn by_name_lookup_count() -> usize { + if is_low_memory() { + 20 + } else { + 50 + } +} + +fn large_non_zip_size() -> usize { + if is_low_memory() { + 5_000_000 // 5 MB instead of 17 MB + } else { + 17_000_000 + } +} + +fn write_many_count() -> usize { + if is_low_memory() { + 300 + } else { + 1_000 + } +} + +const STREAM_ENTRIES: usize = 20; +const STREAM_ENTRY_SIZE: usize = 256; +const WRITE_LARGE_SIZE: usize = 1024 * 1024; +const ROUNDTRIP_ENTRIES: usize = 100; + +fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let comment = seeded_random_bytes(comment_len); + writer.set_raw_comment(comment.into_boxed_slice()); + writer.start_file("data.txt", options)?; + writer.write_all(b"x")?; + Ok(writer.finish()?.into_inner()) +} + +fn criterion_benchmark(c: &mut Criterion) { + let size = 1024 * 1024; + let bytes = generate_random_archive(size); + + c.bench_function("read_entry", |b| { + b.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + let mut file = archive.by_name("random.dat").unwrap(); + let mut buf = [0u8; 1024]; + + loop { + let n = file.read(&mut buf).unwrap(); + if n == 0 { + break; + } + } + + black_box(buf); + }); + }); + + let bytes_meta = generate_random_archive_meta(file_count_meta(), FILE_SIZE_META).unwrap(); + + c.bench_function("read_metadata", |b| { + b.iter(|| { + black_box( + ZipArchive::new(Cursor::new(bytes_meta.as_slice())) + .unwrap() + .len(), + ) + }); + }); + + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + let (len, src_bytes) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + c.bench_function("merge_archive_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(Cursor::new(src_bytes.clone())).unwrap(); + let buf = Cursor::new(Vec::with_capacity(len)); + (src, buf) + }, + |(src, buf)| { + let zip = ZipWriter::new(buf); + let zip = perform_merge(src, zip).unwrap(); + let out = zip.finish().unwrap().into_inner(); + + assert_eq!(out.len(), len); + + black_box(out) + }, + BatchSize::SmallInput, + ); + }); + + let (len2, src_bytes2) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + c.bench_function("merge_archive_raw_copy_file_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(Cursor::new(src_bytes2.clone())).unwrap(); + let buf = Cursor::new(Vec::with_capacity(len2)); + (src, buf) + }, + |(src, buf)| { + let zip = ZipWriter::new(buf); + let zip = perform_raw_copy_file(src, zip).unwrap(); + let out = zip.finish().unwrap().into_inner(); + + assert_eq!(out.len(), len2); + + black_box(out) + }, + BatchSize::SmallInput, + ); + }); + + // --- read_all_entries: iterate by_index and read each entry --- + let bytes_all_entries = + generate_random_archive_meta(read_all_entries_count(), 512).unwrap(); + c.bench_function("read_all_entries", |b| { + b.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(bytes_all_entries.as_slice())).unwrap(); + for i in 0..archive.len() { + let mut entry = archive.by_index(i).unwrap(); + let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); + } + }); + }); + + // --- by_name_lookup_many: look up many names in large archive --- + let lookup_names: Vec = (0..by_name_lookup_count()) + .map(|i| format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat")) + .collect(); + c.bench_function("by_name_lookup_many", |b| { + b.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(bytes_meta.as_slice())).unwrap(); + for name in &lookup_names { + let _ = archive.by_name(name).unwrap(); + } + }); + }); + + // --- parse_archive_with_comment --- + let bytes_comment = generate_archive_with_comment(comment_size()).unwrap(); + c.bench_function("parse_archive_with_comment", |b| { + b.iter(|| { + let archive = ZipArchive::new(Cursor::new(bytes_comment.as_slice())).unwrap(); + black_box(archive.comment().len()); + }); + }); + + // --- read_stream_entries: read_zipfile_from_stream until None --- + let bytes_stream = generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(); + let dir_stream = tempfile::TempDir::with_prefix("criterion_stream").unwrap(); + let path_stream = dir_stream.path().join("bench.zip"); + fs::write(&path_stream, &bytes_stream).unwrap(); + c.bench_function("read_stream_entries", |b| { + b.iter(|| { + let mut f = fs::File::open(&path_stream).unwrap(); + while zip::read::read_zipfile_from_stream(&mut f).unwrap().is_some() {} + }); + }); + + // --- parse_large_non_zip_reject --- + let dir_reject = tempfile::TempDir::with_prefix("criterion_reject").unwrap(); + let path_reject = dir_reject.path().join("zeros"); + fs::write(&path_reject, vec![0u8; large_non_zip_size()]).unwrap(); + c.bench_function("parse_large_non_zip_reject", |b| { + b.iter(|| { + let r = ZipArchive::new(fs::File::open(&path_reject).unwrap()); + assert!(r.is_err()); + }); + }); + + // --- write_many_small_files --- + let payload_small = seeded_random_bytes(128); + c.bench_function("write_many_small_files", |b| { + b.iter(|| { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + for i in 0..write_many_count() { + let name = format!("file_{i}.dat"); + writer.start_file(name, options).unwrap(); + writer.write_all(&payload_small).unwrap(); + } + black_box(writer.finish().unwrap().into_inner()); + }); + }); + + // --- write_one_large_file --- + let payload_large = seeded_random_bytes(WRITE_LARGE_SIZE); + c.bench_function("write_one_large_file", |b| { + b.iter(|| { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + writer.start_file("large.dat", options).unwrap(); + writer.write_all(&payload_large).unwrap(); + black_box(writer.finish().unwrap().into_inner()); + }); + }); + + // --- write_then_read_roundtrip --- + let roundtrip_payload = seeded_random_bytes(256); + c.bench_function("write_then_read_roundtrip", |b| { + b.iter(|| { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + for i in 0..ROUNDTRIP_ENTRIES { + writer.start_file(format!("entry_{i}.dat"), options).unwrap(); + writer.write_all(&roundtrip_payload).unwrap(); + } + let bytes = writer.finish().unwrap().into_inner(); + let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + black_box(archive.len()); + }); + }); + + // --- deflate: read deflated entry (when feature enabled) --- + #[cfg(feature = "deflate")] + { + let (_, deflated_bytes) = generate_random_archive_merge( + 5, + 4096, + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), + ) + .unwrap(); + c.bench_function("read_deflated_entry", |b| { + b.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(deflated_bytes.as_slice())).unwrap(); + for i in 0..archive.len() { + let mut entry = archive.by_index(i).unwrap(); + let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); + } + }); + }); + } + + #[cfg(feature = "deflate")] + { + let deflate_payload = seeded_random_bytes(2048); + c.bench_function("write_deflated_entries", |b| { + b.iter(|| { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + for i in 0..20 { + writer.start_file(format!("deflated_{i}.dat"), options).unwrap(); + writer.write_all(&deflate_payload).unwrap(); + } + black_box(writer.finish().unwrap().into_inner()); + }); + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); \ No newline at end of file From 67bfca8accd21ef9a78378d68f67a86868218a7b Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 00:37:16 +0300 Subject: [PATCH 02/29] #723 RUN 1.0.0 specific code --- Cargo.toml | 5 +-- benches/criterion_bench.rs | 79 ++++++++++++++------------------------ 2 files changed, 29 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 23680ac93..65491ed80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ time = { version = "0.3.37", default-features = false } [dependencies] aes = { version = "0.8", optional = true } +byteorder = "1.5" bzip2 = { version = "0.5.0", optional = true } chrono = { version = "0.4", optional = true } constant_time_eq = { version = "0.3", optional = true } @@ -101,10 +102,6 @@ harness = false name = "read_metadata" harness = false -[[bench]] -name = "merge_archive" -harness = false - [[bench]] name = "criterion_bench" harness = false diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index a177e1032..287562eb8 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -5,7 +5,7 @@ use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; use std::fs; use std::io::{self, Cursor, Read, Seek, Write}; -use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; +use zip::{result::ZipResult, write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; // deterministic seeded randomness helper (SplitMix64, no external dependencies) fn seeded_random_bytes(size: usize) -> Vec { @@ -32,7 +32,7 @@ fn seeded_random_bytes(size: usize) -> Vec { fn generate_random_archive(size: usize) -> Vec { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored); writer.start_file("random.dat", options).unwrap(); // generate deterministic seeded random data @@ -47,14 +47,13 @@ const FILE_SIZE_META: usize = 1024; fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - // seeded random payload reused across entries let bytes = seeded_random_bytes(file_size); for i in 0..count_files { let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); - writer.start_file(name, options)?; + writer + .start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored))?; writer.write_all(&bytes)?; } @@ -64,7 +63,7 @@ fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResu fn generate_random_archive_merge( num_entries: usize, entry_size: usize, - options: SimpleFileOptions, + _options: FileOptions, ) -> ZipResult<(usize, Vec)> { let buf = Cursor::new(Vec::new()); let mut zip = ZipWriter::new(buf); @@ -74,7 +73,7 @@ fn generate_random_archive_merge( for i in 0..num_entries { let name = format!("random{i}.dat"); - zip.start_file(name, options)?; + zip.start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored))?; zip.write_all(&bytes)?; } @@ -84,14 +83,6 @@ fn generate_random_archive_merge( Ok((len, buf)) } -fn perform_merge( - src: ZipArchive, - mut target: ZipWriter, -) -> ZipResult> { - target.merge_archive(src)?; - Ok(target) -} - fn perform_raw_copy_file( mut src: ZipArchive, mut target: ZipWriter, @@ -167,9 +158,9 @@ const ROUNDTRIP_ENTRIES: usize = 100; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment.into_boxed_slice()); + writer.set_raw_comment(comment); writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) @@ -208,29 +199,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); - - let (len, src_bytes) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); - - c.bench_function("merge_archive_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(Cursor::new(src_bytes.clone())).unwrap(); - let buf = Cursor::new(Vec::with_capacity(len)); - (src, buf) - }, - |(src, buf)| { - let zip = ZipWriter::new(buf); - let zip = perform_merge(src, zip).unwrap(); - let out = zip.finish().unwrap().into_inner(); - - assert_eq!(out.len(), len); - - black_box(out) - }, - BatchSize::SmallInput, - ); - }); + let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored); let (len2, src_bytes2) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); @@ -243,8 +212,8 @@ fn criterion_benchmark(c: &mut Criterion) { (src, buf) }, |(src, buf)| { - let zip = ZipWriter::new(buf); - let zip = perform_raw_copy_file(src, zip).unwrap(); + let mut zip = ZipWriter::new(buf); + zip = perform_raw_copy_file(src, zip).unwrap(); let out = zip.finish().unwrap().into_inner(); assert_eq!(out.len(), len2); @@ -318,10 +287,11 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_many_small_files", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..write_many_count() { let name = format!("file_{i}.dat"); - writer.start_file(name, options).unwrap(); + writer + .start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored)) + .unwrap(); writer.write_all(&payload_small).unwrap(); } black_box(writer.finish().unwrap().into_inner()); @@ -333,7 +303,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_one_large_file", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); black_box(writer.finish().unwrap().into_inner()); @@ -345,9 +315,13 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { - writer.start_file(format!("entry_{i}.dat"), options).unwrap(); + writer + .start_file( + format!("entry_{i}.dat"), + FileOptions::default().compression_method(CompressionMethod::Stored), + ) + .unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } let bytes = writer.finish().unwrap().into_inner(); @@ -362,7 +336,7 @@ fn criterion_benchmark(c: &mut Criterion) { let (_, deflated_bytes) = generate_random_archive_merge( 5, 4096, - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), + FileOptions::default().compression_method(CompressionMethod::Deflated), ) .unwrap(); c.bench_function("read_deflated_entry", |b| { @@ -382,10 +356,13 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_deflated_entries", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); for i in 0..20 { - writer.start_file(format!("deflated_{i}.dat"), options).unwrap(); + writer + .start_file( + format!("deflated_{i}.dat"), + FileOptions::default().compression_method(CompressionMethod::Deflated), + ) + .unwrap(); writer.write_all(&deflate_payload).unwrap(); } black_box(writer.finish().unwrap().into_inner()); From aa240299e29737a8dcedb660c333b6763dd15ced Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 00:53:20 +0300 Subject: [PATCH 03/29] Revert "#723 RUN 1.0.0 specific code" This reverts commit 67bfca8accd21ef9a78378d68f67a86868218a7b. --- Cargo.toml | 5 ++- benches/criterion_bench.rs | 79 ++++++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 65491ed80..23680ac93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,6 @@ time = { version = "0.3.37", default-features = false } [dependencies] aes = { version = "0.8", optional = true } -byteorder = "1.5" bzip2 = { version = "0.5.0", optional = true } chrono = { version = "0.4", optional = true } constant_time_eq = { version = "0.3", optional = true } @@ -102,6 +101,10 @@ harness = false name = "read_metadata" harness = false +[[bench]] +name = "merge_archive" +harness = false + [[bench]] name = "criterion_bench" harness = false diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 287562eb8..a177e1032 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -5,7 +5,7 @@ use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; use std::fs; use std::io::{self, Cursor, Read, Seek, Write}; -use zip::{result::ZipResult, write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; +use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; // deterministic seeded randomness helper (SplitMix64, no external dependencies) fn seeded_random_bytes(size: usize) -> Vec { @@ -32,7 +32,7 @@ fn seeded_random_bytes(size: usize) -> Vec { fn generate_random_archive(size: usize) -> Vec { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); writer.start_file("random.dat", options).unwrap(); // generate deterministic seeded random data @@ -47,13 +47,14 @@ const FILE_SIZE_META: usize = 1024; fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + // seeded random payload reused across entries let bytes = seeded_random_bytes(file_size); for i in 0..count_files { let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); - writer - .start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored))?; + writer.start_file(name, options)?; writer.write_all(&bytes)?; } @@ -63,7 +64,7 @@ fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResu fn generate_random_archive_merge( num_entries: usize, entry_size: usize, - _options: FileOptions, + options: SimpleFileOptions, ) -> ZipResult<(usize, Vec)> { let buf = Cursor::new(Vec::new()); let mut zip = ZipWriter::new(buf); @@ -73,7 +74,7 @@ fn generate_random_archive_merge( for i in 0..num_entries { let name = format!("random{i}.dat"); - zip.start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored))?; + zip.start_file(name, options)?; zip.write_all(&bytes)?; } @@ -83,6 +84,14 @@ fn generate_random_archive_merge( Ok((len, buf)) } +fn perform_merge( + src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + target.merge_archive(src)?; + Ok(target) +} + fn perform_raw_copy_file( mut src: ZipArchive, mut target: ZipWriter, @@ -158,9 +167,9 @@ const ROUNDTRIP_ENTRIES: usize = 100; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment); + writer.set_raw_comment(comment.into_boxed_slice()); writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) @@ -199,7 +208,29 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + let (len, src_bytes) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + c.bench_function("merge_archive_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(Cursor::new(src_bytes.clone())).unwrap(); + let buf = Cursor::new(Vec::with_capacity(len)); + (src, buf) + }, + |(src, buf)| { + let zip = ZipWriter::new(buf); + let zip = perform_merge(src, zip).unwrap(); + let out = zip.finish().unwrap().into_inner(); + + assert_eq!(out.len(), len); + + black_box(out) + }, + BatchSize::SmallInput, + ); + }); let (len2, src_bytes2) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); @@ -212,8 +243,8 @@ fn criterion_benchmark(c: &mut Criterion) { (src, buf) }, |(src, buf)| { - let mut zip = ZipWriter::new(buf); - zip = perform_raw_copy_file(src, zip).unwrap(); + let zip = ZipWriter::new(buf); + let zip = perform_raw_copy_file(src, zip).unwrap(); let out = zip.finish().unwrap().into_inner(); assert_eq!(out.len(), len2); @@ -287,11 +318,10 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_many_small_files", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..write_many_count() { let name = format!("file_{i}.dat"); - writer - .start_file(name, FileOptions::default().compression_method(CompressionMethod::Stored)) - .unwrap(); + writer.start_file(name, options).unwrap(); writer.write_all(&payload_small).unwrap(); } black_box(writer.finish().unwrap().into_inner()); @@ -303,7 +333,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_one_large_file", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); black_box(writer.finish().unwrap().into_inner()); @@ -315,13 +345,9 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { - writer - .start_file( - format!("entry_{i}.dat"), - FileOptions::default().compression_method(CompressionMethod::Stored), - ) - .unwrap(); + writer.start_file(format!("entry_{i}.dat"), options).unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } let bytes = writer.finish().unwrap().into_inner(); @@ -336,7 +362,7 @@ fn criterion_benchmark(c: &mut Criterion) { let (_, deflated_bytes) = generate_random_archive_merge( 5, 4096, - FileOptions::default().compression_method(CompressionMethod::Deflated), + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), ) .unwrap(); c.bench_function("read_deflated_entry", |b| { @@ -356,13 +382,10 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_deflated_entries", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); for i in 0..20 { - writer - .start_file( - format!("deflated_{i}.dat"), - FileOptions::default().compression_method(CompressionMethod::Deflated), - ) - .unwrap(); + writer.start_file(format!("deflated_{i}.dat"), options).unwrap(); writer.write_all(&deflate_payload).unwrap(); } black_box(writer.finish().unwrap().into_inner()); From 4e6e834d63a9c7958124ef9e192d4f756fd7221c Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:19:04 +0300 Subject: [PATCH 04/29] #723 RUN v2.0.0 specific code --- Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 4f9a99f63..717f461eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,9 @@ arbitrary = { version = "1.4.1", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" +criterion = { version = "0.5", features = ["html_reports"] } getrandom = { version = "0.2.14", features = ["js"] } +tempfile = "3.15" walkdir = "2.5.0" time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1" From dd81f9bb6d835bee7c4614365bcb4f2f6dd3f795 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:19:09 +0300 Subject: [PATCH 05/29] Revert "#723 RUN v2.0.0 specific code" This reverts commit 4e6e834d63a9c7958124ef9e192d4f756fd7221c. --- Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 717f461eb..4f9a99f63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,9 +56,7 @@ arbitrary = { version = "1.4.1", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" -criterion = { version = "0.5", features = ["html_reports"] } getrandom = { version = "0.2.14", features = ["js"] } -tempfile = "3.15" walkdir = "2.5.0" time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1" From 85bfad7a9dcbeaaa429524dd3f23d54cddc4ac1e Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:24:38 +0300 Subject: [PATCH 06/29] #723 RUN v3.0.0 --- Cargo.toml | 1 + benches/criterion_bench.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2e821c761..b93f90e07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ arbitrary = { version = "1.4.1", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" +criterion = { version = "0.5", features = ["html_reports"] } getrandom = { version = "0.3.1", features = ["wasm_js", "std"] } walkdir = "2.5" time = { workspace = true, features = ["formatting", "macros"] } diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index a177e1032..c68728ed6 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -364,7 +364,7 @@ fn criterion_benchmark(c: &mut Criterion) { 4096, SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), ) - .unwrap(); + .unwrap(); c.bench_function("read_deflated_entry", |b| { b.iter(|| { let mut archive = ZipArchive::new(Cursor::new(deflated_bytes.as_slice())).unwrap(); From a0d11cdc0b9d9facaf1bf8e68b8107fd4785435f Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:31:17 +0300 Subject: [PATCH 07/29] #723 RUN v4.0.0 Made-with: Cursor From 11aad9b39daf2aedb6ca2cb7efc395a48e88e024 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:32:50 +0300 Subject: [PATCH 08/29] #723 RUN v5.0.0 Made-with: Cursor From 2707670f799f577640e410972ef92208c7c7794b Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:33:19 +0300 Subject: [PATCH 09/29] #723 RUN v6.0.0 Made-with: Cursor From f69d7a32bd17ab15b2a0233a72d8f03af606e40b Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:33:45 +0300 Subject: [PATCH 10/29] #723 RUN v7.0.0 Made-with: Cursor From c6cbf220fc1fc4bfdb8779c0061c9dff06132bcb Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:35:33 +0300 Subject: [PATCH 11/29] #723 RUN v8.0.0 Made-with: Cursor From 7d7844d9a65d6ac28bac84115898914f9426826c Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:35:49 +0300 Subject: [PATCH 12/29] #723 RUN v8.2.0 Made-with: Cursor From 258e599fdd149f4b58ef05cedee90a08549410bb Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 28 Mar 2026 10:19:18 +0300 Subject: [PATCH 13/29] #723 update criterion to 8.2.0 --- Cargo.toml | 2 +- benches/criterion_bench.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 92abf8b1d..927f6272c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ wasm-bindgen-test = "^0.3.56" [dev-dependencies] bencher = "0.1.5" -criterion = { version = "0.5", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } getrandom = { version = "0.4", default-features = false } tempfile = "3.15" walkdir = "2.5" diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index c68728ed6..3c2d40dbb 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -2,8 +2,9 @@ // Run: cargo bench --bench criterion_bench // First run saves baseline; later runs compare and can fail on regression. -use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; +use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use std::fs; +use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; From 16874d3eeb4e605ee9cb783323b353a16aeaaf29 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 28 Mar 2026 12:24:37 +0300 Subject: [PATCH 14/29] #723 formating --- benches/criterion_bench.rs | 57 +++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 3c2d40dbb..d3b9b47ad 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -2,11 +2,11 @@ // Run: cargo bench --bench criterion_bench // First run saves baseline; later runs compare and can fail on regression. -use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; -use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; +use zip::{CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::SimpleFileOptions}; // deterministic seeded randomness helper (SplitMix64, no external dependencies) fn seeded_random_bytes(size: usize) -> Vec { @@ -122,27 +122,15 @@ fn file_count_meta() -> usize { } fn comment_size() -> usize { - if is_low_memory() { - 10_000 - } else { - 50_000 - } + if is_low_memory() { 10_000 } else { 50_000 } } fn read_all_entries_count() -> usize { - if is_low_memory() { - 200 - } else { - 500 - } + if is_low_memory() { 200 } else { 500 } } fn by_name_lookup_count() -> usize { - if is_low_memory() { - 20 - } else { - 50 - } + if is_low_memory() { 20 } else { 50 } } fn large_non_zip_size() -> usize { @@ -154,11 +142,7 @@ fn large_non_zip_size() -> usize { } fn write_many_count() -> usize { - if is_low_memory() { - 300 - } else { - 1_000 - } + if is_low_memory() { 300 } else { 1_000 } } const STREAM_ENTRIES: usize = 20; @@ -257,8 +241,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); // --- read_all_entries: iterate by_index and read each entry --- - let bytes_all_entries = - generate_random_archive_meta(read_all_entries_count(), 512).unwrap(); + let bytes_all_entries = generate_random_archive_meta(read_all_entries_count(), 512).unwrap(); c.bench_function("read_all_entries", |b| { b.iter(|| { let mut archive = ZipArchive::new(Cursor::new(bytes_all_entries.as_slice())).unwrap(); @@ -299,7 +282,10 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("read_stream_entries", |b| { b.iter(|| { let mut f = fs::File::open(&path_stream).unwrap(); - while zip::read::read_zipfile_from_stream(&mut f).unwrap().is_some() {} + while zip::read::read_zipfile_from_stream(&mut f) + .unwrap() + .is_some() + {} }); }); @@ -319,7 +305,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_many_small_files", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..write_many_count() { let name = format!("file_{i}.dat"); writer.start_file(name, options).unwrap(); @@ -334,7 +321,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_one_large_file", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); black_box(writer.finish().unwrap().into_inner()); @@ -346,9 +334,12 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { - writer.start_file(format!("entry_{i}.dat"), options).unwrap(); + writer + .start_file(format!("entry_{i}.dat"), options) + .unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } let bytes = writer.finish().unwrap().into_inner(); @@ -365,7 +356,7 @@ fn criterion_benchmark(c: &mut Criterion) { 4096, SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), ) - .unwrap(); + .unwrap(); c.bench_function("read_deflated_entry", |b| { b.iter(|| { let mut archive = ZipArchive::new(Cursor::new(deflated_bytes.as_slice())).unwrap(); @@ -386,7 +377,9 @@ fn criterion_benchmark(c: &mut Criterion) { let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); for i in 0..20 { - writer.start_file(format!("deflated_{i}.dat"), options).unwrap(); + writer + .start_file(format!("deflated_{i}.dat"), options) + .unwrap(); writer.write_all(&deflate_payload).unwrap(); } black_box(writer.finish().unwrap().into_inner()); @@ -396,4 +389,4 @@ fn criterion_benchmark(c: &mut Criterion) { } criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); \ No newline at end of file +criterion_main!(benches); From 10e9c647f2882d2375bfc27616b3136ada591f16 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 28 Mar 2026 12:51:24 +0300 Subject: [PATCH 15/29] #723 experiment with preallocated write buffer I am not sure about it. We have to have it only for calls which doesn't have calls like `vec.reserve(predicted_write_n);` --- benches/criterion_bench.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index d3b9b47ad..3f5301deb 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -371,18 +371,26 @@ fn criterion_benchmark(c: &mut Criterion) { #[cfg(feature = "deflate")] { let deflate_payload = seeded_random_bytes(2048); + let mut buffer = Vec::with_capacity(128 * 1024); + c.bench_function("write_deflated_entries", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + buffer.clear(); + + let mut writer = ZipWriter::new(Cursor::new(&mut buffer)); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + for i in 0..20 { writer .start_file(format!("deflated_{i}.dat"), options) .unwrap(); writer.write_all(&deflate_payload).unwrap(); } - black_box(writer.finish().unwrap().into_inner()); + + let cursor = writer.finish().unwrap(); + black_box(cursor.into_inner()); }); }); } From 6dad4ba4b643ec1bf48df7fbc90d6c051d6d2f29 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sun, 29 Mar 2026 10:23:16 -0600 Subject: [PATCH 16/29] feat: add criterion dev-deps only on non wasm --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9c45f5fbe..fc07c47d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ getrandom = { version = "0.4", default-features = false, optional = true } time = { version = "^0.3.47", default-features = false, optional = true, features = ["std"] } [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] +criterion = { version = "0.8.2", features = ["html_reports"] } getrandom = { version = "0.4", default-features = false } # wasm32 @@ -68,12 +69,11 @@ wasm-bindgen-test = "^0.3.56" [dev-dependencies] bencher = "0.1.5" -criterion = { version = "0.8.2", features = ["html_reports"] } getrandom = { version = "0.4", default-features = false } -tempfile = "3.15" walkdir = "2.5" time = { version = "^0.3.47", features = ["formatting", "macros"] } clap = { version = "^4.4.18", features = ["derive"] } +tempfile = "3.15" rayon = "1.11" [features] From f5cef6c31294885765db448959aba2dfb29f1518 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:12:38 -0400 Subject: [PATCH 17/29] decide to keep custom random implementation for benchmarks only; listed args in comments --- benches/criterion_bench.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 3f5301deb..24e9626be 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -8,7 +8,11 @@ use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; use zip::{CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::SimpleFileOptions}; -// deterministic seeded randomness helper (SplitMix64, no external dependencies) +// Deterministic seeded randomness helper (SplitMix64; no external dependencies, no syscalls). +// Why we use this instead of the getrandom crate: +// 1. Deterministic — always the same bytes. +// 2. No syscalls — the getrandom crate calls the OS/platform RNG. +// 3. No dependency here — getrandom is used elsewhere but remains optional and may change. fn seeded_random_bytes(size: usize) -> Vec { let mut x: u64 = 0xdead_beef_cafe_babe; // seed let mut out = vec![0u8; size]; From 19c5a7514fd442e74b873c2365a71ce966ae9e90 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:23:52 -0400 Subject: [PATCH 18/29] test version with preallocated vecs --- benches/criterion_bench.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 24e9626be..352b0f101 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -153,6 +153,8 @@ const STREAM_ENTRIES: usize = 20; const STREAM_ENTRY_SIZE: usize = 256; const WRITE_LARGE_SIZE: usize = 1024 * 1024; const ROUNDTRIP_ENTRIES: usize = 100; +/// Reused output buffer for write benches: largest case is ~1 MiB stored payload + zip overhead (< 2 MiB). +const WRITE_BENCH_BUF_CAP: usize = 2 * 1024 * 1024; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); @@ -306,9 +308,11 @@ fn criterion_benchmark(c: &mut Criterion) { // --- write_many_small_files --- let payload_small = seeded_random_bytes(128); + let mut write_many_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); c.bench_function("write_many_small_files", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + write_many_buffer.clear(); + let mut writer = ZipWriter::new(Cursor::new(&mut write_many_buffer)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..write_many_count() { @@ -322,9 +326,11 @@ fn criterion_benchmark(c: &mut Criterion) { // --- write_one_large_file --- let payload_large = seeded_random_bytes(WRITE_LARGE_SIZE); + let mut write_large_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); c.bench_function("write_one_large_file", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + write_large_buffer.clear(); + let mut writer = ZipWriter::new(Cursor::new(&mut write_large_buffer)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); @@ -335,9 +341,11 @@ fn criterion_benchmark(c: &mut Criterion) { // --- write_then_read_roundtrip --- let roundtrip_payload = seeded_random_bytes(256); + let mut roundtrip_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + roundtrip_buffer.clear(); + let mut writer = ZipWriter::new(Cursor::new(&mut roundtrip_buffer)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { @@ -375,13 +383,13 @@ fn criterion_benchmark(c: &mut Criterion) { #[cfg(feature = "deflate")] { let deflate_payload = seeded_random_bytes(2048); - let mut buffer = Vec::with_capacity(128 * 1024); + let mut write_deflate_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); c.bench_function("write_deflated_entries", |b| { b.iter(|| { - buffer.clear(); + write_deflate_buffer.clear(); - let mut writer = ZipWriter::new(Cursor::new(&mut buffer)); + let mut writer = ZipWriter::new(Cursor::new(&mut write_deflate_buffer)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); From 05a936a43181b5c9f42c032c438cb32f37387173 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:43:06 -0400 Subject: [PATCH 19/29] use tmp files as buffers (tmpfs on linux) we still have syscalls but without hardware layer --- benches/criterion_bench.rs | 283 +++++++++++++++++++++---------------- 1 file changed, 165 insertions(+), 118 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 352b0f101..9d40c6455 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -111,50 +111,16 @@ fn perform_raw_copy_file( const NUM_ENTRIES: usize = 100; const ENTRY_SIZE: usize = 1024; -// Default sizes (desktop). When BENCH_PI=1 or BENCH_LOW_MEMORY=1, use smaller sizes for Pi 3B (~1 GB RAM). -fn is_low_memory() -> bool { - std::env::var("BENCH_PI").as_deref() == Ok("1") - || std::env::var("BENCH_LOW_MEMORY").as_deref() == Ok("1") -} - -fn file_count_meta() -> usize { - if is_low_memory() { - 2_000 // ~2 MB archive instead of ~15 MB - } else { - FILE_COUNT - } -} - -fn comment_size() -> usize { - if is_low_memory() { 10_000 } else { 50_000 } -} - -fn read_all_entries_count() -> usize { - if is_low_memory() { 200 } else { 500 } -} - -fn by_name_lookup_count() -> usize { - if is_low_memory() { 20 } else { 50 } -} - -fn large_non_zip_size() -> usize { - if is_low_memory() { - 5_000_000 // 5 MB instead of 17 MB - } else { - 17_000_000 - } -} - -fn write_many_count() -> usize { - if is_low_memory() { 300 } else { 1_000 } -} +const COMMENT_BENCH_LEN: usize = 50_000; +const READ_ALL_ENTRIES_FILES: usize = 500; +const BY_NAME_LOOKUP_COUNT: usize = 50; +const LARGE_NON_ZIP_BYTES: usize = 17_000_000; +const WRITE_MANY_FILES: usize = 1_000; const STREAM_ENTRIES: usize = 20; const STREAM_ENTRY_SIZE: usize = 256; const WRITE_LARGE_SIZE: usize = 1024 * 1024; const ROUNDTRIP_ENTRIES: usize = 100; -/// Reused output buffer for write benches: largest case is ~1 MiB stored payload + zip overhead (< 2 MiB). -const WRITE_BENCH_BUF_CAP: usize = 2 * 1024 * 1024; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); @@ -167,12 +133,24 @@ fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { } fn criterion_benchmark(c: &mut Criterion) { - let size = 1024 * 1024; - let bytes = generate_random_archive(size); + // Shared directory: all fixtures use real files under here (see each section below). + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip").unwrap(); + let p = |name: &str| bench_dir.path().join(name); + + // ============================================================================ + // read_entry + // Single stored entry (~1 MiB payload); read full entry in a loop from disk. + // ============================================================================ + let path_read_entry = p("read_entry.zip"); + fs::write( + &path_read_entry, + generate_random_archive(1024 * 1024), + ) + .unwrap(); c.bench_function("read_entry", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_read_entry).unwrap()).unwrap(); let mut file = archive.by_name("random.dat").unwrap(); let mut buf = [0u8; 1024]; @@ -187,70 +165,100 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - let bytes_meta = generate_random_archive_meta(file_count_meta(), FILE_SIZE_META).unwrap(); + // ============================================================================ + // read_metadata / by_name_lookup_many + // One large archive on disk (FILE_COUNT × FILE_SIZE_META); shared by both benches. + // ============================================================================ + let path_meta = p("meta.zip"); + fs::write( + &path_meta, + generate_random_archive_meta(FILE_COUNT, FILE_SIZE_META).unwrap(), + ) + .unwrap(); c.bench_function("read_metadata", |b| { b.iter(|| { black_box( - ZipArchive::new(Cursor::new(bytes_meta.as_slice())) + ZipArchive::new(fs::File::open(&path_meta).unwrap()) .unwrap() .len(), ) }); }); - let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); - - let (len, src_bytes) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + // ============================================================================ + // merge_archive_stored + // Source zip on disk; merge into a new file each batch (stored method). + // ============================================================================ + let merge_options = + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let path_merge_src = p("merge_src.zip"); + let path_merge_out = p("merge_out.zip"); + let (len, src_bytes) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src, &src_bytes).unwrap(); c.bench_function("merge_archive_stored", |b| { b.iter_batched( || { - let src = ZipArchive::new(Cursor::new(src_bytes.clone())).unwrap(); - let buf = Cursor::new(Vec::with_capacity(len)); - (src, buf) + let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out).unwrap(); + (src, out) }, - |(src, buf)| { - let zip = ZipWriter::new(buf); + |(src, out)| { + let zip = ZipWriter::new(out); let zip = perform_merge(src, zip).unwrap(); - let out = zip.finish().unwrap().into_inner(); - - assert_eq!(out.len(), len); - - black_box(out) + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len); + black_box(out); }, BatchSize::SmallInput, ); }); + // ============================================================================ + // merge_archive_raw_copy_file_stored + // Second merge source (independent); raw_copy_file path. + // ============================================================================ + let path_merge_src2 = p("merge_src2.zip"); + let path_merge_out2 = p("merge_out2.zip"); let (len2, src_bytes2) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src2, &src_bytes2).unwrap(); c.bench_function("merge_archive_raw_copy_file_stored", |b| { b.iter_batched( || { - let src = ZipArchive::new(Cursor::new(src_bytes2.clone())).unwrap(); - let buf = Cursor::new(Vec::with_capacity(len2)); - (src, buf) + let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out2).unwrap(); + (src, out) }, - |(src, buf)| { - let zip = ZipWriter::new(buf); + |(src, out)| { + let zip = ZipWriter::new(out); let zip = perform_raw_copy_file(src, zip).unwrap(); - let out = zip.finish().unwrap().into_inner(); - - assert_eq!(out.len(), len2); - - black_box(out) + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len2); + black_box(out); }, BatchSize::SmallInput, ); }); - // --- read_all_entries: iterate by_index and read each entry --- - let bytes_all_entries = generate_random_archive_meta(read_all_entries_count(), 512).unwrap(); + // ============================================================================ + // read_all_entries + // Many small entries; read each by index to sink (from disk). + // ============================================================================ + let path_all_entries = p("all_entries.zip"); + fs::write( + &path_all_entries, + generate_random_archive_meta(READ_ALL_ENTRIES_FILES, 512).unwrap(), + ) + .unwrap(); + c.bench_function("read_all_entries", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes_all_entries.as_slice())).unwrap(); + let mut archive = + ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); @@ -258,33 +266,51 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // --- by_name_lookup_many: look up many names in large archive --- - let lookup_names: Vec = (0..by_name_lookup_count()) + // ============================================================================ + // by_name_lookup_many + // Uses meta.zip above; repeated by_name lookups (names prebuilt once). + // ============================================================================ + let lookup_names: Vec = (0..BY_NAME_LOOKUP_COUNT) .map(|i| format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat")) .collect(); c.bench_function("by_name_lookup_many", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes_meta.as_slice())).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_meta).unwrap()).unwrap(); for name in &lookup_names { let _ = archive.by_name(name).unwrap(); } }); }); - // --- parse_archive_with_comment --- - let bytes_comment = generate_archive_with_comment(comment_size()).unwrap(); + // ============================================================================ + // parse_archive_with_comment + // Zip with large comment field; parse and read comment length from disk. + // ============================================================================ + let path_comment = p("comment.zip"); + fs::write( + &path_comment, + generate_archive_with_comment(COMMENT_BENCH_LEN).unwrap(), + ) + .unwrap(); + c.bench_function("parse_archive_with_comment", |b| { b.iter(|| { - let archive = ZipArchive::new(Cursor::new(bytes_comment.as_slice())).unwrap(); + let archive = ZipArchive::new(fs::File::open(&path_comment).unwrap()).unwrap(); black_box(archive.comment().len()); }); }); - // --- read_stream_entries: read_zipfile_from_stream until None --- - let bytes_stream = generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(); - let dir_stream = tempfile::TempDir::with_prefix("criterion_stream").unwrap(); - let path_stream = dir_stream.path().join("bench.zip"); - fs::write(&path_stream, &bytes_stream).unwrap(); + // ============================================================================ + // read_stream_entries + // Streaming API: read_zipfile_from_stream until None (file on disk). + // ============================================================================ + let path_stream = p("stream.zip"); + fs::write( + &path_stream, + generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(), + ) + .unwrap(); + c.bench_function("read_stream_entries", |b| { b.iter(|| { let mut f = fs::File::open(&path_stream).unwrap(); @@ -295,10 +321,13 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // --- parse_large_non_zip_reject --- - let dir_reject = tempfile::TempDir::with_prefix("criterion_reject").unwrap(); - let path_reject = dir_reject.path().join("zeros"); - fs::write(&path_reject, vec![0u8; large_non_zip_size()]).unwrap(); + // ============================================================================ + // parse_large_non_zip_reject + // Large non-zip blob on disk; ZipArchive::new must fail (reject path). + // ============================================================================ + let path_reject = p("zeros.bin"); + fs::write(&path_reject, vec![0u8; LARGE_NON_ZIP_BYTES]).unwrap(); + c.bench_function("parse_large_non_zip_reject", |b| { b.iter(|| { let r = ZipArchive::new(fs::File::open(&path_reject).unwrap()); @@ -306,46 +335,55 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // --- write_many_small_files --- + // ============================================================================ + // write_many_small_files + // Payload in memory; write many stored entries to a file (truncated each iter). + // ============================================================================ + let path_write_many = p("write_many.zip"); let payload_small = seeded_random_bytes(128); - let mut write_many_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); + c.bench_function("write_many_small_files", |b| { b.iter(|| { - write_many_buffer.clear(); - let mut writer = ZipWriter::new(Cursor::new(&mut write_many_buffer)); + let mut writer = ZipWriter::new(fs::File::create(&path_write_many).unwrap()); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - for i in 0..write_many_count() { + for i in 0..WRITE_MANY_FILES { let name = format!("file_{i}.dat"); writer.start_file(name, options).unwrap(); writer.write_all(&payload_small).unwrap(); } - black_box(writer.finish().unwrap().into_inner()); + black_box(writer.finish().unwrap()); }); }); - // --- write_one_large_file --- + // ============================================================================ + // write_one_large_file + // ~1 MiB stored payload; single entry written to disk each iter. + // ============================================================================ + let path_write_large = p("write_large.zip"); let payload_large = seeded_random_bytes(WRITE_LARGE_SIZE); - let mut write_large_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); + c.bench_function("write_one_large_file", |b| { b.iter(|| { - write_large_buffer.clear(); - let mut writer = ZipWriter::new(Cursor::new(&mut write_large_buffer)); + let mut writer = ZipWriter::new(fs::File::create(&path_write_large).unwrap()); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); - black_box(writer.finish().unwrap().into_inner()); + black_box(writer.finish().unwrap()); }); }); - // --- write_then_read_roundtrip --- + // ============================================================================ + // write_then_read_roundtrip + // Write several entries to disk, reopen, read archive length. + // ============================================================================ + let path_roundtrip = p("roundtrip.zip"); let roundtrip_payload = seeded_random_bytes(256); - let mut roundtrip_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); + c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { - roundtrip_buffer.clear(); - let mut writer = ZipWriter::new(Cursor::new(&mut roundtrip_buffer)); + let mut writer = ZipWriter::new(fs::File::create(&path_roundtrip).unwrap()); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { @@ -354,42 +392,52 @@ fn criterion_benchmark(c: &mut Criterion) { .unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } - let bytes = writer.finish().unwrap().into_inner(); - let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + drop(writer.finish().unwrap()); + let archive = ZipArchive::new(fs::File::open(&path_roundtrip).unwrap()).unwrap(); black_box(archive.len()); }); }); - // --- deflate: read deflated entry (when feature enabled) --- #[cfg(feature = "deflate")] { - let (_, deflated_bytes) = generate_random_archive_merge( - 5, - 4096, - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), + // ============================================================================ + // read_deflated_entry + // Deflated entries on disk; read all to sink. + // ============================================================================ + let path_deflate_read = p("deflate_read.zip"); + fs::write( + &path_deflate_read, + generate_random_archive_merge( + 5, + 4096, + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), + ) + .unwrap() + .1, ) .unwrap(); + c.bench_function("read_deflated_entry", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(deflated_bytes.as_slice())).unwrap(); + let mut archive = + ZipArchive::new(fs::File::open(&path_deflate_read).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); } }); }); - } - #[cfg(feature = "deflate")] - { + // ============================================================================ + // write_deflated_entries + // Deflated writes to disk each iter. + // ============================================================================ + let path_deflate_write = p("deflate_write.zip"); let deflate_payload = seeded_random_bytes(2048); - let mut write_deflate_buffer = Vec::with_capacity(WRITE_BENCH_BUF_CAP); c.bench_function("write_deflated_entries", |b| { b.iter(|| { - write_deflate_buffer.clear(); - - let mut writer = ZipWriter::new(Cursor::new(&mut write_deflate_buffer)); + let mut writer = ZipWriter::new(fs::File::create(&path_deflate_write).unwrap()); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); @@ -401,8 +449,7 @@ fn criterion_benchmark(c: &mut Criterion) { writer.write_all(&deflate_payload).unwrap(); } - let cursor = writer.finish().unwrap(); - black_box(cursor.into_inner()); + black_box(writer.finish().unwrap()); }); }); } From 6428d7dff1ff46c3917ac32966c0433b57c7dedc Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:58:50 -0400 Subject: [PATCH 20/29] restore Cargo.toml --- Cargo.toml | 120 ++++++++++++++++------------------------------------- 1 file changed, 35 insertions(+), 85 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 23680ac93..7dcbb58d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,97 +1,55 @@ [package] -name = "zip" -version = "3.0.0" -authors = [ - "Mathijs van de Nes ", - "Marli Frost ", - "Ryan Levick ", - "Chris Hennick ", -] +name = "zip_next" +version = "1.0.0" +authors = ["Mathijs van de Nes ", "Marli Frost ", "Ryan Levick ", + "Chris Hennick "] license = "MIT" -repository = "https://github.com/zip-rs/zip2.git" -keywords = ["zip", "archive", "compression"] -# Any change to rust-version must be reflected also in `README.md` and `.github/workflows/ci.yaml`. -# The MSRV policy is documented in `README.md`. -rust-version = "1.75.0" +repository = "https://github.com/Pr0methean/zip-next.git" +keywords = ["zip", "archive"] description = """ +rust-version = "1.67.0" Library to support the reading and writing of zip files. """ edition = "2021" -exclude = ["tests/**", "examples/**", ".github/**", "fuzz_read/**", "fuzz_write/**"] - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[workspace.dependencies] -time = { version = "0.3.37", default-features = false } [dependencies] -aes = { version = "0.8", optional = true } -bzip2 = { version = "0.5.0", optional = true } -chrono = { version = "0.4", optional = true } -constant_time_eq = { version = "0.3", optional = true } -crc32fast = "1.4" -flate2 = { version = "1.1.1", default-features = false, optional = true } -getrandom = { version = "0.3.1", features = ["wasm_js", "std"], optional = true} -hmac = { version = "0.12", optional = true, features = ["reset"] } -indexmap = "2" -jiff = { version = "0.2.4", optional = true } -memchr = "2.7" -nt-time = { version = "0.10.6", default-features = false, optional = true } -pbkdf2 = { version = "0.12", optional = true } -sha1 = { version = "0.10", optional = true } -time = { workspace = true, optional = true, features = [ - "std", -] } -zeroize = { version = "1.8", optional = true, features = ["zeroize_derive"] } -zstd = { version = "0.13", optional = true, default-features = false } -zopfli = { version = "0.8", optional = true } -deflate64 = { version = "0.1.9", optional = true } -lzma-rs = { version = "0.3", default-features = false, optional = true } -xz2 = { version = "0.1.7", optional = true } +aes = { version = "0.8.4", optional = true } +byteorder = "1.5.0" +bzip2 = { version = "0.4.4", optional = true } +chrono = { version = "0.4.37", optional = true } +constant_time_eq = { version = "0.3.0", optional = true } +crc32fast = "1.4.0" +flate2 = { version = "1.0.28", default-features = false, optional = true } +hmac = { version = "0.12.1", optional = true, features = ["reset"] } +pbkdf2 = {version = "0.12.2", optional = true } +sha1 = {version = "0.10.6", optional = true } +time = { version = "0.3.34", optional = true, default-features = false, features = ["std"] } +zstd = { version = "0.13.1", optional = true, default-features = false } +zopfli = { version = "0.8.0", optional = true } +deflate64 = { version = "0.1.8", optional = true } + +[target.'cfg(any(all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", target_arch = "powerpc"))'.dependencies] +crossbeam-utils = "0.8.19" [target.'cfg(fuzzing)'.dependencies] -arbitrary = { version = "1.4.1", features = ["derive"] } +arbitrary = { version = "1.3.2", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" -criterion = { version = "0.5", features = ["html_reports"] } -getrandom = { version = "0.3.1", features = ["wasm_js", "std"] } -walkdir = "2.5" -time = { workspace = true, features = ["formatting", "macros"] } -anyhow = "1.0.95" -clap = { version = "=4.4.18", features = ["derive"] } -tempfile = "3.15" +getrandom = { version = "0.2.14", features = ["js"] } +walkdir = "2.5.0" +time = { version = "0.3.34", features = ["formatting", "macros"] } [features] -aes-crypto = ["aes", "constant_time_eq", "hmac", "pbkdf2", "sha1", "getrandom", "zeroize"] +aes-crypto = [ "aes", "constant_time_eq", "hmac", "pbkdf2", "sha1" ] chrono = ["chrono/default"] -_deflate-any = [] -_all-features = [] # Detect when --all-features is used -deflate = ["deflate-zopfli", "deflate-flate2-zlib-rs"] -# Pull in flate2, but don't choose a backend; useful if you want to choose your own flate2 backend -deflate-flate2 = ["_deflate-any", "dep:flate2"] -# Pull in flate2 and the fast zlib-rs backend; this is what most users will want -deflate-flate2-zlib-rs = ["deflate-flate2", "flate2/zlib-rs"] -# Pull in flate2 and the zlib backend; only use this if you need a dynamically linked system zlib -deflate-flate2-zlib = ["deflate-flate2", "flate2/zlib"] -deflate-zopfli = ["zopfli", "_deflate-any"] -jiff-02 = ["dep:jiff"] -nt-time = ["dep:nt-time"] -lzma = ["lzma-rs/stream"] +deflate = ["flate2/rust_backend"] +deflate-miniz = ["flate2/default"] +deflate-zlib = ["flate2/zlib"] +deflate-zlib-ng = ["flate2/zlib-ng"] +deflate-zopfli = ["zopfli"] unreserved = [] -xz = ["dep:xz2"] -default = [ - "aes-crypto", - "bzip2", - "deflate64", - "deflate", - "lzma", - "time", - "zstd", - "xz", -] +default = ["aes-crypto", "bzip2", "deflate", "deflate64", "deflate-zlib-ng", "deflate-zopfli", "time", "zstd"] [[bench]] name = "read_entry" @@ -100,11 +58,3 @@ harness = false [[bench]] name = "read_metadata" harness = false - -[[bench]] -name = "merge_archive" -harness = false - -[[bench]] -name = "criterion_bench" -harness = false From b67ed7a4724f20af14ae6d1a63dee62062e573d8 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:06:56 -0400 Subject: [PATCH 21/29] add criterion benchmark --- Cargo.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7dcbb58d0..ff7136799 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ arbitrary = { version = "1.3.2", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" +criterion = { version = "0.8.2", features = ["html_reports"] } getrandom = { version = "0.2.14", features = ["js"] } walkdir = "2.5.0" time = { version = "0.3.34", features = ["formatting", "macros"] } @@ -58,3 +59,7 @@ harness = false [[bench]] name = "read_metadata" harness = false + +[[bench]] +name = "criterion_bench" +harness = false From 5daa46e274a56b29924b12f83f64cb5373ab29ab Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:13:32 -0400 Subject: [PATCH 22/29] add tempfile = "3.27.0" to the Cargo.toml dev-dependencies --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index ff7136799..2f6db6e63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ arbitrary = { version = "1.3.2", features = ["derive"] } bencher = "0.1.5" criterion = { version = "0.8.2", features = ["html_reports"] } getrandom = { version = "0.2.14", features = ["js"] } +tempfile = "3.27.0" walkdir = "2.5.0" time = { version = "0.3.34", features = ["formatting", "macros"] } From 3e0bac783730697781757255a98e4fa8c53b0954 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:07:16 -0400 Subject: [PATCH 23/29] #723 RUN_1 1.0.0 --- benches/criterion_bench.rs | 355 ++++++++++++++++++++----------------- 1 file changed, 191 insertions(+), 164 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index a177e1032..6dfd662a4 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -2,12 +2,19 @@ // Run: cargo bench --bench criterion_bench // First run saves baseline; later runs compare and can fail on regression. -use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; +use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; +use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; -use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; - -// deterministic seeded randomness helper (SplitMix64, no external dependencies) +use zip_next::{ + CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::FileOptions, +}; + +// Deterministic seeded randomness helper (SplitMix64; no external dependencies, no syscalls). +// Why we use this instead of the getrandom crate: +// 1. Deterministic — always the same bytes. +// 2. No syscalls — the getrandom crate calls the OS/platform RNG. +// 3. No dependency here — getrandom is used elsewhere but remains optional and may change. fn seeded_random_bytes(size: usize) -> Vec { let mut x: u64 = 0xdead_beef_cafe_babe; // seed let mut out = vec![0u8; size]; @@ -32,7 +39,7 @@ fn seeded_random_bytes(size: usize) -> Vec { fn generate_random_archive(size: usize) -> Vec { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("random.dat", options).unwrap(); // generate deterministic seeded random data @@ -47,14 +54,14 @@ const FILE_SIZE_META: usize = 1024; fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); // seeded random payload reused across entries let bytes = seeded_random_bytes(file_size); for i in 0..count_files { let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); - writer.start_file(name, options)?; + writer.start_file(name, options.clone())?; writer.write_all(&bytes)?; } @@ -64,7 +71,7 @@ fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResu fn generate_random_archive_merge( num_entries: usize, entry_size: usize, - options: SimpleFileOptions, + options: FileOptions, ) -> ZipResult<(usize, Vec)> { let buf = Cursor::new(Vec::new()); let mut zip = ZipWriter::new(buf); @@ -74,7 +81,7 @@ fn generate_random_archive_merge( for i in 0..num_entries { let name = format!("random{i}.dat"); - zip.start_file(name, options)?; + zip.start_file(name, options.clone())?; zip.write_all(&bytes)?; } @@ -84,14 +91,6 @@ fn generate_random_archive_merge( Ok((len, buf)) } -fn perform_merge( - src: ZipArchive, - mut target: ZipWriter, -) -> ZipResult> { - target.merge_archive(src)?; - Ok(target) -} - fn perform_raw_copy_file( mut src: ZipArchive, mut target: ZipWriter, @@ -106,59 +105,11 @@ fn perform_raw_copy_file( const NUM_ENTRIES: usize = 100; const ENTRY_SIZE: usize = 1024; -// Default sizes (desktop). When BENCH_PI=1 or BENCH_LOW_MEMORY=1, use smaller sizes for Pi 3B (~1 GB RAM). -fn is_low_memory() -> bool { - std::env::var("BENCH_PI").as_deref() == Ok("1") - || std::env::var("BENCH_LOW_MEMORY").as_deref() == Ok("1") -} - -fn file_count_meta() -> usize { - if is_low_memory() { - 2_000 // ~2 MB archive instead of ~15 MB - } else { - FILE_COUNT - } -} - -fn comment_size() -> usize { - if is_low_memory() { - 10_000 - } else { - 50_000 - } -} - -fn read_all_entries_count() -> usize { - if is_low_memory() { - 200 - } else { - 500 - } -} - -fn by_name_lookup_count() -> usize { - if is_low_memory() { - 20 - } else { - 50 - } -} - -fn large_non_zip_size() -> usize { - if is_low_memory() { - 5_000_000 // 5 MB instead of 17 MB - } else { - 17_000_000 - } -} - -fn write_many_count() -> usize { - if is_low_memory() { - 300 - } else { - 1_000 - } -} +const COMMENT_BENCH_LEN: usize = 50_000; +const READ_ALL_ENTRIES_FILES: usize = 500; +const BY_NAME_LOOKUP_COUNT: usize = 50; +const LARGE_NON_ZIP_BYTES: usize = 17_000_000; +const WRITE_MANY_FILES: usize = 1_000; const STREAM_ENTRIES: usize = 20; const STREAM_ENTRY_SIZE: usize = 256; @@ -167,21 +118,33 @@ const ROUNDTRIP_ENTRIES: usize = 100; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment.into_boxed_slice()); + writer.set_raw_comment(comment); writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) } fn criterion_benchmark(c: &mut Criterion) { - let size = 1024 * 1024; - let bytes = generate_random_archive(size); + // Shared directory: all fixtures use real files under here (see each section below). + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip").unwrap(); + let p = |name: &str| bench_dir.path().join(name); + + // ============================================================================ + // read_entry + // Single stored entry (~1 MiB payload); read full entry in a loop from disk. + // ============================================================================ + let path_read_entry = p("read_entry.zip"); + fs::write( + &path_read_entry, + generate_random_archive(1024 * 1024), + ) + .unwrap(); c.bench_function("read_entry", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_read_entry).unwrap()).unwrap(); let mut file = archive.by_name("random.dat").unwrap(); let mut buf = [0u8; 1024]; @@ -196,71 +159,72 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - let bytes_meta = generate_random_archive_meta(file_count_meta(), FILE_SIZE_META).unwrap(); + // ============================================================================ + // read_metadata / by_name_lookup_many + // One large archive on disk (FILE_COUNT × FILE_SIZE_META); shared by both benches. + // ============================================================================ + let path_meta = p("meta.zip"); + fs::write( + &path_meta, + generate_random_archive_meta(FILE_COUNT, FILE_SIZE_META).unwrap(), + ) + .unwrap(); c.bench_function("read_metadata", |b| { b.iter(|| { black_box( - ZipArchive::new(Cursor::new(bytes_meta.as_slice())) + ZipArchive::new(fs::File::open(&path_meta).unwrap()) .unwrap() .len(), ) }); }); - let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); - - let (len, src_bytes) = generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); - - c.bench_function("merge_archive_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(Cursor::new(src_bytes.clone())).unwrap(); - let buf = Cursor::new(Vec::with_capacity(len)); - (src, buf) - }, - |(src, buf)| { - let zip = ZipWriter::new(buf); - let zip = perform_merge(src, zip).unwrap(); - let out = zip.finish().unwrap().into_inner(); - - assert_eq!(out.len(), len); - - black_box(out) - }, - BatchSize::SmallInput, - ); - }); - + // ============================================================================ + // merge_archive_raw_copy_file_stored + // Second merge source (independent); raw_copy_file path. + // ============================================================================ + let merge_options = + FileOptions::default().compression_method(CompressionMethod::Stored); + let path_merge_src2 = p("merge_src2.zip"); + let path_merge_out2 = p("merge_out2.zip"); let (len2, src_bytes2) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src2, &src_bytes2).unwrap(); c.bench_function("merge_archive_raw_copy_file_stored", |b| { b.iter_batched( || { - let src = ZipArchive::new(Cursor::new(src_bytes2.clone())).unwrap(); - let buf = Cursor::new(Vec::with_capacity(len2)); - (src, buf) + let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out2).unwrap(); + (src, out) }, - |(src, buf)| { - let zip = ZipWriter::new(buf); - let zip = perform_raw_copy_file(src, zip).unwrap(); - let out = zip.finish().unwrap().into_inner(); - - assert_eq!(out.len(), len2); - - black_box(out) + |(src, out)| { + let zip = ZipWriter::new(out); + let mut zip = perform_raw_copy_file(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len2); + black_box(out); }, BatchSize::SmallInput, ); }); - // --- read_all_entries: iterate by_index and read each entry --- - let bytes_all_entries = - generate_random_archive_meta(read_all_entries_count(), 512).unwrap(); + // ============================================================================ + // read_all_entries + // Many small entries; read each by index to sink (from disk). + // ============================================================================ + let path_all_entries = p("all_entries.zip"); + fs::write( + &path_all_entries, + generate_random_archive_meta(READ_ALL_ENTRIES_FILES, 512).unwrap(), + ) + .unwrap(); + c.bench_function("read_all_entries", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes_all_entries.as_slice())).unwrap(); + let mut archive = + ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); @@ -268,44 +232,68 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // --- by_name_lookup_many: look up many names in large archive --- - let lookup_names: Vec = (0..by_name_lookup_count()) + // ============================================================================ + // by_name_lookup_many + // Uses meta.zip above; repeated by_name lookups (names prebuilt once). + // ============================================================================ + let lookup_names: Vec = (0..BY_NAME_LOOKUP_COUNT) .map(|i| format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat")) .collect(); c.bench_function("by_name_lookup_many", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(bytes_meta.as_slice())).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_meta).unwrap()).unwrap(); for name in &lookup_names { let _ = archive.by_name(name).unwrap(); } }); }); - // --- parse_archive_with_comment --- - let bytes_comment = generate_archive_with_comment(comment_size()).unwrap(); + // ============================================================================ + // parse_archive_with_comment + // Zip with large comment field; parse and read comment length from disk. + // ============================================================================ + let path_comment = p("comment.zip"); + fs::write( + &path_comment, + generate_archive_with_comment(COMMENT_BENCH_LEN).unwrap(), + ) + .unwrap(); + c.bench_function("parse_archive_with_comment", |b| { b.iter(|| { - let archive = ZipArchive::new(Cursor::new(bytes_comment.as_slice())).unwrap(); + let archive = ZipArchive::new(fs::File::open(&path_comment).unwrap()).unwrap(); black_box(archive.comment().len()); }); }); - // --- read_stream_entries: read_zipfile_from_stream until None --- - let bytes_stream = generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(); - let dir_stream = tempfile::TempDir::with_prefix("criterion_stream").unwrap(); - let path_stream = dir_stream.path().join("bench.zip"); - fs::write(&path_stream, &bytes_stream).unwrap(); + // ============================================================================ + // read_stream_entries + // Streaming API: read_zipfile_from_stream until None (file on disk). + // ============================================================================ + let path_stream = p("stream.zip"); + fs::write( + &path_stream, + generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(), + ) + .unwrap(); + c.bench_function("read_stream_entries", |b| { b.iter(|| { let mut f = fs::File::open(&path_stream).unwrap(); - while zip::read::read_zipfile_from_stream(&mut f).unwrap().is_some() {} + while zip_next::read::read_zipfile_from_stream(&mut f) + .unwrap() + .is_some() + {} }); }); - // --- parse_large_non_zip_reject --- - let dir_reject = tempfile::TempDir::with_prefix("criterion_reject").unwrap(); - let path_reject = dir_reject.path().join("zeros"); - fs::write(&path_reject, vec![0u8; large_non_zip_size()]).unwrap(); + // ============================================================================ + // parse_large_non_zip_reject + // Large non-zip blob on disk; ZipArchive::new must fail (reject path). + // ============================================================================ + let path_reject = p("zeros.bin"); + fs::write(&path_reject, vec![0u8; LARGE_NON_ZIP_BYTES]).unwrap(); + c.bench_function("parse_large_non_zip_reject", |b| { b.iter(|| { let r = ZipArchive::new(fs::File::open(&path_reject).unwrap()); @@ -313,86 +301,125 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // --- write_many_small_files --- + // ============================================================================ + // write_many_small_files + // Payload in memory; write many stored entries to a file (truncated each iter). + // ============================================================================ + let path_write_many = p("write_many.zip"); let payload_small = seeded_random_bytes(128); + c.bench_function("write_many_small_files", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - for i in 0..write_many_count() { + let mut writer = ZipWriter::new(fs::File::create(&path_write_many).unwrap()); + let options = + FileOptions::default().compression_method(CompressionMethod::Stored); + for i in 0..WRITE_MANY_FILES { let name = format!("file_{i}.dat"); - writer.start_file(name, options).unwrap(); + writer.start_file(name, options.clone()).unwrap(); writer.write_all(&payload_small).unwrap(); } - black_box(writer.finish().unwrap().into_inner()); + black_box(writer.finish().unwrap()); }); }); - // --- write_one_large_file --- + // ============================================================================ + // write_one_large_file + // ~1 MiB stored payload; single entry written to disk each iter. + // ============================================================================ + let path_write_large = p("write_large.zip"); let payload_large = seeded_random_bytes(WRITE_LARGE_SIZE); + c.bench_function("write_one_large_file", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let mut writer = ZipWriter::new(fs::File::create(&path_write_large).unwrap()); + let options = + FileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); - black_box(writer.finish().unwrap().into_inner()); + black_box(writer.finish().unwrap()); }); }); - // --- write_then_read_roundtrip --- + // ============================================================================ + // write_then_read_roundtrip + // Write several entries to disk, reopen, read archive length. + // ============================================================================ + let path_roundtrip = p("roundtrip.zip"); let roundtrip_payload = seeded_random_bytes(256); + c.bench_function("write_then_read_roundtrip", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + let mut writer = ZipWriter::new(fs::File::create(&path_roundtrip).unwrap()); + let options = + FileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { - writer.start_file(format!("entry_{i}.dat"), options).unwrap(); + writer + .start_file(format!("entry_{i}.dat"), options.clone()) + .unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } - let bytes = writer.finish().unwrap().into_inner(); - let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + drop(writer.finish().unwrap()); + let archive = ZipArchive::new(fs::File::open(&path_roundtrip).unwrap()).unwrap(); black_box(archive.len()); }); }); - // --- deflate: read deflated entry (when feature enabled) --- #[cfg(feature = "deflate")] { - let (_, deflated_bytes) = generate_random_archive_merge( - 5, - 4096, - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), + // ============================================================================ + // read_deflated_entry + // Deflated entries on disk; read all to sink. + // ============================================================================ + let path_deflate_read = p("deflate_read.zip"); + fs::write( + &path_deflate_read, + generate_random_archive_merge( + 5, + 4096, + FileOptions::default().compression_method(CompressionMethod::Deflated), + ) + .unwrap() + .1, ) - .unwrap(); + .unwrap(); + c.bench_function("read_deflated_entry", |b| { b.iter(|| { - let mut archive = ZipArchive::new(Cursor::new(deflated_bytes.as_slice())).unwrap(); + let mut archive = + ZipArchive::new(fs::File::open(&path_deflate_read).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); } }); }); - } - #[cfg(feature = "deflate")] - { + // ============================================================================ + // write_deflated_entries + // Deflated writes to disk each iter. + // ============================================================================ + let path_deflate_write = p("deflate_write.zip"); let deflate_payload = seeded_random_bytes(2048); + c.bench_function("write_deflated_entries", |b| { b.iter(|| { - let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let mut writer = ZipWriter::new(fs::File::create(&path_deflate_write).unwrap()); + let options = - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + FileOptions::default().compression_method(CompressionMethod::Deflated); + for i in 0..20 { - writer.start_file(format!("deflated_{i}.dat"), options).unwrap(); + writer + .start_file(format!("deflated_{i}.dat"), options.clone()) + .unwrap(); writer.write_all(&deflate_payload).unwrap(); } - black_box(writer.finish().unwrap().into_inner()); + + black_box(writer.finish().unwrap()); }); }); } } criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); \ No newline at end of file +criterion_main!(benches); From e0866dc711e5818ff80a4e100f89e4468fc17c9f Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 5 Apr 2026 11:38:02 -0400 Subject: [PATCH 24/29] #723 RUN_1 v2.0.0 --- Cargo.toml | 2 +- benches/criterion_bench.rs | 78 +++++++++++++++++++++++++++----------- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d0dd65a98..5a8b04ebc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,7 @@ tempfile = "3.27.0" walkdir = "2.5.0" time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1" -clap = { version = "=4.4.18", features = ["derive"] } +clap = { version = "4.5", features = ["derive"] } tempdir = "0.3.7" [features] diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 6dfd662a4..60d754189 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -6,9 +6,7 @@ use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; -use zip_next::{ - CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::FileOptions, -}; +use zip::{CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::SimpleFileOptions}; // Deterministic seeded randomness helper (SplitMix64; no external dependencies, no syscalls). // Why we use this instead of the getrandom crate: @@ -39,7 +37,7 @@ fn seeded_random_bytes(size: usize) -> Vec { fn generate_random_archive(size: usize) -> Vec { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); writer.start_file("random.dat", options).unwrap(); // generate deterministic seeded random data @@ -54,14 +52,14 @@ const FILE_SIZE_META: usize = 1024; fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); // seeded random payload reused across entries let bytes = seeded_random_bytes(file_size); for i in 0..count_files { let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); - writer.start_file(name, options.clone())?; + writer.start_file(name, options)?; writer.write_all(&bytes)?; } @@ -71,7 +69,7 @@ fn generate_random_archive_meta(count_files: usize, file_size: usize) -> ZipResu fn generate_random_archive_merge( num_entries: usize, entry_size: usize, - options: FileOptions, + options: SimpleFileOptions, ) -> ZipResult<(usize, Vec)> { let buf = Cursor::new(Vec::new()); let mut zip = ZipWriter::new(buf); @@ -81,7 +79,7 @@ fn generate_random_archive_merge( for i in 0..num_entries { let name = format!("random{i}.dat"); - zip.start_file(name, options.clone())?; + zip.start_file(name, options)?; zip.write_all(&bytes)?; } @@ -91,6 +89,14 @@ fn generate_random_archive_merge( Ok((len, buf)) } +fn perform_merge( + src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + target.merge_archive(src)?; + Ok(target) +} + fn perform_raw_copy_file( mut src: ZipArchive, mut target: ZipWriter, @@ -118,9 +124,9 @@ const ROUNDTRIP_ENTRIES: usize = 100; fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment); + writer.set_raw_comment(comment.into_boxed_slice()); writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) @@ -180,12 +186,40 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); + // ============================================================================ + // merge_archive_stored + // Source zip on disk; merge into a new file each batch (stored method). + // ============================================================================ + let merge_options = + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let path_merge_src = p("merge_src.zip"); + let path_merge_out = p("merge_out.zip"); + let (len, src_bytes) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src, &src_bytes).unwrap(); + + c.bench_function("merge_archive_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out).unwrap(); + (src, out) + }, + |(src, out)| { + let zip = ZipWriter::new(out); + let zip = perform_merge(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len); + black_box(out); + }, + BatchSize::SmallInput, + ); + }); + // ============================================================================ // merge_archive_raw_copy_file_stored // Second merge source (independent); raw_copy_file path. // ============================================================================ - let merge_options = - FileOptions::default().compression_method(CompressionMethod::Stored); let path_merge_src2 = p("merge_src2.zip"); let path_merge_out2 = p("merge_out2.zip"); let (len2, src_bytes2) = @@ -201,7 +235,7 @@ fn criterion_benchmark(c: &mut Criterion) { }, |(src, out)| { let zip = ZipWriter::new(out); - let mut zip = perform_raw_copy_file(src, zip).unwrap(); + let zip = perform_raw_copy_file(src, zip).unwrap(); let out = zip.finish().unwrap(); assert_eq!(out.metadata().unwrap().len() as usize, len2); black_box(out); @@ -280,7 +314,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("read_stream_entries", |b| { b.iter(|| { let mut f = fs::File::open(&path_stream).unwrap(); - while zip_next::read::read_zipfile_from_stream(&mut f) + while zip::read::read_zipfile_from_stream(&mut f) .unwrap() .is_some() {} @@ -312,10 +346,10 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { let mut writer = ZipWriter::new(fs::File::create(&path_write_many).unwrap()); let options = - FileOptions::default().compression_method(CompressionMethod::Stored); + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..WRITE_MANY_FILES { let name = format!("file_{i}.dat"); - writer.start_file(name, options.clone()).unwrap(); + writer.start_file(name, options).unwrap(); writer.write_all(&payload_small).unwrap(); } black_box(writer.finish().unwrap()); @@ -333,7 +367,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { let mut writer = ZipWriter::new(fs::File::create(&path_write_large).unwrap()); let options = - FileOptions::default().compression_method(CompressionMethod::Stored); + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); writer.start_file("large.dat", options).unwrap(); writer.write_all(&payload_large).unwrap(); black_box(writer.finish().unwrap()); @@ -351,10 +385,10 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { let mut writer = ZipWriter::new(fs::File::create(&path_roundtrip).unwrap()); let options = - FileOptions::default().compression_method(CompressionMethod::Stored); + SimpleFileOptions::default().compression_method(CompressionMethod::Stored); for i in 0..ROUNDTRIP_ENTRIES { writer - .start_file(format!("entry_{i}.dat"), options.clone()) + .start_file(format!("entry_{i}.dat"), options) .unwrap(); writer.write_all(&roundtrip_payload).unwrap(); } @@ -376,7 +410,7 @@ fn criterion_benchmark(c: &mut Criterion) { generate_random_archive_merge( 5, 4096, - FileOptions::default().compression_method(CompressionMethod::Deflated), + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), ) .unwrap() .1, @@ -406,11 +440,11 @@ fn criterion_benchmark(c: &mut Criterion) { let mut writer = ZipWriter::new(fs::File::create(&path_deflate_write).unwrap()); let options = - FileOptions::default().compression_method(CompressionMethod::Deflated); + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); for i in 0..20 { writer - .start_file(format!("deflated_{i}.dat"), options.clone()) + .start_file(format!("deflated_{i}.dat"), options) .unwrap(); writer.write_all(&deflate_payload).unwrap(); } From be1feeec120daf3fc54b70b88fec4828935d0976 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 26 Apr 2026 09:48:12 -0400 Subject: [PATCH 25/29] #723 tweak merge benchmarks --- benches/criterion_bench.rs | 132 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 59 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 60d754189..3a1553ec9 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -1,11 +1,15 @@ // Criterion-based benchmarks (baseline storage, regression detection). // Run: cargo bench --bench criterion_bench // First run saves baseline; later runs compare and can fail on regression. +// +// Merge / raw_copy benches use a longer measurement window (10s) so 100 samples can +// complete without Criterion’s default 5s cap — helps on SBCs (e.g. Raspberry Pi). use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; +use std::time::Duration; use zip::{CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::SimpleFileOptions}; // Deterministic seeded randomness helper (SplitMix64; no external dependencies, no syscalls). @@ -102,7 +106,9 @@ fn perform_raw_copy_file( mut target: ZipWriter, ) -> ZipResult> { for i in 0..src.len() { - let entry = src.by_index(i)?; + // `by_index_raw` matches raw-only repack: no decompress/CRC reader stack; still ends in + // the same `raw_copy_file` writer path. + let entry = src.by_index_raw(i)?; target.raw_copy_file(entry)?; } Ok(target) @@ -126,7 +132,7 @@ fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment.into_boxed_slice()); + writer.set_raw_comment(comment.into_boxed_slice())?; writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) @@ -186,63 +192,64 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - // ============================================================================ - // merge_archive_stored - // Source zip on disk; merge into a new file each batch (stored method). - // ============================================================================ - let merge_options = - SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); - let path_merge_src = p("merge_src.zip"); - let path_merge_out = p("merge_out.zip"); - let (len, src_bytes) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); - fs::write(&path_merge_src, &src_bytes).unwrap(); - - c.bench_function("merge_archive_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); - let out = fs::File::create(&path_merge_out).unwrap(); - (src, out) - }, - |(src, out)| { - let zip = ZipWriter::new(out); - let zip = perform_merge(src, zip).unwrap(); - let out = zip.finish().unwrap(); - assert_eq!(out.metadata().unwrap().len() as usize, len); - black_box(out); - }, - BatchSize::SmallInput, - ); - }); - // ============================================================================ - // merge_archive_raw_copy_file_stored - // Second merge source (independent); raw_copy_file path. - // ============================================================================ - let path_merge_src2 = p("merge_src2.zip"); - let path_merge_out2 = p("merge_out2.zip"); - let (len2, src_bytes2) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); - fs::write(&path_merge_src2, &src_bytes2).unwrap(); - - c.bench_function("merge_archive_raw_copy_file_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); - let out = fs::File::create(&path_merge_out2).unwrap(); - (src, out) - }, - |(src, out)| { - let zip = ZipWriter::new(out); - let zip = perform_raw_copy_file(src, zip).unwrap(); - let out = zip.finish().unwrap(); - assert_eq!(out.metadata().unwrap().len() as usize, len2); - black_box(out); - }, - BatchSize::SmallInput, - ); - }); + /// `merge_archive_stored` and `merge_archive_raw_copy_file_stored` only. Uses a + /// longer Criterion window than the default for stable timing on slow hosts. + fn merge_archive_benchmarks(c: &mut Criterion) { + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip_merge").unwrap(); + let p = |name: &str| bench_dir.path().join(name); + + let merge_options = + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let path_merge_src = p("merge_src.zip"); + let path_merge_out = p("merge_out.zip"); + let (len, src_bytes) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src, &src_bytes).unwrap(); + + c.bench_function("merge_archive_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out).unwrap(); + (src, out) + }, + |(src, out)| { + let zip = ZipWriter::new(out); + let zip = perform_merge(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len); + black_box(out); + }, + BatchSize::SmallInput, + ); + }); + + let path_merge_src2 = p("merge_src2.zip"); + let path_merge_out2 = p("merge_out2.zip"); + let (len2, src_bytes2) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src2, &src_bytes2).unwrap(); + + c.bench_function("merge_archive_raw_copy_file_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out2).unwrap(); + (src, out) + }, + |(src, out)| { + let zip = ZipWriter::new(out); + let zip = perform_raw_copy_file(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len2); + black_box(out); + }, + BatchSize::SmallInput, + ); + }); + } + // ============================================================================ // read_all_entries @@ -455,5 +462,12 @@ fn criterion_benchmark(c: &mut Criterion) { } } +criterion_group! { + name = merge_archive_benches; + config = Criterion::default() + .measurement_time(Duration::from_secs(10)) + .warm_up_time(Duration::from_secs(4)); + targets = merge_archive_benchmarks +} criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); +criterion_main!(merge_archive_benches, benches); From 47226532d476d91c9ddc20409e0bdac38dac4643 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 26 Apr 2026 10:28:52 -0400 Subject: [PATCH 26/29] #723 iteration; try to ship it --- benches/criterion_bench.rs | 149 +++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 79 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 3a1553ec9..36c0c40af 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -1,9 +1,6 @@ -// Criterion-based benchmarks (baseline storage, regression detection). -// Run: cargo bench --bench criterion_bench -// First run saves baseline; later runs compare and can fail on regression. -// -// Merge / raw_copy benches use a longer measurement window (10s) so 100 samples can -// complete without Criterion’s default 5s cap — helps on SBCs (e.g. Raspberry Pi). +// Criterion-based benchmarks. Run: `cargo bench --bench criterion_bench`. +// Optional baselines: pass Criterion flags after `--` (e.g. `--save-baseline`, `--baseline `). +// Merge / raw_copy benches: longer measurement (10s) so samples complete on slow hosts (e.g. SBCs). use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; @@ -138,6 +135,64 @@ fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { Ok(writer.finish()?.into_inner()) } +/// `merge_archive_stored` and `merge_archive_raw_copy_file_stored` only. Uses a +/// longer Criterion window than the default (see `merge_archive_benches` group) for +/// stable timing on slow hosts. +fn merge_archive_benchmarks(c: &mut Criterion) { + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip_merge").unwrap(); + let p = |name: &str| bench_dir.path().join(name); + + let merge_options = + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let path_merge_src = p("merge_src.zip"); + let path_merge_out = p("merge_out.zip"); + let (len, src_bytes) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src, &src_bytes).unwrap(); + + c.bench_function("merge_archive_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out).unwrap(); + (src, out) + }, + |(src, out)| { + let zip = ZipWriter::new(out); + let zip = perform_merge(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len); + black_box(out); + }, + BatchSize::SmallInput, + ); + }); + + let path_merge_src2 = p("merge_src2.zip"); + let path_merge_out2 = p("merge_out2.zip"); + let (len2, src_bytes2) = + generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); + fs::write(&path_merge_src2, &src_bytes2).unwrap(); + + c.bench_function("merge_archive_raw_copy_file_stored", |b| { + b.iter_batched( + || { + let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); + let out = fs::File::create(&path_merge_out2).unwrap(); + (src, out) + }, + |(src, out)| { + let zip = ZipWriter::new(out); + let zip = perform_raw_copy_file(src, zip).unwrap(); + let out = zip.finish().unwrap(); + assert_eq!(out.metadata().unwrap().len() as usize, len2); + black_box(out); + }, + BatchSize::SmallInput, + ); + }); +} + fn criterion_benchmark(c: &mut Criterion) { // Shared directory: all fixtures use real files under here (see each section below). let bench_dir = tempfile::TempDir::with_prefix("criterion_zip").unwrap(); @@ -148,11 +203,7 @@ fn criterion_benchmark(c: &mut Criterion) { // Single stored entry (~1 MiB payload); read full entry in a loop from disk. // ============================================================================ let path_read_entry = p("read_entry.zip"); - fs::write( - &path_read_entry, - generate_random_archive(1024 * 1024), - ) - .unwrap(); + fs::write(&path_read_entry, generate_random_archive(1024 * 1024)).unwrap(); c.bench_function("read_entry", |b| { b.iter(|| { @@ -180,7 +231,7 @@ fn criterion_benchmark(c: &mut Criterion) { &path_meta, generate_random_archive_meta(FILE_COUNT, FILE_SIZE_META).unwrap(), ) - .unwrap(); + .unwrap(); c.bench_function("read_metadata", |b| { b.iter(|| { @@ -192,65 +243,6 @@ fn criterion_benchmark(c: &mut Criterion) { }); }); - - /// `merge_archive_stored` and `merge_archive_raw_copy_file_stored` only. Uses a - /// longer Criterion window than the default for stable timing on slow hosts. - fn merge_archive_benchmarks(c: &mut Criterion) { - let bench_dir = tempfile::TempDir::with_prefix("criterion_zip_merge").unwrap(); - let p = |name: &str| bench_dir.path().join(name); - - let merge_options = - SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); - let path_merge_src = p("merge_src.zip"); - let path_merge_out = p("merge_out.zip"); - let (len, src_bytes) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); - fs::write(&path_merge_src, &src_bytes).unwrap(); - - c.bench_function("merge_archive_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(fs::File::open(&path_merge_src).unwrap()).unwrap(); - let out = fs::File::create(&path_merge_out).unwrap(); - (src, out) - }, - |(src, out)| { - let zip = ZipWriter::new(out); - let zip = perform_merge(src, zip).unwrap(); - let out = zip.finish().unwrap(); - assert_eq!(out.metadata().unwrap().len() as usize, len); - black_box(out); - }, - BatchSize::SmallInput, - ); - }); - - let path_merge_src2 = p("merge_src2.zip"); - let path_merge_out2 = p("merge_out2.zip"); - let (len2, src_bytes2) = - generate_random_archive_merge(NUM_ENTRIES, ENTRY_SIZE, merge_options).unwrap(); - fs::write(&path_merge_src2, &src_bytes2).unwrap(); - - c.bench_function("merge_archive_raw_copy_file_stored", |b| { - b.iter_batched( - || { - let src = ZipArchive::new(fs::File::open(&path_merge_src2).unwrap()).unwrap(); - let out = fs::File::create(&path_merge_out2).unwrap(); - (src, out) - }, - |(src, out)| { - let zip = ZipWriter::new(out); - let zip = perform_raw_copy_file(src, zip).unwrap(); - let out = zip.finish().unwrap(); - assert_eq!(out.metadata().unwrap().len() as usize, len2); - black_box(out); - }, - BatchSize::SmallInput, - ); - }); - } - - // ============================================================================ // read_all_entries // Many small entries; read each by index to sink (from disk). @@ -260,12 +252,11 @@ fn criterion_benchmark(c: &mut Criterion) { &path_all_entries, generate_random_archive_meta(READ_ALL_ENTRIES_FILES, 512).unwrap(), ) - .unwrap(); + .unwrap(); c.bench_function("read_all_entries", |b| { b.iter(|| { - let mut archive = - ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); @@ -298,7 +289,7 @@ fn criterion_benchmark(c: &mut Criterion) { &path_comment, generate_archive_with_comment(COMMENT_BENCH_LEN).unwrap(), ) - .unwrap(); + .unwrap(); c.bench_function("parse_archive_with_comment", |b| { b.iter(|| { @@ -316,7 +307,7 @@ fn criterion_benchmark(c: &mut Criterion) { &path_stream, generate_random_archive_meta(STREAM_ENTRIES, STREAM_ENTRY_SIZE).unwrap(), ) - .unwrap(); + .unwrap(); c.bench_function("read_stream_entries", |b| { b.iter(|| { @@ -419,10 +410,10 @@ fn criterion_benchmark(c: &mut Criterion) { 4096, SimpleFileOptions::default().compression_method(CompressionMethod::Deflated), ) - .unwrap() - .1, + .unwrap() + .1, ) - .unwrap(); + .unwrap(); c.bench_function("read_deflated_entry", |b| { b.iter(|| { From de112559dc08fc8fcd91c2eb93ad3378b26d6ded Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 26 Apr 2026 10:31:51 -0400 Subject: [PATCH 27/29] #723 prepare version 7.0.0 for benchmark --- benches/criterion_bench.rs | 141 +++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 60d754189..485eef845 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -1,11 +1,12 @@ -// Criterion-based benchmarks (baseline storage, regression detection). -// Run: cargo bench --bench criterion_bench -// First run saves baseline; later runs compare and can fail on regression. +// Criterion-based benchmarks. Run: `cargo bench --bench criterion_bench`. +// Optional baselines: pass Criterion flags after `--` (e.g. `--save-baseline`, `--baseline `). +// Merge / raw_copy benches: longer measurement (10s) so samples complete on slow hosts (e.g. SBCs). use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use std::fs; use std::hint::black_box; use std::io::{self, Cursor, Read, Seek, Write}; +use std::time::Duration; use zip::{CompressionMethod, ZipArchive, ZipWriter, result::ZipResult, write::SimpleFileOptions}; // Deterministic seeded randomness helper (SplitMix64; no external dependencies, no syscalls). @@ -102,7 +103,9 @@ fn perform_raw_copy_file( mut target: ZipWriter, ) -> ZipResult> { for i in 0..src.len() { - let entry = src.by_index(i)?; + // `by_index_raw` matches raw-only repack: no decompress/CRC reader stack; still ends in + // the same `raw_copy_file` writer path. + let entry = src.by_index_raw(i)?; target.raw_copy_file(entry)?; } Ok(target) @@ -126,70 +129,19 @@ fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment.into_boxed_slice()); + writer.set_raw_comment(comment.into_boxed_slice())?; writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) } -fn criterion_benchmark(c: &mut Criterion) { - // Shared directory: all fixtures use real files under here (see each section below). - let bench_dir = tempfile::TempDir::with_prefix("criterion_zip").unwrap(); +/// `merge_archive_stored` and `merge_archive_raw_copy_file_stored` only. Uses a +/// longer Criterion window than the default (see `merge_archive_benches` group) for +/// stable timing on slow hosts. +fn merge_archive_benchmarks(c: &mut Criterion) { + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip_merge").unwrap(); let p = |name: &str| bench_dir.path().join(name); - // ============================================================================ - // read_entry - // Single stored entry (~1 MiB payload); read full entry in a loop from disk. - // ============================================================================ - let path_read_entry = p("read_entry.zip"); - fs::write( - &path_read_entry, - generate_random_archive(1024 * 1024), - ) - .unwrap(); - - c.bench_function("read_entry", |b| { - b.iter(|| { - let mut archive = ZipArchive::new(fs::File::open(&path_read_entry).unwrap()).unwrap(); - let mut file = archive.by_name("random.dat").unwrap(); - let mut buf = [0u8; 1024]; - - loop { - let n = file.read(&mut buf).unwrap(); - if n == 0 { - break; - } - } - - black_box(buf); - }); - }); - - // ============================================================================ - // read_metadata / by_name_lookup_many - // One large archive on disk (FILE_COUNT × FILE_SIZE_META); shared by both benches. - // ============================================================================ - let path_meta = p("meta.zip"); - fs::write( - &path_meta, - generate_random_archive_meta(FILE_COUNT, FILE_SIZE_META).unwrap(), - ) - .unwrap(); - - c.bench_function("read_metadata", |b| { - b.iter(|| { - black_box( - ZipArchive::new(fs::File::open(&path_meta).unwrap()) - .unwrap() - .len(), - ) - }); - }); - - // ============================================================================ - // merge_archive_stored - // Source zip on disk; merge into a new file each batch (stored method). - // ============================================================================ let merge_options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); let path_merge_src = p("merge_src.zip"); @@ -216,10 +168,6 @@ fn criterion_benchmark(c: &mut Criterion) { ); }); - // ============================================================================ - // merge_archive_raw_copy_file_stored - // Second merge source (independent); raw_copy_file path. - // ============================================================================ let path_merge_src2 = p("merge_src2.zip"); let path_merge_out2 = p("merge_out2.zip"); let (len2, src_bytes2) = @@ -243,6 +191,57 @@ fn criterion_benchmark(c: &mut Criterion) { BatchSize::SmallInput, ); }); +} + +fn criterion_benchmark(c: &mut Criterion) { + // Shared directory: all fixtures use real files under here (see each section below). + let bench_dir = tempfile::TempDir::with_prefix("criterion_zip").unwrap(); + let p = |name: &str| bench_dir.path().join(name); + + // ============================================================================ + // read_entry + // Single stored entry (~1 MiB payload); read full entry in a loop from disk. + // ============================================================================ + let path_read_entry = p("read_entry.zip"); + fs::write(&path_read_entry, generate_random_archive(1024 * 1024)).unwrap(); + + c.bench_function("read_entry", |b| { + b.iter(|| { + let mut archive = ZipArchive::new(fs::File::open(&path_read_entry).unwrap()).unwrap(); + let mut file = archive.by_name("random.dat").unwrap(); + let mut buf = [0u8; 1024]; + + loop { + let n = file.read(&mut buf).unwrap(); + if n == 0 { + break; + } + } + + black_box(buf); + }); + }); + + // ============================================================================ + // read_metadata / by_name_lookup_many + // One large archive on disk (FILE_COUNT × FILE_SIZE_META); shared by both benches. + // ============================================================================ + let path_meta = p("meta.zip"); + fs::write( + &path_meta, + generate_random_archive_meta(FILE_COUNT, FILE_SIZE_META).unwrap(), + ) + .unwrap(); + + c.bench_function("read_metadata", |b| { + b.iter(|| { + black_box( + ZipArchive::new(fs::File::open(&path_meta).unwrap()) + .unwrap() + .len(), + ) + }); + }); // ============================================================================ // read_all_entries @@ -257,8 +256,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("read_all_entries", |b| { b.iter(|| { - let mut archive = - ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); + let mut archive = ZipArchive::new(fs::File::open(&path_all_entries).unwrap()).unwrap(); for i in 0..archive.len() { let mut entry = archive.by_index(i).unwrap(); let _ = io::copy(&mut entry, &mut io::sink()).unwrap(); @@ -455,5 +453,12 @@ fn criterion_benchmark(c: &mut Criterion) { } } +criterion_group! { + name = merge_archive_benches; + config = Criterion::default() + .measurement_time(Duration::from_secs(10)) + .warm_up_time(Duration::from_secs(4)); + targets = merge_archive_benchmarks +} criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); +criterion_main!(merge_archive_benches, benches); From d9094d5668f7deaeb406036b63408e1dace38b53 Mon Sep 17 00:00:00 2001 From: im7mortal <5336231+im7mortal@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:14:19 -0400 Subject: [PATCH 28/29] #723 adjust bench for v7.0.0 --- benches/criterion_bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 485eef845..55f5bb86a 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -129,7 +129,7 @@ fn generate_archive_with_comment(comment_len: usize) -> ZipResult> { let mut writer = ZipWriter::new(Cursor::new(Vec::new())); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); let comment = seeded_random_bytes(comment_len); - writer.set_raw_comment(comment.into_boxed_slice())?; + writer.set_raw_comment(comment.into_boxed_slice()); writer.start_file("data.txt", options)?; writer.write_all(b"x")?; Ok(writer.finish()?.into_inner()) From 679d3ed03376d8635a95ea8fd756d6316cd5caf7 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Sun, 3 May 2026 20:12:19 -0700 Subject: [PATCH 29/29] Fix Clippy warning --- fuzz/write/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/write/src/main.rs b/fuzz/write/src/main.rs index d998a857d..a9e57c3da 100644 --- a/fuzz/write/src/main.rs +++ b/fuzz/write/src/main.rs @@ -389,7 +389,7 @@ impl Debug for FuzzTestCase<'_> { "let mut initial_junk = Cursor::new(vec!{:?});\n\ initial_junk.seek(SeekFrom::End(0))?;\n\ let mut writer = ZipWriter::new(initial_junk);", - &self.initial_junk + self.initial_junk )?; } let _ = self.clone().execute(f, false);