Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ jobs:
MACOSX_DEPLOYMENT_TARGET: "10.13"
CIBW_BUILD_VERBOSITY: 1
CIBW_BUILD: ${{ matrix.cibw_build }}
CIBW_BEFORE_TEST: pip install "blosc2_grok==0.3.3" "blosc2<4" --only-binary ":all:"
CIBW_TEST_EXTRAS: "test"
# Test twice:
# - with latest version
Expand Down
5 changes: 3 additions & 2 deletions doc/information.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,16 @@ HDF5 compression filters and compression libraries sources were obtained from:
using `BZip2 <https://sourceware.org/git/bzip2.git>`_ (v1.0.8).
* `hdf5-blosc plugin <https://github.com/Blosc/hdf5-blosc>`_ (v1.0.1)
using `c-blosc <https://github.com/Blosc/c-blosc>`_ (v1.21.6), LZ4, Snappy, ZLib and ZStd.
* `hdf5-blosc2 plugin <https://github.com/Blosc/HDF5-Blosc2>`_ (v2.0.0)
* `hdf5-blosc2 plugin <https://github.com/Blosc/HDF5-Blosc2>`_
(commit `e4d0f58 <https://github.com/Blosc/HDF5-Blosc2/tree/e4d0f583f39bf1d3e482aa4695b7dc95afb2b9b2>`_)
using `c-blosc2 <https://github.com/Blosc/c-blosc2>`_ (v2.23.0), LZ4, ZLib and ZStd.
* `FCIDECOMP plugin <https://gitlab.eumetsat.int/open-source/data-tailor-plugins/fcidecomp>`_
(`v2.1.1 <https://gitlab.eumetsat.int/open-source/data-tailor-plugins/fcidecomp/-/tree/2.1.1>`_)
using `CharLS <https://github.com/team-charls/charls>`_ (v2.1.0).
* `SZ plugin <https://github.com/szcompressor/SZ2>`_
(commit `f466775 <https://github.com/szcompressor/SZ2/tree/f4667759ead6a902110e80ff838ccdfddbc8dcd7>`_)
using `SZ <https://github.com/szcompressor/SZ2>`_, ZLib and ZStd.
* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.4).
* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.5).
* `SZ3 plugin <https://github.com/szcompressor/SZ3>`_
(commit `4bbe9df7e4bcb <https://github.com/szcompressor/SZ3/commit/4bbe9df7e4bcb6ae6339fcb3033100da07fe7434>`_)
using `SZ3 <https://github.com/szcompressor/SZ3>`_ and ZStd.
Expand Down
22 changes: 11 additions & 11 deletions lib/HDF5-Blosc2/src/blosc2_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@
* - 4: compression level
* - 5: shuffle method
* - 6: compressor code
* - 7: chunk rank (number of dimensions) (present if 1 < rank <= BLOSC2_MAX_DIM, for B2ND)
* - 7: chunk rank (number of dimensions) (present if 1 < rank <= B2ND_MAX_DIM, for B2ND)
* - 8 + i: length of chunk dimension i (0 <= i < rank)
*
* If a value is specified, all values before it must be specified too.
*
* If the chunk rank is specified, chunk dimensions must follow.
*/
#define MAX_FILTER_VALUES (8 + BLOSC2_MAX_DIM)
#define MAX_FILTER_VALUES (8 + B2ND_MAX_DIM)
/* Compression level default */
#define DEFAULT_CLEVEL 5
/* Shuffle default */
Expand Down Expand Up @@ -95,7 +95,7 @@ int register_blosc2(char **version, char **date){

3. Compute the chunk size in bytes and store it in slot 3.

4. If 1 < rank <= BLOSC2_MAX_DIM, store it in slot 7, and chunk dimensions in the following slots.
4. If 1 < rank <= B2ND_MAX_DIM, store it in slot 7, and chunk dimensions in the following slots.
*/
herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {

Expand Down Expand Up @@ -157,7 +157,7 @@ herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {
fprintf(stderr, "Blosc2: Computed buffer size %d\n", bufsize);
#endif

if (1 < ndim && ndim <= BLOSC2_MAX_DIM) {
if (1 < ndim && ndim <= B2ND_MAX_DIM) {
if (nelements < 5) { values[4] = DEFAULT_CLEVEL; }
if (nelements < 6) { values[5] = DEFAULT_SHUFFLE; }
if (nelements < 7) { values[6] = DEFAULT_COMPCODE; }
Expand All @@ -172,7 +172,7 @@ herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {
/* The user may be expecting more efficient storage than we can currently provide,
* so convey some information when tracing. */
BLOSC_TRACE_WARNING("Chunk rank %d exceeds B2ND build limit %d, "
"using plain Blosc2 instead", ndim, BLOSC2_MAX_DIM);
"using plain Blosc2 instead", ndim, B2ND_MAX_DIM);
}

r = H5Pmodify_filter(dcpl, FILTER_BLOSC2, flags, nelements, values);
Expand Down Expand Up @@ -297,7 +297,7 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,

/* Filter params that are only set for B2ND */
int ndim = -1;
int32_t chunkshape[BLOSC2_MAX_DIM];
int32_t chunkshape[B2ND_MAX_DIM];
size_t chunksize = typesize;
if (cd_nelmts >= 8) {
/* Get chunk shape for B2ND */
Expand All @@ -308,10 +308,10 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
ndim);
goto failed;
}
if (ndim > BLOSC2_MAX_DIM) {
if (ndim > B2ND_MAX_DIM) {
PUSH_ERR("blosc2_filter", H5E_CALLBACK,
"Chunk rank %d (filter value) exceeds B2ND build limit %d",
ndim, BLOSC2_MAX_DIM);
ndim, B2ND_MAX_DIM);
goto failed;
}
if (cd_nelmts < (size_t)(8 + ndim)) {
Expand Down Expand Up @@ -391,12 +391,12 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
}
blocksize = sugg_blocksize;
}
int32_t blockdims[BLOSC2_MAX_DIM];
int32_t blockdims[B2ND_MAX_DIM];
cparams.blocksize = compute_b2nd_block_shape(blocksize, typesize,
ndim, chunkshape,
blockdims);

int64_t chunkshape_l[BLOSC2_MAX_DIM];
int64_t chunkshape_l[B2ND_MAX_DIM];
for (int i = 0; i < ndim; i++) {
chunkshape_l[i] = chunkshape[i];
}
Expand Down Expand Up @@ -504,7 +504,7 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
"B2ND array rank (%hhd) != filter rank (%d)", array->ndim, ndim);
goto b2nd_decomp_out;
}
int64_t start[BLOSC2_MAX_DIM], stop[BLOSC2_MAX_DIM], size = typesize;
int64_t start[B2ND_MAX_DIM], stop[B2ND_MAX_DIM], size = typesize;
for (int i = 0; i < array->ndim; i++) {
start[i] = 0;
stop[i] = array->shape[i];
Expand Down
18 changes: 15 additions & 3 deletions lib/SPERR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cmake_minimum_required(VERSION 3.14)

project(SPERR VERSION 0.8.4 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
project(SPERR VERSION 0.8.5 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")

if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD "20" CACHE STRING "Choose the C++ Standard to use." FORCE)
Expand Down Expand Up @@ -32,8 +32,20 @@ option( BUILD_SHARED_LIBS "Build shared SPERR library" ON )
option( BUILD_UNIT_TESTS "Build unit tests using GoogleTest" ON )
option( BUILD_CLI_UTILITIES "Build a set of command line utilities" ON )
option( USE_OMP "Use OpenMP parallelization on 3D volumes" OFF )
option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" ON )
option( SPERR_PREFER_RPATH "Set RPATH; this can fight with package managers so turn off when building for them" ON )

#
# AVX2 auto-detection: default ON for x86, OFF otherwise.
#
if(NOT DEFINED ENABLE_AVX2)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i[3-9]86")
option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" ON )
else()
option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" OFF )
endif()
else()
option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" ${ENABLE_AVX2} )
endif()
mark_as_advanced(FORCE SPERR_PREFER_RPATH)

if(USE_OMP)
Expand Down Expand Up @@ -109,7 +121,7 @@ if( BUILD_CLI_UTILITIES )
set( CLI11_SINGLE_FILE OFF CACHE INTERNAL "Don't use single file CLI11")
FetchContent_Declare( cli11
GIT_REPOSITORY https://github.com/CLIUtils/CLI11
GIT_TAG 6c7b07a878ad834957b98d0f9ce1dbe0cb204fc9 # v2.4.2
GIT_TAG 37bb6edc5317e99af72ef48405e65d9ca5218861 # v2.6.2
)
FetchContent_MakeAvailable(cli11)

Expand Down
7 changes: 2 additions & 5 deletions lib/SPERR/include/Bitmask.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,8 @@ class Bitmask {

// Functions to perform bulk tests.
//
// Two versions of the `has_true()` function. Both versions return -1 in case of no true found.
// 1) Position == false: it returns 1 indicating finding a true.
// 2) Position == true: it returns the offset relative to `start` of the first true.
template <bool Position>
auto has_true(size_t start, size_t len) const -> int64_t;
auto has_true(size_t start, size_t len) const -> bool;
auto find_true(size_t start, size_t len) const -> int64_t;
auto count_true() const -> size_t; // How many 1's in this mask?

// Functions for write
Expand Down
1 change: 1 addition & 0 deletions lib/SPERR/include/SPECK2D_INT.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class SPECK2D_INT : public SPECK_INT<T> {
virtual void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool need_decide) = 0;
virtual void m_process_P(size_t idx, size_t& counter, bool need_decide) = 0;
virtual void m_process_I(bool need_decide) = 0;
virtual void m_additional_initialization() {};

auto m_partition_S(Set2D) const -> std::array<Set2D, 4>;
auto m_partition_I() -> std::array<Set2D, 3>;
Expand Down
8 changes: 8 additions & 0 deletions lib/SPERR/include/SPECK2D_INT_ENC.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,17 @@ class SPECK2D_INT_ENC final : public SPECK2D_INT<T> {
void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool need_decide) final;
void m_process_P(size_t idx, size_t& counter, bool need_decide) final;
void m_process_I(bool need_decide) final;
void m_additional_initialization() final;
void m_bitplane_init() final;
void m_refinement_extra() final;

auto m_decide_S_significance(const Set2D&) const -> bool;
auto m_decide_I_significance() const -> bool;

// `m_msb_buf` stores the MSB bit position of each coefficient, in the same order as
// m_coeff_buf. Significance tests compare entries against `m_msb_threshold`.
std::vector<int8_t> m_msb_buf;
int8_t m_msb_threshold = -1;
};

}; // namespace sperr
Expand Down
21 changes: 1 addition & 20 deletions lib/SPERR/include/SPECK3D_INT.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,20 @@

#include "SPECK_INT.h"

#include <cstring> // std::memcpy()
#include <tuple>

namespace sperr {

class Set3D {
private:
// The first 6 bytes of the morton offset in uint64_t. Because each set dimension is
// stored using 16-bit integers, these 48 bits are big enough too!
std::array<uint8_t, 6> m_morton = {0, 0, 0, 0, 0, 0};

public:
//
// Publicly accessible public data members.
//
uint64_t morton_idx = 0;
uint16_t start_x = 0;
uint16_t start_y = 0;
uint16_t start_z = 0;
uint16_t length_x = 0;
uint16_t length_y = 0;
uint16_t length_z = 0;

public:
//
// Member functions (intended to be inline)
//
auto get_morton() const -> uint64_t
{
auto tmp = uint64_t{0};
std::memcpy(&tmp, m_morton.data(), sizeof(m_morton));
return tmp;
}
void set_morton(uint64_t val) { std::memcpy(m_morton.data(), &val, sizeof(m_morton)); }
void make_empty() { length_x = 0; }
auto num_elem() const -> size_t { return (size_t{length_x} * length_y * length_z); }
};
Expand Down
10 changes: 9 additions & 1 deletion lib/SPERR/include/SPECK3D_INT_ENC.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,17 @@ class SPECK3D_INT_ENC final : public SPECK3D_INT<T> {
void m_process_P(size_t idx, size_t morton, size_t& counter, bool output) final;
void m_process_P_lite(size_t idx) final;
void m_additional_initialization() final;
void m_bitplane_init() final;
void m_refinement_extra() final;

// Data structures and functions for morton data layout.
vecui_type m_morton_buf;
// `m_morton_buf` stores the MSB bit position of each coefficient (via m_msb_position()),
// rather than the full coefficient value. This shrinks the buffer from sizeof(T) to 1 byte
// per element, reducing cache pressure in the significance-testing hot path (m_process_S).
std::vector<int8_t> m_morton_buf;
// `m_morton_threshold` is the MSB position of `m_threshold`, updated each bitplane via
// m_bitplane_init(). Significance tests compare m_morton_buf entries against this value.
int8_t m_morton_threshold = -1;
void m_deposit_set(Set3D);
};

Expand Down
6 changes: 4 additions & 2 deletions lib/SPERR/include/SPECK_INT.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,17 @@ class SPECK_INT {
virtual void m_clean_LIS() = 0;
virtual void m_sorting_pass() = 0;
virtual void m_initialize_lists() = 0;
virtual void m_bitplane_init() {}
virtual void m_refinement_extra() {}
void m_refinement_pass_encode();
void m_refinement_pass_decode();

// Data members
uint8_t m_num_bitplanes = 0;
uint_type m_threshold = 0;
uint64_t m_total_bits = 0; // The number of bits of a complete SPECK stream.
uint64_t m_avail_bits = 0; // Decoding only. `m_avail_bits` <= `m_total_bits`
size_t m_budget = std::numeric_limits<size_t>::max();
uint_type m_threshold = 0;
uint8_t m_num_bitplanes = 0;

dims_type m_dims = {0, 0, 0};
vecui_type m_coeff_buf;
Expand Down
7 changes: 4 additions & 3 deletions lib/SPERR/include/sperr_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ auto calc_stats(const T* arr1, const T* arr2, size_t arr_len, size_t omp_nthread
template <typename T>
auto kahan_summation(const T*, size_t) -> T;

// Returns the bit position of the most significant bit (0-based), or -1 for zero.
template <typename T>
auto msb_position(T v) -> int8_t;

// Given a whole volume size and a desired chunk size, this helper function
// returns a list of chunks specified by 6 integers:
// chunk[0], [2], [4]: starting index of this chunk in X, Y, and Z;
Expand All @@ -184,9 +188,6 @@ auto chunk_volume(dims_type vol_dim, dims_type chunk_dim) -> std::vector<std::ar
template <typename T>
auto calc_mean_var(const T*, size_t len, size_t omp_nthreads = 0) -> std::array<T, 2>;

template <typename T>
auto any_ge(const T* buf, size_t len, T threshold) -> bool;

}; // namespace sperr

#endif
Loading
Loading