diff --git a/dwio/nimble/encodings/FixedBitWidthEncoding.h b/dwio/nimble/encodings/FixedBitWidthEncoding.h index aeb09acc..764a926c 100644 --- a/dwio/nimble/encodings/FixedBitWidthEncoding.h +++ b/dwio/nimble/encodings/FixedBitWidthEncoding.h @@ -163,18 +163,22 @@ void FixedBitWidthEncoding::readWithVisitor( isEightByteIntegralType()); constexpr bool kIsFluidCast = sizeof(OutputType) >= sizeof(physicalType) && std::is_integral_v && std::is_integral_v; - // Fast path: use bulk scan for 4-byte and 8-byte integral types with no - // filter and no hook. Supports both signed and unsigned types as long as - // the output type is integral and at least as wide as the physical type. + // Fast path: use bulk scan for 4-byte and 8-byte integral types. + // Compile-time: type constraints (FBW physical type, ExtractToReader, + // compatible integral output type at least as wide as the physical type). + // Runtime (useFastPath): deterministic filter, AVX2, bulk path enabled, + // null+filter/hook compatibility. if constexpr ( - kIsSuitableWidth && !V::kHasFilter && !V::kHasHook && + kIsSuitableWidth && std::is_same_v< typename V::Extract, velox::dwio::common::ExtractToReader> && kIsFluidCast) { auto* nulls = visitor.reader().rawNullsInReadRange(); - detail::readWithVisitorFast(*this, visitor, params, nulls); - return; + if (velox::dwio::common::useFastPath(visitor, nulls)) { + detail::readWithVisitorFast(*this, visitor, params, nulls); + return; + } } // Slow path: process one value at a time. detail::readWithVisitorSlow( @@ -255,22 +259,51 @@ void FixedBitWidthEncoding::bulkScan( } else { // Sparse case: read individual values at specified positions. for (vector_size_t i = 0; i < numSelected; ++i) { - values[i] = fixedBitArray_.get(selectedRows[i] + offset) + baseline_; + values[i] = static_cast( + fixedBitArray_.get(selectedRows[i] + offset) + baseline_); } } - // Scatter: when nulls are present (kScatter=true), values are packed - // contiguously but need to be placed at their correct output positions. - // Process in reverse to avoid overwriting unread values. - if constexpr (kScatter) { - for (int32_t i = numSelected - 1; i >= 0; --i) { - values[scatterRows[i]] = values[i]; - } + row_ += selectedRows[numSelected - 1] - currentRow + 1; + + // No scatter, filter, or hook: values are already in the output buffer. + if constexpr (!kScatter && !V::kHasFilter && !V::kHasHook) { + visitor.addNumValues(numRows); + visitor.setRowIndex(visitor.numRows()); + return; } - row_ += selectedRows[numSelected - 1] - currentRow + 1; + // processFixedWidthRun handles scatter (null gaps), filter evaluation, + // and hook forwarding. For non-hook paths, values points to the reader's + // output buffer (rawValues). For hooks, values stays as the local decode + // buffer since hook.addValue() consumes values without writing to the reader. + if constexpr (!V::kHasHook) { + values = reinterpret_cast(visitor.reader().rawValues()); + } + + auto numValues = visitor.reader().numValues(); + int32_t* filterHits = nullptr; + if constexpr (V::kHasFilter) { + filterHits = visitor.outputRows(numSelected) - numValues; + } - visitor.addNumValues(numRows); + velox::dwio::common:: + processFixedWidthRun( + velox::RowSet(selectedRows, numSelected), + 0, + numSelected, + scatterRows, + values, + filterHits, + numValues, + visitor.filter(), + visitor.hook()); + + if constexpr (!V::kHasHook) { + // Filter: count passing rows; no filter: all rows produce values. + visitor.addNumValues( + V::kHasFilter ? numValues - visitor.reader().numValues() : numRows); + } visitor.setRowIndex(visitor.numRows()); } diff --git a/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp b/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp index 59a4a81d..8a4ed393 100644 --- a/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp +++ b/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp @@ -1688,19 +1688,37 @@ TEST_P( } // --------------------------------------------------------------------------- -// 13a. FixedBitWidthEncoding + AlwaysTrue + dense + FAST PATH -// Exercises the unsigned integer fast path. The relaxed gate accepts -// integral output types at least as wide as the physical type, including -// unsigned→signed same-size casts (uint32_t→int32_t). +// 13b. FixedBitWidthEncoding + AlwaysTrue + dense + FAST PATH + NULLS +// Tests the scatter path in bulkScan when encoding-level nulls are present +// (as set by NullableEncoding). Non-null values must land at their correct +// scattered positions in the output buffer. // --------------------------------------------------------------------------- TEST_P( ReadWithVisitorTest, - encodingLevel_FixedBitWidth_AlwaysTrue_Dense_UInt32_FastPath) { + encodingLevel_FixedBitWidth_AlwaysTrue_Dense_Int32_FastPath_WithNulls) { constexpr int kRows = 500; - std::vector data(kRows); + // Build null bitmap: every 5th row is null. + auto nulls = velox::allocateNulls(kRows, pool(), velox::bits::kNotNull); + auto* rawNulls = nulls->asMutable(); + int expectedNonNull = 0; for (int i = 0; i < kRows; ++i) { - data[i] = static_cast(i % 16); + if (i % 5 == 0) { + velox::bits::setNull(rawNulls, i); + } else { + ++expectedNonNull; + } + } + + // Small-range int32 data for all rows (non-null values use i % 16). + std::vector nonNullData(expectedNonNull); + { + int j = 0; + for (int i = 0; i < kRows; ++i) { + if (i % 5 != 0) { + nonNullData[j++] = i % 16; + } + } } auto input = makeRowVector({makeFlatVector( @@ -1725,9 +1743,13 @@ TEST_P( reader->doPrepareRead(0, rows, nullptr); + // Set encoding-level nulls (simulates NullableEncoding wrapping FBW). + reader->nullsInReadRange() = nulls; + + // Create FBW encoding with only the non-null values. Buffer buffer(*pool()); - auto encoding = createFromCustomLayout( - FixedBitWidthEnc{}, data, *pool(), buffer); + auto encoding = createFromCustomLayout( + FixedBitWidthEnc{}, nonNullData, *pool(), buffer); common::AlwaysTrue filter; dwio::common::ExtractToReader extractValues(reader); @@ -1742,24 +1764,237 @@ TEST_P( dispatchCallReadWithVisitor(*encoding, visitor, params); + // With nulls, numValues should be kRows (non-null values scattered to their + // correct positions, null positions left untouched). EXPECT_EQ(reader->numValues(), kRows); auto values = getValues(reader); for (int i = 0; i < kRows; ++i) { - EXPECT_EQ(values[i], i % 16) << "row " << i; + if (i % 5 != 0) { + EXPECT_EQ(values[i], i % 16) << "non-null row " << i; + } } } // --------------------------------------------------------------------------- -// 13b. FixedBitWidthEncoding + AlwaysTrue + dense + FAST PATH + NULLS -// Tests the scatter path in bulkScan when encoding-level nulls are present -// (as set by NullableEncoding). Non-null values must land at their correct -// scattered positions in the output buffer. +// 13c. FixedBitWidthEncoding + AlwaysTrue + sparse + FAST PATH +// Exercises the sparse branch of bulkScan (individual value reads at +// specified positions) followed by processFixedWidthRun. A subset of rows +// is selected to make the read non-dense. // --------------------------------------------------------------------------- TEST_P( ReadWithVisitorTest, - encodingLevel_FixedBitWidth_AlwaysTrue_Dense_Int32_FastPath_WithNulls) { + encodingLevel_FixedBitWidth_AlwaysTrue_Sparse_Int32_FastPath) { + constexpr int kTotalRows = 500; + + // Small-range int32 data. + std::vector data(kTotalRows); + for (int i = 0; i < kTotalRows; ++i) { + data[i] = i % 16; + } + + auto input = makeRowVector({makeFlatVector( + kTotalRows, [](auto i) { return static_cast(i % 16); })}); + auto rowType = asRowType(input->type()); + auto ctx = makeFileContext(input); + auto scanSpec = std::make_shared("root"); + scanSpec->addAllChildFields(*rowType); + scanSpec->childByName("c0")->setFilter( + std::make_unique()); + auto root = buildReader(*ctx, rowType, *scanSpec); + + auto* structReader = + dynamic_cast(root.get()); + auto* reader = static_cast( + dynamic_cast(structReader->children()[0])); + ASSERT_NE(reader, nullptr); + + // Select every other row to make the read sparse. + std::vector rowVec; + for (int i = 0; i < kTotalRows; i += 2) { + rowVec.push_back(i); + } + RowSet rows(rowVec.data(), rowVec.size()); + + reader->doPrepareRead(0, rows, nullptr); + + Buffer buffer(*pool()); + auto encoding = createFromCustomLayout( + FixedBitWidthEnc{}, data, *pool(), buffer); + + common::AlwaysTrue filter; + dwio::common::ExtractToReader extractValues(reader); + constexpr bool kIsDense = false; + DecoderVisitor< + int32_t, + common::AlwaysTrue, + dwio::common::ExtractToReader, + kIsDense> + visitor(filter, reader, rows, extractValues); + auto params = makeReadWithVisitorParams(visitor, rows, pool()); + + dispatchCallReadWithVisitor(*encoding, visitor, params); + + auto numSelected = static_cast(rowVec.size()); + EXPECT_EQ(reader->numValues(), numSelected); + auto values = getValues(reader); + for (int i = 0; i < numSelected; ++i) { + EXPECT_EQ(values[i], rowVec[i] % 16) << "selected row index " << i; + } +} + +// --------------------------------------------------------------------------- +// 13d. FixedBitWidthEncoding + BigintRange + dense + FAST PATH +// Exercises the filter path in bulkScan: only values passing BigintRange +// [4, 12] should appear in the output. +// --------------------------------------------------------------------------- +TEST_P( + ReadWithVisitorTest, + encodingLevel_FixedBitWidth_BigintRange_Dense_Int32_FastPath) { + constexpr int kRows = 500; + + std::vector data(kRows); + for (int i = 0; i < kRows; ++i) { + data[i] = i % 16; + } + + auto input = makeRowVector({makeFlatVector( + kRows, [](auto i) { return static_cast(i % 16); })}); + auto rowType = asRowType(input->type()); + auto ctx = makeFileContext(input); + auto scanSpec = std::make_shared("root"); + scanSpec->addAllChildFields(*rowType); + scanSpec->childByName("c0")->setFilter( + std::make_unique(4, 12, false)); + auto root = buildReader(*ctx, rowType, *scanSpec); + + auto* structReader = + dynamic_cast(root.get()); + auto* reader = static_cast( + dynamic_cast(structReader->children()[0])); + ASSERT_NE(reader, nullptr); + + std::vector rowVec(kRows); + std::iota(rowVec.begin(), rowVec.end(), 0); + RowSet rows(rowVec.data(), rowVec.size()); + + reader->doPrepareRead(0, rows, nullptr); + + Buffer buffer(*pool()); + auto encoding = createFromCustomLayout( + FixedBitWidthEnc{}, data, *pool(), buffer); + + common::BigintRange filter(4, 12, false); + dwio::common::ExtractToReader extractValues(reader); + constexpr bool kIsDense = true; + DecoderVisitor< + int32_t, + common::BigintRange, + dwio::common::ExtractToReader, + kIsDense> + visitor(filter, reader, rows, extractValues); + auto params = makeReadWithVisitorParams(visitor, rows, pool()); + + dispatchCallReadWithVisitor(*encoding, visitor, params); + + int expected = 0; + for (int i = 0; i < kRows; ++i) { + if (data[i] >= 4 && data[i] <= 12) { + ++expected; + } + } + EXPECT_EQ(reader->numValues(), expected); + + auto values = getValues(reader); + for (int i = 0; i < reader->numValues(); ++i) { + EXPECT_GE(values[i], 4); + EXPECT_LE(values[i], 12); + } +} + +// --------------------------------------------------------------------------- +// 13e. FixedBitWidthEncoding + BigintRange + sparse + FAST PATH +// Exercises the filter path in bulkScan with sparse (non-dense) row +// selection: every other row is selected, then filtered by [4, 12]. +// --------------------------------------------------------------------------- +TEST_P( + ReadWithVisitorTest, + encodingLevel_FixedBitWidth_BigintRange_Sparse_Int32_FastPath) { + constexpr int kTotalRows = 500; + + std::vector data(kTotalRows); + for (int i = 0; i < kTotalRows; ++i) { + data[i] = i % 16; + } + + auto input = makeRowVector({makeFlatVector( + kTotalRows, [](auto i) { return static_cast(i % 16); })}); + auto rowType = asRowType(input->type()); + auto ctx = makeFileContext(input); + auto scanSpec = std::make_shared("root"); + scanSpec->addAllChildFields(*rowType); + scanSpec->childByName("c0")->setFilter( + std::make_unique(4, 12, false)); + auto root = buildReader(*ctx, rowType, *scanSpec); + + auto* structReader = + dynamic_cast(root.get()); + auto* reader = static_cast( + dynamic_cast(structReader->children()[0])); + ASSERT_NE(reader, nullptr); + + std::vector rowVec; + for (int i = 0; i < kTotalRows; i += 2) { + rowVec.push_back(i); + } + RowSet rows(rowVec.data(), rowVec.size()); + + reader->doPrepareRead(0, rows, nullptr); + + Buffer buffer(*pool()); + auto encoding = createFromCustomLayout( + FixedBitWidthEnc{}, data, *pool(), buffer); + + common::BigintRange filter(4, 12, false); + dwio::common::ExtractToReader extractValues(reader); + constexpr bool kIsDense = false; + DecoderVisitor< + int32_t, + common::BigintRange, + dwio::common::ExtractToReader, + kIsDense> + visitor(filter, reader, rows, extractValues); + auto params = makeReadWithVisitorParams(visitor, rows, pool()); + + dispatchCallReadWithVisitor(*encoding, visitor, params); + + int expected = 0; + for (int i = 0; i < kTotalRows; i += 2) { + if (data[i] >= 4 && data[i] <= 12) { + ++expected; + } + } + EXPECT_EQ(reader->numValues(), expected); + + auto values = getValues(reader); + for (int i = 0; i < reader->numValues(); ++i) { + EXPECT_GE(values[i], 4); + EXPECT_LE(values[i], 12); + } +} + +// --------------------------------------------------------------------------- +// 13f. FixedBitWidthEncoding + BigintRange + dense + FAST PATH + NULLS +// Exercises the filter + scatter combination in bulkScan: encoding-level +// nulls trigger scatter (kScatter=true), and the BigintRange filter +// selects only values in [4, 12]. Both processFixedWidthRun concerns +// are exercised simultaneously. +// --------------------------------------------------------------------------- +TEST_P( + ReadWithVisitorTest, + encodingLevel_FixedBitWidth_BigintRange_Dense_Int32_FastPath_WithNulls) { constexpr int kRows = 500; + // Build null bitmap: every 5th row is null. auto nulls = velox::allocateNulls(kRows, pool(), velox::bits::kNotNull); auto* rawNulls = nulls->asMutable(); int expectedNonNull = 0; @@ -1771,6 +2006,7 @@ TEST_P( } } + // Non-null values only (encoding stores only non-null values). std::vector nonNullData(expectedNonNull); { int j = 0; @@ -1788,7 +2024,7 @@ TEST_P( auto scanSpec = std::make_shared("root"); scanSpec->addAllChildFields(*rowType); scanSpec->childByName("c0")->setFilter( - std::make_unique()); + std::make_unique(4, 12, false)); auto root = buildReader(*ctx, rowType, *scanSpec); auto* structReader = @@ -1802,18 +2038,21 @@ TEST_P( RowSet rows(rowVec.data(), rowVec.size()); reader->doPrepareRead(0, rows, nullptr); + + // Set encoding-level nulls (simulates NullableEncoding wrapping FBW). reader->nullsInReadRange() = nulls; + // Create FBW encoding with only the non-null values. Buffer buffer(*pool()); auto encoding = createFromCustomLayout( FixedBitWidthEnc{}, nonNullData, *pool(), buffer); - common::AlwaysTrue filter; + common::BigintRange filter(4, 12, false); dwio::common::ExtractToReader extractValues(reader); constexpr bool kIsDense = true; DecoderVisitor< int32_t, - common::AlwaysTrue, + common::BigintRange, dwio::common::ExtractToReader, kIsDense> visitor(filter, reader, rows, extractValues); @@ -1821,13 +2060,20 @@ TEST_P( dispatchCallReadWithVisitor(*encoding, visitor, params); - EXPECT_EQ(reader->numValues(), kRows); - auto values = getValues(reader); + // Count expected: non-null rows whose value is in [4, 12]. + int expected = 0; for (int i = 0; i < kRows; ++i) { - if (i % 5 != 0) { - EXPECT_EQ(values[i], i % 16) << "non-null row " << i; + if (i % 5 != 0 && (i % 16) >= 4 && (i % 16) <= 12) { + ++expected; } } + EXPECT_EQ(reader->numValues(), expected); + + auto values = getValues(reader); + for (int i = 0; i < reader->numValues(); ++i) { + EXPECT_GE(values[i], 4); + EXPECT_LE(values[i], 12); + } } // ---------------------------------------------------------------------------