diff --git a/dwio/nimble/encodings/EncodingUtils.h b/dwio/nimble/encodings/EncodingUtils.h index 86388302..630f26fe 100644 --- a/dwio/nimble/encodings/EncodingUtils.h +++ b/dwio/nimble/encodings/EncodingUtils.h @@ -17,6 +17,7 @@ #include "dwio/nimble/encodings/ConstantEncoding.h" #include "dwio/nimble/encodings/DictionaryEncoding.h" +#include "dwio/nimble/encodings/EncodingFactory.h" #include "dwio/nimble/encodings/FixedBitWidthEncoding.h" #include "dwio/nimble/encodings/MainlyConstantEncoding.h" #include "dwio/nimble/encodings/NullableEncoding.h" @@ -166,9 +167,18 @@ void callReadWithVisitor( } } -/// Encoding trait for non-legacy encodings. Dispatches to the standard -/// callReadWithVisitor which casts to non-legacy concrete encoding types. +/// Encoding trait for non-legacy encodings. Decodes using the standard +/// EncodingFactory and dispatches callReadWithVisitor to non-legacy concrete +/// encoding types. struct DefaultEncodingTrait { + static std::unique_ptr decode( + velox::memory::MemoryPool& pool, + std::string_view data, + std::function stringBufferFactory) { + return EncodingFactory::decode( + pool, data, std::move(stringBufferFactory)); + } + template static void callReadWithVisitor( Encoding& encoding, diff --git a/dwio/nimble/encodings/legacy/EncodingTrait.h b/dwio/nimble/encodings/legacy/EncodingTrait.h index 933f1c08..fbe0a38c 100644 --- a/dwio/nimble/encodings/legacy/EncodingTrait.h +++ b/dwio/nimble/encodings/legacy/EncodingTrait.h @@ -16,6 +16,7 @@ #pragma once #include "dwio/nimble/encodings/Encoding.h" +#include "dwio/nimble/encodings/legacy/EncodingFactory.h" namespace facebook::nimble::legacy { @@ -29,9 +30,18 @@ void callReadWithVisitor( DecoderVisitor& visitor, ReadWithVisitorParams& params); -/// Encoding trait for legacy encodings. Dispatches to -/// legacy::callReadWithVisitor which casts to legacy concrete encoding types. +/// Encoding trait for legacy encodings. Decodes using the legacy +/// EncodingFactory and dispatches callReadWithVisitor to legacy concrete +/// encoding types. struct LegacyEncodingTrait { + static std::unique_ptr decode( + velox::memory::MemoryPool& pool, + std::string_view data, + std::function stringBufferFactory) { + return EncodingFactory::decode( + pool, data, std::move(stringBufferFactory)); + } + template static void callReadWithVisitor( Encoding& encoding, diff --git a/dwio/nimble/encodings/legacy/EncodingUtils.h b/dwio/nimble/encodings/legacy/EncodingUtils.h index ed11eec5..a91b0603 100644 --- a/dwio/nimble/encodings/legacy/EncodingUtils.h +++ b/dwio/nimble/encodings/legacy/EncodingUtils.h @@ -141,5 +141,4 @@ void callReadWithVisitor( }); } } - } // namespace facebook::nimble::legacy diff --git a/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp b/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp index 4306c5b1..d5ec50b8 100644 --- a/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp +++ b/dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp @@ -29,9 +29,7 @@ #include "dwio/nimble/common/Buffer.h" #include "dwio/nimble/common/tests/NimbleFileWriter.h" -#include "dwio/nimble/encodings/EncodingFactory.h" #include "dwio/nimble/encodings/EncodingUtils.h" -#include "dwio/nimble/encodings/legacy/EncodingFactory.h" #include "dwio/nimble/encodings/legacy/EncodingUtils.h" #include "dwio/nimble/encodings/tests/EncodingLayoutTestHelper.h" #include "dwio/nimble/velox/selective/ByteColumnReader.h" @@ -125,31 +123,12 @@ class ReadWithVisitorTest : public ::testing::TestWithParam, FileContext& ctx, const RowTypePtr& rowType, common::ScanSpec& scanSpec) { - using Factory = std::function( - memory::MemoryPool&, std::string_view, std::function)>; - Factory factory = useNonLegacy() - ? Factory( - [](memory::MemoryPool& pool, - std::string_view data, - std::function sbf) - -> std::unique_ptr { - return EncodingFactory::decode(pool, data, std::move(sbf)); - }) - : Factory( - [](memory::MemoryPool& pool, - std::string_view data, - std::function sbf) - -> std::unique_ptr { - return legacy::EncodingFactory::decode( - pool, data, std::move(sbf)); - }); NimbleParams params( *pool(), ctx.stats, ctx.readerBase->nimbleSchema(), *ctx.streams, ctx.rowSizeTracker.get(), - std::move(factory), /*getStringBuffersFromDecoder=*/useNonLegacy()); auto reader = buildColumnReader( @@ -168,9 +147,9 @@ class ReadWithVisitorTest : public ::testing::TestWithParam, std::string_view encoded, velox::memory::MemoryPool& memPool) { if (useNonLegacy()) { - return EncodingFactory::decode(memPool, encoded, nullptr); + return DefaultEncodingTrait::decode(memPool, encoded, nullptr); } - return legacy::EncodingFactory::decode(memPool, encoded, nullptr); + return legacy::LegacyEncodingTrait::decode(memPool, encoded, nullptr); } // Dispatch callReadWithVisitor to the appropriate family. @@ -180,9 +159,10 @@ class ReadWithVisitorTest : public ::testing::TestWithParam, V& visitor, ReadWithVisitorParams& params) { if (useNonLegacy()) { - nimble::callReadWithVisitor(encoding, visitor, params); + DefaultEncodingTrait::callReadWithVisitor(encoding, visitor, params); } else { - legacy::callReadWithVisitor(encoding, visitor, params); + legacy::LegacyEncodingTrait::callReadWithVisitor( + encoding, visitor, params); } } diff --git a/dwio/nimble/velox/selective/ChunkedDecoder.cpp b/dwio/nimble/velox/selective/ChunkedDecoder.cpp index ba63cfc2..c310bba6 100644 --- a/dwio/nimble/velox/selective/ChunkedDecoder.cpp +++ b/dwio/nimble/velox/selective/ChunkedDecoder.cpp @@ -18,7 +18,6 @@ #include "dwio/nimble/common/ChunkHeader.h" #include "dwio/nimble/common/Types.h" -#include "dwio/nimble/encodings/EncodingFactory.h" #include "velox/common/testutil/TestValue.h" #include @@ -49,14 +48,18 @@ void ChunkedDecoder::loadNextChunk() { inputData_ += length; inputSize_ -= length; currentStringBuffers_.clear(); - encoding_ = encodingFactory_( - *pool_, - std::string_view(chunkData, chunkSize), - [&](uint32_t totalLength) { - auto& buffer = currentStringBuffers_.emplace_back( - velox::AlignedBuffer::allocate(totalLength, pool_)); - return buffer->asMutable(); - }); + auto stringBufferFactory = [&](uint32_t totalLength) -> void* { + auto& buffer = currentStringBuffers_.emplace_back( + velox::AlignedBuffer::allocate(totalLength, pool_)); + return buffer->asMutable(); + }; + auto data = std::string_view(chunkData, chunkSize); + if (getStringBuffersFromDecoder_) { + encoding_ = DefaultEncodingTrait::decode(*pool_, data, stringBufferFactory); + } else { + encoding_ = + legacy::LegacyEncodingTrait::decode(*pool_, data, stringBufferFactory); + } remainingValues_ = encoding_->rowCount(); NIMBLE_CHECK_GT(remainingValues_, 0); VLOG(1) << encoding_->debugString(); diff --git a/dwio/nimble/velox/selective/ChunkedDecoder.h b/dwio/nimble/velox/selective/ChunkedDecoder.h index 78bea999..0968d81e 100644 --- a/dwio/nimble/velox/selective/ChunkedDecoder.h +++ b/dwio/nimble/velox/selective/ChunkedDecoder.h @@ -33,21 +33,10 @@ class ChunkedDecoder { bool decodeValuesWithNulls, std::shared_ptr streamIndex, velox::memory::MemoryPool* pool, - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> encodingFactory = - [](velox::memory::MemoryPool& pool, - std::string_view data, - std::function stringBufferFactory) - -> std::unique_ptr { - return EncodingFactory::decode(pool, data, stringBufferFactory); - }, bool getStringBuffersFromDecoder = false) : input_{std::move(input)}, pool_{pool}, decodeValuesWithNulls_{decodeValuesWithNulls}, - encodingFactory_{std::move(encodingFactory)}, getStringBuffersFromDecoder_{getStringBuffersFromDecoder}, streamIndex_{std::move(streamIndex)}, streamRowCount_{ @@ -437,11 +426,6 @@ class ChunkedDecoder { // encode nulls alongside values). When false, decode values without nulls // (standard case for scalar types). const bool decodeValuesWithNulls_; - const std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> - encodingFactory_; const bool getStringBuffersFromDecoder_{false}; // Optional stream index for accelerating skip operations const std::shared_ptr streamIndex_; diff --git a/dwio/nimble/velox/selective/NimbleData.cpp b/dwio/nimble/velox/selective/NimbleData.cpp index 78f3096a..fbf4b210 100644 --- a/dwio/nimble/velox/selective/NimbleData.cpp +++ b/dwio/nimble/velox/selective/NimbleData.cpp @@ -28,16 +28,11 @@ NimbleData::NimbleData( StripeStreams& streams, memory::MemoryPool& memoryPool, ChunkedDecoder* inMapDecoder, - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> encodingFactory, bool getStringBuffersFromDecoder) : nimbleType_(nimbleType), streams_(&streams), pool_(&memoryPool), - inMapDecoder_(inMapDecoder), - encodingFactory_{encodingFactory} { + inMapDecoder_(inMapDecoder) { switch (nimbleType->kind()) { case Kind::Scalar: // Nulls in scalar types will be decoded along with values. @@ -166,7 +161,6 @@ ChunkedDecoder NimbleData::makeScalarDecoder() { /*decodeValuesWithNulls=*/false, streams_->streamIndex(streamId), pool_, - encodingFactory_, getStringBuffersFromDecoder_); } @@ -179,7 +173,6 @@ ChunkedDecoder NimbleData::makeMicrosDecoder() { /*decodeValuesWithNulls=*/false, streams_->streamIndex(streamId), pool_, - encodingFactory_, getStringBuffersFromDecoder_); } @@ -192,7 +185,6 @@ ChunkedDecoder NimbleData::makeNanosDecoder() { /*decodeValuesWithNulls=*/false, streams_->streamIndex(streamId), pool_, - encodingFactory_, getStringBuffersFromDecoder_); } @@ -222,7 +214,6 @@ std::unique_ptr NimbleData::makeDecoder( decodeValuesWithNulls, streams_->streamIndex(descriptor.offset()), pool_, - encodingFactory_, getStringBuffersFromDecoder_); } @@ -234,7 +225,6 @@ std::unique_ptr NimbleParams::toFormatData( *streams_, pool(), inMapDecoder_, - encodingFactory_, getStringBuffersFromDecoder_); } diff --git a/dwio/nimble/velox/selective/NimbleData.h b/dwio/nimble/velox/selective/NimbleData.h index 8c9a4cc5..6bc8cfc7 100644 --- a/dwio/nimble/velox/selective/NimbleData.h +++ b/dwio/nimble/velox/selective/NimbleData.h @@ -16,7 +16,6 @@ #pragma once -#include "dwio/nimble/encodings/EncodingFactory.h" #include "dwio/nimble/velox/selective/ReaderBase.h" #include "dwio/nimble/velox/selective/RowSizeTracker.h" #include "velox/dwio/common/FormatData.h" @@ -32,10 +31,6 @@ class NimbleData : public velox::dwio::common::FormatData { StripeStreams& streams, velox::memory::MemoryPool& memoryPool, ChunkedDecoder* inMapDecoder, - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> encodingFactory, bool getStringBuffersFromDecoder); /// Read internal node nulls. For leaf nodes, we only copy `incomingNulls' if @@ -110,11 +105,6 @@ class NimbleData : public velox::dwio::common::FormatData { ChunkedDecoder* const inMapDecoder_; std::unique_ptr nullsDecoder_; velox::BufferPtr inMap_; - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> - encodingFactory_; }; class NimbleParams : public velox::dwio::common::FormatParams { @@ -125,10 +115,6 @@ class NimbleParams : public velox::dwio::common::FormatParams { const std::shared_ptr& nimbleType, StripeStreams& streams, RowSizeTracker* rowSizeTracker, - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function)> encodingFactory, bool getStringBuffersFromDecoder = false, bool preserveFlatMapsInMemory = false) : FormatParams(pool, stats), @@ -136,7 +122,6 @@ class NimbleParams : public velox::dwio::common::FormatParams { streams_(&streams), rowSizeTracker_(rowSizeTracker), preserveFlatMapsInMemory_(preserveFlatMapsInMemory), - encodingFactory_(std::move(encodingFactory)), getStringBuffersFromDecoder_{getStringBuffersFromDecoder} {} std::unique_ptr toFormatData( @@ -150,7 +135,6 @@ class NimbleParams : public velox::dwio::common::FormatParams { type, *streams_, rowSizeTracker_, - encodingFactory_, getStringBuffersFromDecoder_, preserveFlatMapsInMemory_); } @@ -181,11 +165,6 @@ class NimbleParams : public velox::dwio::common::FormatParams { RowSizeTracker* const rowSizeTracker_{nullptr}; const bool preserveFlatMapsInMemory_{false}; ChunkedDecoder* inMapDecoder_{nullptr}; - std::function( - velox::memory::MemoryPool&, - std::string_view, - std::function stringBufferFactory)> - encodingFactory_; bool getStringBuffersFromDecoder_{false}; }; diff --git a/dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp b/dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp index ea15f3f7..73542436 100644 --- a/dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp +++ b/dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp @@ -18,8 +18,6 @@ #include -#include "dwio/nimble/encodings/EncodingFactory.h" -#include "dwio/nimble/encodings/legacy/EncodingFactory.h" #include "dwio/nimble/index/ClusterIndexReader.h" #include "dwio/nimble/velox/SchemaUtils.h" #include "dwio/nimble/velox/selective/ColumnReader.h" @@ -571,19 +569,6 @@ void SelectiveNimbleIndexReader::loadStripeWithIndex(uint32_t stripeIndex) { readerBase_->nimbleSchema(), streams_, options_.trackRowSize() ? rowSizeTracker_.get() : nullptr, - options_.passStringBuffersFromDecoder() - ? [](velox::memory::MemoryPool& pool, - std::string_view data, - std::function stringBufferFactory) - -> std::unique_ptr { - return EncodingFactory::decode(pool, data, std::move(stringBufferFactory)); - } - : [](velox::memory::MemoryPool& pool, - std::string_view data, - std::function stringBufferFactory) - -> std::unique_ptr { - return legacy::EncodingFactory::decode(pool, data, std::move(stringBufferFactory)); - }, options_.passStringBuffersFromDecoder(), options_.preserveFlatMapsInMemory()); diff --git a/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp b/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp index 533f0f4f..dc840842 100644 --- a/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp +++ b/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp @@ -15,8 +15,6 @@ */ #include "dwio/nimble/velox/selective/SelectiveNimbleReader.h" -#include "dwio/nimble/encodings/EncodingFactory.h" -#include "dwio/nimble/encodings/legacy/EncodingFactory.h" #include "dwio/nimble/index/ClusterIndexReader.h" #include "dwio/nimble/index/IndexConstants.h" #include "dwio/nimble/index/IndexFilter.h" @@ -363,19 +361,6 @@ void SelectiveNimbleRowReader::loadCurrentStripe() { readerBase_->nimbleSchema(), streams_, options_.trackRowSize() ? rowSizeTracker_.get() : nullptr, - options_.passStringBuffersFromDecoder() - ? [](velox::memory::MemoryPool& pool, - std::string_view data, - std::function stringBufferFactory) - -> std::unique_ptr { - return EncodingFactory::decode(pool, data, stringBufferFactory); - } - : [](velox::memory::MemoryPool& pool, - std::string_view data, - std::function stringBufferFactory) - -> std::unique_ptr { - return legacy::EncodingFactory::decode(pool, data, stringBufferFactory); - }, options_.passStringBuffersFromDecoder(), options_.preserveFlatMapsInMemory());