Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions dwio/nimble/encodings/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "dwio/nimble/encodings/ConstantEncoding.h"
#include "dwio/nimble/encodings/DictionaryEncoding.h"
#include "dwio/nimble/encodings/EncodingFactory.h"
#include "dwio/nimble/encodings/FixedBitWidthEncoding.h"
#include "dwio/nimble/encodings/MainlyConstantEncoding.h"
#include "dwio/nimble/encodings/NullableEncoding.h"
Expand Down Expand Up @@ -166,9 +167,18 @@ void callReadWithVisitor(
}
}

/// Encoding trait for non-legacy encodings. Dispatches to the standard
/// callReadWithVisitor which casts to non-legacy concrete encoding types.
/// Encoding trait for non-legacy encodings. Decodes using the standard
/// EncodingFactory and dispatches callReadWithVisitor to non-legacy concrete
/// encoding types.
struct DefaultEncodingTrait {
static std::unique_ptr<Encoding> decode(
velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory) {
return EncodingFactory::decode(
pool, data, std::move(stringBufferFactory));
}

template <typename V>
static void callReadWithVisitor(
Encoding& encoding,
Expand Down
14 changes: 12 additions & 2 deletions dwio/nimble/encodings/legacy/EncodingTrait.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include "dwio/nimble/encodings/Encoding.h"
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"

namespace facebook::nimble::legacy {

Expand All @@ -29,9 +30,18 @@ void callReadWithVisitor(
DecoderVisitor& visitor,
ReadWithVisitorParams& params);

/// Encoding trait for legacy encodings. Dispatches to
/// legacy::callReadWithVisitor which casts to legacy concrete encoding types.
/// Encoding trait for legacy encodings. Decodes using the legacy
/// EncodingFactory and dispatches callReadWithVisitor to legacy concrete
/// encoding types.
struct LegacyEncodingTrait {
static std::unique_ptr<Encoding> decode(
velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory) {
return EncodingFactory::decode(
pool, data, std::move(stringBufferFactory));
}

template <typename V>
static void callReadWithVisitor(
Encoding& encoding,
Expand Down
1 change: 0 additions & 1 deletion dwio/nimble/encodings/legacy/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,4 @@ void callReadWithVisitor(
});
}
}

} // namespace facebook::nimble::legacy
30 changes: 5 additions & 25 deletions dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@

#include "dwio/nimble/common/Buffer.h"
#include "dwio/nimble/common/tests/NimbleFileWriter.h"
#include "dwio/nimble/encodings/EncodingFactory.h"
#include "dwio/nimble/encodings/EncodingUtils.h"
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"
#include "dwio/nimble/encodings/legacy/EncodingUtils.h"
#include "dwio/nimble/encodings/tests/EncodingLayoutTestHelper.h"
#include "dwio/nimble/velox/selective/ByteColumnReader.h"
Expand Down Expand Up @@ -125,31 +123,12 @@ class ReadWithVisitorTest : public ::testing::TestWithParam<bool>,
FileContext& ctx,
const RowTypePtr& rowType,
common::ScanSpec& scanSpec) {
using Factory = std::function<std::unique_ptr<Encoding>(
memory::MemoryPool&, std::string_view, std::function<void*(uint32_t)>)>;
Factory factory = useNonLegacy()
? Factory(
[](memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> sbf)
-> std::unique_ptr<Encoding> {
return EncodingFactory::decode(pool, data, std::move(sbf));
})
: Factory(
[](memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> sbf)
-> std::unique_ptr<Encoding> {
return legacy::EncodingFactory::decode(
pool, data, std::move(sbf));
});
NimbleParams params(
*pool(),
ctx.stats,
ctx.readerBase->nimbleSchema(),
*ctx.streams,
ctx.rowSizeTracker.get(),
std::move(factory),
/*getStringBuffersFromDecoder=*/useNonLegacy());

auto reader = buildColumnReader(
Expand All @@ -168,9 +147,9 @@ class ReadWithVisitorTest : public ::testing::TestWithParam<bool>,
std::string_view encoded,
velox::memory::MemoryPool& memPool) {
if (useNonLegacy()) {
return EncodingFactory::decode(memPool, encoded, nullptr);
return DefaultEncodingTrait::decode(memPool, encoded, nullptr);
}
return legacy::EncodingFactory::decode(memPool, encoded, nullptr);
return legacy::LegacyEncodingTrait::decode(memPool, encoded, nullptr);
}

// Dispatch callReadWithVisitor to the appropriate family.
Expand All @@ -180,9 +159,10 @@ class ReadWithVisitorTest : public ::testing::TestWithParam<bool>,
V& visitor,
ReadWithVisitorParams& params) {
if (useNonLegacy()) {
nimble::callReadWithVisitor(encoding, visitor, params);
DefaultEncodingTrait::callReadWithVisitor(encoding, visitor, params);
} else {
legacy::callReadWithVisitor(encoding, visitor, params);
legacy::LegacyEncodingTrait::callReadWithVisitor(
encoding, visitor, params);
}
}

Expand Down
21 changes: 12 additions & 9 deletions dwio/nimble/velox/selective/ChunkedDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include "dwio/nimble/common/ChunkHeader.h"
#include "dwio/nimble/common/Types.h"
#include "dwio/nimble/encodings/EncodingFactory.h"
#include "velox/common/testutil/TestValue.h"

#include <cstddef>
Expand Down Expand Up @@ -49,14 +48,18 @@ void ChunkedDecoder::loadNextChunk() {
inputData_ += length;
inputSize_ -= length;
currentStringBuffers_.clear();
encoding_ = encodingFactory_(
*pool_,
std::string_view(chunkData, chunkSize),
[&](uint32_t totalLength) {
auto& buffer = currentStringBuffers_.emplace_back(
velox::AlignedBuffer::allocate<char>(totalLength, pool_));
return buffer->asMutable<void>();
});
auto stringBufferFactory = [&](uint32_t totalLength) -> void* {
auto& buffer = currentStringBuffers_.emplace_back(
velox::AlignedBuffer::allocate<char>(totalLength, pool_));
return buffer->asMutable<void>();
};
auto data = std::string_view(chunkData, chunkSize);
if (getStringBuffersFromDecoder_) {
encoding_ = DefaultEncodingTrait::decode(*pool_, data, stringBufferFactory);
} else {
encoding_ =
legacy::LegacyEncodingTrait::decode(*pool_, data, stringBufferFactory);
}
remainingValues_ = encoding_->rowCount();
NIMBLE_CHECK_GT(remainingValues_, 0);
VLOG(1) << encoding_->debugString();
Expand Down
16 changes: 0 additions & 16 deletions dwio/nimble/velox/selective/ChunkedDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,10 @@ class ChunkedDecoder {
bool decodeValuesWithNulls,
std::shared_ptr<index::StreamIndex> streamIndex,
velox::memory::MemoryPool* pool,
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)> encodingFactory =
[](velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory)
-> std::unique_ptr<Encoding> {
return EncodingFactory::decode(pool, data, stringBufferFactory);
},
bool getStringBuffersFromDecoder = false)
: input_{std::move(input)},
pool_{pool},
decodeValuesWithNulls_{decodeValuesWithNulls},
encodingFactory_{std::move(encodingFactory)},
getStringBuffersFromDecoder_{getStringBuffersFromDecoder},
streamIndex_{std::move(streamIndex)},
streamRowCount_{
Expand Down Expand Up @@ -437,11 +426,6 @@ class ChunkedDecoder {
// encode nulls alongside values). When false, decode values without nulls
// (standard case for scalar types).
const bool decodeValuesWithNulls_;
const std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)>
encodingFactory_;
const bool getStringBuffersFromDecoder_{false};
// Optional stream index for accelerating skip operations
const std::shared_ptr<index::StreamIndex> streamIndex_;
Expand Down
12 changes: 1 addition & 11 deletions dwio/nimble/velox/selective/NimbleData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,11 @@ NimbleData::NimbleData(
StripeStreams& streams,
memory::MemoryPool& memoryPool,
ChunkedDecoder* inMapDecoder,
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)> encodingFactory,
bool getStringBuffersFromDecoder)
: nimbleType_(nimbleType),
streams_(&streams),
pool_(&memoryPool),
inMapDecoder_(inMapDecoder),
encodingFactory_{encodingFactory} {
inMapDecoder_(inMapDecoder) {
switch (nimbleType->kind()) {
case Kind::Scalar:
// Nulls in scalar types will be decoded along with values.
Expand Down Expand Up @@ -166,7 +161,6 @@ ChunkedDecoder NimbleData::makeScalarDecoder() {
/*decodeValuesWithNulls=*/false,
streams_->streamIndex(streamId),
pool_,
encodingFactory_,
getStringBuffersFromDecoder_);
}

Expand All @@ -179,7 +173,6 @@ ChunkedDecoder NimbleData::makeMicrosDecoder() {
/*decodeValuesWithNulls=*/false,
streams_->streamIndex(streamId),
pool_,
encodingFactory_,
getStringBuffersFromDecoder_);
}

Expand All @@ -192,7 +185,6 @@ ChunkedDecoder NimbleData::makeNanosDecoder() {
/*decodeValuesWithNulls=*/false,
streams_->streamIndex(streamId),
pool_,
encodingFactory_,
getStringBuffersFromDecoder_);
}

Expand Down Expand Up @@ -222,7 +214,6 @@ std::unique_ptr<ChunkedDecoder> NimbleData::makeDecoder(
decodeValuesWithNulls,
streams_->streamIndex(descriptor.offset()),
pool_,
encodingFactory_,
getStringBuffersFromDecoder_);
}

Expand All @@ -234,7 +225,6 @@ std::unique_ptr<velox::dwio::common::FormatData> NimbleParams::toFormatData(
*streams_,
pool(),
inMapDecoder_,
encodingFactory_,
getStringBuffersFromDecoder_);
}

Expand Down
21 changes: 0 additions & 21 deletions dwio/nimble/velox/selective/NimbleData.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#pragma once

#include "dwio/nimble/encodings/EncodingFactory.h"
#include "dwio/nimble/velox/selective/ReaderBase.h"
#include "dwio/nimble/velox/selective/RowSizeTracker.h"
#include "velox/dwio/common/FormatData.h"
Expand All @@ -32,10 +31,6 @@ class NimbleData : public velox::dwio::common::FormatData {
StripeStreams& streams,
velox::memory::MemoryPool& memoryPool,
ChunkedDecoder* inMapDecoder,
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)> encodingFactory,
bool getStringBuffersFromDecoder);

/// Read internal node nulls. For leaf nodes, we only copy `incomingNulls' if
Expand Down Expand Up @@ -110,11 +105,6 @@ class NimbleData : public velox::dwio::common::FormatData {
ChunkedDecoder* const inMapDecoder_;
std::unique_ptr<ChunkedDecoder> nullsDecoder_;
velox::BufferPtr inMap_;
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)>
encodingFactory_;
};

class NimbleParams : public velox::dwio::common::FormatParams {
Expand All @@ -125,18 +115,13 @@ class NimbleParams : public velox::dwio::common::FormatParams {
const std::shared_ptr<const Type>& nimbleType,
StripeStreams& streams,
RowSizeTracker* rowSizeTracker,
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)>)> encodingFactory,
bool getStringBuffersFromDecoder = false,
bool preserveFlatMapsInMemory = false)
: FormatParams(pool, stats),
nimbleType_(nimbleType),
streams_(&streams),
rowSizeTracker_(rowSizeTracker),
preserveFlatMapsInMemory_(preserveFlatMapsInMemory),
encodingFactory_(std::move(encodingFactory)),
getStringBuffersFromDecoder_{getStringBuffersFromDecoder} {}

std::unique_ptr<velox::dwio::common::FormatData> toFormatData(
Expand All @@ -150,7 +135,6 @@ class NimbleParams : public velox::dwio::common::FormatParams {
type,
*streams_,
rowSizeTracker_,
encodingFactory_,
getStringBuffersFromDecoder_,
preserveFlatMapsInMemory_);
}
Expand Down Expand Up @@ -181,11 +165,6 @@ class NimbleParams : public velox::dwio::common::FormatParams {
RowSizeTracker* const rowSizeTracker_{nullptr};
const bool preserveFlatMapsInMemory_{false};
ChunkedDecoder* inMapDecoder_{nullptr};
std::function<std::unique_ptr<Encoding>(
velox::memory::MemoryPool&,
std::string_view,
std::function<void*(uint32_t)> stringBufferFactory)>
encodingFactory_;
bool getStringBuffersFromDecoder_{false};
};

Expand Down
15 changes: 0 additions & 15 deletions dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

#include <utility>

#include "dwio/nimble/encodings/EncodingFactory.h"
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"
#include "dwio/nimble/index/ClusterIndexReader.h"
#include "dwio/nimble/velox/SchemaUtils.h"
#include "dwio/nimble/velox/selective/ColumnReader.h"
Expand Down Expand Up @@ -571,19 +569,6 @@ void SelectiveNimbleIndexReader::loadStripeWithIndex(uint32_t stripeIndex) {
readerBase_->nimbleSchema(),
streams_,
options_.trackRowSize() ? rowSizeTracker_.get() : nullptr,
options_.passStringBuffersFromDecoder()
? [](velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory)
-> std::unique_ptr<Encoding> {
return EncodingFactory::decode(pool, data, std::move(stringBufferFactory));
}
: [](velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory)
-> std::unique_ptr<Encoding> {
return legacy::EncodingFactory::decode(pool, data, std::move(stringBufferFactory));
},
options_.passStringBuffersFromDecoder(),
options_.preserveFlatMapsInMemory());

Expand Down
15 changes: 0 additions & 15 deletions dwio/nimble/velox/selective/SelectiveNimbleReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
*/

#include "dwio/nimble/velox/selective/SelectiveNimbleReader.h"
#include "dwio/nimble/encodings/EncodingFactory.h"
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"
#include "dwio/nimble/index/ClusterIndexReader.h"
#include "dwio/nimble/index/IndexConstants.h"
#include "dwio/nimble/index/IndexFilter.h"
Expand Down Expand Up @@ -363,19 +361,6 @@ void SelectiveNimbleRowReader::loadCurrentStripe() {
readerBase_->nimbleSchema(),
streams_,
options_.trackRowSize() ? rowSizeTracker_.get() : nullptr,
options_.passStringBuffersFromDecoder()
? [](velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory)
-> std::unique_ptr<Encoding> {
return EncodingFactory::decode(pool, data, stringBufferFactory);
}
: [](velox::memory::MemoryPool& pool,
std::string_view data,
std::function<void*(uint32_t)> stringBufferFactory)
-> std::unique_ptr<Encoding> {
return legacy::EncodingFactory::decode(pool, data, stringBufferFactory);
},
options_.passStringBuffersFromDecoder(),
options_.preserveFlatMapsInMemory());

Expand Down
Loading