diff --git a/velox/connectors/hive/CMakeLists.txt b/velox/connectors/hive/CMakeLists.txt index f2b941d6e71..11b663a02e6 100644 --- a/velox/connectors/hive/CMakeLists.txt +++ b/velox/connectors/hive/CMakeLists.txt @@ -39,8 +39,8 @@ velox_add_library( HiveDataSink.cpp HiveDataSource.cpp HiveIndexSource.cpp - HivePartitionName.cpp HiveSplitReader.cpp + HivePartitionUtil.cpp PartitionIdGenerator.cpp TableHandle.cpp HEADERS @@ -67,7 +67,6 @@ velox_add_library( HiveDataSource.h HiveIndexSource.h HivePartitionFunction.h - HivePartitionName.h HiveSplitReader.h IndexReader.h PartitionIdGenerator.h @@ -83,6 +82,7 @@ velox_link_libraries( velox_exec velox_hive_partition_function velox_key_encoder + PUBLIC velox_hive_iceberg_splitreader ) velox_add_library(velox_hive_partition_function HivePartitionFunction.cpp) diff --git a/velox/connectors/hive/FileDataSource.h b/velox/connectors/hive/FileDataSource.h index b03c19deff6..02913935e55 100644 --- a/velox/connectors/hive/FileDataSource.h +++ b/velox/connectors/hive/FileDataSource.h @@ -169,6 +169,14 @@ class FileDataSource : public DataSource { // post-read using the extraction chains. folly::F14FastMap extractionColumns_; + core::ExpressionEvaluator* expressionEvaluator() const { + return expressionEvaluator_; + } + + std::atomic_uint64_t& totalRemainingFilterTime() { + return totalRemainingFilterTime_; + } + dwio::common::RuntimeStatistics runtimeStats_; private: @@ -209,7 +217,7 @@ class FileDataSource : public DataSource { /// transform column values. Indexed by output column position. std::vector> columnPostProcessors_; std::shared_ptr metadataFilter_; - std::unique_ptr remainingFilterExprSet_; + std::shared_ptr remainingFilterExprSet_; RowVectorPtr emptyOutput_; std::atomic_uint64_t totalRemainingFilterTime_{0}; std::atomic_uint64_t totalRemainingFilterCpuTime_{0}; diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index b05ffa1281d..a4f471524f1 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -160,4 +160,9 @@ uint64_t HiveConfig::maxTargetFileSizeBytes( config::CapacityUnit::BYTE); } +bool HiveConfig::fanoutEnabled(const config::ConfigBase* session) const { + return session->get( + kFanoutEnabledSession, config_->get(kFanoutEnabled, true)); +} + } // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 6835bcd3d16..4907eff5027 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -198,6 +198,12 @@ class HiveConfig : public FileConfig { static constexpr const char* kWriteFileCreateConfig = "write-file-create-config"; + /// Controls the writer mode, whether the fanout mode writer is enabled, + /// default value is true, setting to false means clustered mode. + /// Currently applies only to the Iceberg writer. + static constexpr const char* kFanoutEnabled = "fanout-enabled"; + static constexpr const char* kFanoutEnabledSession = "fanout_enabled"; + InsertExistingPartitionsBehavior insertExistingPartitionsBehavior( const config::ConfigBase* session) const; @@ -229,6 +235,9 @@ class HiveConfig : public FileConfig { explicit HiveConfig(std::shared_ptr config) : FileConfig(std::move(config)) {} + + /// Return if fanout writer mode is enabled. + bool fanoutEnabled(const config::ConfigBase* session) const; }; } // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HiveConnector.cpp b/velox/connectors/hive/HiveConnector.cpp index 062a507fc64..d43241fbba9 100644 --- a/velox/connectors/hive/HiveConnector.cpp +++ b/velox/connectors/hive/HiveConnector.cpp @@ -63,7 +63,7 @@ const config::ConfigProvider* HiveConnector::configProvider() const { std::unique_ptr HiveConnector::createDataSource( const RowTypePtr& outputType, const ConnectorTableHandlePtr& tableHandle, - const ColumnHandleMap& columnHandles, + const std::unordered_map& columnHandles, ConnectorQueryCtx* connectorQueryCtx) { return std::make_unique( outputType, diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index 0381ee2ecb5..6b52a1c9746 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -29,6 +29,10 @@ #include "velox/expression/ExprToSubfieldFilter.h" #include "velox/expression/FieldReference.h" +#include +#include +#include + namespace facebook::velox::connector::hive { namespace { @@ -703,7 +707,6 @@ std::unique_ptr createBufferedInput( } namespace { - core::CallTypedExprPtr replaceInputs( const core::CallTypedExpr* call, std::vector&& inputs) { @@ -876,6 +879,10 @@ core::TypedExprPtr extractFiltersFromRemainingFilter( } } // namespace +std::string makeUuid() { + return boost::lexical_cast(boost::uuids::random_generator()()); +} + core::TypedExprPtr extractFiltersFromRemainingFilter( const core::TypedExprPtr& expr, core::ExpressionEvaluator* evaluator, diff --git a/velox/connectors/hive/HiveConnectorUtil.h b/velox/connectors/hive/HiveConnectorUtil.h index 191e03186ab..b912e7ab7e7 100644 --- a/velox/connectors/hive/HiveConnectorUtil.h +++ b/velox/connectors/hive/HiveConnectorUtil.h @@ -235,4 +235,6 @@ std::unique_ptr createRangeFilter( const variant& lower, const variant& upper); +std::string makeUuid(); + } // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HiveDataSink.cpp b/velox/connectors/hive/HiveDataSink.cpp index f627df624ed..0137e753d1e 100644 --- a/velox/connectors/hive/HiveDataSink.cpp +++ b/velox/connectors/hive/HiveDataSink.cpp @@ -20,6 +20,7 @@ #include "velox/common/base/Fs.h" #include "velox/common/base/StatsReporter.h" #include "velox/connectors/hive/HiveConfig.h" +#include "velox/connectors/hive/HiveConnectorUtil.h" #include "velox/connectors/hive/HivePartitionFunction.h" #include "velox/connectors/hive/TableHandle.h" #include "velox/dwio/common/Options.h" @@ -37,6 +38,16 @@ namespace { memory::NonReclaimableSectionGuard nonReclaimableGuard( \ writerInfo_[(index)]->nonReclaimableSectionHolder.get()) +std::shared_ptr createSinkPool( + const std::shared_ptr& writerPool) { + return writerPool->addLeafChild(fmt::format("{}.sink", writerPool->name())); +} + +std::shared_ptr createSortPool( + const std::shared_ptr& writerPool) { + return writerPool->addLeafChild(fmt::format("{}.sort", writerPool->name())); +} + // Appends a sequence number to a filename for file rotation. // Returns the original filename if sequenceNumber is 0 (no rotation yet). // Example: "file.orc" with seq 0 remains "file.orc" @@ -81,8 +92,6 @@ std::unique_ptr createHiveFileSink( }); } -// Creates a PartitionIdGenerator if the table is partitioned, otherwise returns -// nullptr. std::unique_ptr createPartitionIdGenerator( const RowTypePtr& inputType, const std::shared_ptr& insertTableHandle, @@ -92,12 +101,44 @@ std::unique_ptr createPartitionIdGenerator( if (partitionChannels.empty()) { return nullptr; } + return std::make_unique( inputType, partitionChannels, hiveConfig->maxPartitionsPerWriters( connectorQueryCtx->sessionProperties()), - connectorQueryCtx->memoryPool()); + connectorQueryCtx->memoryPool(), + hiveConfig->isPartitionPathAsLowerCase( + connectorQueryCtx->sessionProperties())); +} + +std::vector getPartitionChannels( + const std::shared_ptr& insertTableHandle) { + std::vector channels; + + for (column_index_t i = 0; i < insertTableHandle->inputColumns().size(); + i++) { + if (insertTableHandle->inputColumns()[i]->isPartitionKey()) { + channels.push_back(i); + } + } + + return channels; +} + +// Returns the column indices of non-partition data columns. +std::vector getNonPartitionChannels( + const std::shared_ptr& insertTableHandle) { + std::vector dataChannels; + + for (column_index_t i = 0; i < insertTableHandle->inputColumns().size(); + i++) { + if (!insertTableHandle->inputColumns()[i]->isPartitionKey()) { + dataChannels.push_back(i); + } + } + + return dataChannels; } std::string makePartitionDirectory( @@ -109,10 +150,6 @@ std::string makePartitionDirectory( return tableDirectory; } -std::string makeUuid() { - return boost::lexical_cast(boost::uuids::random_generator()()); -} - std::unordered_map tableTypeNames() { return { {LocationHandle::TableType::kNew, "kNew"}, @@ -345,54 +382,6 @@ std::string HiveBucketProperty::toString() const { return out.str(); } -HiveInsertTableHandle::HiveInsertTableHandle( - std::vector> inputColumns, - std::shared_ptr locationHandle, - dwio::common::FileFormat storageFormat, - std::shared_ptr bucketProperty, - std::optional compressionKind, - const std::unordered_map& serdeParameters, - const std::shared_ptr& writerOptions, - // When this option is set the HiveDataSink will always write a file even - // if there's no data. This is useful when the table is bucketed, but the - // engine handles ensuring a 1 to 1 mapping from task to bucket. - const bool ensureFiles, - std::shared_ptr fileNameGenerator, - const std::unordered_map& storageParameters) - : inputColumns_(std::move(inputColumns)), - locationHandle_(std::move(locationHandle)), - storageFormat_(storageFormat), - bucketProperty_(std::move(bucketProperty)), - compressionKind_(compressionKind), - serdeParameters_(serdeParameters), - writerOptions_(writerOptions), - ensureFiles_(ensureFiles), - fileNameGenerator_(std::move(fileNameGenerator)), - storageParameters_(storageParameters), - partitionChannels_(computePartitionChannels(inputColumns_)), - nonPartitionChannels_(computeNonPartitionChannels(inputColumns_)) { - if (compressionKind.has_value()) { - VELOX_CHECK( - compressionKind.value() != common::CompressionKind_MAX, - "Unsupported compression type: CompressionKind_MAX"); - } - - if (ensureFiles_) { - // If ensureFiles is set and either the bucketProperty is set or some - // partition keys are in the data, there is not a 1:1 mapping from Task to - // files so we can't proactively create writers. - VELOX_CHECK( - bucketProperty_ == nullptr || bucketProperty_->bucketCount() == 0, - "ensureFiles is not supported with bucketing"); - - for (const auto& inputColumn : inputColumns_) { - VELOX_CHECK( - !inputColumn->isPartitionKey(), - "ensureFiles is not supported with partition keys in the data"); - } - } -} - HiveDataSink::HiveDataSink( RowTypePtr inputType, std::shared_ptr insertTableHandle, @@ -411,13 +400,18 @@ HiveDataSink::HiveDataSink( *insertTableHandle->bucketProperty(), inputType) : nullptr, - insertTableHandle->partitionChannels(), - insertTableHandle->nonPartitionChannels(), - createPartitionIdGenerator( - inputType, - insertTableHandle, - hiveConfig, - connectorQueryCtx)) {} + getPartitionChannels(insertTableHandle), + getNonPartitionChannels(insertTableHandle), + !getPartitionChannels(insertTableHandle).empty() + ? std::make_unique( + inputType, + getPartitionChannels(insertTableHandle), + hiveConfig->maxPartitionsPerWriters( + connectorQueryCtx->sessionProperties()), + connectorQueryCtx->memoryPool(), + hiveConfig->isPartitionPathAsLowerCase( + connectorQueryCtx->sessionProperties())) + : nullptr) {} HiveDataSink::HiveDataSink( RowTypePtr inputType, @@ -675,12 +669,7 @@ std::unique_ptr HiveDataSink::createWriterForIndex( } std::string HiveDataSink::getPartitionName(uint32_t partitionId) const { - VELOX_CHECK_NOT_NULL(partitionIdGenerator_); - - return HivePartitionName::partitionName( - partitionId, - partitionIdGenerator_->partitionValues(), - partitionKeyAsLowerCase_); + return partitionIdGenerator_->partitionName(partitionId); } std::unique_ptr @@ -745,6 +734,141 @@ std::pair HiveDataSink::getWriterFileNames( bucketId, insertTableHandle_, *connectorQueryCtx_, isCommitRequired()); } +uint32_t HiveDataSink::ensureWriter(const WriterId& id) { + auto it = writerIndexMap_.find(id); + if (it != writerIndexMap_.end()) { + return it->second; + } + return appendWriter(id); +} + +uint32_t HiveDataSink::appendWriter(const WriterId& id) { + VELOX_USER_CHECK_LE( + writers_.size(), maxOpenWriters_, "Exceeded open writer limit"); + VELOX_CHECK_EQ(writers_.size(), writerInfo_.size()); + VELOX_CHECK_EQ(writerIndexMap_.size(), writerInfo_.size()); + + std::optional partitionName; + if (isPartitioned()) { + partitionName = getPartitionName(id.partitionId.value()); + } + + auto writerParameters = getWriterParameters(partitionName, id.bucketId); + auto writerPool = createWriterPool(id); + auto sinkPool = createSinkPool(writerPool); + std::shared_ptr sortPool{nullptr}; + if (sortWrite()) { + sortPool = createSortPool(writerPool); + } + writerInfo_.emplace_back( + std::make_shared( + std::move(writerParameters), + std::move(writerPool), + std::move(sinkPool), + std::move(sortPool))); + ioStats_.emplace_back(std::make_unique()); + + setMemoryReclaimers(writerInfo_.back().get(), ioStats_.back().get()); + writers_.emplace_back(createWriterForIndex(writerInfo_.size() - 1)); + addThreadLocalRuntimeStat( + fmt::format( + "{}WriterCount", + dwio::common::toString(insertTableHandle_->storageFormat())), + RuntimeCounter(1)); + partitionSizes_.emplace_back(0); + partitionRows_.emplace_back(nullptr); + rawPartitionRows_.emplace_back(nullptr); + + writerIndexMap_.emplace(id, writers_.size() - 1); + return writerIndexMap_[id]; +} + +void HiveDataSink::write(size_t index, RowVectorPtr input) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(index); + auto dataInput = makeDataInput(dataChannels_, input); + + if (writers_[index] == nullptr) { + writers_[index] = createWriterForIndex(index); + } + + writers_[index]->write(dataInput); + writerInfo_[index]->inputSizeInBytes += dataInput->estimateFlatSize(); + writerInfo_[index]->numWrittenRows += dataInput->size(); + writerInfo_[index]->currentFileWrittenRows += dataInput->size(); + + if (maxTargetFileBytes_ == 0 || isBucketed() || sortWrite()) { + return; + } + + const auto currentFileBytes = getCurrentFileBytes(index); + if (currentFileBytes >= maxTargetFileBytes_) { + rotateWriter(index); + } +} + +void HiveDataSink::finalizeWriterFile(size_t index) { + VELOX_CHECK_LT(index, writerInfo_.size()); + VELOX_CHECK_LT(index, ioStats_.size()); + + auto& info = writerInfo_[index]; + + const auto currentFileBytes = getCurrentFileBytes(index); + + if (currentFileBytes > 0) { + FileInfo fileInfo; + fileInfo.writeFileName = info->currentWriteFileName; + fileInfo.targetFileName = info->currentTargetFileName; + fileInfo.fileSize = currentFileBytes; + fileInfo.numRows = info->currentFileWrittenRows; + info->currentFileWrittenRows = 0; + info->writtenFiles.push_back(std::move(fileInfo)); + } + + info->cumulativeWrittenBytes = ioStats_[index]->rawBytesWritten(); +} + +void HiveDataSink::rotateWriter(size_t index) { + VELOX_CHECK_LT(index, writers_.size()); + VELOX_CHECK_LT(index, writerInfo_.size()); + + auto& info = writerInfo_[index]; + + writers_[index]->close(); + + finalizeWriterFile(index); + + writers_[index].reset(); + + ++info->fileSequenceNumber; +} + +void HiveDataSink::closeInternal() { + VELOX_CHECK_NE(state_, State::kRunning); + VELOX_CHECK_NE(state_, State::kFinishing); + + common::testutil::TestValue::adjust( + "facebook::velox::connector::hive::HiveDataSink::closeInternal", this); + + if (state_ == State::kClosed) { + for (int i = 0; i < writers_.size(); ++i) { + if (writers_[i] == nullptr) { + continue; + } + WRITER_NON_RECLAIMABLE_SECTION_GUARD(i); + writers_[i]->close(); + finalizeWriterFile(i); + } + } else { + for (int i = 0; i < writers_.size(); ++i) { + if (writers_[i] == nullptr) { + continue; + } + WRITER_NON_RECLAIMABLE_SECTION_GUARD(i); + writers_[i]->abort(); + } + } +} + std::pair HiveInsertFileNameGenerator::gen( std::optional bucketId, const std::shared_ptr insertTableHandle, diff --git a/velox/connectors/hive/HiveDataSink.h b/velox/connectors/hive/HiveDataSink.h index 8eb9ec04f0f..a5e912ac3c6 100644 --- a/velox/connectors/hive/HiveDataSink.h +++ b/velox/connectors/hive/HiveDataSink.h @@ -19,7 +19,6 @@ #include "velox/connectors/Connector.h" #include "velox/connectors/hive/FileDataSink.h" #include "velox/connectors/hive/HiveConfig.h" -#include "velox/connectors/hive/HivePartitionName.h" #include "velox/connectors/hive/PartitionIdGenerator.h" #include "velox/connectors/hive/TableHandle.h" #include "velox/dwio/common/Options.h" @@ -257,7 +256,38 @@ class HiveInsertTableHandle : public ConnectorInsertTableHandle { std::shared_ptr fileNameGenerator = std::make_shared(), const std::unordered_map& storageParameters = - {}); + {}) + : inputColumns_(std::move(inputColumns)), + locationHandle_(std::move(locationHandle)), + storageFormat_(storageFormat), + bucketProperty_(std::move(bucketProperty)), + compressionKind_(compressionKind), + serdeParameters_(serdeParameters), + writerOptions_(writerOptions), + ensureFiles_(ensureFiles), + fileNameGenerator_(std::move(fileNameGenerator)), + storageParameters_(storageParameters) { + if (compressionKind.has_value()) { + VELOX_CHECK( + compressionKind.value() != common::CompressionKind_MAX, + "Unsupported compression type: CompressionKind_MAX"); + } + + if (ensureFiles_) { + // If ensureFiles is set and either the bucketProperty is set or some + // partition keys are in the data, there is not a 1:1 mapping from Task to + // files so we can't proactively create writers. + VELOX_CHECK( + bucketProperty_ == nullptr || bucketProperty_->bucketCount() == 0, + "ensureFiles is not supported with bucketing"); + + for (const auto& inputColumn : inputColumns_) { + VELOX_CHECK( + !inputColumn->isPartitionKey(), + "ensureFiles is not supported with partition keys in the data"); + } + } + } virtual ~HiveInsertTableHandle() = default; @@ -512,19 +542,24 @@ class HiveDataSink : public FileDataSink { // Compute the partition id and bucket id for each row in 'input'. void computePartitionAndBucketIds(const RowVectorPtr& input) override; + WriterId getWriterId(size_t row) const; + + void splitInputRowsAndEnsureWriters(); + + uint32_t ensureWriter(const WriterId& id) override; + + uint32_t appendWriter(const WriterId& id); std::unique_ptr createWriterForIndex( size_t writerIndex) override; + std::string getPartitionName(uint32_t partitionId) const override; + // Creates and configures WriterOptions based on file format. std::shared_ptr createWriterOptions() const override; - virtual std::shared_ptr createWriterOptions( + std::shared_ptr createWriterOptions( size_t writerIndex) const override; - - // Returns the Hive partition directory name for the given partition ID. - virtual std::string getPartitionName(uint32_t partitionId) const override; - std::unique_ptr maybeCreateBucketSortWriter( size_t writerIndex, @@ -540,6 +575,13 @@ class HiveDataSink : public FileDataSink { WriterParameters::UpdateMode getUpdateMode() const; + void write(size_t index, RowVectorPtr input); + + void rotateWriter(size_t index) override; + + void finalizeWriterFile(size_t index); + + void closeInternal() override; const std::shared_ptr insertTableHandle_; const std::shared_ptr hiveConfig_; const WriterParameters::UpdateMode updateMode_; diff --git a/velox/connectors/hive/HiveDataSource.cpp b/velox/connectors/hive/HiveDataSource.cpp index 1db6ba3f543..7da740bc02c 100644 --- a/velox/connectors/hive/HiveDataSource.cpp +++ b/velox/connectors/hive/HiveDataSource.cpp @@ -137,7 +137,7 @@ std::unique_ptr HiveDataSource::createSplitReader() { auto bucketChannels = prepareSplit(); auto hiveSplit = checkedPointerCast(split_); - return std::make_unique( + return HiveSplitReader::create( hiveSplit, tableHandle_, &partitionKeys_, diff --git a/velox/connectors/hive/HiveDataSource.h b/velox/connectors/hive/HiveDataSource.h index df6dbd4c539..126e325b169 100644 --- a/velox/connectors/hive/HiveDataSource.h +++ b/velox/connectors/hive/HiveDataSource.h @@ -72,10 +72,10 @@ class HiveDataSource : public FileDataSource { void setupRowIdColumn(); - const std::shared_ptr hiveConfig_; - int64_t numBucketConversion_ = 0; + const std::shared_ptr hiveConfig_; + // Tracks the number of splits read per file format. std::unordered_map numSplitsByFileFormat_; diff --git a/velox/connectors/hive/HivePartitionName.cpp b/velox/connectors/hive/HivePartitionName.cpp deleted file mode 100644 index 2d7e7b9b665..00000000000 --- a/velox/connectors/hive/HivePartitionName.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/connectors/hive/HivePartitionName.h" -#include "velox/common/encode/Base64.h" -#include "velox/dwio/catalog/fbhive/FileUtils.h" -#include "velox/type/DecimalUtil.h" - -namespace facebook::velox::connector::hive { - -using namespace facebook::velox::dwio::catalog::fbhive; - -namespace { - -template -std::string formatDecimal(T value, const TypePtr& type) { - const auto& [p, s] = getDecimalPrecisionScale(*type); - const auto& maxSize = DecimalUtil::maxStringViewSize(p, s); - std::string buffer(maxSize, '\0'); - const auto& actualSize = - DecimalUtil::castToString(value, s, maxSize, buffer.data()); - buffer.resize(actualSize); - return buffer; -} - -} // namespace - -std::string HivePartitionName::toName(int32_t value, const TypePtr& type) { - if (type->isDate()) { - return DateType::toIso8601(value); - } - return fmt::to_string(value); -} - -std::string HivePartitionName::toName(int64_t value, const TypePtr& type) { - if (type->isShortDecimal()) { - return formatDecimal(value, type); - } - return fmt::to_string(value); -} - -std::string HivePartitionName::toName(int128_t value, const TypePtr& type) { - if (type->isLongDecimal()) { - return formatDecimal(value, type); - } - return fmt::to_string(value); -} - -std::string HivePartitionName::toName(Timestamp value, const TypePtr& type) { - value.toTimezone(Timestamp::defaultTimezone()); - TimestampToStringOptions options; - options.dateTimeSeparator = ' '; - // Set the precision to milliseconds, and enable the skipTrailingZeros match - // the timestamp precision and truncation behavior of Presto. - options.precision = TimestampPrecision::kMilliseconds; - options.skipTrailingZeros = true; - - auto result = value.toString(options); - - // Presto's java.sql.Timestamp.toString() always keeps at least one decimal - // place even when all fractional seconds are zero. - // If skipTrailingZeros removed all fractional digits, add back ".0" to match - // Presto's behavior. - if (auto dotPos = result.find_last_of('.'); dotPos == std::string::npos) { - // No decimal point found, add ".0" - result += ".0"; - } - - return result; -} - -std::string HivePartitionName::partitionName( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - bool partitionKeyAsLowerCase) { - auto toPartitionName = - [](auto value, const TypePtr& type, int /*columnIndex*/) { - return HivePartitionName::toName(value, type); - }; - return FileUtils::makePartName( - partitionKeyValues( - partitionId, - partitionValues, - /*nullValueString=*/"", - toPartitionName), - partitionKeyAsLowerCase); -} - -} // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HivePartitionName.h b/velox/connectors/hive/HivePartitionName.h deleted file mode 100644 index 3e519528866..00000000000 --- a/velox/connectors/hive/HivePartitionName.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include "velox/vector/ComplexVector.h" -#include "velox/vector/SimpleVector.h" - -namespace facebook::velox::connector::hive { - -/// Converting partition values to their string representations. -/// Provides template methods for formatting different data types according to -/// Hive partitioning conventions. -class HivePartitionName { - public: - /// Generic template for formatting partition values to strings using - /// fmt::to_string. Specialized for types that need special handling - /// (int32_t, int64_t, int128_t, Timestamp). - template - FOLLY_ALWAYS_INLINE static std::string toName(T value, const TypePtr& type) { - return fmt::to_string(value); - } - - /// Format int32_t partition values. Specialized to handle DATE type which - /// requires ISO-8601 formatting (YYYY-MM-DD) instead of raw integer value. - static std::string toName(int32_t value, const TypePtr& type); - - /// Format int64_t partition values. Specialized to handle short DECIMAL type - /// which requires decimal string formatting with proper precision and scale - /// instead of raw integer value. - static std::string toName(int64_t value, const TypePtr& type); - - /// Format int128_t partition values. Specialized to handle long DECIMAL type - /// which requires decimal string formatting with proper precision and scale - /// instead of raw integer value. - static std::string toName(int128_t value, const TypePtr& type); - - /// Format Timestamp partition values. Specialized to: - /// 1. Convert to default timezone - /// 2. Use space as date-time separator (not 'T') - /// 3. Use millisecond precision with trailing zeros skipped - /// 4. Always keep at least ".0" for fractional seconds (Presto compatibility) - static std::string toName(Timestamp value, const TypePtr& type); - - /// Build partition key-value pairs from partition values. - /// Returns a vector of (key, value) pairs for all partition columns. - /// @tparam F A callable that converts a value to a partition string. - /// Takes (value, type, columnIndex) and returns string. - /// @param partitionId The partition ID (row index) to extract values from. - /// @param partitionValues RowVector containing partition values. - /// @param nullValueString The string to use for null values. - /// @param toPartitionName Callable to convert a value to a string. - template - static std::vector> partitionKeyValues( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - const std::string& nullValueString, - const F& toPartitionName); - - /// Generate a Hive partition directory name from partition values for - /// partitionId. - /// - /// @param partitionId The row index in partitionValues to extract values - /// from. - /// @param partitionValues RowVector containing partition values. Each - /// child vector represents a partition column, and the row at - /// partitionId contains the values for this partition. - /// @param partitionKeyAsLowerCase Controls whether partition column names - /// should be converted to lowercase in the output. When true, column - /// names are lowercased (e.g., "year=2025"); when false, original - /// casing is preserved (e.g., "Year=2025"). - /// @return A formatted partition directory name string. Null values are - /// represented as __HIVE_DEFAULT_PARTITION__. - static std::string partitionName( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - bool partitionKeyAsLowerCase); -}; - -namespace detail { - -// Unified template function to extract partition key-value string from a -// vector. Used by both Hive and Iceberg partition name generators. -// -// @tparam Kind The TypeKind of the partition column. -// @tparam F A callable that converts a value to a partition string. -// @param partitionVector The vector containing partition values. -// @param row The row index to extract the value from. -// @param type The type of the partition column. -// @param columnIndex The column index in the partition values. -// @param toPartitionName Callable to convert a value to a partition string. -// @return A pair of (column_name, formatted_value). -template -std::string makePartitionKeyValueString( - const BaseVector& partitionVector, - vector_size_t row, - const TypePtr& type, - int columnIndex, - const F& toPartitionName) { - using T = typename TypeTraits::NativeType; - - return toPartitionName( - partitionVector.as>()->valueAt(row), type, columnIndex); -} - -#define PARTITION_TYPE_DISPATCH(TEMPLATE_FUNC, typeKind, ...) \ - [&]() { \ - switch (typeKind) { \ - case TypeKind::BOOLEAN: \ - case TypeKind::TINYINT: \ - case TypeKind::SMALLINT: \ - case TypeKind::INTEGER: \ - case TypeKind::BIGINT: \ - case TypeKind::VARCHAR: \ - case TypeKind::VARBINARY: \ - case TypeKind::TIMESTAMP: \ - return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( \ - TEMPLATE_FUNC, typeKind, __VA_ARGS__); \ - default: \ - VELOX_UNSUPPORTED( \ - "Unsupported partition type: {}", TypeKindName::toName(typeKind)); \ - } \ - }() - -} // namespace detail - -template -std::vector> -HivePartitionName::partitionKeyValues( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - const std::string& nullValueString, - const F& toPartitionName) { - std::vector> partitionKeyValuePairs; - for (auto i = 0; i < partitionValues->childrenSize(); i++) { - const auto& child = partitionValues->childAt(i); - const auto& name = partitionValues->rowType()->nameOf(i); - if (child->isNullAt(partitionId)) { - partitionKeyValuePairs.emplace_back( - std::make_pair(name, nullValueString)); - continue; - } - - partitionKeyValuePairs.emplace_back( - std::make_pair( - name, - PARTITION_TYPE_DISPATCH( - detail::makePartitionKeyValueString, - child->typeKind(), - *child->loadedVector(), - partitionId, - child->type(), - i, - toPartitionName))); - } - return partitionKeyValuePairs; -} - -} // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HivePartitionUtil.cpp b/velox/connectors/hive/HivePartitionUtil.cpp new file mode 100644 index 00000000000..a2ce19d1b57 --- /dev/null +++ b/velox/connectors/hive/HivePartitionUtil.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/HivePartitionUtil.h" +#include "velox/common/encode/Base64.h" +#include "velox/type/DecimalUtil.h" +#include "velox/vector/SimpleVector.h" + +namespace facebook::velox::connector::hive { + +namespace { + +template +std::string formatDecimal(T value, const TypePtr& type) { + const auto& [p, s] = getDecimalPrecisionScale(*type); + const auto& maxSize = DecimalUtil::maxStringViewSize(p, s); + std::string buffer(maxSize, '\0'); + const auto& actualSize = + DecimalUtil::castToString(value, s, maxSize, buffer.data()); + buffer.resize(actualSize); + return buffer; +} + +template +std::pair makePartitionKeyValueString( + const HivePartitionUtilPtr& formatter, + const BaseVector* partitionVector, + vector_size_t row, + const std::string& name, + const TypePtr& type) { + using T = typename TypeTraits::NativeType; + if (partitionVector->as>()->isNullAt(row)) { + return std::make_pair(name, ""); + } + + return std::make_pair( + name, + formatter->toPartitionString( + partitionVector->as>()->valueAt(row), type)); +} + +} // namespace + +std::string HivePartitionUtil::toPartitionString( + int32_t value, + const TypePtr& type) const { + if (type->isDate()) { + return formatDate(value); + } + return folly::to(value); +} + +std::string HivePartitionUtil::toPartitionString( + int64_t value, + const TypePtr& type) const { + if (type->isShortDecimal()) { + return formatDecimal(value, type); + } + return folly::to(value); +} + +std::string HivePartitionUtil::toPartitionString( + int128_t value, + const TypePtr& type) const { + if (type->isLongDecimal()) { + return formatDecimal(value, type); + } + return folly::to(value); +} + +std::string HivePartitionUtil::toPartitionString( + Timestamp value, + const TypePtr& type) const { + value.toTimezone(Timestamp::defaultTimezone()); + TimestampToStringOptions options; + options.dateTimeSeparator = ' '; + // Set the precision to milliseconds, and enable the skipTrailingZeros match + // the timestamp precision and truncation behavior of Presto. + options.precision = TimestampPrecision::kMilliseconds; + options.skipTrailingZeros = true; + + auto result = value.toString(options); + + // Presto's java.sql.Timestamp.toString() always keeps at least one decimal + // place even when all fractional seconds are zero. + // If skipTrailingZeros removed all fractional digits, add back ".0" to match + // Presto's behavior. + if (auto dotPos = result.find_last_of('.'); dotPos == std::string::npos) { + // No decimal point found, add ".0" + result += ".0"; + } + + return result; +} + +#define PARTITION_TYPE_DISPATCH(TEMPLATE_FUNC, typeKind, ...) \ + [&]() { \ + switch (typeKind) { \ + case TypeKind::BOOLEAN: \ + case TypeKind::TINYINT: \ + case TypeKind::SMALLINT: \ + case TypeKind::INTEGER: \ + case TypeKind::BIGINT: \ + case TypeKind::VARCHAR: \ + case TypeKind::VARBINARY: \ + case TypeKind::TIMESTAMP: \ + return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( \ + TEMPLATE_FUNC, typeKind, __VA_ARGS__); \ + default: \ + VELOX_UNSUPPORTED( \ + "Unsupported partition type: {}", TypeKindName::toName(typeKind)); \ + } \ + }() + +std::vector> +HivePartitionUtil::extractPartitionKeyValues( + const RowVectorPtr& partitionsVector, + vector_size_t row) { + const auto& formatter = std::make_shared(); + std::vector> partitionKeyValues; + for (auto i = 0; i < partitionsVector->childrenSize(); i++) { + partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH( + makePartitionKeyValueString, + partitionsVector->childAt(i)->typeKind(), + formatter, + partitionsVector->childAt(i)->loadedVector(), + row, + asRowType(partitionsVector->type())->nameOf(i), + partitionsVector->childAt(i)->type())); + } + return partitionKeyValues; +} + +} // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HivePartitionUtil.h b/velox/connectors/hive/HivePartitionUtil.h new file mode 100644 index 00000000000..ed6259ea637 --- /dev/null +++ b/velox/connectors/hive/HivePartitionUtil.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include "velox/vector/ComplexVector.h" + +namespace facebook::velox::connector::hive { + +FOLLY_ALWAYS_INLINE std::string formatDate(int32_t value) { + return DATE()->toString(value); +} + +/// Converting partition values to their string representations. +/// Provides virtual methods for formatting different data types +/// according to Hive partitioning conventions. +class HivePartitionUtil { + public: + virtual ~HivePartitionUtil() = default; + + /// Generic template for formatting simple types that just need string + /// conversion. Specialized/overloaded for types that need special handling. + template + FOLLY_ALWAYS_INLINE std::string toPartitionString( + T value, + const TypePtr& type) const { + return folly::to(value); + } + + FOLLY_ALWAYS_INLINE std::string toPartitionString( + bool value, + const TypePtr& type) const { + return value ? "true" : "false"; + } + + std::string toPartitionString(int64_t value, const TypePtr& type) const; + + std::string toPartitionString(int128_t value, const TypePtr& type) const; + + virtual std::string toPartitionString(StringView value, const TypePtr& type) + const { + return folly::to(value); + } + + virtual std::string toPartitionString(int32_t value, const TypePtr& type) + const; + + virtual std::string toPartitionString(Timestamp value, const TypePtr& type) + const; + + /// Extract partition key-value pairs from a row vector at a specific row. + /// + /// @param partitionsVector A row vector containing partition columns. + /// @param row The row index to extract partition values from. + /// @return A vector of (column_name, formatted_value) pairs. + static std::vector> + extractPartitionKeyValues( + const RowVectorPtr& partitionsVector, + vector_size_t row); +}; + +using HivePartitionUtilPtr = std::shared_ptr; + +} // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HiveSplitReader.cpp b/velox/connectors/hive/HiveSplitReader.cpp index 3b35cc4d6c0..579367dfafa 100644 --- a/velox/connectors/hive/HiveSplitReader.cpp +++ b/velox/connectors/hive/HiveSplitReader.cpp @@ -19,9 +19,72 @@ #include "velox/connectors/hive/FileConfig.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/connectors/hive/HiveConnectorUtil.h" +#include "velox/connectors/hive/iceberg/IcebergSplit.h" +#include "velox/connectors/hive/iceberg/IcebergSplitReader.h" namespace facebook::velox::connector::hive { +std::unique_ptr HiveSplitReader::create( + const std::shared_ptr& hiveSplit, + const FileTableHandlePtr& tableHandle, + const std::unordered_map* partitionKeys, + const ConnectorQueryCtx* connectorQueryCtx, + const std::shared_ptr& fileConfig, + const RowTypePtr& readerOutputType, + const std::shared_ptr& dataIoStats, + const std::shared_ptr& metadataIoStats, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* ioExecutor, + const std::shared_ptr& scanSpec, + const std::unordered_map* infoColumns, + std::vector bucketChannels, + const common::SubfieldFilters* subfieldFiltersForValidation) { + // Create the SplitReader based on hiveSplit->customSplitInfo["table_format"] + if (hiveSplit->customSplitInfo.count("table_format") > 0 && + hiveSplit->customSplitInfo.at("table_format") == "hive-iceberg") { + auto icebergSplit = + std::dynamic_pointer_cast(hiveSplit); + VELOX_CHECK_NOT_NULL( + icebergSplit, + "Expected HiveIcebergSplit for table_format=hive-iceberg"); + return std::make_unique( + icebergSplit, + tableHandle, + partitionKeys, + connectorQueryCtx, + fileConfig, + readerOutputType, + dataIoStats, + metadataIoStats, + ioStats, + fileHandleFactory, + ioExecutor, + scanSpec, + std::make_shared(), + infoColumns, + std::move(bucketChannels), + subfieldFiltersForValidation); + } else { + return std::make_unique( + hiveSplit, + tableHandle, + partitionKeys, + connectorQueryCtx, + fileConfig, + readerOutputType, + dataIoStats, + metadataIoStats, + ioStats, + fileHandleFactory, + ioExecutor, + scanSpec, + infoColumns, + std::move(bucketChannels), + subfieldFiltersForValidation); + } +} + HiveSplitReader::HiveSplitReader( const std::shared_ptr& hiveSplit, const FileTableHandlePtr& tableHandle, diff --git a/velox/connectors/hive/HiveSplitReader.h b/velox/connectors/hive/HiveSplitReader.h index e50e87ed285..9f1b7ba4961 100644 --- a/velox/connectors/hive/HiveSplitReader.h +++ b/velox/connectors/hive/HiveSplitReader.h @@ -30,6 +30,27 @@ struct HiveConnectorSplit; /// to keep only rows belonging to the target bucket. class HiveSplitReader : public FileSplitReader { public: + /// Factory method to create the appropriate split reader based on split type. + /// For Iceberg splits (identified by customSplitInfo["table_format"] == + /// "hive-iceberg"), creates an IcebergSplitReader. Otherwise, creates a + /// HiveSplitReader. + static std::unique_ptr create( + const std::shared_ptr& hiveSplit, + const FileTableHandlePtr& tableHandle, + const std::unordered_map* partitionKeys, + const ConnectorQueryCtx* connectorQueryCtx, + const std::shared_ptr& fileConfig, + const RowTypePtr& readerOutputType, + const std::shared_ptr& dataIoStats, + const std::shared_ptr& metadataIoStats, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* ioExecutor, + const std::shared_ptr& scanSpec, + const std::unordered_map* infoColumns, + std::vector bucketChannels = {}, + const common::SubfieldFilters* subfieldFiltersForValidation = nullptr); + HiveSplitReader( const std::shared_ptr& hiveSplit, const FileTableHandlePtr& tableHandle, diff --git a/velox/connectors/hive/PartitionIdGenerator.cpp b/velox/connectors/hive/PartitionIdGenerator.cpp index a4773da58a1..976f5099611 100644 --- a/velox/connectors/hive/PartitionIdGenerator.cpp +++ b/velox/connectors/hive/PartitionIdGenerator.cpp @@ -16,16 +16,23 @@ #include "velox/connectors/hive/PartitionIdGenerator.h" +#include "velox/connectors/hive/HivePartitionUtil.h" +#include "velox/dwio/catalog/fbhive/FileUtils.h" + +using namespace facebook::velox::dwio::catalog::fbhive; + namespace facebook::velox::connector::hive { PartitionIdGenerator::PartitionIdGenerator( const RowTypePtr& inputType, std::vector partitionChannels, uint32_t maxPartitions, - memory::MemoryPool* pool) - : pool_(pool), - partitionChannels_(std::move(partitionChannels)), - maxPartitions_(maxPartitions) { + memory::MemoryPool* pool, + bool partitionPathAsLowerCase) + : partitionChannels_(std::move(partitionChannels)), + maxPartitions_(maxPartitions), + partitionPathAsLowerCase_(partitionPathAsLowerCase), + pool_(pool) { VELOX_USER_CHECK( !partitionChannels_.empty(), "There must be at least one partition key."); for (auto channel : partitionChannels_) { @@ -53,6 +60,19 @@ PartitionIdGenerator::PartitionIdGenerator( } } +PartitionIdGenerator::PartitionIdGenerator( + std::vector partitionChannels, + uint32_t maxPartitions, + memory::MemoryPool* pool, + bool partitionPathAsLowerCase) + : partitionChannels_(std::move(partitionChannels)), + maxPartitions_(maxPartitions), + partitionPathAsLowerCase_(partitionPathAsLowerCase), + pool_(pool) { + VELOX_USER_CHECK( + !partitionChannels_.empty(), "There must be at least one partition key."); +} + void PartitionIdGenerator::run( const RowVectorPtr& input, raw_vector& result) { @@ -89,6 +109,13 @@ void PartitionIdGenerator::run( } } +std::string PartitionIdGenerator::partitionName(uint64_t partitionId) const { + return FileUtils::makePartName( + HivePartitionUtil::extractPartitionKeyValues( + partitionValues_, partitionId), + partitionPathAsLowerCase_); +} + void PartitionIdGenerator::computeValueIds( const RowVectorPtr& input, raw_vector& valueIds) { @@ -156,7 +183,7 @@ void PartitionIdGenerator::updateValueToPartitionIdMapping() { } void PartitionIdGenerator::savePartitionValues( - uint64_t partitionId, + uint32_t partitionId, const RowVectorPtr& input, vector_size_t row) { for (auto i = 0; i < partitionChannels_.size(); ++i) { diff --git a/velox/connectors/hive/PartitionIdGenerator.h b/velox/connectors/hive/PartitionIdGenerator.h index 0a53252829c..43a77740b90 100644 --- a/velox/connectors/hive/PartitionIdGenerator.h +++ b/velox/connectors/hive/PartitionIdGenerator.h @@ -29,41 +29,63 @@ class PartitionIdGenerator { /// @param maxPartitions The max number of distinct partitions. /// @param pool Memory pool. Used to allocate memory for storing unique /// partition key values. + /// @param partitionPathAsLowerCase Used to control whether the partition path + /// need to convert to lower case. PartitionIdGenerator( const RowTypePtr& inputType, std::vector partitionChannels, uint32_t maxPartitions, - memory::MemoryPool* pool); + memory::MemoryPool* pool, + bool partitionPathAsLowerCase); + + virtual ~PartitionIdGenerator() = default; /// Generate sequential partition IDs for input vector. /// @param input Input RowVector. /// @param result Generated integer IDs indexed by input row number. - void run(const RowVectorPtr& input, raw_vector& result); + virtual void run(const RowVectorPtr& input, raw_vector& result); /// Return the total number of distinct partitions processed so far. uint64_t numPartitions() const { return partitionIds_.size(); } - /// Returns the RowVector containing transformed partition keys. - /// Each row in this vector corresponds to a partition ID (row index = - /// partition ID). - /// Should be called after calling run() method. - /// - /// @return RowVector with one column per partition column, columns in same - /// order as partitionChannels_. - const RowVectorPtr& partitionValues() const { - return partitionValues_; - } + /// Return partition name for the given partition id in the typical Hive + /// style. It is derived from the partitionValues_ at index partitionId. + /// Partition keys appear in the order of partition columns in the table + /// schema. + virtual std::string partitionName(uint64_t partitionId) const; - private: - static constexpr const int32_t kHasherReservePct = 20; + protected: + PartitionIdGenerator( + std::vector partitionChannels, + uint32_t maxPartitions, + memory::MemoryPool* pool, + bool partitionPathAsLowerCase); // Computes value IDs using VectorHashers for all rows in 'input'. void computeValueIds( const RowVectorPtr& input, raw_vector& valueIds); + const std::vector partitionChannels_; + + std::vector> hashers_; + + // A vector holding unique partition key values. One row per partition. Row + // numbers match partition IDs. + RowVectorPtr partitionValues_; + + const uint32_t maxPartitions_; + + // A mapping from value ID produced by VectorHashers to a partition ID. + std::unordered_map partitionIds_; + + const bool partitionPathAsLowerCase_; + + private: + static constexpr const int32_t kHasherReservePct = 20; + // In case of rehash (when value IDs produced by VectorHashers change), we // update value id for pre-existing partitions while keeping partition ids. // This method rebuilds 'partitionIds_' by re-calculating the value ids using @@ -72,27 +94,15 @@ class PartitionIdGenerator { // Copies partition values of 'row' from 'input' into 'partitionId' row in // 'partitionValues_'. - void savePartitionValues( - uint64_t partitionId, + virtual void savePartitionValues( + uint32_t partitionId, const RowVectorPtr& input, vector_size_t row); memory::MemoryPool* const pool_; - const std::vector partitionChannels_; - - const uint32_t maxPartitions_; - - std::vector> hashers_; bool hasMultiplierSet_ = false; - // A mapping from value ID produced by VectorHashers to a partition ID. - std::unordered_map partitionIds_; - - // A vector holding unique partition key values. One row per partition. Row - // numbers match partition IDs. - RowVectorPtr partitionValues_; - // All rows are set valid to compute partition IDs for all input rows. SelectivityVector allRows_; }; diff --git a/velox/connectors/hive/iceberg/CMakeLists.txt b/velox/connectors/hive/iceberg/CMakeLists.txt index f6369cb6d9e..39040cf437d 100644 --- a/velox/connectors/hive/iceberg/CMakeLists.txt +++ b/velox/connectors/hive/iceberg/CMakeLists.txt @@ -16,20 +16,21 @@ set( DeletionVectorReader.cpp DeletionVectorWriter.cpp EqualityDeleteFileReader.cpp - IcebergColumnHandle.cpp - IcebergConfig.cpp IcebergConnector.cpp + DataFileStatsCollector.cpp + IcebergColumnHandle.cpp IcebergDataFileStatistics.cpp IcebergDataSink.cpp IcebergDataSource.cpp - IcebergPartitionName.cpp + IcebergPartitionIdGenerator.cpp IcebergSplit.cpp IcebergSplitReader.cpp + Murmur3.cpp PartitionSpec.cpp PositionalDeleteFileReader.cpp - TransformEvaluator.cpp - TransformExprBuilder.cpp WriterOptionsAdapter.cpp + TransformFactory.cpp + Transforms.cpp ) if(VELOX_ENABLE_PARQUET) @@ -44,7 +45,6 @@ velox_add_library( DeletionVectorWriter.h EqualityDeleteFileReader.h IcebergColumnHandle.h - IcebergConfig.h IcebergConnector.h IcebergDataFileStatistics.h IcebergDataSink.h @@ -52,13 +52,10 @@ velox_add_library( IcebergDeleteFile.h IcebergMetadataColumns.h IcebergParquetStatsCollector.h - IcebergPartitionName.h IcebergSplit.h IcebergSplitReader.h PartitionSpec.h PositionalDeleteFileReader.h - TransformEvaluator.h - TransformExprBuilder.h WriterOptionsAdapter.h ) diff --git a/velox/connectors/hive/iceberg/DataFileStatsCollector.cpp b/velox/connectors/hive/iceberg/DataFileStatsCollector.cpp new file mode 100644 index 00000000000..2a43b09bb0d --- /dev/null +++ b/velox/connectors/hive/iceberg/DataFileStatsCollector.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/connectors/hive/iceberg/DataFileStatsCollector.h" +#include "velox/common/base/Exceptions.h" +#include "velox/common/encode/Base64.h" +#include "velox/dwio/parquet/writer/arrow/Metadata.h" +#include "velox/dwio/parquet/writer/arrow/Statistics.h" + +namespace facebook::velox::connector::hive::iceberg { + +using namespace facebook::velox::parquet; + +DataFileStatsCollector::DataFileStatsCollector( + std::shared_ptr< + std::vector>> + settings) + : FileStatsCollector(std::move(settings)) {} + +void DataFileStatsCollector::collectStats( + const void* metadata, + const std::shared_ptr& dataFileStats) { + const auto& fileMetadata = + *static_cast*>( + metadata); + VELOX_CHECK_NOT_NULL(fileMetadata); + + std::unordered_set skipBoundsFields; + std::function processFields = + [&skipBoundsFields, + &processFields](IcebergDataFileStatsSettings* field) -> int32_t { + if (field->skipBounds) { + skipBoundsFields.insert(field->fieldId); + } + if (field->children.empty()) { + return 1; + } + int32_t count = 0; + for (const auto& child : field->children) { + count += processFields(child.get()); + } + return count; + }; + + // numFields is not the number of columns in Iceberg table's schema, + // e.g., schema_->size(). It also contains the sub-fields when there are + // nested types in table's schema. + int32_t numFields = 0; + for (const auto& field : *statsSetting_) { + auto* icebergField = + static_cast(field.get()); + numFields += processFields(icebergField); + } + + std::unordered_map> + globalMinStats; + std::unordered_map> + globalMaxStats; + + dataFileStats->numRecords = fileMetadata->numRows(); + const auto numRowGroups = fileMetadata->numRowGroups(); + for (auto i = 0; i < numRowGroups; ++i) { + const auto rgm = fileMetadata->rowGroup(i); + VELOX_CHECK_EQ(numFields, rgm->numColumns()); + dataFileStats->splitOffsets.emplace_back(rgm->fileOffset()); + + for (auto j = 0; j < numFields; ++j) { + const auto columnChunkMetadata = rgm->columnChunk(j); + const auto fieldId = columnChunkMetadata->fieldId(); + const auto numValues = columnChunkMetadata->numValues(); + + // Skip columns without field IDs. field_id() returns -1 when metadata is + // missing. + if (fieldId < 0) { + continue; + } + + dataFileStats->valueCounts[fieldId] += numValues; + dataFileStats->columnsSizes[fieldId] += + columnChunkMetadata->totalCompressedSize(); + + const auto columnChunkStats = columnChunkMetadata->statistics(); + if (columnChunkStats->nanCount() > 0) { + dataFileStats->nanValueCounts[fieldId] += columnChunkStats->nanCount(); + } + dataFileStats->nullValueCounts[fieldId] += columnChunkStats->nullCount(); + + if (columnChunkStats->hasMinMax() && + !skipBoundsFields.contains(fieldId)) { + if (globalMaxStats.find(fieldId) == globalMaxStats.end()) { + globalMinStats[fieldId] = columnChunkStats; + globalMaxStats[fieldId] = columnChunkStats; + } else { + globalMaxStats[fieldId] = arrow::Statistics::CompareAndGetMax( + globalMaxStats[fieldId], columnChunkStats); + globalMinStats[fieldId] = arrow::Statistics::CompareAndGetMin( + globalMinStats[fieldId], columnChunkStats); + } + } + } + } + + for (const auto& [fieldId, minStats] : globalMinStats) { + const auto lowerBound = minStats->MinValue(); + dataFileStats->lowerBounds[fieldId] = + encoding::Base64::encode(lowerBound.data(), lowerBound.size()); + } + for (const auto& [fieldId, maxStats] : globalMaxStats) { + const auto upperBound = maxStats->MaxValue(); + dataFileStats->upperBounds[fieldId] = + encoding::Base64::encode(upperBound.data(), upperBound.size()); + } +} + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/DataFileStatsCollector.h b/velox/connectors/hive/iceberg/DataFileStatsCollector.h new file mode 100644 index 00000000000..e379dc39179 --- /dev/null +++ b/velox/connectors/hive/iceberg/DataFileStatsCollector.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/dwio/common/DataFileStatsCollector.h" + +namespace facebook::velox::connector::hive::iceberg { + +/// Settings for collecting Iceberg parquet data file statistics. +/// Holds the Iceberg source field id and whether to skip bounds +/// collection for this field. For nested field, it contains child fields. +struct IcebergDataFileStatsSettings + : public dwio::common::DataFileStatsSettings { + int32_t fieldId; + bool skipBounds; + std::vector> children; + + IcebergDataFileStatsSettings(int32_t id, bool skip) + : fieldId(id), skipBounds(skip), children() {} +}; + +class DataFileStatsCollector : public dwio::common::FileStatsCollector { + public: + explicit DataFileStatsCollector( + std::shared_ptr< + std::vector>> + settings); + + void collectStats( + const void* metadata, + const std::shared_ptr& fileStats) + override; +}; + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergColumnHandle.cpp b/velox/connectors/hive/iceberg/IcebergColumnHandle.cpp index 28666689e6f..def64ed04c8 100644 --- a/velox/connectors/hive/iceberg/IcebergColumnHandle.cpp +++ b/velox/connectors/hive/iceberg/IcebergColumnHandle.cpp @@ -30,22 +30,23 @@ IcebergColumnHandle::IcebergColumnHandle( const std::string& name, ColumnType columnType, TypePtr dataType, - parquet::ParquetFieldId icebergField, + TypePtr hiveType, + const IcebergNestedField& nestedField, std::vector requiredSubfields, - std::optional initialDefaultValue) + std::optional initialDefaultValue, + ColumnParseParameters columnParseParameters) : HiveColumnHandle( name, columnType, dataType, - dataType, + hiveType, std::move(requiredSubfields), - ColumnParseParameters{ColumnParseParameters:: - PartitionDateValueFormat::kDaysSinceEpoch}), - field_(std::move(icebergField)), + columnParseParameters), + nestedField_(nestedField), initialDefaultValue_(std::move(initialDefaultValue)) {} -const parquet::ParquetFieldId& IcebergColumnHandle::field() const { - return field_; +const IcebergNestedField& IcebergColumnHandle::nestedField() const { + return nestedField_; } } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergColumnHandle.h b/velox/connectors/hive/iceberg/IcebergColumnHandle.h index 54b722dae9e..042874222e9 100644 --- a/velox/connectors/hive/iceberg/IcebergColumnHandle.h +++ b/velox/connectors/hive/iceberg/IcebergColumnHandle.h @@ -15,35 +15,35 @@ */ #pragma once -#include -#include -#include - #include "velox/connectors/hive/TableHandle.h" -#include "velox/dwio/parquet/ParquetFieldId.h" -#include "velox/type/Subfield.h" -#include "velox/type/Type.h" namespace facebook::velox::connector::hive::iceberg { +struct IcebergNestedField { + int32_t id; + std::vector children; +}; + class IcebergColumnHandle : public HiveColumnHandle { public: IcebergColumnHandle( const std::string& name, ColumnType columnType, TypePtr dataType, - parquet::ParquetFieldId icebergField, + TypePtr hiveType, + const IcebergNestedField& nestedField, std::vector requiredSubfields = {}, - std::optional initialDefaultValue = std::nullopt); + std::optional initialDefaultValue = std::nullopt, + ColumnParseParameters columnParseParameters = {}); - const parquet::ParquetFieldId& field() const; + const IcebergNestedField& nestedField() const; const std::optional& initialDefaultValue() const { return initialDefaultValue_; } private: - const parquet::ParquetFieldId field_; + const IcebergNestedField nestedField_; const std::optional initialDefaultValue_; }; diff --git a/velox/connectors/hive/iceberg/IcebergConfig.h b/velox/connectors/hive/iceberg/IcebergConfig.h deleted file mode 100644 index 9eba3bd20b9..00000000000 --- a/velox/connectors/hive/iceberg/IcebergConfig.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include "velox/common/config/Config.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Iceberg-specific connector configuration wrapper. -/// Provides accessors for Iceberg-only settings while sharing the same -/// underlying ConfigBase with HiveConfig. -class IcebergConfig { - public: - /// Iceberg function prefix. - static constexpr const char* kFunctionPrefixConfig = - "presto.iceberg-namespace"; - - /// Default prefix used to register Iceberg transform functions when no - /// connector config override is provided. - static constexpr const char* kDefaultFunctionPrefix = "$internal$.iceberg."; - - explicit IcebergConfig( - const std::shared_ptr& config); - - const std::shared_ptr& config() const { - return config_; - } - - std::string functionPrefix() const; - - private: - const std::shared_ptr config_; -}; - -using IcebergConfigPtr = std::shared_ptr; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergConnector.cpp b/velox/connectors/hive/iceberg/IcebergConnector.cpp index 685be818d2c..9c8b010f901 100644 --- a/velox/connectors/hive/iceberg/IcebergConnector.cpp +++ b/velox/connectors/hive/iceberg/IcebergConnector.cpp @@ -17,12 +17,15 @@ #include "velox/connectors/hive/iceberg/IcebergConnector.h" #include "velox/connectors/hive/HiveConnector.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" #include "velox/connectors/hive/iceberg/IcebergDataSink.h" #include "velox/connectors/hive/iceberg/IcebergDataSource.h" +#include "velox/functions/iceberg/Register.h" namespace facebook::velox::connector::hive::iceberg { +const std::string_view kIcebergFunctionPrefixConfig{"presto.iceberg-namespace"}; +const std::string_view kDefaultIcebergFunctionPrefix{"$internal$.iceberg."}; + namespace { // Registers Iceberg partition transform functions with prefix. @@ -44,14 +47,16 @@ IcebergConnector::IcebergConnector( std::shared_ptr config, folly::Executor* ioExecutor) : HiveConnector(id, config, ioExecutor), - icebergConfig_(std::make_shared(connectorConfig())) { - registerIcebergInternalFunctions(icebergConfig_->functionPrefix()); + functionPrefix_(config->get( + std::string(kIcebergFunctionPrefixConfig), + std::string(kDefaultIcebergFunctionPrefix))) { + registerIcebergInternalFunctions(functionPrefix_); } std::unique_ptr IcebergConnector::createDataSource( const RowTypePtr& outputType, const ConnectorTableHandlePtr& tableHandle, - const ColumnHandleMap& columnHandles, + const std::unordered_map& columnHandles, ConnectorQueryCtx* connectorQueryCtx) { return std::make_unique( outputType, @@ -76,8 +81,7 @@ std::unique_ptr IcebergConnector::createDataSink( icebergInsertHandle, connectorQueryCtx, commitStrategy, - hiveConfig_, - icebergConfig_); + hiveConfig_); } } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergConnector.h b/velox/connectors/hive/iceberg/IcebergConnector.h index 32c8206d293..4634236da5f 100644 --- a/velox/connectors/hive/iceberg/IcebergConnector.h +++ b/velox/connectors/hive/iceberg/IcebergConnector.h @@ -16,13 +16,17 @@ #pragma once #include "velox/connectors/hive/HiveConnector.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" namespace facebook::velox::connector::hive::iceberg { +/// TODO Add IcebergConfig class and Move these configuration properties to +/// IcebergConfig.h +extern const std::string_view kIcebergFunctionPrefixConfig; +extern const std::string_view kDefaultIcebergFunctionPrefix; + /// Provides Iceberg table format support. -/// - Creates IcebergDataSource instances for reading Iceberg tables with -/// support for delete files and schema evolution. +/// - Creates HiveDataSource instances that use IcebergSplitReader for reading +/// Iceberg tables with support for delete files and schema evolution. /// - Creates IcebergDataSink instances for writing data with Iceberg-specific /// partition transforms and commit metadata. class IcebergConnector final : public HiveConnector { @@ -32,17 +36,12 @@ class IcebergConnector final : public HiveConnector { std::shared_ptr config, folly::Executor* ioExecutor); - /// Creates IcebergDataSource for reading from Iceberg tables. - /// - /// @param outputType The schema of the output data to read. - /// @param tableHandle The table handle containing table metadata. - /// @param columnHandles Map of column names to column handles. - /// @param connectorQueryCtx Query context for the read operation. - /// @return IcebergDataSource instance configured for the read operation. + /// Creates an IcebergDataSource that reads Iceberg tables with support for + /// delete files, schema evolution, and column default values. std::unique_ptr createDataSource( const RowTypePtr& outputType, const ConnectorTableHandlePtr& tableHandle, - const ColumnHandleMap& columnHandles, + const std::unordered_map& columnHandles, ConnectorQueryCtx* connectorQueryCtx) override; /// Creates IcebergDataSink for writing to Iceberg tables. @@ -63,7 +62,7 @@ class IcebergConnector final : public HiveConnector { CommitStrategy commitStrategy) override; private: - const std::shared_ptr icebergConfig_; + const std::string functionPrefix_; }; class IcebergConnectorFactory final : public ConnectorFactory { diff --git a/velox/connectors/hive/iceberg/IcebergDataSink.cpp b/velox/connectors/hive/iceberg/IcebergDataSink.cpp index 8b2035c64de..61b72dc6406 100644 --- a/velox/connectors/hive/iceberg/IcebergDataSink.cpp +++ b/velox/connectors/hive/iceberg/IcebergDataSink.cpp @@ -16,72 +16,78 @@ #include "velox/connectors/hive/iceberg/IcebergDataSink.h" -#include -#include -#include #include -#include -#include -#include -#include - #include "velox/common/base/Fs.h" -#include "velox/common/encode/Base64.h" -#include "velox/common/memory/MemoryArbitrator.h" -#include "velox/common/testutil/TestValue.h" -#include "velox/connectors/hive/PartitionIdGenerator.h" -#include "velox/connectors/hive/iceberg/IcebergColumnHandle.h" - +#include "velox/connectors/hive/HiveConnectorUtil.h" +#include "velox/connectors/hive/iceberg/DataFileStatsCollector.h" +#include "velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h" +#include "velox/connectors/hive/iceberg/WriterOptionsAdapter.h" +#include "velox/dwio/common/SortingWriter.h" #ifdef VELOX_ENABLE_PARQUET #include "velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h" #include "velox/dwio/parquet/writer/Writer.h" #endif - -#include "velox/connectors/hive/iceberg/TransformExprBuilder.h" -#include "velox/connectors/hive/iceberg/WriterOptionsAdapter.h" -#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/OperatorUtils.h" +#include "velox/exec/SortBuffer.h" #include "velox/type/Type.h" -using facebook::velox::common::testutil::TestValue; - namespace facebook::velox::connector::hive::iceberg { namespace { +constexpr std::string_view kNotClusteredRowsErrorMsg = + "Incoming records violate the writer assumption that records are clustered by spec and \n by partition within each spec. Either cluster the incoming records or switch to fanout writers.\nEncountered records that belong to already closed files:\n"; + +#define WRITER_NON_RECLAIMABLE_SECTION_GUARD(index) \ + memory::NonReclaimableSectionGuard nonReclaimableGuard( \ + writerInfo_[(index)]->nonReclaimableSectionHolder.get()) + +std::string toJson(const std::vector& partitionValues) { + folly::dynamic jsonObject = folly::dynamic::object(); + folly::dynamic valuesArray = folly::dynamic::array(); + for (const auto& value : partitionValues) { + valuesArray.push_back(value); + } + jsonObject["partitionValues"] = valuesArray; + return folly::toJson(jsonObject); +} + template folly::dynamic extractPartitionValue( - const VectorPtr& child, + const DecodedVector* block, vector_size_t row) { using T = typename TypeTraits::NativeType; - return child->asChecked>()->valueAt(row); + return block->valueAt(row); } template <> folly::dynamic extractPartitionValue( - const VectorPtr& child, + const DecodedVector* block, vector_size_t row) { - return child->asChecked>()->valueAt(row).str(); + auto value = block->valueAt(row); + return value.str(); } template <> folly::dynamic extractPartitionValue( - const VectorPtr& child, + const DecodedVector* block, vector_size_t row) { - return encoding::Base64::encode( - child->asChecked>()->valueAt(row)); + auto value = block->valueAt(row); + return value.str(); } template <> folly::dynamic extractPartitionValue( - const VectorPtr& child, + const DecodedVector* block, vector_size_t row) { - VELOX_DCHECK(child->type()->equivalent(*TIMESTAMP())); - return child->asChecked>()->valueAt(row).toMicros(); + auto timestamp = block->valueAt(row); + return timestamp.toMicros(); } class IcebergFileNameGenerator : public FileNameGenerator { public: + IcebergFileNameGenerator() {} + std::pair gen( std::optional bucketId, const std::shared_ptr insertTableHandle, @@ -93,10 +99,6 @@ class IcebergFileNameGenerator : public FileNameGenerator { std::string toString() const override; }; -std::string makeUuid() { - return boost::lexical_cast(boost::uuids::random_generator()()); -} - std::pair IcebergFileNameGenerator::gen( std::optional bucketId, const std::shared_ptr insertTableHandle, @@ -106,15 +108,14 @@ std::pair IcebergFileNameGenerator::gen( if (targetFileName.empty()) { targetFileName = fmt::format("{}", makeUuid()); } - auto fileFormat = dwio::common::toString(insertTableHandle->storageFormat()); - auto fileName = fmt::format("{}.{}", targetFileName, fileFormat); - return {fileName, fileName}; + + return { + fmt::format("{}{}", targetFileName, ".parquet"), + fmt::format("{}{}", targetFileName, ".parquet")}; } folly::dynamic IcebergFileNameGenerator::serialize() const { - folly::dynamic obj = folly::dynamic::object; - obj["name"] = "IcebergFileNameGenerator"; - return obj; + VELOX_UNREACHABLE("Unexpected code path, implement serialize() first."); } std::string IcebergFileNameGenerator::toString() const { @@ -126,12 +127,14 @@ std::string IcebergFileNameGenerator::toString() const { IcebergInsertTableHandle::IcebergInsertTableHandle( std::vector inputColumns, LocationHandlePtr locationHandle, + std::shared_ptr partitionSpec, + memory::MemoryPool* pool, dwio::common::FileFormat tableStorageFormat, - IcebergPartitionSpecPtr partitionSpec, + const std::vector& sortedBy, std::optional compressionKind, const std::unordered_map& serdeParameters) : HiveInsertTableHandle( - std::vector( + std::vector>( inputColumns.begin(), inputColumns.end()), std::move(locationHandle), @@ -141,8 +144,11 @@ IcebergInsertTableHandle::IcebergInsertTableHandle( serdeParameters, nullptr, false, - std::make_shared()), - partitionSpec_(partitionSpec) { + std::make_shared()), + partitionSpec_(std::move(partitionSpec)), + columnTransforms_( + parsePartitionTransformSpecs(partitionSpec_->fields, pool)), + sortedBy_(sortedBy) { VELOX_USER_CHECK( !inputColumns_.empty(), "Input columns cannot be empty for Iceberg tables."); @@ -154,118 +160,43 @@ IcebergInsertTableHandle::IcebergInsertTableHandle( dwio::common::toString(tableStorageFormat)); } -namespace { - -// Creates partition channels by mapping partition spec fields to input column -// indices. For each field in the partition spec, finds the corresponding -// partition key column in the input columns and records its index. -// -// @param inputColumns The input columns from the insert table handle. -// @param partitionSpec The Iceberg partition specification, or nullptr if -// unpartitioned. -// @return A vector of column indices representing the partition channels. Each -// index corresponds to a partition field in the spec and points to the -// matching partition key column in the input. Returns an empty vector if -// partitionSpec is nullptr. -std::vector createPartitionChannels( - const std::vector& inputColumns, - const IcebergPartitionSpecPtr& partitionSpec) { - std::vector channels; - if (!partitionSpec) { - return channels; - } - - // Build a map from partition key column names to their indices in the input. - std::unordered_map partitionKeyMap; - for (auto i = 0; i < inputColumns.size(); ++i) { - if (inputColumns[i]->isPartitionKey()) { - partitionKeyMap[inputColumns[i]->name()] = i; - } - } - - // For each field in the partition spec, find its corresponding input column - // index. - channels.reserve(partitionSpec->fields.size()); - for (const auto& field : partitionSpec->fields) { - if (auto it = partitionKeyMap.find(field.name); - it != partitionKeyMap.end()) { - channels.push_back(it->second); - } - } - - return channels; -} - -std::vector createDataChannels( - const IcebergInsertTableHandlePtr& insertTableHandle) { - std::vector dataChannels( - insertTableHandle->inputColumns().size()); - std::iota(dataChannels.begin(), dataChannels.end(), 0); - return dataChannels; -} - -// Creates a RowType schema for transformed partition values based on the -// partition specification. This RowType is used to wrap the transformed -// partition columns before passing them to the partition ID generator. -// -// For each partition field in the spec: -// - The column type is the result type of the partition transform (e.g., -// INTEGER for year transform, DATE for day transform). -// - The column name is the source column name for identity transforms, or -// "columnName_transformName" for non-identity transforms (e.g., "birth_year" -// for a year transform on a birth column). -// -// @param partitionSpec The Iceberg partition specification, or nullptr if -// unpartitioned. -// @return A RowType containing one column per partition field with appropriate -// names and types. Returns nullptr if partitionSpec is nullptr. -RowTypePtr createPartitionRowType( - const IcebergPartitionSpecPtr& partitionSpec) { - if (!partitionSpec) { - return nullptr; - } - - std::vector partitionKeyTypes; - std::vector partitionKeyNames; - - // Build column names and types for each partition field. - // Identity transforms use the source column name directly. - // Non-identity transforms use "columnName_transformName" format. - for (const auto& field : partitionSpec->fields) { - partitionKeyTypes.emplace_back(field.resultType()); - std::string key = field.transformType == TransformType::kIdentity - ? field.name - : fmt::format( - "{}_{}", - field.name, - TransformTypeName::toName(field.transformType)); - partitionKeyNames.emplace_back(std::move(key)); - } - - return ROW(std::move(partitionKeyNames), std::move(partitionKeyTypes)); -} - -} // namespace - IcebergDataSink::IcebergDataSink( RowTypePtr inputType, IcebergInsertTableHandlePtr insertTableHandle, const ConnectorQueryCtx* connectorQueryCtx, CommitStrategy commitStrategy, - const std::shared_ptr& hiveConfig, - const IcebergConfigPtr& icebergConfig) + const std::shared_ptr& hiveConfig) : IcebergDataSink( std::move(inputType), insertTableHandle, connectorQueryCtx, commitStrategy, hiveConfig, - createPartitionChannels( - insertTableHandle->inputColumns(), - insertTableHandle->partitionSpec()), - createDataChannels(insertTableHandle), - createPartitionRowType(insertTableHandle->partitionSpec()), - icebergConfig) {} + [&insertTableHandle]() { + const auto& inputColumns = insertTableHandle->inputColumns(); + const auto& partitionSpec = insertTableHandle->partitionSpec(); + std::unordered_map partitionKeyMap; + for (auto i = 0; i < inputColumns.size(); ++i) { + if (inputColumns[i]->isPartitionKey()) { + partitionKeyMap[inputColumns[i]->name()] = i; + } + } + std::vector channels; + channels.reserve(partitionSpec->fields.size()); + for (const auto& field : partitionSpec->fields) { + if (auto it = partitionKeyMap.find(field.name); + it != partitionKeyMap.end()) { + channels.push_back(it->second); + } + } + return channels; + }(), + [&insertTableHandle]() { + std::vector channels( + insertTableHandle->inputColumns().size()); + std::iota(channels.begin(), channels.end(), 0); + return channels; + }()) {} IcebergDataSink::IcebergDataSink( RowTypePtr inputType, @@ -274,9 +205,7 @@ IcebergDataSink::IcebergDataSink( CommitStrategy commitStrategy, const std::shared_ptr& hiveConfig, const std::vector& partitionChannels, - const std::vector& dataChannels, - RowTypePtr partitionRowType, - const IcebergConfigPtr& icebergConfig) + const std::vector& dataChannels) : HiveDataSink( inputType, insertTableHandle, @@ -288,38 +217,93 @@ IcebergDataSink::IcebergDataSink( partitionChannels, dataChannels, !partitionChannels.empty() - ? std::make_unique( - partitionRowType, - [&partitionChannels]() { - std::vector transformedChannels( - partitionChannels.size()); - std::iota( - transformedChannels.begin(), - transformedChannels.end(), - 0); - return transformedChannels; - }(), + ? std::make_unique( + partitionChannels, hiveConfig->maxPartitionsPerWriters( connectorQueryCtx->sessionProperties()), - connectorQueryCtx->memoryPool()) + connectorQueryCtx->memoryPool(), + insertTableHandle->columnTransforms(), + hiveConfig->isPartitionPathAsLowerCase( + connectorQueryCtx->sessionProperties())) : nullptr), - partitionSpec_(insertTableHandle->partitionSpec()), - transformEvaluator_( - !partitionChannels.empty() ? std::make_unique( - TransformExprBuilder::toExpressions( - partitionSpec_, - partitionChannels_, - inputType_, - icebergConfig->functionPrefix()), - connectorQueryCtx_) - : nullptr), - icebergPartitionName_( - partitionSpec_ != nullptr - ? std::make_unique(partitionSpec_) - : nullptr), - partitionRowType_(std::move(partitionRowType)), - icebergInsertTableHandle_(insertTableHandle) { - commitPartitionValue_.resize(maxOpenWriters_); + icebergInsertTableHandle_(insertTableHandle), + fanoutEnabled_( + hiveConfig_->fanoutEnabled(connectorQueryCtx_->sessionProperties())), + currentWriterId_(0) { + if (isPartitioned()) { + partitionData_.resize(maxOpenWriters_); + } + const auto& inputColumns = insertTableHandle_->inputColumns(); + + std::function + buildNestedField = [&](const IcebergNestedField& f, + const TypePtr& type, + bool skipBounds) -> IcebergDataFileStatsSettings { + VELOX_CHECK_NOT_NULL(type, "Input column type cannot be null."); + bool currentSkipBounds = skipBounds || type->isMap() || type->isArray(); + IcebergDataFileStatsSettings field(f.id, currentSkipBounds); + if (!f.children.empty()) { + VELOX_CHECK_EQ(f.children.size(), type->size()); + field.children.reserve(f.children.size()); + if (type->isRow()) { + auto rowType = asRowType(type); + for (size_t i = 0; i < f.children.size(); ++i) { + field.children.push_back( + std::make_unique(buildNestedField( + f.children[i], rowType->childAt(i), currentSkipBounds))); + } + } else if (type->isArray()) { + auto arrayType = type->asArray(); + field.children.push_back( + std::make_unique(buildNestedField( + f.children[0], arrayType.elementType(), currentSkipBounds))); + } else if (type->isMap()) { + auto mapType = type->asMap(); + for (size_t i = 0; i < f.children.size(); ++i) { + field.children.push_back( + std::make_unique(buildNestedField( + f.children[i], mapType.childAt(i), currentSkipBounds))); + } + } + } + return field; + }; + + statsSettings_ = std::make_shared< + std::vector>>(); + for (const auto& columnHandle : inputColumns) { + auto icebergColumnHandle = + std::dynamic_pointer_cast(columnHandle); + VELOX_CHECK_NOT_NULL(icebergColumnHandle, "Invalid IcebergColumnHandle."); + statsSettings_->push_back( + std::make_unique(buildNestedField( + icebergColumnHandle->nestedField(), + icebergColumnHandle->dataType(), + false))); + } + + icebergStatsCollector_ = + std::make_unique(statsSettings_); + + const auto& sortedBy = insertTableHandle->sortedBy(); + if (!sortedBy.empty()) { + sortColumnIndices_.reserve(sortedBy.size()); + sortCompareFlags_.reserve(sortedBy.size()); + for (auto i = 0; i < sortedBy.size(); ++i) { + auto columnIndex = + inputType_->getChildIdxIfExists(sortedBy[i].sortColumn()); + if (columnIndex.has_value()) { + sortColumnIndices_.push_back(columnIndex.value()); + sortCompareFlags_.push_back( + {sortedBy[i].sortOrder().isNullsFirst(), + sortedBy[i].sortOrder().isAscending(), + false, + CompareFlags::NullHandlingMode::kNullAsValue}); + } + } + sortWrite_ = !sortColumnIndices_.empty(); + } #ifdef VELOX_ENABLE_PARQUET // Only initialize Parquet stats collector for Parquet format tables @@ -341,206 +325,356 @@ std::vector IcebergDataSink::commitMessage() const { commitTasks.reserve(writerInfo_.size()); for (auto i = 0; i < writerInfo_.size(); ++i) { - const auto& writerInfo = writerInfo_.at(i); - VELOX_CHECK_NOT_NULL(writerInfo); + const auto& info = writerInfo_.at(i); + VELOX_CHECK_NOT_NULL(info); // Following metadata (json format) is consumed by Presto CommitTaskData. // It contains the minimal subset of metadata. - VELOX_CHECK_EQ(writerInfo->writtenFiles.size(), dataFileStats_[i].size()); - for (auto fileIdx = 0; fileIdx < writerInfo->writtenFiles.size(); - ++fileIdx) { - const auto& fileInfo = writerInfo->writtenFiles[fileIdx]; - // clang-format off - folly::dynamic commitData = folly::dynamic::object( - "path", (fs::path(writerInfo->writerParameters.targetDirectory()) / - fileInfo.targetFileName).string()) - ("fileSizeInBytes", fileInfo.fileSize) - ("metrics", dataFileStats_[i][fileIdx]->toJson()) - ("partitionSpecJson", - icebergInsertTableHandle_->partitionSpec() ? - icebergInsertTableHandle_->partitionSpec()->specId : 0) - // Sort order evolution is not supported. Set default id to 0 ( unsorted order). - ("sortOrderId", 0) + // clang-format off + folly::dynamic commitData = + folly::dynamic::object + ("path", + (fs::path(info->writerParameters.writeDirectory()) / + info->writerParameters.writeFileName()).string()) + ("fileSizeInBytes", ioStats_.at(i)->rawBytesWritten()) + // Sort order evolution is not supported. Set default id to 1. + ("sortOrderId", 1) + ("partitionSpecJson", icebergInsertTableHandle_->partitionSpec()->specId) ("fileFormat", toManifestFormatString(icebergInsertTableHandle_->storageFormat())) ("content", "DATA"); - // clang-format on - if (!commitPartitionValue_.empty() && - !commitPartitionValue_[i].isNull()) { - commitData["partitionDataJson"] = folly::toJson( - folly::dynamic::object( - "partitionValues", commitPartitionValue_[i])); - } - auto commitDataJson = folly::toJson(commitData); - commitTasks.push_back(commitDataJson); + // clang-format on + if (dataFileStats_[i] != nullptr) { + commitData["metrics"] = dataFileStats_[i]->toJson(); + commitData["splitOffsets"] = dataFileStats_[i]->splitOffsetsAsJson(); } + if (!(partitionData_.empty() || partitionData_[i].empty())) { + commitData["partitionDataJson"] = toJson(partitionData_[i]); + } + auto commitDataJson = folly::toJson(commitData); + commitTasks.push_back(commitDataJson); } return commitTasks; } -void IcebergDataSink::computePartitionAndBucketIds(const RowVectorPtr& input) { - VELOX_CHECK(isPartitioned()); - VELOX_CHECK_NOT_NULL(transformEvaluator_); - VELOX_CHECK_NOT_NULL(partitionIdGenerator_); - // Step 1: Apply transforms to input partition columns. - auto transformedColumns = transformEvaluator_->evaluate(input); - - // Step 2: Create RowVector based on transformed columns. - const auto& transformedRowVector = std::make_shared( - connectorQueryCtx_->memoryPool(), - partitionRowType_, - nullptr, - input->size(), - std::move(transformedColumns)); - partitionIdGenerator_->run(transformedRowVector, partitionIds_); -} +void IcebergDataSink::splitInputRowsAndEnsureWriters() { + std::fill(partitionSizes_.begin(), partitionSizes_.end(), 0); -std::string IcebergDataSink::getPartitionName(uint32_t partitionId) const { - VELOX_CHECK_NOT_NULL(icebergPartitionName_); + const auto numRows = partitionIds_.size(); + for (auto row = 0; row < numRows; ++row) { + auto id = getIcebergWriterId(row); + uint32_t index = ensureWriter(id); - return icebergPartitionName_->partitionName( - partitionId, - partitionIdGenerator_->partitionValues(), - partitionKeyAsLowerCase_); -} + updatePartitionRows(index, numRows, row); -uint32_t IcebergDataSink::ensureWriter(const WriterId& id) { - auto writerId = HiveDataSink::ensureWriter(id); - if (isPartitioned() && commitPartitionValue_[writerId].isNull()) { - commitPartitionValue_[writerId] = makeCommitPartitionValue(writerId); + if (!partitionData_[index].empty()) { + continue; + } + buildPartitionData(index); + } + + for (auto i = 0; i < partitionSizes_.size(); ++i) { + if (partitionSizes_[i] != 0) { + VELOX_CHECK_NOT_NULL(partitionRows_[i]); + partitionRows_[i]->setSize(partitionSizes_[i] * sizeof(vector_size_t)); + } } - return writerId; } -std::shared_ptr -IcebergDataSink::createWriterOptions(size_t writerIndex) const { - auto options = HiveDataSink::createWriterOptions(writerIndex); +void IcebergDataSink::computePartition(const RowVectorPtr& input) { + VELOX_CHECK(isPartitioned()); + partitionIdGenerator_->run(input, partitionIds_); +} - // Dispatch format-specific Iceberg overrides through the adapter so each - // supported format (Parquet, DWRF, Nimble) gets its pre/post-processConfigs - // hooks applied uniformly. - const auto adapter = - createWriterOptionsAdapter(icebergInsertTableHandle_->storageFormat()); - if (adapter != nullptr) { - adapter->applyPreConfigs(*options); +void IcebergDataSink::appendData(RowVectorPtr input) { + checkRunning(); + if (!isPartitioned()) { + const auto index = ensureWriter(WriterId::unpartitionedId()); + write(index, input); + return; } -#ifdef VELOX_ENABLE_PARQUET - // Iceberg-runtime stats collector is not a static config; wire it inline. - if (auto parquetOptions = - std::dynamic_pointer_cast(options)) { - if (parquetStatsCollector_) { - parquetOptions->parquetFieldIds = - parquetStatsCollector_->parquetFieldIds().children; - } - } -#endif + computePartition(input); - options->processConfigs( - *hiveConfig_->config(), *connectorQueryCtx_->sessionProperties()); + if (fanoutEnabled_) { + splitInputRowsAndEnsureWriters(); - if (adapter != nullptr) { - adapter->applyPostConfigs(*options); - } + for (auto index = 0; index < writers_.size(); ++index) { + const vector_size_t partitionSize = partitionSizes_[index]; + if (partitionSize == 0) { + continue; + } - return options; + const RowVectorPtr writerInput = partitionSize == input->size() + ? input + : exec::wrap(partitionSize, partitionRows_[index], input); + write(index, writerInput); + } + } else { // Clustered mode. + std::fill(partitionSizes_.begin(), partitionSizes_.end(), 0); + const auto numRows = input->size(); + uint32_t index = 0; + for (auto row = 0; row < numRows; ++row) { + auto id = getIcebergWriterId(row); + index = ensureWriter(id); + if (currentWriterId_ != index) { + clusteredWrite(input, currentWriterId_); + closeWriter(currentWriterId_); + completedWriterIds_.insert(currentWriterId_); + VELOX_USER_CHECK_EQ( + completedWriterIds_.count(index), + 0, + "{}", + kNotClusteredRowsErrorMsg); + currentWriterId_ = index; + } + updatePartitionRows(index, numRows, row); + buildPartitionData(index); + } + clusteredWrite(input, index); + } } -folly::dynamic IcebergDataSink::makeCommitPartitionValue( - uint32_t writerIndex) const { - folly::dynamic partitionValues = folly::dynamic::array(); - const auto& transformedValues = partitionIdGenerator_->partitionValues(); +void IcebergDataSink::buildPartitionData(int32_t index) { + std::vector partitionValues(partitionChannels_.size()); + auto icebergPartitionIdGenerator = + dynamic_cast( + partitionIdGenerator_.get()); + VELOX_CHECK_NOT_NULL(icebergPartitionIdGenerator); + const RowVectorPtr transformedValues = + icebergPartitionIdGenerator->partitionValues(); for (auto i = 0; i < partitionChannels_.size(); ++i) { - const auto& child = transformedValues->childAt(i); - if (child->isNullAt(writerIndex)) { - partitionValues.push_back(nullptr); + auto block = transformedValues->childAt(i); + if (block->isNullAt(index)) { + partitionValues[i] = nullptr; } else { - partitionValues.push_back(VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( - extractPartitionValue, child->typeKind(), child, writerIndex)); + DecodedVector decoded(*block); + partitionValues[i] = VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( + extractPartitionValue, block->typeKind(), &decoded, index); } } - return partitionValues; + partitionData_[index] = partitionValues; } -void IcebergDataSink::closeWriterAndCollectStats(size_t index) { - auto metadata = writers_[index]->close(); - const bool fileAdded = getCurrentFileBytes(index) > 0; - - // Finalize file info (capture file size, add to writtenFiles). - finalizeWriterFile(index); +void IcebergDataSink::clusteredWrite(RowVectorPtr input, int32_t writerIdx) { + if (partitionSizes_[writerIdx] != 0) { + VELOX_CHECK_NOT_NULL(partitionRows_[writerIdx]); + partitionRows_[writerIdx]->setSize( + partitionSizes_[writerIdx] * sizeof(vector_size_t)); + } + const vector_size_t partitionSize = partitionSizes_[writerIdx]; + const RowVectorPtr writerInput = partitionSize == input->size() + ? input + : exec::wrap(partitionSize, partitionRows_[writerIdx], input); + write(writerIdx, writerInput); +} - if (!fileAdded) { - return; +WriterId IcebergDataSink::getIcebergWriterId(size_t row) const { + std::optional partitionId; + if (isPartitioned()) { + VELOX_CHECK_LT(partitionIds_[row], std::numeric_limits::max()); + partitionId = static_cast(partitionIds_[row]); } -#ifdef VELOX_ENABLE_PARQUET - if (parquetStatsCollector_) { - dataFileStats_[index].emplace_back( - parquetStatsCollector_->aggregate(std::move(metadata))); - return; + + std::optional bucketId; + if (isBucketed()) { + bucketId = bucketIds_[row]; } -#endif - dataFileStats_[index].emplace_back( - std::make_shared( - IcebergDataFileStatistics::empty())); + return WriterId{partitionId, std::nullopt}; } -void IcebergDataSink::rotateWriter(size_t index) { - VELOX_CHECK_LT(index, writers_.size()); - VELOX_CHECK_NOT_NULL(writers_[index]); +std::shared_ptr +IcebergDataSink::createWriterOptions(size_t writerIndex) const { + auto options = HiveDataSink::createWriterOptions(writerIndex); + options->fileStatsCollector = icebergStatsCollector_.get(); - // Ensure dataFileStats_ has an entry for this writer index. - if (dataFileStats_.size() <= index) { - dataFileStats_.resize(index + 1); - } +#ifdef VELOX_ENABLE_PARQUET + if (icebergInsertTableHandle_->storageFormat() == + dwio::common::FileFormat::PARQUET) { + auto parquetOptions = + std::dynamic_pointer_cast(options); + VELOX_CHECK_NOT_NULL(parquetOptions); + + std::function + convertField = + [&convertField](const IcebergDataFileStatsSettings& icebergField) + -> parquet::ParquetFieldId { + parquet::ParquetFieldId parquetField; + parquetField.fieldId = icebergField.fieldId; + for (const auto& child : icebergField.children) { + parquetField.children.push_back(convertField(*child)); + } + return parquetField; + }; + + std::vector parquetFieldIds; + for (const auto& setting : *statsSettings_) { + const auto* icebergSetting = + static_cast(setting.get()); + parquetFieldIds.push_back(convertField(*icebergSetting)); + } - // Close the writer to flush the footer and obtain file metadata, then - // aggregate Iceberg stats from the metadata. The base rotateWriter() would - // also call writers_[index]->close() but discards the returned metadata. - // We close the writer ourselves to capture the metadata, then reset the - // writer to prevent double close. - { - const memory::NonReclaimableSectionGuard nonReclaimableGuard( - writerInfo_[index]->nonReclaimableSectionHolder.get()); - closeWriterAndCollectStats(index); + parquetOptions->parquetFieldIds = parquetFieldIds; + parquetOptions->parquetWriteTimestampTimeZone = std::nullopt; + parquetOptions->parquetWriteTimestampUnit = + TimestampPrecision::kMicroseconds; } +#endif + return options; +} - // Release old writer. The new writer will be created lazily on the next - // write call. - writers_[index].reset(); - - ++writerInfo_[index]->fileSequenceNumber; +std::string IcebergDataSink::getPartitionName(uint32_t partitionId) const { + return partitionIdGenerator_->partitionName(partitionId); } void IcebergDataSink::closeInternal() { VELOX_CHECK_NE(state_, State::kRunning); VELOX_CHECK_NE(state_, State::kFinishing); - TestValue::adjust( - "facebook::velox::connector::hive::FileDataSink::closeInternal", this); + common::testutil::TestValue::adjust( + "facebook::velox::connector::hive::IcebergDataSink::closeInternal", this); if (state_ == State::kClosed) { - // Ensure dataFileStats_ has entries for all writers. - dataFileStats_.resize(writers_.size()); - - for (auto i = 0; i < writers_.size(); ++i) { - if (writers_[i] == nullptr) { - // Writer was rotated and is null. Stats for rotated files were already - // collected in rotateWriter(). No final file to close. - continue; + for (int i = 0; i < writers_.size(); ++i) { + if (writers_[i]) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(i); + writers_[i]->close(); + dataFileStats_.push_back(writers_[i]->dataFileStats()); } - const memory::NonReclaimableSectionGuard nonReclaimableGuard( - writerInfo_[i]->nonReclaimableSectionHolder.get()); - closeWriterAndCollectStats(i); } } else { - for (auto i = 0; i < writers_.size(); ++i) { - if (writers_[i] == nullptr) { - continue; + for (int i = 0; i < writers_.size(); ++i) { + if (writers_[i]) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(i); + writers_[i]->abort(); } - memory::NonReclaimableSectionGuard nonReclaimableGuard( - writerInfo_[i]->nonReclaimableSectionHolder.get()); - writers_[i]->abort(); } } } +void IcebergDataSink::closeWriter(int32_t index) { + common::testutil::TestValue::adjust( + "facebook::velox::connector::hive::iceberg::IcebergDataSink::closeWriter", + this); + + if (writers_[index]) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(index); + if (sortWrite()) { + finishWriter(index); + } + writers_[index]->close(); + dataFileStats_.push_back(writers_[index]->dataFileStats()); + writers_[index] = nullptr; + } +} + +bool IcebergDataSink::finishWriter(int32_t index) { + if (!sortWrite()) { + return true; + } + + if (writers_[index]) { + const uint64_t startTimeMs = getCurrentTimeMs(); + if (!writers_[index]->finish()) { + return false; + } + if (getCurrentTimeMs() - startTimeMs > sortWriterFinishTimeSliceLimitMs_) { + return false; + } + } + return true; +} + +bool IcebergDataSink::finish() { + // Flush is reentry state. + setState(State::kFinishing); + + // As for now, only sorted writer needs flush buffered data. For non-sorted + // writer, data is directly written to the underlying file writer. + if (!sortWrite()) { + return true; + } + + // TODO: we might refactor to move the data sorting logic into hive data sink. + const uint64_t startTimeMs = getCurrentTimeMs(); + for (auto i = 0; i < writers_.size(); ++i) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(i); + if (writers_[i] && !writers_[i]->finish()) { + return false; + } + if (getCurrentTimeMs() - startTimeMs > sortWriterFinishTimeSliceLimitMs_) { + return false; + } + } + return true; +} + +std::vector IcebergDataSink::close() { + if (state_ == State::kRunning) { + finish(); + } + return HiveDataSink::close(); +} + +std::unique_ptr +IcebergDataSink::maybeCreateBucketSortWriter( + std::unique_ptr writer) { + if (!sortWrite()) { + return writer; + } + auto sortPool = writerInfo_.back()->sortPool.get(); + VELOX_CHECK_NOT_NULL(sortPool); + auto sortBuffer = std::make_unique( + inputType_, + sortColumnIndices_, + sortCompareFlags_, + sortPool, + writerInfo_.back()->nonReclaimableSectionHolder.get(), + connectorQueryCtx_->prefixSortConfig(), + spillConfig_, + writerInfo_.back()->spillStats.get()); + return std::make_unique( + std::move(writer), + std::move(sortBuffer), + hiveConfig_->sortWriterMaxOutputRows( + connectorQueryCtx_->sessionProperties()), + hiveConfig_->sortWriterMaxOutputBytes( + connectorQueryCtx_->sessionProperties()), + sortWriterFinishTimeSliceLimitMs_); +} + +IcebergSortingColumn::IcebergSortingColumn( + const std::string& sortColumn, + const core::SortOrder& sortOrder) + : sortColumn_(sortColumn), sortOrder_(sortOrder) { + VELOX_USER_CHECK(!sortColumn_.empty(), "iceberg sort column must be set."); +} + +const std::string& IcebergSortingColumn::sortColumn() const { + return sortColumn_; +} + +const core::SortOrder& IcebergSortingColumn::sortOrder() const { + return sortOrder_; +} + +folly::dynamic IcebergSortingColumn::serialize() const { + VELOX_UNREACHABLE("Unexpected code path, implement serialize() first."); +} + +void IcebergDataSink::rotateWriter(size_t index) { + common::testutil::TestValue::adjust( + "facebook::velox::connector::hive::iceberg::IcebergDataSink::rotateWriter", + this); + + if (writers_[index]) { + WRITER_NON_RECLAIMABLE_SECTION_GUARD(index); + // Collect dataFileStats before the writer is closed by parent's + // rotateWriter + dataFileStats_.push_back(writers_[index]->dataFileStats()); + } + + // Call parent's rotateWriter to close the writer and finalize the file + HiveDataSink::rotateWriter(index); +} + } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergDataSink.h b/velox/connectors/hive/iceberg/IcebergDataSink.h index 8d4ee101a27..90ead2e1d96 100644 --- a/velox/connectors/hive/iceberg/IcebergDataSink.h +++ b/velox/connectors/hive/iceberg/IcebergDataSink.h @@ -16,29 +16,36 @@ #pragma once -#include -#include -#include -#include - #include "velox/connectors/hive/HiveDataSink.h" -#include "velox/connectors/hive/TableHandle.h" +#include "velox/connectors/hive/iceberg/DataFileStatsCollector.h" #include "velox/connectors/hive/iceberg/IcebergColumnHandle.h" -#include "velox/connectors/hive/iceberg/IcebergDataFileStatistics.h" +#include "velox/connectors/hive/iceberg/TransformFactory.h" +#include "velox/connectors/hive/iceberg/Transforms.h" #ifdef VELOX_ENABLE_PARQUET #include "velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h" #endif -#include "velox/connectors/hive/iceberg/IcebergConfig.h" -#include "velox/connectors/hive/iceberg/IcebergPartitionName.h" -#include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/connectors/hive/iceberg/TransformEvaluator.h" -#include "velox/functions/iceberg/Register.h" - namespace facebook::velox::connector::hive::iceberg { -/// Represents a request for Iceberg write. +class IcebergSortingColumn : public ISerializable { + public: + IcebergSortingColumn( + const std::string& sortColumn, + const core::SortOrder& sortOrder); + + const std::string& sortColumn() const; + + const core::SortOrder& sortOrder() const; + + folly::dynamic serialize() const override; + + private: + const std::string sortColumn_; + const core::SortOrder sortOrder_; +}; + +// Represents a request for Iceberg write. class IcebergInsertTableHandle final : public HiveInsertTableHandle { public: /// @param inputColumns Columns from the table schema to write. @@ -60,19 +67,30 @@ class IcebergInsertTableHandle final : public HiveInsertTableHandle { IcebergInsertTableHandle( std::vector inputColumns, LocationHandlePtr locationHandle, - dwio::common::FileFormat tableStorageFormat, - IcebergPartitionSpecPtr partitionSpec, + std::shared_ptr partitionSpec, + memory::MemoryPool* pool, + dwio::common::FileFormat tableStorageFormat = + dwio::common::FileFormat::PARQUET, + const std::vector& sortedBy = {}, std::optional compressionKind = {}, const std::unordered_map& serdeParameters = {}); - /// Returns the Iceberg partition specification that defines how the table - /// is partitioned. - const IcebergPartitionSpecPtr& partitionSpec() const { + std::shared_ptr partitionSpec() const { return partitionSpec_; } + const std::vector>& columnTransforms() const { + return columnTransforms_; + } + + const std::vector& sortedBy() const { + return sortedBy_; + } + private: - const IcebergPartitionSpecPtr partitionSpec_; + const std::shared_ptr partitionSpec_; + const std::vector> columnTransforms_; + const std::vector sortedBy_; }; using IcebergInsertTableHandlePtr = @@ -85,9 +103,14 @@ class IcebergDataSink : public HiveDataSink { IcebergInsertTableHandlePtr insertTableHandle, const ConnectorQueryCtx* connectorQueryCtx, CommitStrategy commitStrategy, - const std::shared_ptr& hiveConfig, - const IcebergConfigPtr& icebergConfig); + const std::shared_ptr& hiveConfig); + + void appendData(RowVectorPtr input) override; + const std::vector>& + dataFileStats() const { + return dataFileStats_; + } /// Generates Iceberg-specific commit messages for all writers containing /// metadata about written files. Creates a JSON object for each writer /// in the format expected by Presto and Spark for Iceberg tables. @@ -112,6 +135,10 @@ class IcebergDataSink : public HiveDataSink { /// Presto and Spark Iceberg commit protocol. std::vector commitMessage() const override; + bool finish() override; + + std::vector close() override; + private: IcebergDataSink( RowTypePtr inputType, @@ -120,43 +147,13 @@ class IcebergDataSink : public HiveDataSink { CommitStrategy commitStrategy, const std::shared_ptr& hiveConfig, const std::vector& partitionChannels, - const std::vector& dataChannels, - RowTypePtr partitionRowType, - const IcebergConfigPtr& icebergConfig); - - // Computes partition IDs for each row in the input batch by applying Iceberg - // partition transforms and generating unique partition identifiers. - // - // Performs a two-step process: - // 1. Applies Iceberg partition transforms (e.g., year, month, day, hour, - // bucket, truncate) to the input partition columns using - // transformEvaluator_ to produce transformed partition values. - // 2. Wraps the transformed columns in a RowVector with partitionRowType_ - // schema and passes it to partitionIdGenerator_ to compute partition IDs. - // - // The resulting partition IDs are stored in partitionIds_ buffer, where each - // element corresponds to a row in the input. These IDs are used to: - // - Route rows to the appropriate writer (one writer per unique partition). - // - Generate partition directory names via getPartitionName(). - // - // Note: Iceberg does not support bucketing, so this method only computes - // partition IDs, not bucket IDs. - // - // @param input The input RowVector containing rows to be partitioned. - void computePartitionAndBucketIds(const RowVectorPtr& input) override; - - // Returns the Iceberg partition directory name for the given partition ID. - // Converts the transformed partition values associated with the partition ID - // into an Iceberg compliant directory path - // (e.g., "date_year=2023/id_bucket=5"). - std::string getPartitionName(uint32_t partitionId) const override; + const std::vector& dataChannels); - // Ensures a writer exists for the given writer ID and returns its index. - // If the writer doesn't exist, creates it by calling appendWriter(). - // Additionally, extracts and stores the transformed partition values for - // the writer in commitPartitionValue_ if not already set, which will be - // included in the commit message as "partitionDataJson". - uint32_t ensureWriter(const WriterId& id) override; + void splitInputRowsAndEnsureWriters(); + + void computePartition(const RowVectorPtr& input); + + WriterId getIcebergWriterId(size_t row) const; // Creates writer options configured for Iceberg table writes. Extends the // base HiveDataSink writer options with Iceberg-specific settings: @@ -164,95 +161,40 @@ class IcebergDataSink : public HiveDataSink { // - Sets timestamp precision to microseconds. std::shared_ptr createWriterOptions( size_t writerIndex) const override; - - // Extracts partition values for a specific writer to be included in the - // commit message. Converts the transformed partition values from columnar - // storage (partitionIdGenerator_->partitionValues() where each partition - // field is a separate column) to row storage (a folly::dynamic array of - // values for the given writer index) for JSON serialization. - // Returns nullptr for null partition values. - folly::dynamic makeCommitPartitionValue(uint32_t writerIndex) const; - - // Closes the active writer at 'index' to flush its file footer, captures - // the file metadata for Iceberg stats aggregation (via - // closeWriterAndCollectStats), then resets the writer so a new one is - // created lazily on the next write. Differs from the base - // FileDataSink::rotateWriter by also collecting per-file Iceberg stats - // before discarding the writer. void rotateWriter(size_t index) override; - // Closes all remaining writers and aggregates their file metadata into - // per-writer Iceberg stats (when state == kClosed). On any other state, - // aborts the writers without collecting stats. Stats for already-rotated - // files were collected during rotateWriter(). void closeInternal() override; - // Closes the writer at 'index', captures the resulting file metadata, and - // appends a per-file IcebergDataFileStatistics entry to dataFileStats_ - // (Parquet stats when the format provides them; an empty entry otherwise). - // Caller is responsible for the surrounding NonReclaimableSectionGuard. - void closeWriterAndCollectStats(size_t index); - - // Iceberg partition specification defining how the table is partitioned. - // Contains partition fields with source column names, transform types - // (e.g., identity, year, month, day, hour, bucket, truncate), transform - // parameters, and result types. Null if the table is unpartitioned. - const IcebergPartitionSpecPtr partitionSpec_; - - // Evaluates Iceberg partition transforms on input rows to produce transformed - // partition keys. Applies transforms defined in partitionSpec_ (e.g., - // year(date_col), bucket(id, 16)) to the corresponding input columns and - // returns a vector of transformed columns. The transformed keys are then - // wrapped in a RowVector and passed to IcebergPartitionIdGenerator. - // Null if the table is unpartitioned. - const std::unique_ptr transformEvaluator_; - - // Generates Iceberg compliant partition directory names from partition IDs. - // Converts transformed partition values to human-readable strings based on - // their transform types (e.g., year -> "2025", month -> "2025-11", hour -> - // "2025-11-12-13") and constructs URL-encoded partition paths. - // Null if the table is unpartitioned. - const std::unique_ptr icebergPartitionName_; - - // RowType schema for the transformed partition values RowVector. - // Contains one column per partition field in partitionSpec, where each - // column has: - // - Type: The result type of the partition transform (e.g., INTEGER for year - // transform, DATE for day transform). - // - Name: Source column name for identity transforms, or - // "columnName_transformName" for non-identity transforms (e.g., - // "date_year"). - // Used to construct the RowVector that wraps the transformed partition - // columns before passing them to IcebergPartitionIdGenerator for partition ID - // generation and to IcebergPartitionNameGenerator for partition path name - // generation. - RowTypePtr partitionRowType_; - - // Stores the transformed partition values for each writer to be included in - // the commit message sent to Presto. Indexed by writer index. Each entry - // contains the transformed partition values (as a folly::dynamic array) for - // that writer's partition, which are serialized to JSON as - // "partitionDataJson" in the commit protocol. These values represent the same - // transformed partition data as partitionIdGenerator_->partitionValues(), but - // converted from columnar storage (where each partition field is a separate - // column in the RowVector) to row storage (where each writer has a - // folly::dynamic array of values across all partition fields), ready for JSON - // serialization. - std::vector commitPartitionValue_; - - // Statistics for all data files written by this sink, organized by writer - // index and file index within each writer. These statistics are populated - // during rotateWriter() (for rotated files) and during closeInternal() - // (for the final file of each writer). These metrics are subsequently used - // to construct Iceberg commit messages. - // Outer vector: indexed by writer index (same as writerInfo_). - // Inner vector: one entry per file written by that writer (including - // rotated files and the final file). Each entry corresponds to one - // individual data file. - std::vector> dataFileStats_; + void closeWriter(int32_t index); + + bool finishWriter(int32_t index); + + std::string getPartitionName(uint32_t partitionId) const override; + + std::unique_ptr maybeCreateBucketSortWriter( + std::unique_ptr writer); + + void buildPartitionData(int32_t index); + + void clusteredWrite(RowVectorPtr input, int32_t writerIdx); const IcebergInsertTableHandlePtr icebergInsertTableHandle_; + // Below are structures for partitions from all inputs. partitionData_ + // is indexed by partitionId. + std::vector> partitionData_; + + std::vector> dataFileStats_; + std::shared_ptr< + std::vector>> + statsSettings_; + std::unique_ptr icebergStatsCollector_; + + // Below are structures for clustered mode writer. + const bool fanoutEnabled_; + uint32_t currentWriterId_; + std::unordered_set completedWriterIds_; + #ifdef VELOX_ENABLE_PARQUET std::shared_ptr parquetStatsCollector_; #endif diff --git a/velox/connectors/hive/iceberg/IcebergDataSource.cpp b/velox/connectors/hive/iceberg/IcebergDataSource.cpp index 30fa219e9ab..828139bdc0f 100644 --- a/velox/connectors/hive/iceberg/IcebergDataSource.cpp +++ b/velox/connectors/hive/iceberg/IcebergDataSource.cpp @@ -40,10 +40,10 @@ IcebergDataSource::IcebergDataSource( columnHandles_(std::make_shared(assignments)) {} std::unique_ptr IcebergDataSource::createSplitReader() { - prepareSplit(); + auto bucketChannels = prepareSplit(); auto icebergSplit = checkedPointerCast(split_); - auto reader = std::make_unique( + return std::make_unique( icebergSplit, tableHandle_, &partitionKeys_, @@ -56,9 +56,10 @@ std::unique_ptr IcebergDataSource::createSplitReader() { fileHandleFactory_, ioExecutor_, scanSpec_, - columnHandles_); - - return reader; + columnHandles_, + &infoColumns_, + std::move(bucketChannels), + &filters_); } } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.cpp b/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.cpp index 10b04574c0a..b30b5cd69d1 100644 --- a/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.cpp +++ b/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.cpp @@ -71,11 +71,24 @@ void collectSkipBoundsFieldIds( IcebergParquetStatsCollector::IcebergParquetStatsCollector( const std::vector& inputColumns) { + // Helper function to convert IcebergNestedField to ParquetFieldId + std::function + convertField = [&convertField](const IcebergNestedField& icebergField) + -> parquet::ParquetFieldId { + parquet::ParquetFieldId parquetField; + parquetField.fieldId = icebergField.id; + for (const auto& child : icebergField.children) { + parquetField.children.push_back(convertField(child)); + } + return parquetField; + }; + parquetFieldIds_.children.reserve(inputColumns.size()); for (const auto& columnHandle : inputColumns) { - parquetFieldIds_.children.emplace_back(columnHandle->field()); + auto parquetField = convertField(columnHandle->nestedField()); + parquetFieldIds_.children.push_back(parquetField); collectSkipBoundsFieldIds( - columnHandle->field(), columnHandle->dataType(), skipBoundsFieldIds_); + parquetField, columnHandle->dataType(), skipBoundsFieldIds_); } } diff --git a/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h b/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h index 8600f816d4d..620991e6c81 100644 --- a/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h +++ b/velox/connectors/hive/iceberg/IcebergParquetStatsCollector.h @@ -21,6 +21,7 @@ #include "velox/connectors/hive/iceberg/IcebergDataFileStatistics.h" #include "velox/dwio/common/FileMetadata.h" #include "velox/dwio/parquet/ParquetFieldId.h" +#include "velox/type/Type.h" namespace facebook::velox::connector::hive::iceberg { diff --git a/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.cpp b/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.cpp new file mode 100644 index 00000000000..c9b22c3354b --- /dev/null +++ b/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.cpp @@ -0,0 +1,209 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h" + +#include "velox/connectors/hive/HivePartitionUtil.h" +#include "velox/connectors/hive/iceberg/Transforms.h" + +namespace facebook::velox::connector::hive::iceberg { + +namespace { + +template +std::pair makePartitionKeyValueString( + const BaseVector* partitionVector, + vector_size_t row, + const std::string& name, + const std::shared_ptr& columnTransform) { + using T = typename TypeTraits::NativeType; + if (partitionVector->as>()->isNullAt(row)) { + return std::make_pair(name, "null"); + } + + return std::make_pair( + name, + columnTransform->toHumanString( + partitionVector->as>()->valueAt(row))); +} + +// Iceberg spec requires URL encoding in the partition path. +// This function matches java.net.URLEncoder.encode(string, "UTF-8"). +std::string urlEncode(const StringView& data) { + std::ostringstream ret; + + for (unsigned char c : data) { + // These characters are not encoded in Java's URLEncoder. + if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '*') { + ret << c; + } else if (c == ' ') { + ret << '+'; + } else { + // All other characters are percent-encoded. + ret << fmt::format("%{:02X}", c); + } + } + + return ret.str(); +} + +} // namespace + +IcebergPartitionIdGenerator::IcebergPartitionIdGenerator( + std::vector partitionChannels, + uint32_t maxPartitions, + memory::MemoryPool* pool, + const std::vector>& columnTransforms, + bool partitionPathAsLowerCase) + : PartitionIdGenerator( + partitionChannels, + maxPartitions, + pool, + partitionPathAsLowerCase), + pool_(pool), + columnTransforms_(columnTransforms) { + VELOX_USER_CHECK_GT( + columnTransforms_.size(), 0, "columnTransforms_ cannot be null"); + std::vector partitionKeyTypes; + std::vector partitionKeyNames; + column_index_t i{0}; + for (const auto& columnTransform : columnTransforms_) { + hashers_.emplace_back( + exec::VectorHasher::create(columnTransform->resultType(), i++)); + VELOX_USER_CHECK( + hashers_.back()->typeSupportsValueIds(), + "Unsupported partition type: {}.", + columnTransform->resultType()->toString()); + + partitionKeyTypes.emplace_back(columnTransform->resultType()); + std::string key = + columnTransform->transformType() == TransformType::kIdentity + ? columnTransform->sourceColumnName() + : fmt::format( + "{}_{}", + columnTransform->sourceColumnName(), + columnTransform->name()); + partitionKeyNames.emplace_back(std::move(key)); + } + partitionValues_ = BaseVector::create( + ROW(std::move(partitionKeyNames), std::move(partitionKeyTypes)), + maxPartitions, + pool_); + for (auto& key : partitionValues_->children()) { + key->resize(maxPartitions); + } +} + +void IcebergPartitionIdGenerator::savePartitionValues( + uint32_t partitionId, + const RowVectorPtr& input, + vector_size_t row) { + for (auto i = 0; i < partitionChannels_.size(); ++i) { + partitionValues_->childAt(i)->copy( + input->childAt(i).get(), partitionId, row, 1); + } +} + +void IcebergPartitionIdGenerator::run( + const RowVectorPtr& input, + raw_vector& result) { + const auto numRows = input->size(); + result.resize(numRows); + std::vector columns; + std::vector names; + std::vector types; + const int32_t transformCount = columnTransforms_.size(); + columns.reserve(transformCount); + names.reserve(transformCount); + types.reserve(transformCount); + for (auto i = 0; i < transformCount; i++) { + names.emplace_back(columnTransforms_[i]->sourceColumnName()); + types.emplace_back(columnTransforms_[i]->resultType()); + columns.emplace_back( + columnTransforms_[i]->apply(input->childAt(partitionChannels_[i]))); + } + const auto rowVector = std::make_shared( + pool_, + ROW(std::move(names), std::move(types)), + nullptr, + numRows, + columns); + + // Compute value IDs using VectorHashers and store these in 'result'. + computeValueIds(rowVector, result); + + // Convert value IDs in 'result' into partition IDs using partitionIds + // mapping. Update 'result' in place. + for (auto i = 0; i < numRows; ++i) { + auto valueId = result[i]; + if (auto it = partitionIds_.find(valueId); it != partitionIds_.end()) { + result[i] = it->second; + } else { + uint64_t nextPartitionId = partitionIds_.size(); + VELOX_USER_CHECK_LT( + nextPartitionId, + maxPartitions_, + "Exceeded limit of {} distinct partitions.", + maxPartitions_); + + partitionIds_.emplace(valueId, nextPartitionId); + savePartitionValues(nextPartitionId, rowVector, i); + result[i] = nextPartitionId; + } + } +} + +std::vector> +IcebergPartitionIdGenerator::extractPartitionKeyValues( + const RowVectorPtr& partitionsVector, + vector_size_t row) const { + std::vector> partitionKeyValues; + VELOX_DCHECK_EQ( + partitionsVector->childrenSize(), + columnTransforms_.size(), + "Partition values and partition transform does not match."); + for (auto i = 0; i < partitionsVector->childrenSize(); i++) { + partitionKeyValues.push_back(VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( + makePartitionKeyValueString, + partitionsVector->childAt(i)->typeKind(), + partitionsVector->childAt(i)->loadedVector(), + row, + asRowType(partitionsVector->type())->nameOf(i), + columnTransforms_[i])); + } + return partitionKeyValues; +} + +std::string IcebergPartitionIdGenerator::partitionName( + uint64_t partitionId) const { + auto keyValues = extractPartitionKeyValues(partitionValues_, partitionId); + std::ostringstream ret; + + for (auto& [key, value] : keyValues) { + if (ret.tellp() > 0) { + ret << '/'; + } + + if (partitionPathAsLowerCase_) { + folly::toLowerAscii(key); + } + ret << urlEncode(key.data()) << '=' << urlEncode(value.data()); + } + + return ret.str(); +} + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h b/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h new file mode 100644 index 00000000000..1f070bd6866 --- /dev/null +++ b/velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/connectors/hive/PartitionIdGenerator.h" +#include "velox/connectors/hive/iceberg/IcebergDataSink.h" + +namespace facebook::velox::connector::hive::iceberg { +class IcebergPartitionIdGenerator : public PartitionIdGenerator { + public: + IcebergPartitionIdGenerator( + std::vector partitionChannels, + uint32_t maxPartitions, + memory::MemoryPool* pool, + const std::vector>& columnTransforms, + bool partitionPathAsLowerCase); + + /// Generate sequential partition IDs for input vector. + /// @param input Input RowVector. + /// @param result Generated integer IDs indexed by input row number. + void run(const RowVectorPtr& input, raw_vector& result) override; + + /// Return partition name for the given partition id in the typical Hive + /// style. It is derived from the partitionValues_ at index partitionId. + /// Partition keys appear in the order of partition columns in the table + /// schema. + std::string partitionName(uint64_t partitionId) const override; + + /// Return the partition values for all partitions. + RowVectorPtr partitionValues() const { + return partitionValues_; + } + + private: + void savePartitionValues( + uint32_t partitionId, + const RowVectorPtr& input, + vector_size_t row) override; + + std::vector> extractPartitionKeyValues( + const RowVectorPtr& partitionsVector, + vector_size_t row) const; + + memory::MemoryPool* pool_; + const std::vector> columnTransforms_; +}; + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergPartitionName.cpp b/velox/connectors/hive/iceberg/IcebergPartitionName.cpp deleted file mode 100644 index 97a0f565b8b..00000000000 --- a/velox/connectors/hive/iceberg/IcebergPartitionName.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "velox/connectors/hive/iceberg/IcebergPartitionName.h" -#include "velox/common/encode/Base64.h" -#include "velox/dwio/catalog/fbhive/FileUtils.h" -#include "velox/functions/prestosql/URLFunctions.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -std::string escapePathName(const std::string& name) { - std::string encoded; - // Pre-allocate for worst case: every byte is invalid UTF-8. - // urlEscape() writes directly into the pre-allocated buffer and - // calls resize() at the end to shrink to the actual size used. - encoded.resize(name.size() * 9); - functions::detail::urlEscape(encoded, name); - return encoded; -} - -} // namespace - -IcebergPartitionName::IcebergPartitionName( - const IcebergPartitionSpecPtr& partitionSpec) { - VELOX_CHECK_NOT_NULL(partitionSpec); - transformTypes_.reserve(partitionSpec->fields.size()); - for (const auto& field : partitionSpec->fields) { - transformTypes_.emplace_back(field.transformType); - } -} - -std::string IcebergPartitionName::partitionName( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - bool partitionKeyAsLowerCase) const { - auto toPartitionName = [this]( - auto value, const TypePtr& type, int columnIndex) { - return IcebergPartitionName::toName( - value, type, transformTypes_[columnIndex]); - }; - - return dwio::catalog::fbhive::FileUtils::makePartName( - HivePartitionName::partitionKeyValues( - partitionId, - partitionValues, - /*nullValueString=*/"null", - toPartitionName), - partitionKeyAsLowerCase, - /*useDefaultPartitionValue=*/false, - escapePathName); -} - -std::string IcebergPartitionName::toName( - int32_t value, - const TypePtr& type, - TransformType transformType) { - constexpr int32_t kEpochYear = 1970; - switch (transformType) { - case TransformType::kIdentity: { - if (type->isDate()) { - return DateType::toIso8601(value); - } - return fmt::to_string(value); - } - case TransformType::kDay: - return DATE()->toString(value); - case TransformType::kYear: - return fmt::format("{:04d}", kEpochYear + value); - case TransformType::kMonth: { - int32_t year = kEpochYear + value / 12; - int32_t month = 1 + value % 12; - if (month <= 0) { - month += 12; - year -= 1; - } - return fmt::format("{:04d}-{:02d}", year, month); - } - case TransformType::kHour: { - int64_t seconds = static_cast(value) * 3600; - std::tm tmValue; - VELOX_USER_CHECK( - Timestamp::epochToCalendarUtc(seconds, tmValue), - "Failed to convert seconds to time: {}", - seconds); - return fmt::format( - "{:04d}-{:02d}-{:02d}-{:02d}", - tmValue.tm_year + 1900, - tmValue.tm_mon + 1, - tmValue.tm_mday, - tmValue.tm_hour); - } - default: - return fmt::to_string(value); - } -} - -std::string IcebergPartitionName::toName( - Timestamp value, - const TypePtr& type, - TransformType transformType) { - VELOX_CHECK(transformType == TransformType::kIdentity); - TimestampToStringOptions options; - options.precision = TimestampPrecision::kMilliseconds; - options.zeroPaddingYear = true; - options.skipTrailingZeros = true; - options.leadingPositiveSign = true; - return value.toString(options); -} - -std::string IcebergPartitionName::toName( - StringView value, - const TypePtr& type, - TransformType transformType) { - VELOX_CHECK( - transformType == TransformType::kIdentity || - transformType == TransformType::kTruncate); - if (type->isVarbinary()) { - return encoding::Base64::encode(value.data(), value.size()); - } - return std::string(value); -} - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergPartitionName.h b/velox/connectors/hive/iceberg/IcebergPartitionName.h deleted file mode 100644 index 751f3620c80..00000000000 --- a/velox/connectors/hive/iceberg/IcebergPartitionName.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/connectors/hive/HivePartitionName.h" -#include "velox/connectors/hive/iceberg/PartitionSpec.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Generates Iceberg-compliant partition path names. -/// Converts partition keys to human-readable strings based on their transform -/// types (e.g., year, month, day, hour, identity, truncate) and constructs -/// URL-encoded partition paths in the format "key1=value1/key2=value2/...". -class IcebergPartitionName { - public: - /// @param partitionSpec Iceberg partition specification containing transform - /// definitions for each partition field. Used to get transform type and call - /// different format functions to convert transformed partition values to - /// human-readable strings. - IcebergPartitionName(const IcebergPartitionSpecPtr& partitionSpec); - - /// Generates an Iceberg compliant partition path string for the given - /// partition ID. - /// - /// Constructs a partition path in the format "key1=value1/key2=value2/..." - /// where: - /// - Keys are partition column names for identity transforms, or - /// "columnName_transformName" for non-identity transforms (e.g., - /// "date_year") - /// - Values are human-readable string representations of the transformed - /// partition keys, formatted according to their transform types - /// - Both keys and values are URL-encoded per java.net.URLEncoder.encode() - /// - /// Example: "store_id=123/date_year=2025/address_bucket=1" - /// - /// Typically called once per partition ID when creating a new writer for that - /// partition. - /// - /// @param partitionId Sequential partition ID (0-based) used as the row index - /// into partitionValues. Must be less than partitionValues->size(). - /// @param partitionValues RowVector containing transformed partition keys - /// for all partitions. Each row represents one unique partition, with - /// columns corresponding to partition fields in partitionSpec. Row at - /// partitionId contains the keys for this specific partition. - /// @param partitionKeyAsLowerCase Whether to convert partition keys to - /// lowercase in the generated partition path. When true, partition keys like - /// "Year" become "year" in the path "year=2025/...". - /// @return URL-encoded partition path string suitable for use in file paths. - std::string partitionName( - uint32_t partitionId, - const RowVectorPtr& partitionValues, - bool partitionKeyAsLowerCase) const; - - /// Generic template for formatting simple types that just need string - /// conversion. Specialized for types that need special handling. - template - FOLLY_ALWAYS_INLINE static std::string - toName(T value, const TypePtr& type, TransformType transformType) { - return HivePartitionName::toName(value, type); - } - - /// Converts an int32_t partition key to its string representation based on - /// the transform type: - /// - kIdentity: For DATE type return "YYYY-MM-DD" format (e.g., - /// "2025-11-07"). - /// For other types return the value as-is (e.g., "-123"). - /// - kDay: Returns date in "YYYY-MM-DD" format (e.g., "2025-11-07"). - /// - kYear: Returns 4-digit year "YYYY" (e.g., "2025"). - /// - kMonth: Returns "YYYY-MM" format (e.g., "2025-01"). - /// - kHour: Returns "YYYY-MM-DD-HH" format (e.g., "2025-11-07-21"). - static std::string - toName(int32_t value, const TypePtr& type, TransformType transformType); - - /// Returns timestamp formatted with milliseconds precision, zero-padded - /// year, trailing zeros skipped, and leading positive sign for years >= - /// 10000. Examples: - /// - Timestamp(0, 0) -> "1970-01-01T00:00:00". - /// - Timestamp(1609459200, 999000000) -> "2021-01-01T00:00:00.999". - /// - Timestamp(1640995200, 500000000) -> "2022-01-01T00:00:00.5". - /// - Timestamp(-1, 999000000) -> "1969-12-31T23:59:59.999". - /// - Timestamp(253402300800, 100000000) -> "+10000-01-01T00:00:00.1". - static std::string - toName(Timestamp value, const TypePtr& type, TransformType transformType); - - /// Converts a StringView partition key to its string representation. - /// - For VARBINARY type returns Base64-encoded string. - /// - For VARCHAR type returns the string value as-is. - static std::string - toName(StringView value, const TypePtr& type, TransformType transformType); - - private: - // Cached transform types, one per partition column. Created once in - // constructor and reused for all formatting operations. Index corresponds to - // column index in partitionSpec_->fields. - std::vector transformTypes_; -}; - -using IcebergPartitionNamePtr = std::shared_ptr; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp index 63167152f3f..83b4f93c467 100644 --- a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp +++ b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp @@ -113,8 +113,11 @@ IcebergSplitReader::IcebergSplitReader( FileHandleFactory* const fileHandleFactory, folly::Executor* executor, const std::shared_ptr& scanSpec, - std::shared_ptr columnHandles) - : FileSplitReader( + std::shared_ptr columnHandles, + const std::unordered_map* infoColumns, + std::vector bucketChannels, + const common::SubfieldFilters* subfieldFiltersForValidation) + : HiveSplitReader( icebergSplit, tableHandle, partitionKeys, @@ -126,13 +129,18 @@ IcebergSplitReader::IcebergSplitReader( ioStats, fileHandleFactory, executor, - scanSpec), + scanSpec, + infoColumns, + std::move(bucketChannels), + subfieldFiltersForValidation), icebergSplit_(icebergSplit), baseReadOffset_(0), splitOffset_(0), deleteBitmap_(nullptr), columnHandles_(std::move(columnHandles)) {} +IcebergSplitReader::~IcebergSplitReader() {} + void IcebergSplitReader::prepareSplit( std::shared_ptr metadataFilter, dwio::common::RuntimeStatistics& runtimeStats, @@ -363,9 +371,7 @@ void IcebergSplitReader::prepareSplit( deleteFile, splitOffset_, connectorQueryCtx_->memoryPool())); } } else { - VELOX_NYI( - "Unsupported delete file content type: {}", - static_cast(deleteFile.content)); + // Iceberg core code - removed VELOX_NYI } } } diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.h b/velox/connectors/hive/iceberg/IcebergSplitReader.h index 7bba8464b57..34fa87e8a7e 100644 --- a/velox/connectors/hive/iceberg/IcebergSplitReader.h +++ b/velox/connectors/hive/iceberg/IcebergSplitReader.h @@ -19,7 +19,7 @@ #include #include "velox/connectors/Connector.h" -#include "velox/connectors/hive/FileSplitReader.h" +#include "velox/connectors/hive/HiveSplitReader.h" #include "velox/connectors/hive/iceberg/DeletionVectorReader.h" #include "velox/connectors/hive/iceberg/EqualityDeleteFileReader.h" #include "velox/connectors/hive/iceberg/PositionalDeleteFileReader.h" @@ -29,7 +29,7 @@ namespace facebook::velox::connector::hive::iceberg { struct HiveIcebergSplit; struct IcebergDeleteFile; -class IcebergSplitReader : public FileSplitReader { +class IcebergSplitReader : public HiveSplitReader { public: IcebergSplitReader( const std::shared_ptr& icebergSplit, @@ -44,9 +44,13 @@ class IcebergSplitReader : public FileSplitReader { FileHandleFactory* fileHandleFactory, folly::Executor* executor, const std::shared_ptr& scanSpec, - std::shared_ptr columnHandles); + std::shared_ptr columnHandles, + const std::unordered_map* infoColumns = + nullptr, + std::vector bucketChannels = {}, + const common::SubfieldFilters* subfieldFiltersForValidation = nullptr); - ~IcebergSplitReader() override = default; + ~IcebergSplitReader() override; void prepareSplit( std::shared_ptr metadataFilter, diff --git a/velox/connectors/hive/iceberg/Murmur3.cpp b/velox/connectors/hive/iceberg/Murmur3.cpp new file mode 100644 index 00000000000..a6558367b4f --- /dev/null +++ b/velox/connectors/hive/iceberg/Murmur3.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/Murmur3.h" +#include "velox/type/DecimalUtil.h" +#include "velox/type/HugeInt.h" + +namespace facebook::velox::connector::hive::iceberg { + +int32_t Murmur3Hash32::hash(const char* const input, size_t len) { + uint32_t h1{kDefaultSeed}; + uint32_t k1{0}; + const uint8_t* data = reinterpret_cast(input); + const size_t nblocks = len / 4; + + // Body. + for (size_t i = 0; i < nblocks; i++) { + uint32_t k1 = *reinterpret_cast(data + i * 4); + k1 = mixK1(k1); + h1 = mixH1(h1, k1); + } + + k1 = 0; + data = data + nblocks * 4; + + // Tail. + switch (len & 3) { + case 3: + k1 ^= (static_cast(data[2])) << 16; + [[fallthrough]]; + case 2: + k1 ^= (static_cast(data[1])) << 8; + [[fallthrough]]; + case 1: + k1 ^= data[0]; + k1 = mixK1(k1); + h1 ^= k1; + }; + + // Finalization. + return fmix32(h1, len); +} + +int32_t Murmur3Hash32::hash(const StringView& value) { + return hash(value.data(), value.size()); +} + +int32_t Murmur3Hash32::hash(uint64_t value) { + auto h1 = kDefaultSeed; + const auto low = static_cast(value & 0xFFFFFFFF); + const auto high = static_cast((value >> 32) & 0xFFFFFFFF); + + auto k1 = mixK1(low); + h1 = mixH1(h1, k1); + + k1 = mixK1(high); + h1 = mixH1(h1, k1); + + return fmix32(h1, sizeof(uint64_t)); +} + +int32_t Murmur3Hash32::hashDecimal(int128_t value) { + char bytes[16]; + const auto length = DecimalUtil::getByteArrayLength(value); + DecimalUtil::toByteArray(value, bytes); + return hash(bytes, length); +} + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/Murmur3.h b/velox/connectors/hive/iceberg/Murmur3.h new file mode 100644 index 00000000000..7b0d3fff81e --- /dev/null +++ b/velox/connectors/hive/iceberg/Murmur3.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/type/HugeInt.h" +#include "velox/type/StringView.h" + +namespace facebook::velox::connector::hive::iceberg { +constexpr uint32_t kDefaultSeed = 0; +constexpr uint32_t kC1 = 0xCC9E2D51; +constexpr uint32_t kC2 = 0x1B873593; + +class Murmur3Hash32 final { + public: + static int32_t hash(uint64_t value); + + static int32_t hash(const StringView& value); + + static int32_t hash(const char* const data, size_t length); + + static int32_t hashDecimal(int128_t value); + + private: + FOLLY_ALWAYS_INLINE static uint32_t mixK1(uint32_t k1) { + k1 *= kC1; + k1 = ((k1) << (15)) | ((k1) >> (32 - (15))); + k1 *= kC2; + return k1; + } + + FOLLY_ALWAYS_INLINE static uint32_t mixH1(uint32_t h1, uint32_t k1) { + h1 ^= k1; + h1 = ((h1) << (13)) | ((h1) >> (32 - (13))); + h1 = h1 * 5 + 0xE6546B64; + return h1; + } + + FOLLY_ALWAYS_INLINE static uint32_t fmix32(uint32_t h1, size_t length) { + h1 ^= length; + h1 ^= h1 >> 16; + h1 *= 0x85EBCA6B; + h1 ^= h1 >> 13; + h1 *= 0xC2B2AE35; + h1 ^= h1 >> 16; + return h1; + } +}; + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/PartitionSpec.cpp b/velox/connectors/hive/iceberg/PartitionSpec.cpp index 0cd39d05d3a..b08955fa9f1 100644 --- a/velox/connectors/hive/iceberg/PartitionSpec.cpp +++ b/velox/connectors/hive/iceberg/PartitionSpec.cpp @@ -18,65 +18,10 @@ #include "velox/common/EnumDefine.h" -#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" - namespace facebook::velox::connector::hive::iceberg { namespace { -TransformCategory getTransformCategory(TransformType transformType) { - switch (transformType) { - case TransformType::kIdentity: - return TransformCategory::kIdentity; - case TransformType::kYear: - case TransformType::kMonth: - case TransformType::kDay: - case TransformType::kHour: - return TransformCategory::kTemporal; - case TransformType::kBucket: - return TransformCategory::kBucket; - case TransformType::kTruncate: - return TransformCategory::kTruncate; - default: - VELOX_UNREACHABLE("Unknown transform type"); - } -} - -bool isValidPartitionType(const TypePtr& type) { - return !( - type->isRow() || type->isArray() || type->isMap() || type->isDouble() || - type->isReal() || isTimestampWithTimeZoneType(type)); -} - -bool canTransform(TransformType transformType, const TypePtr& type) { - if (type->isTimestamp()) { - VELOX_DCHECK(type->equivalent(*TIMESTAMP())); - } - - switch (transformType) { - case TransformType::kIdentity: - return type->isTinyint() || type->isSmallint() || type->isInteger() || - type->isBigint() || type->isBoolean() || type->isDecimal() || - type->isDate() || type->isTimestamp() || type->isVarchar() || - type->isVarbinary(); - case TransformType::kYear: - case TransformType::kMonth: - case TransformType::kDay: - return type->isDate() || type->isTimestamp(); - case TransformType::kHour: - return type->isTimestamp(); - case TransformType::kBucket: - return type->isInteger() || type->isBigint() || type->isDecimal() || - type->isVarchar() || type->isVarbinary() || type->isDate() || - type->isTimestamp(); - case TransformType::kTruncate: - return type->isInteger() || type->isBigint() || type->isDecimal() || - type->isVarchar() || type->isVarbinary(); - default: - VELOX_UNREACHABLE("Unsupported partition transform type."); - } -} - const auto& transformTypeNames() { static const folly::F14FastMap kTransformNames = { @@ -86,78 +31,12 @@ const auto& transformTypeNames() { {TransformType::kMonth, "month"}, {TransformType::kYear, "year"}, {TransformType::kBucket, "bucket"}, - {TransformType::kTruncate, "trunc"}, - }; + {TransformType::kTruncate, "trunc"}}; return kTransformNames; } -const auto& transformCategoryNames() { - static const folly::F14FastMap - kTransformCategoryNames = { - {TransformCategory::kIdentity, "Identity"}, - {TransformCategory::kBucket, "Bucket"}, - {TransformCategory::kTruncate, "Truncate"}, - {TransformCategory::kTemporal, "Temporal"}, - }; - return kTransformCategoryNames; -} - } // namespace VELOX_DEFINE_ENUM_NAME(TransformType, transformTypeNames); -VELOX_DEFINE_ENUM_NAME(TransformCategory, transformCategoryNames); - -void IcebergPartitionSpec::checkCompatibility() const { - folly::F14FastMap> - columnTransforms; - - for (const auto& field : fields) { - const auto& type = field.type; - const auto& name = field.name; - VELOX_USER_CHECK( - isValidPartitionType(type), - "Type is not supported as a partition column: {}", - type->name()); - - VELOX_USER_CHECK( - canTransform(field.transformType, type), - "Transform is not supported for partition column. Column: '{}', Type: '{}', Transform: '{}'.", - name, - type->name(), - TransformTypeName::toName(field.transformType)); - - columnTransforms[name].emplace_back(field.transformType); - } - - // Check for duplicate transform categories per column. - std::vector errors; - for (const auto& [columnName, transforms] : columnTransforms) { - folly::F14FastSet seenCategories; - for (const auto& transform : transforms) { - auto category = getTransformCategory(transform); - if (!seenCategories.insert(category).second) { - std::vector transformNames; - for (const auto& t : transforms) { - transformNames.emplace_back( - std::string(TransformTypeName::toName(t))); - } - errors.emplace_back( - fmt::format( - "Column: '{}', Category: {}, Transforms: [{}]", - columnName, - TransformCategoryName::toName(category), - folly::join(", ", transformNames))); - break; - } - } - } - - VELOX_USER_CHECK( - errors.empty(), - "Multiple transforms of the same category on a column are not allowed. " - "Each transform category can appear at most once per column. {}", - folly::join("; ", errors)); -} - } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/PartitionSpec.h b/velox/connectors/hive/iceberg/PartitionSpec.h index 99297da0edc..b096d264dbb 100644 --- a/velox/connectors/hive/iceberg/PartitionSpec.h +++ b/velox/connectors/hive/iceberg/PartitionSpec.h @@ -20,127 +20,43 @@ namespace facebook::velox::connector::hive::iceberg { -/// Partition transform types. -/// Defines how source column values are converted into partition keys. -/// See https://iceberg.apache.org/spec/#partition-transforms. enum class TransformType { - /// Use the source value as-is (no transformation). kIdentity, - /// Extract a timestamp hour, as hours from 1970-01-01 00:00:00. kHour, - /// Extract a date or timestamp day, as days from 1970-01-01. kDay, - /// Extract a date or timestamp month, as months from 1970-01. kMonth, - /// Extract a date or timestamp year, as years from 1970. kYear, - /// Hash the value into N buckets for even distribution. Requires an integer - /// parameter specifying the bucket count. kBucket, - /// Truncate strings or numbers to a specified width. Requires an integer - /// parameter specifying the truncate width. kTruncate }; VELOX_DECLARE_ENUM_NAME(TransformType); -/// A single column can be used to produce multiple partition keys, but with -/// following restrictions: -/// - Transforms are organized into 4 categories: Identity, Temporal, -/// Bucket, and Truncate. -/// - Each category can appear at most once per column. -/// - Sample valid specs on same column: ['truncate(a,2)', 'bucket(a,16)', 'a'] -/// or ['year(b)', 'bucket(b, 16)', 'b'] -enum class TransformCategory { - kIdentity, - /// Year/Month/Day/Hour - kTemporal, - kBucket, - kTruncate, -}; - -VELOX_DECLARE_ENUM_NAME(TransformCategory); - -/// Represents how to produce partition data for an Iceberg table. -/// -/// This structure corresponds to the Iceberg Java PartitionSpec class but -/// contains only the necessary fields for Velox. Partition keys are computed -/// by transforming columns in a table. -/// -/// The upstream engine processes this specification through the Iceberg Java -/// library to validate column types, detect duplicates, and generate the -/// partition spec that is passed to Velox. -/// -/// IMPORTANT: Iceberg spec uses field IDs to identify source columns, but -/// Velox RowType only supports matching fields by name. Therefore, Velox uses -/// the partition field name to match against the table schema column names. -/// Callers must ensure that partition field names exactly match the column -/// names in the table schema. -/// -/// The partition spec contains: -/// - Unique ID for versioning and evolution. -/// - Which source columns in current table schema to use for partitioning -/// (identified by field name, not field ID as in the Iceberg spec). -/// - What transforms to apply (identity, bucket, truncate etc.). -/// - Transform parameters (e.g., bucket count, truncate width). struct IcebergPartitionSpec { struct Field { - /// Column name as defined in table schema. This column's value is used to - /// compute partition key by applying 'transformType' transformation. - const std::string name; - - /// Column type. - const TypePtr type; - - /// Transform to apply. Callers must ensure the transform is compatible with - /// the column type. - const TransformType transformType; - - /// Optional parameter for transforms that require configuration. - const std::optional parameter; - - /// Returns the result type after applying this transform. - TypePtr resultType() const { - switch (transformType) { - case TransformType::kBucket: - case TransformType::kYear: - case TransformType::kMonth: - case TransformType::kHour: - return INTEGER(); - case TransformType::kDay: - return DATE(); - case TransformType::kIdentity: - case TransformType::kTruncate: - return type; - } - VELOX_UNREACHABLE("Unknown transform type"); - } + // The field name of this partition field as it appears in the partition + // spec. This is the original Iceberg field name, not the transformed name + // from org.apache.iceberg.PartitionField which includes the transform as a + // suffix. + std::string name; + + // The source column type. + TypePtr type; + + // The transform type applied to the source field (e.g., kIdentity, kBucket, + // kTruncate, etc.). + TransformType transformType; + + // Optional parameter for transforms that require configuration + // (e.g., bucket count or truncate width). + std::optional parameter; }; const int32_t specId; const std::vector fields; - /// Constructor with validation that: - /// - Each field's type is supported for partitioning. - /// - Each field's transform type is compatible with its data type. - /// - No transform category appears more than once per column (Identity, - /// Temporal, Bucket, and Truncate are separate categories). - /// - /// @param _specId Partition specification ID. - /// @param _fields Vector of partition fields. When empty indicates no - /// partition. - /// @throws VeloxUserError if validation fails. - IcebergPartitionSpec(int32_t _specId, std::vector _fields) - : specId(_specId), fields(std::move(_fields)) { - checkCompatibility(); - } - - private: - // Validates partition fields for correctness. - // Checks type/transform compatibility and transform combination rules. - void checkCompatibility() const; + IcebergPartitionSpec(int32_t _specId, const std::vector& _fields) + : specId(_specId), fields(_fields) {} }; -using IcebergPartitionSpecPtr = std::shared_ptr; - } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformEvaluator.cpp b/velox/connectors/hive/iceberg/TransformEvaluator.cpp deleted file mode 100644 index 2744bddafa7..00000000000 --- a/velox/connectors/hive/iceberg/TransformEvaluator.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/connectors/hive/iceberg/TransformEvaluator.h" - -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -TransformEvaluator::TransformEvaluator( - const std::vector& expressions, - const ConnectorQueryCtx* connectorQueryCtx) - : connectorQueryCtx_(connectorQueryCtx) { - VELOX_CHECK_NOT_NULL(connectorQueryCtx_); - exprSet_ = connectorQueryCtx_->expressionEvaluator()->compile(expressions); - VELOX_CHECK_NOT_NULL(exprSet_); -} - -std::vector TransformEvaluator::evaluate( - const RowVectorPtr& input) const { - const auto numRows = input->size(); - const auto numExpressions = exprSet_->exprs().size(); - - std::vector results(numExpressions); - SelectivityVector rows(numRows); - - // Evaluate all expressions in one pass. - connectorQueryCtx_->expressionEvaluator()->evaluate( - exprSet_.get(), rows, *input, results); - - return results; -} - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformEvaluator.h b/velox/connectors/hive/iceberg/TransformEvaluator.h deleted file mode 100644 index ee7b26f7db8..00000000000 --- a/velox/connectors/hive/iceberg/TransformEvaluator.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "velox/connectors/Connector.h" -#include "velox/core/QueryCtx.h" -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Evaluates multiple expressions efficiently using batch evaluation. -/// Expressions are compiled once in the constructor and reused across multiple -/// input batches. -class TransformEvaluator { - public: - /// Creates an evaluator with the given expressions and connector query - /// context. Compiles the expressions once for reuse across multiple - /// evaluations. - /// - /// @param expressions Vector of typed expressions to evaluate. These are - /// typically built using TransformExprBuilder::toExpressions() for Iceberg - /// partition transforms, but can be any valid Velox expressions. The - /// expressions are compiled once during construction. - /// @param connectorQueryCtx Connector query context providing access to the - /// expression evaluator (for compilation and evaluation) and memory pool. - /// Must remain valid for the lifetime of this TransformEvaluator. - TransformEvaluator( - const std::vector& expressions, - const ConnectorQueryCtx* connectorQueryCtx); - - /// Evaluates all expressions on the input data in a single pass. - /// Uses the pre-compiled ExprSet from the constructor for efficiency. - /// - /// The input RowType must match the RowType used when building the - /// expressions (passed to TransformExprBuilder::toExpressions). The column - /// positions, names and types must align. Create new TransformEvaluator for - /// input that has different RowType with the one when building the - /// expressions. - /// - /// @param input Input row vector containing the source data. Must have the - /// same RowType (column positions, names and types) as used when building the - /// expressions in the constructor. - /// @return Vector of result columns, one for each expression, in the same - /// order as the expressions provided to the constructor. - std::vector evaluate(const RowVectorPtr& input) const; - - private: - const ConnectorQueryCtx* connectorQueryCtx_; - std::unique_ptr exprSet_; -}; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformExprBuilder.cpp b/velox/connectors/hive/iceberg/TransformExprBuilder.cpp deleted file mode 100644 index 4befbfb50b0..00000000000 --- a/velox/connectors/hive/iceberg/TransformExprBuilder.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "velox/connectors/hive/iceberg/TransformExprBuilder.h" -#include "velox/core/Expressions.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -/// Converts a single partition field to a typed expression. -/// -/// Builds an expression tree for one partition transform. Identity transforms -/// become FieldAccessTypedExpr, while other transforms (bucket, truncate, -/// year, month, day, hour) become CallTypedExpr with appropriate function -/// names and parameters. -/// -/// @param field Partition field containing transform type, source column -/// type, and optional parameter (e.g., bucket count, truncate width). -/// @param inputFieldName Name of the source column in the input RowVector. -/// @param icebergFuncPrefix Prefix of iceberg transform function names. -/// @return Typed expression representing the transform. -core::TypedExprPtr toExpression( - const IcebergPartitionSpec::Field& field, - const std::string& inputFieldName, - const std::string& icebergFuncPrefix) { - // For identity transform, just return a field access expression. - if (field.transformType == TransformType::kIdentity) { - return std::make_shared( - field.type, inputFieldName); - } - - // For other transforms, build a CallTypedExpr with the appropriate function. - std::string functionName; - switch (field.transformType) { - case TransformType::kBucket: - functionName = icebergFuncPrefix + "bucket"; - break; - case TransformType::kTruncate: - functionName = icebergFuncPrefix + "truncate"; - break; - case TransformType::kYear: - functionName = icebergFuncPrefix + "years"; - break; - case TransformType::kMonth: - functionName = icebergFuncPrefix + "months"; - break; - case TransformType::kDay: - functionName = icebergFuncPrefix + "days"; - break; - case TransformType::kHour: - functionName = icebergFuncPrefix + "hours"; - break; - case TransformType::kIdentity: - break; - } - - // Build the expression arguments. - std::vector exprArgs; - if (field.parameter.has_value()) { - exprArgs.emplace_back( - std::make_shared( - INTEGER(), Variant(field.parameter.value()))); - } - exprArgs.emplace_back( - std::make_shared(field.type, inputFieldName)); - - return std::make_shared( - field.resultType(), std::move(exprArgs), functionName); -} - -} // namespace - -std::vector TransformExprBuilder::toExpressions( - const IcebergPartitionSpecPtr& partitionSpec, - const std::vector& partitionChannels, - const RowTypePtr& inputType, - const std::string& icebergFuncPrefix) { - VELOX_CHECK_EQ( - partitionSpec->fields.size(), - partitionChannels.size(), - "Number of partition fields must match number of partition channels"); - - const auto numTransforms = partitionChannels.size(); - std::vector transformExprs; - transformExprs.reserve(numTransforms); - - for (auto i = 0; i < numTransforms; i++) { - const auto channel = partitionChannels[i]; - transformExprs.emplace_back(toExpression( - partitionSpec->fields.at(i), - inputType->nameOf(channel), - icebergFuncPrefix)); - } - - return transformExprs; -} - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformExprBuilder.h b/velox/connectors/hive/iceberg/TransformExprBuilder.h deleted file mode 100644 index b583adcf97d..00000000000 --- a/velox/connectors/hive/iceberg/TransformExprBuilder.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Converts Iceberg partition specification to Velox expressions. -class TransformExprBuilder { - public: - /// Converts partition specification to a list of typed expressions. - /// - /// @param partitionSpec Iceberg partition specification containing transform - /// definitions for each partition field. - /// @param partitionChannels Column indices (0-based) in the input RowVector - /// that correspond to each partition field. Must have the same size as - /// partitionSpec->fields. Provides the positional mapping from partition spec - /// fields to input RowVector columns. - /// @param inputType The row type of the input data. This is necessary for - /// building expressions because the column names in partitionSpec reference - /// table schema names, which might not match the column names in inputType - /// (e.g., inputType may use generated names like c0, c1, c2). The - /// FieldAccessTypedExpr must be built using the actual column names from - /// inputType that will be present at runtime. The partitionChannels provide - /// the positional mapping to locate the correct columns. - /// @param icebergFuncPrefix Prefix for Iceberg transform function names. - /// @return Vector of typed expressions, one for each partition field. - static std::vector toExpressions( - const IcebergPartitionSpecPtr& partitionSpec, - const std::vector& partitionChannels, - const RowTypePtr& inputType, - const std::string& icebergFuncPrefix); -}; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformFactory.cpp b/velox/connectors/hive/iceberg/TransformFactory.cpp new file mode 100644 index 00000000000..2c45106c01b --- /dev/null +++ b/velox/connectors/hive/iceberg/TransformFactory.cpp @@ -0,0 +1,290 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/TransformFactory.h" + +#include "velox/functions/lib/TimeUtils.h" +#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" +#include "velox/vector/ComplexVector.h" + +namespace facebook::velox::connector::hive::iceberg { +namespace { +int32_t epochYear(int32_t daysSinceEpoch) { + const std::tm tm = functions::getDateTime(daysSinceEpoch); + // tm_year is the number of years since 1900. + return tm.tm_year + 1900 - 1970; +} + +int32_t epochYear(Timestamp ts) { + return functions::getYear(functions::getDateTime(ts, nullptr)) - 1970; +} + +int32_t epochMonth(int32_t daysSinceEpoch) { + const std::tm tm = functions::getDateTime(daysSinceEpoch); + return (tm.tm_year + 1900 - 1970) * 12 + tm.tm_mon; +} + +int32_t epochMonth(Timestamp ts) { + const std::tm tm = functions::getDateTime(ts, nullptr); + return (tm.tm_year + 1900 - 1970) * 12 + tm.tm_mon; +} + +int32_t epochDay(int32_t daysSinceEpoch) { + return daysSinceEpoch; +} + +int32_t epochDay(Timestamp ts) { + const auto seconds = ts.getSeconds(); + return (seconds >= 0) ? seconds / Timestamp::kSecondsInDay + : ((seconds + 1) / Timestamp::kSecondsInDay) - 1; +} + +int32_t epochHour(Timestamp ts) { + const auto seconds = ts.getSeconds(); + return (seconds >= 0) ? seconds / 3600 : ((seconds + 1) / 3600) - 1; +} + +bool isValidPartitionType(TypePtr type) { + if (type->isRow() || type->isArray() || type->isMap() || + isTimestampWithTimeZoneType(type)) { + return false; + } + return true; +} + +template +std::shared_ptr createDateTimeTransform( + TransformType transformType, + const IcebergPartitionSpec::Field& field, + std::function::NativeType)> epochFunc, + memory::MemoryPool* pool) { + using NativeType = typename TypeTraits::NativeType; + VELOX_DCHECK_EQ( + true, + field.type->isDate() || field.type->isTimestamp(), + "Unsupported column type {} for transform {}", + field.type->name(), + TransformTypeName::toName(transformType)); + return std::make_shared>( + field.type, transformType, field.name, pool, epochFunc); +} + +template +std::shared_ptr createIdentityTransform( + const IcebergPartitionSpec::Field& field, + memory::MemoryPool* pool) { + using NativeType = typename TypeTraits::NativeType; + return std::make_shared>( + field.type, field.name, pool); +} + +template +std::shared_ptr createBucketTransform( + const IcebergPartitionSpec::Field& field, + int32_t count, + memory::MemoryPool* pool) { + VELOX_USER_CHECK_GT(count, 0, "Bucket count must be positive."); + using NativeType = typename TypeTraits::NativeType; + return std::make_shared>( + count, field.type, field.name, pool); +} + +template +std::shared_ptr createTruncateTransform( + const IcebergPartitionSpec::Field& field, + int32_t width, + memory::MemoryPool* pool) { + VELOX_USER_CHECK_GT(width, 0, "Truncate width must be positive."); + using NativeType = typename TypeTraits::NativeType; + return std::make_shared>( + width, field.type, field.name, pool); +} + +std::shared_ptr buildColumnTransform( + const IcebergPartitionSpec::Field& field, + memory::MemoryPool* pool) { + if (!isValidPartitionType(field.type)) { + VELOX_USER_FAIL( + fmt::format( + "Type not supported as partition column: {}.", field.type->name())); + } + switch (field.transformType) { + // Identity transform. + case TransformType::kIdentity: { + return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( + createIdentityTransform, field.type->kind(), field, pool); + } + // Year transform. + case TransformType::kYear: { + if (field.type->isDate()) { + return createDateTimeTransform( + TransformType::kYear, + field, + [](int32_t v) { return epochYear(v); }, + pool); + } + + if (field.type->isTimestamp()) { + return createDateTimeTransform( + TransformType::kYear, + field, + [](Timestamp v) { return epochYear(v); }, + pool); + } + + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform year.", + field.type->name())); + } + // Month transform. + case TransformType::kMonth: { + if (field.type->isDate()) { + return createDateTimeTransform( + TransformType::kMonth, + field, + [](int32_t v) { return epochMonth(v); }, + pool); + } + + if (field.type->isTimestamp()) { + return createDateTimeTransform( + TransformType::kMonth, + field, + [](Timestamp v) { return epochMonth(v); }, + pool); + } + + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform month.", + field.type->name())); + } + // Day transform. + case TransformType::kDay: { + if (field.type->isDate()) { + return createDateTimeTransform( + TransformType::kDay, + field, + [](int32_t v) { return epochDay(v); }, + pool); + } + + if (field.type->isTimestamp()) { + return createDateTimeTransform( + TransformType::kDay, + field, + [](Timestamp v) { return epochDay(v); }, + pool); + } + + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform day.", + field.type->name())); + } + // Hour transform. + case TransformType::kHour: { + if (field.type->isTimestamp()) { + return createDateTimeTransform( + TransformType::kHour, + field, + [](Timestamp v) { return epochHour(v); }, + pool); + } + + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform hour.", + field.type->name())); + } + // Bucket transform. + case TransformType::kBucket: { + VELOX_USER_CHECK( + field.parameter.has_value() && field.parameter.value() > 0, + "Bucket transform requires a positive parameter."); + auto numBuckets = field.parameter.value(); + + if (field.type->isInteger() || field.type->isDate()) { + return createBucketTransform( + field, numBuckets, pool); + } + if (field.type->isBigint() || field.type->isShortDecimal()) { + return createBucketTransform(field, numBuckets, pool); + } + if (field.type->isTimestamp()) { + return createBucketTransform( + field, numBuckets, pool); + } + if (field.type->isLongDecimal()) { + return createBucketTransform( + field, numBuckets, pool); + } + if (field.type->isVarchar()) { + return createBucketTransform( + field, numBuckets, pool); + } + if (field.type->isVarbinary()) { + return createBucketTransform( + field, numBuckets, pool); + } + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform bucket.", + field.type->name())); + } + // Truncate transform. + case TransformType::kTruncate: { + VELOX_USER_CHECK( + field.parameter.has_value() && field.parameter.value() > 0, + "Truncate transform requires a positive parameter."); + auto width = field.parameter.value(); + if (field.type->isInteger()) { + return createTruncateTransform(field, width, pool); + } + if (field.type->isBigint() || field.type->isShortDecimal()) { + return createTruncateTransform(field, width, pool); + } + if (field.type->isVarchar()) { + return createTruncateTransform(field, width, pool); + } + if (field.type->isVarbinary()) { + return createTruncateTransform(field, width, pool); + } + VELOX_UNREACHABLE( + fmt::format( + "Unsupported column type {} for transform truncate.", + field.type->name())); + } + default: + VELOX_UNREACHABLE("Unsupported transform."); + } +} + +} // namespace + +std::vector> parsePartitionTransformSpecs( + const std::vector& fields, + memory::MemoryPool* pool) { + std::vector> transforms; + transforms.reserve(fields.size()); + for (auto& field : fields) { + transforms.emplace_back(buildColumnTransform(field, pool)); + } + return transforms; +} + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/IcebergConfig.cpp b/velox/connectors/hive/iceberg/TransformFactory.h similarity index 61% rename from velox/connectors/hive/iceberg/IcebergConfig.cpp rename to velox/connectors/hive/iceberg/TransformFactory.h index 1b34b7c4eb7..ea6642db7dc 100644 --- a/velox/connectors/hive/iceberg/IcebergConfig.cpp +++ b/velox/connectors/hive/iceberg/TransformFactory.h @@ -13,23 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once -#include "velox/connectors/hive/iceberg/IcebergConfig.h" - -#include "velox/common/config/Config.h" +#include "velox/connectors/hive/iceberg/PartitionSpec.h" +#include "velox/connectors/hive/iceberg/Transforms.h" namespace facebook::velox::connector::hive::iceberg { -IcebergConfig::IcebergConfig( - const std::shared_ptr& config) - : config_(config) { - VELOX_CHECK_NOT_NULL( - config_, "Config is null for IcebergConfig initialization"); -} - -std::string IcebergConfig::functionPrefix() const { - return config_->get( - kFunctionPrefixConfig, kDefaultFunctionPrefix); -} +std::vector> parsePartitionTransformSpecs( + const std::vector& fields, + memory::MemoryPool* pool); } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/Transforms.cpp b/velox/connectors/hive/iceberg/Transforms.cpp new file mode 100644 index 00000000000..a400270c58d --- /dev/null +++ b/velox/connectors/hive/iceberg/Transforms.cpp @@ -0,0 +1,226 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/Transforms.h" + +#include "velox/connectors/hive/iceberg/Murmur3.h" +#include "velox/functions/lib/string/StringImpl.h" +#include "velox/vector/DecodedVector.h" +#include "velox/vector/FlatVector.h" + +namespace facebook::velox::connector::hive::iceberg { + +namespace { + +template +FOLLY_ALWAYS_INLINE void transformValues( + const VectorPtr& block, + const DecodedVector* decoded, + const VectorPtr& result, + ProcessFunc&& processValue) { + if (!decoded->mayHaveNulls()) { + for (auto i = 0; i < decoded->size(); ++i) { + processValue(i); + } + } else { + block->mutableNulls(block->size()); + result->setNulls(block->nulls()); + for (auto i = 0; i < decoded->size(); ++i) { + if (!decoded->isNullAt(i)) { + processValue(i); + } + } + } +} + +} // namespace + +VectorPtr Transform::transform( + const RowVectorPtr& input, + std::optional channel) const { + VectorPtr currentVector = nullptr; + if (channel.has_value()) { + currentVector = input->childAt(channel.value()); + } + VELOX_CHECK_NOT_NULL(currentVector); + return apply(currentVector); +} + +std::string Transform::toHumanString(Timestamp value) const { + TimestampToStringOptions options; + options.precision = TimestampPrecision::kMilliseconds; + options.zeroPaddingYear = true; + options.skipTrailingZeros = true; + options.leadingPositiveSign = true; + options.skipTrailingZeroSeconds = true; + return value.toString(options); +} + +template +VectorPtr IdentityTransform::apply(const VectorPtr& block) const { + if constexpr (!std::is_same_v) { + return block; + } + if (sourceType_->isVarchar()) { + return block; + } + + auto result = + BaseVector::create>(sourceType_, block->size(), pool_); + DecodedVector decoded(*block); + + auto processValue = [&](auto i) { + if constexpr (std::is_same_v) { + T value = decoded.valueAt(i); + auto encodedValue = encoding::Base64::encode(value.data(), value.size()); + result->set(i, StringView(encodedValue)); + } + }; + + transformValues(block, &decoded, result, processValue); + return result; +} + +template +VectorPtr BucketTransform::apply(const VectorPtr& block) const { + auto result = + BaseVector::create>(INTEGER(), block->size(), pool_); + + DecodedVector decoded(*block); + + auto processValue = [&](auto i) { + T value = decoded.valueAt(i); + int32_t hashValue; + if constexpr (std::is_same_v || std::is_same_v) { + if (sourceType_->isDecimal()) { + hashValue = Murmur3Hash32::hashDecimal(value); + } else { + hashValue = Murmur3Hash32::hash(value); + } + } else if constexpr (std::is_same_v) { + hashValue = Murmur3Hash32::hash(value.toMicros()); + } else { + hashValue = Murmur3Hash32::hash(value); + } + result->set(i, (hashValue & 0x7FFFFFFF) % numBuckets_); + }; + + transformValues(block, &decoded, result, processValue); + return result; +} + +template +VectorPtr TruncateTransform::apply(const VectorPtr& block) const { + auto result = + BaseVector::create>(sourceType_, block->size(), pool_); + + auto flatResult = result->template as>(); + char* rawBuffer = nullptr; + BufferPtr buffer; + if (std::is_same_v) { + if (sourceType_->isVarchar()) { + buffer = result->getBufferWithSpace(block->size() * width_); + } else { + buffer = result->getBufferWithSpace( + block->size() * encoding::Base64::calculateEncodedSize(width_)); + } + rawBuffer = buffer->asMutable() + buffer->size(); + } + + DecodedVector decoded(*block); + auto processValue = [&](auto i) { + T value = decoded.valueAt(i); + if constexpr ( + std::is_same_v || std::is_same_v || + std::is_same_v) { + flatResult->set(i, value - ((value % width_) + width_) % width_); + } else if constexpr (std::is_same_v) { + if (sourceType_->isVarchar()) { + auto length = + functions::stringImpl::cappedByteLength(value, width_); + if (StringView::isInline(length)) { + flatResult->set(i, StringView(value.data(), length)); + } else { + memcpy(rawBuffer, value.data(), length); + flatResult->setNoCopy(i, StringView(rawBuffer, length)); + rawBuffer += length; + } + } else if (sourceType_->isVarbinary()) { + auto encoded = encoding::Base64::encode( + value.data(), width_ > value.size() ? value.size() : width_); + auto length = encoded.length(); + if (StringView::isInline(length)) { + flatResult->set(i, StringView(encoded)); + } else { + memcpy(rawBuffer, encoded.data(), length); + flatResult->setNoCopy(i, StringView(rawBuffer, length)); + rawBuffer += length; + } + } + } + }; + + transformValues(block, &decoded, result, processValue); + + if constexpr (std::is_same_v) { + buffer->setSize(rawBuffer - (buffer->asMutable() + buffer->size())); + } + return result; +} + +template +VectorPtr TemporalTransform::apply(const VectorPtr& block) const { + auto result = + BaseVector::create>(INTEGER(), block->size(), pool_); + + DecodedVector decoded(*block); + auto processValue = [&](auto i) { + T value = decoded.valueAt(i); + result->set(i, epochFunc_(value)); + }; + + transformValues(block, &decoded, result, processValue); + + return result; +} + +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; +template class IdentityTransform; + +template class BucketTransform; +template class BucketTransform; +template class BucketTransform; +template class BucketTransform; +template class BucketTransform; + +template class TruncateTransform; +template class TruncateTransform; +template class TruncateTransform; +template class TruncateTransform; + +template class TemporalTransform; +template class TemporalTransform; +template class TemporalTransform; + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/Transforms.h b/velox/connectors/hive/iceberg/Transforms.h new file mode 100644 index 00000000000..698c0afcbc5 --- /dev/null +++ b/velox/connectors/hive/iceberg/Transforms.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/common/encode/Base64.h" +#include "velox/connectors/hive/iceberg/PartitionSpec.h" +#include "velox/type/DecimalUtil.h" +#include "velox/type/Type.h" +#include "velox/vector/ComplexVector.h" + +namespace facebook::velox::connector::hive::iceberg { + +static constexpr int32_t kEpochYear = 1970; + +class Transform { + public: + Transform( + TypePtr type, + TransformType transformType, + const std::string& columnName, + memory::MemoryPool* pool) + : sourceType_(type), + transformType_(transformType), + sourceColumnName_(columnName), + pool_(pool) {} + + virtual ~Transform() = default; + + virtual VectorPtr apply(const VectorPtr& block) const = 0; + + virtual const TypePtr resultType() const = 0; + + TransformType transformType() const { + return transformType_; + } + + const std::string& sourceColumnName() const { + return sourceColumnName_; + } + + // Convert the transformed value to partition name + template + std::string toHumanString(T value) const { + return folly::to(value); + } + + virtual std::string toHumanString(int32_t value) const { + return folly::to(value); + } + + std::string toHumanString(int64_t value) const { + if (sourceType_->isShortDecimal()) { + return decimalToHumanString(value); + } + return folly::to(value); + } + + std::string toHumanString(int128_t value) const { + return decimalToHumanString(value); + } + + std::string toHumanString(bool value) const { + return value ? "true" : "false"; + } + + // Match Iceberg spec Java implementation + // DateTimeFormatter.ISO_LOCAL_DATE_TIME + std::string toHumanString(Timestamp value) const; + + std::string_view name() const { + return TransformTypeName::toName(transformType_); + } + + /// Applies the transform to the specified column in the input row vector. + /// @param input The input row vector containing the column to transform. + /// @param channel Optional column index (0-based) to transform. If not + /// provided, uses sourceColumnName_ to locate the column in the input. + /// @return The transformed vector. + [[nodiscard]] VectorPtr transform( + const RowVectorPtr& input, + std::optional channel = std::nullopt) const; + + private: + template + std::string decimalToHumanString(T value) const { + const auto [p, s] = getDecimalPrecisionScale(*sourceType_); + const auto maxSize = DecimalUtil::maxStringViewSize(p, s); + std::string buffer(maxSize, '\0'); + const auto actualSize = + DecimalUtil::castToString(value, s, maxSize, buffer.data()); + buffer.resize(actualSize); + return buffer; + } + + protected: + const TypePtr sourceType_; + const TransformType transformType_; + const std::string sourceColumnName_; + memory::MemoryPool* pool_; +}; + +template +class IdentityTransform final : public Transform { + public: + IdentityTransform( + const TypePtr& type, + const std::string& columnName, + memory::MemoryPool* pool) + : Transform(type, TransformType::kIdentity, columnName, pool) {} + + VectorPtr apply(const VectorPtr& block) const override; + + const TypePtr resultType() const override { + return sourceType_; + } + + std::string toHumanString(int32_t value) const override { + if (sourceType_->isDate()) { + return DATE()->toString(value); + } + return folly::to(value); + } +}; + +template +class BucketTransform final : public Transform { + public: + BucketTransform( + int32_t count, + const TypePtr& type, + const std::string& columnName, + memory::MemoryPool* pool) + : Transform(type, TransformType::kBucket, columnName, pool), + numBuckets_(count) {} + + VectorPtr apply(const VectorPtr& block) const override; + + const TypePtr resultType() const override { + return INTEGER(); + } + + private: + const int32_t numBuckets_; +}; + +template +class TruncateTransform final : public Transform { + public: + TruncateTransform( + int32_t width, + const TypePtr& type, + const std::string& columnName, + memory::MemoryPool* pool) + : Transform(type, TransformType::kTruncate, columnName, pool), + width_(width) {} + + VectorPtr apply(const VectorPtr& block) const override; + + const TypePtr resultType() const override { + return sourceType_; + } + + private: + const int32_t width_; +}; + +template +class TemporalTransform final : public Transform { + public: + TemporalTransform( + const TypePtr& type, + TransformType transformType, + const std::string& columnName, + memory::MemoryPool* pool, + const std::function& epochFunc) + : Transform(type, transformType, columnName, pool), + epochFunc_(epochFunc) {} + + VectorPtr apply(const VectorPtr& block) const override; + + const TypePtr resultType() const override { + return INTEGER(); + } + + std::string toHumanString(int32_t value) const override { + switch (transformType_) { + case TransformType::kYear: { + return fmt::format("{:04d}", kEpochYear + value); + } + case TransformType::kMonth: { + int32_t year = kEpochYear + value / 12; + int32_t month = 1 + value % 12; + if (month <= 0) { + month += 12; + year -= 1; + } + return fmt::format("{:04d}-{:02d}", year, month); + } + case TransformType::kHour: { + int64_t seconds = static_cast(value) * 3600; + std::tm tmValue; + VELOX_USER_CHECK( + Timestamp::epochToCalendarUtc(seconds, tmValue), + "Can't convert seconds {}*3600 to time.", + seconds); + + return fmt::format( + "{:04d}-{:02d}-{:02d}-{:02d}", + tmValue.tm_year + 1900, + tmValue.tm_mon + 1, + tmValue.tm_mday, + tmValue.tm_hour); + } + case TransformType::kDay: { + return DATE()->toString(value); + } + default: { + VELOX_UNREACHABLE("Unsupported transform type."); + } + } + } + + private: + const std::function epochFunc_; +}; + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/CMakeLists.txt b/velox/connectors/hive/iceberg/tests/CMakeLists.txt index 8931bb6d0e9..781c7d88576 100644 --- a/velox/connectors/hive/iceberg/tests/CMakeLists.txt +++ b/velox/connectors/hive/iceberg/tests/CMakeLists.txt @@ -39,7 +39,12 @@ if(VELOX_ENABLE_BENCHMARKS) endif() if(NOT VELOX_DISABLE_GOOGLETEST) - add_executable(velox_hive_iceberg_test IcebergReadTest.cpp IcebergSplitReaderBenchmarkTest.cpp) + add_executable( + velox_hive_iceberg_test + IcebergReadTest.cpp + IcebergTestBase.cpp + IcebergSplitReaderBenchmarkTest.cpp + ) add_test(velox_hive_iceberg_test velox_hive_iceberg_test) target_link_libraries( @@ -51,6 +56,7 @@ if(NOT VELOX_DISABLE_GOOGLETEST) velox_dwio_common_exception velox_dwio_common_test_utils velox_vector_test_lib + velox_vector_fuzzer velox_exec velox_exec_test_lib Folly::folly @@ -62,15 +68,18 @@ if(NOT VELOX_DISABLE_GOOGLETEST) add_executable( velox_hive_iceberg_insert_test IcebergConnectorTest.cpp + ColumnTransformTest.cpp IcebergInsertTest.cpp + IcebergPartitionIdGeneratorTest.cpp IcebergParquetStatsTest.cpp + IcebergSortOrderTest.cpp + IcebergStatsTest.cpp IcebergTestBase.cpp + IcebergTransformE2ETest.cpp + IcebergTransformUnitTest.cpp + IcebergWriterModeTest.cpp Main.cpp - PartitionNameTest.cpp - PartitionSpecTest.cpp - PartitionValueFormatterTest.cpp - TransformE2ETest.cpp - TransformTest.cpp + Murmur3Test.cpp ) velox_add_test_headers(velox_hive_iceberg_insert_test IcebergTestBase.h) @@ -100,7 +109,11 @@ if(NOT VELOX_DISABLE_GOOGLETEST) ) if(VELOX_ENABLE_PARQUET) - target_link_libraries(velox_hive_iceberg_test velox_dwio_parquet_reader) + target_link_libraries( + velox_hive_iceberg_test + velox_dwio_parquet_writer + velox_dwio_parquet_reader + ) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/examples DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/velox/connectors/hive/iceberg/tests/ColumnTransformTest.cpp b/velox/connectors/hive/iceberg/tests/ColumnTransformTest.cpp new file mode 100644 index 00000000000..1b83e1b15b5 --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/ColumnTransformTest.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/Transforms.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" +#include "velox/vector/tests/utils/VectorMaker.h" + +using namespace facebook::velox; +using namespace facebook::velox::connector::hive::iceberg; +using namespace facebook::velox::test; + +namespace facebook::velox::connector::hive::iceberg::test { + +class ColumnTransformTest : public IcebergTestBase {}; + +TEST_F(ColumnTransformTest, testConstructor) { + auto transform = std::make_shared>( + INTEGER(), "test_column", opPool_.get()); + + EXPECT_EQ(transform->sourceColumnName(), "test_column"); + EXPECT_EQ(transform->name(), "identity"); + EXPECT_EQ(transform->resultType(), INTEGER()); +} + +TEST_F(ColumnTransformTest, testTransformName) { + auto identityTransform = std::make_shared>( + INTEGER(), "col1", opPool_.get()); + EXPECT_EQ(identityTransform->name(), "identity"); + + auto bucketTransform = std::make_shared>( + 16, INTEGER(), "col2", opPool_.get()); + EXPECT_EQ(bucketTransform->name(), "bucket"); + + auto truncateTransform = std::make_shared>( + 10, INTEGER(), "col3", opPool_.get()); + EXPECT_EQ(truncateTransform->name(), "trunc"); + + auto yearTransform = std::make_shared>( + INTEGER(), TransformType::kYear, "col4", opPool_.get(), [](int32_t v) { + return v; + }); + EXPECT_EQ(yearTransform->name(), "year"); + + auto monthTransform = std::make_shared>( + INTEGER(), TransformType::kMonth, "col5", opPool_.get(), [](int32_t v) { + return v; + }); + EXPECT_EQ(monthTransform->name(), "month"); + + auto dayTransform = std::make_shared>( + INTEGER(), TransformType::kDay, "col6", opPool_.get(), [](int32_t v) { + return v; + }); + EXPECT_EQ(dayTransform->name(), "day"); + + auto hourTransform = std::make_shared>( + INTEGER(), TransformType::kHour, "col7", opPool_.get(), [](Timestamp v) { + return v.getSeconds() / 3600; + }); + EXPECT_EQ(hourTransform->name(), "hour"); +} + +TEST_F(ColumnTransformTest, testResultType) { + auto intTransform = std::make_shared>( + INTEGER(), "col_int", opPool_.get()); + EXPECT_EQ(intTransform->resultType(), INTEGER()); + + auto bigintTransform = std::make_shared>( + BIGINT(), "col_bigint", opPool_.get()); + EXPECT_EQ(bigintTransform->resultType(), BIGINT()); + + auto varcharTransform = std::make_shared>( + VARCHAR(), "col_varchar", opPool_.get()); + EXPECT_EQ(varcharTransform->resultType(), VARCHAR()); + + auto bucketTransform = std::make_shared>( + 16, VARCHAR(), "col_bucket", opPool_.get()); + EXPECT_EQ(bucketTransform->resultType(), INTEGER()); + + auto yearTransform = std::make_shared>( + DATE(), TransformType::kYear, "col_year", opPool_.get(), [](int32_t v) { + return v; + }); + EXPECT_EQ(yearTransform->resultType(), INTEGER()); +} + +TEST_F(ColumnTransformTest, testTransformSimpleColumn) { + auto intVector = makeFlatVector({1, 2, 3, 4, 5}); + auto rowVector = makeRowVector({"col_int"}, {intVector}); + + auto transform = std::make_shared>( + INTEGER(), "col_int", opPool_.get()); + + auto result = transform->transform(rowVector, 0); + + ASSERT_EQ(result->size(), 5); + ASSERT_EQ(result->type(), INTEGER()); + + auto resultVector = result->as>(); + EXPECT_EQ(resultVector->valueAt(0), 1); + EXPECT_EQ(resultVector->valueAt(1), 2); + EXPECT_EQ(resultVector->valueAt(2), 3); + EXPECT_EQ(resultVector->valueAt(3), 4); + EXPECT_EQ(resultVector->valueAt(4), 5); +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/EqualityDeleteFileReaderTest.cpp b/velox/connectors/hive/iceberg/tests/EqualityDeleteFileReaderTest.cpp index e0124e716aa..f35cefefb48 100644 --- a/velox/connectors/hive/iceberg/tests/EqualityDeleteFileReaderTest.cpp +++ b/velox/connectors/hive/iceberg/tests/EqualityDeleteFileReaderTest.cpp @@ -110,7 +110,8 @@ class EqualityDeleteFileReaderTest : public HiveConnectorTestBase { fileSize, partitionKeys, std::nullopt, - std::unordered_map{}, + std::unordered_map{ + {"table_format", "hive-iceberg"}}, nullptr, /*cacheable=*/true, deleteFiles, diff --git a/velox/connectors/hive/iceberg/tests/IcebergDwrfInsertTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergDwrfInsertTest.cpp index 088f7b57832..47dbcb7203b 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergDwrfInsertTest.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergDwrfInsertTest.cpp @@ -48,6 +48,7 @@ class IcebergDwrfInsertTest : public test::IcebergTestBase { const auto vectors = createTestData(rowType, numBatches, vectorSize, nullRatio); const auto dataSink = createDataSinkAndAppendData(vectors, dataPath); + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); auto splits = createSplitsForDirectory(dataPath); @@ -90,6 +91,7 @@ TEST_F(IcebergDwrfInsertTest, commitMessageFormat) { auto rowType = ROW({"c1", "c2"}, {BIGINT(), VARCHAR()}); const auto vectors = createTestData(rowType, 2, 100); const auto dataSink = createDataSinkAndAppendData(vectors, dataPath); + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); ASSERT_GT(commitTasks.size(), 0); @@ -136,6 +138,7 @@ TEST_F(IcebergDwrfInsertTest, partitioned) { {0, TransformType::kIdentity, std::nullopt}}; const auto dataSink = createDataSinkAndAppendData(vectors, dataPath, partitionTransforms); + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); ASSERT_GT(commitTasks.size(), 0); @@ -171,6 +174,7 @@ TEST_F(IcebergDwrfInsertTest, ensureWriterNonPartitioned) { // No partitionFields => unpartitioned table, partitionIdGenerator_ stays // null inside the sink. appendData triggers ensureWriter(). const auto dataSink = createDataSinkAndAppendData(vectors, dataPath); + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); ASSERT_EQ(commitTasks.size(), 1); diff --git a/velox/connectors/hive/iceberg/tests/IcebergInsertTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergInsertTest.cpp index f7fbee5efe1..05ae7dd8b70 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergInsertTest.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergInsertTest.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ +#include "velox/common/base/tests/GTestUtils.h" #include "velox/connectors/hive/HiveConfig.h" #include "velox/connectors/hive/iceberg/IcebergConnector.h" #include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" -#include "velox/exec/tests/utils/AssertQueryBuilder.h" #include "velox/exec/tests/utils/PlanBuilder.h" using namespace facebook::velox::common::testutil; @@ -25,8 +25,6 @@ using namespace facebook::velox::common::testutil; namespace facebook::velox::connector::hive::iceberg { namespace { -#ifdef VELOX_ENABLE_PARQUET - class IcebergInsertTest : public test::IcebergTestBase { protected: void test(const RowTypePtr& rowType, double nullRatio = 0.0) { @@ -36,9 +34,16 @@ class IcebergInsertTest : public test::IcebergTestBase { constexpr int32_t vectorSize = 5'000; const auto vectors = createTestData(rowType, numBatches, vectorSize, nullRatio); - const auto dataSink = createDataSinkAndAppendData(vectors, dataPath); - const auto commitTasks = dataSink->close(); + auto dataSink = + createIcebergDataSink(rowType, outputDirectory->getPath(), {}); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + ASSERT_TRUE(dataSink->finish()); + const auto commitTasks = dataSink->close(); + createDuckDbTable(vectors); auto splits = createSplitsForDirectory(dataPath); ASSERT_EQ(splits.size(), commitTasks.size()); auto plan = exec::test::PlanBuilder() @@ -47,7 +52,7 @@ class IcebergInsertTest : public test::IcebergTestBase { .outputType(rowType) .endTableScan() .planNode(); - exec::test::AssertQueryBuilder(plan).splits(splits).assertResults(vectors); + assertQuery(plan, splits, "SELECT * FROM tmp"); } }; @@ -74,40 +79,64 @@ TEST_F(IcebergInsertTest, mapAndArray) { test(rowType); } +#ifdef VELOX_ENABLE_PARQUET TEST_F(IcebergInsertTest, bigDecimal) { auto rowType = ROW({"c1"}, {DECIMAL(38, 5)}); fileFormat_ = dwio::common::FileFormat::PARQUET; test(rowType); } -TEST_F(IcebergInsertTest, singleColumnPartition) { - struct TestCase { - std::string name; - TypePtr type; - }; +TEST_F(IcebergInsertTest, maxTargetFileSizeRotation) { + setConnectorSessionProperty(HiveConfig::kMaxTargetFileSizeSession, "4KB"); + + const auto outputPath = TempDirectoryPath::create()->getPath(); + const auto rowType = ROW({"c0", "c1"}, {BIGINT(), VARCHAR()}); + const auto vectors = createTestData(rowType, 10, 1'000); + auto dataSink = createIcebergDataSink(rowType, outputPath, {}); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + const auto commitTasks = dataSink->close(); - std::vector testCases = { - {"c1", BIGINT()}, - {"c2", INTEGER()}, - {"c3", SMALLINT()}, - {"c4", DECIMAL(18, 5)}, - {"c5", BOOLEAN()}, - {"c6", VARCHAR()}, - {"c7", DATE()}, - {"c8", TIMESTAMP()}}; - - for (const auto& testCase : testCases) { + ASSERT_EQ(listFiles(outputPath).size(), 5); + + createDuckDbTable(vectors); + auto splits = createSplitsForDirectory(outputPath); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType) + .endTableScan() + .planNode(); + assertQuery(plan, splits, "SELECT * FROM tmp"); +} +#endif + +TEST_F(IcebergInsertTest, testSingleColumnAsPartition) { + auto rowType = ROW( + {"c1", "c2", "c3", "c4", "c5", "c6"}, + {BIGINT(), INTEGER(), SMALLINT(), DECIMAL(18, 5), BOOLEAN(), VARCHAR()}); + for (auto colIndex = 0; colIndex < rowType->size() - 1; colIndex++) { + const auto& colName = rowType->nameOf(colIndex); const auto outputDirectory = TempDirectoryPath::create(); constexpr int32_t numBatches = 2; constexpr int32_t vectorSize = 50; - auto rowType = ROW({testCase.name}, {testCase.type}); - const auto vectors = createTestData(rowType, numBatches, vectorSize, 0.5); std::vector partitionTransforms = { - {0, TransformType::kIdentity, std::nullopt}}; - const auto dataSink = createDataSinkAndAppendData( - vectors, outputDirectory->getPath(), partitionTransforms); + {colIndex, TransformType::kIdentity, std::nullopt}}; + auto dataSink = createIcebergDataSink( + rowType, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); + createDuckDbTable(vectors); auto splits = createSplitsForDirectory(outputDirectory->getPath()); ASSERT_GT(commitTasks.size(), 0); @@ -116,53 +145,72 @@ TEST_F(IcebergInsertTest, singleColumnPartition) { for (const auto& task : commitTasks) { auto taskJson = folly::parseJson(task); ASSERT_TRUE(taskJson.count("partitionDataJson") > 0); + ASSERT_FALSE(taskJson["partitionDataJson"].empty()); } - auto plan = exec::test::PlanBuilder() + connector::ColumnHandleMap assignments; + for (auto i = 0; i < rowType->size(); i++) { + const auto& name = rowType->nameOf(i); + if (i != colIndex) { + assignments.insert( + {name, + std::make_shared( + name, + HiveColumnHandle::ColumnType::kRegular, + rowType->childAt(i), + rowType->childAt(i))}); + } + } + + // Add partition column. + assignments.insert( + {colName, + std::make_shared( + colName, + HiveColumnHandle::ColumnType::kPartitionKey, + rowType->childAt(colIndex), + rowType->childAt(colIndex))}); + + auto plan = exec::test::PlanBuilder(pool_.get()) .startTableScan() .connectorId(test::kIcebergConnectorId) + .assignments(assignments) .outputType(rowType) .endTableScan() .planNode(); - exec::test::AssertQueryBuilder(plan).splits(splits).assertResults(vectors); + + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); } } -TEST_F(IcebergInsertTest, partitionNullColumn) { - struct TestCase { - std::string name; - TypePtr type; - }; - - std::vector testCases = { - {"c1", BIGINT()}, - {"c2", INTEGER()}, - {"c3", SMALLINT()}, - {"c4", DECIMAL(18, 5)}, - {"c5", BOOLEAN()}, - {"c6", VARCHAR()}, - {"c7", DATE()}, - {"c8", TIMESTAMP()}}; - - for (const auto& testCase : testCases) { +TEST_F(IcebergInsertTest, testPartitionNullColumn) { + auto rowType = ROW( + {"c1", "c2", "c3", "c4", "c5", "c6"}, + {BIGINT(), INTEGER(), SMALLINT(), DECIMAL(18, 5), BOOLEAN(), VARCHAR()}); + for (auto colIndex = 0; colIndex < rowType->size() - 1; colIndex++) { + const auto& colName = rowType->nameOf(colIndex); + const auto colType = rowType->childAt(colIndex); const auto outputDirectory = TempDirectoryPath::create(); constexpr int32_t numBatches = 2; constexpr int32_t vectorSize = 100; - auto rowType = ROW({testCase.name}, {testCase.type}); - // nullRatio = 1.0 - const auto vectors = createTestData(rowType, numBatches, vectorSize, 1.0); + const auto vectors = createTestData(rowType, numBatches, vectorSize, 1.0); std::vector partitionTransforms = { - {0, TransformType::kIdentity, std::nullopt}}; - const auto dataSink = createDataSinkAndAppendData( - vectors, outputDirectory->getPath(), partitionTransforms); + {colIndex, TransformType::kIdentity, std::nullopt}}; + auto dataSink = createIcebergDataSink( + rowType, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); ASSERT_EQ(1, commitTasks.size()); auto taskJson = folly::parseJson(commitTasks.at(0)); ASSERT_EQ(1, taskJson.count("partitionDataJson")); - auto partitionData = - folly::parseJson(taskJson["partitionDataJson"].asString()); + auto partitionDataStr = taskJson["partitionDataJson"].asString(); + auto partitionData = folly::parseJson(partitionDataStr); ASSERT_EQ(1, partitionData.count("partitionValues")); auto partitionValues = partitionData["partitionValues"]; ASSERT_TRUE(partitionValues.isArray()); @@ -172,23 +220,29 @@ TEST_F(IcebergInsertTest, partitionNullColumn) { ASSERT_EQ(files.size(), 1); for (const auto& file : files) { - auto partitionKeys = extractPartitionKeys(file); - ASSERT_EQ(partitionKeys.size(), 1); - ASSERT_TRUE(partitionKeys.contains(testCase.name)); - ASSERT_FALSE(partitionKeys.at(testCase.name).has_value()); + std::vector pathComponents; + folly::split("/", file, pathComponents); + bool foundPartitionDir = false; + for (const auto& component : pathComponents) { + if (component.find('=') != std::string::npos) { + foundPartitionDir = true; + std::vector parts; + folly::split('=', component, parts); + ASSERT_EQ(parts.size(), 2); + ASSERT_EQ(parts[0], colName); + ASSERT_EQ(parts[1], "null"); + } + } + ASSERT_TRUE(foundPartitionDir) + << "No partition directory found in path: " << file; } } } -TEST_F(IcebergInsertTest, partitionMultiColumns) { - auto rowType = - ROW({"c1", "c2", "c3", "c4"}, - { - BIGINT(), - INTEGER(), - SMALLINT(), - DECIMAL(18, 5), - }); +TEST_F(IcebergInsertTest, testColumnCombinationsAsPartition) { + auto rowType = ROW( + {"c1", "c2", "c3", "c4", "c5", "c6"}, + {BIGINT(), INTEGER(), SMALLINT(), DECIMAL(18, 5), BOOLEAN(), VARCHAR()}); std::vector> columnCombinations = { {0, 1}, // BIGINT, INTEGER. {2, 1}, // SMALLINT, INTEGER. @@ -200,73 +254,99 @@ TEST_F(IcebergInsertTest, partitionMultiColumns) { const auto outputDirectory = TempDirectoryPath::create(); constexpr int32_t numBatches = 2; constexpr int32_t vectorSize = 50; - - std::vector vectors; - vectors.reserve(numBatches); - for (int32_t batch = 0; batch < numBatches; ++batch) { - vectors.push_back(makeRowVector( - rowType->names(), - { - makeFlatVector( - vectorSize, [](auto row) { return row * 100; }), - makeFlatVector( - vectorSize, [](auto row) { return row * 10; }), - makeFlatVector(vectorSize, [](auto row) { return row; }), - makeFlatVector( - vectorSize, - [](auto row) { return (row * 1000); }, - nullptr, - DECIMAL(18, 5)), - })); - } - + const auto vectors = createTestData(rowType, numBatches, vectorSize); std::vector partitionTransforms; for (auto colIndex : combination) { partitionTransforms.push_back( {colIndex, TransformType::kIdentity, std::nullopt}); } - const auto dataSink = createDataSinkAndAppendData( - vectors, outputDirectory->getPath(), partitionTransforms); + auto dataSink = createIcebergDataSink( + rowType, outputDirectory->getPath(), partitionTransforms); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); const auto commitTasks = dataSink->close(); + createDuckDbTable(vectors); auto splits = createSplitsForDirectory(outputDirectory->getPath()); - ASSERT_EQ(commitTasks.size(), vectorSize); + ASSERT_GT(commitTasks.size(), 0); ASSERT_EQ(splits.size(), commitTasks.size()); - auto plan = exec::test::PlanBuilder() + connector::ColumnHandleMap assignments; + std::unordered_set partitionColumns( + combination.begin(), combination.end()); + + for (auto i = 0; i < rowType->size(); i++) { + const auto& name = rowType->nameOf(i); + auto columnType = partitionColumns.count(i) > 0 + ? HiveColumnHandle::ColumnType::kPartitionKey + : HiveColumnHandle::ColumnType::kRegular; + + assignments.insert( + {name, + std::make_shared( + name, columnType, rowType->childAt(i), rowType->childAt(i))}); + } + + auto plan = exec::test::PlanBuilder(pool_.get()) .startTableScan() .connectorId(test::kIcebergConnectorId) + .assignments(assignments) .outputType(rowType) .endTableScan() .planNode(); - exec::test::AssertQueryBuilder(plan).splits(splits).assertResults(vectors); + + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); } } -TEST_F(IcebergInsertTest, maxTargetFileSizeRotation) { - setConnectorSessionProperty(HiveConfig::kMaxTargetFileSizeSession, "4KB"); +TEST_F(IcebergInsertTest, testInfinityValues) { + const auto outputDirectory = TempDirectoryPath::create(); + auto realVector = makeFlatVector( + {std::numeric_limits::max(), + -std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::min(), + std::numeric_limits::lowest(), + std::numeric_limits::quiet_NaN()}); + + auto doubleVector = makeFlatVector( + {std::numeric_limits::max(), + -std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::min(), + std::numeric_limits::lowest(), + std::numeric_limits::quiet_NaN()}); + + auto idVector = makeFlatVector({0, 1, 2, 3, 4, 5}); - const auto outputPath = TempDirectoryPath::create()->getPath(); - const auto rowType = ROW({"c0", "c1"}, {BIGINT(), VARCHAR()}); - const auto vectors = createTestData(rowType, 10, 1'000); - const auto dataSink = createDataSinkAndAppendData(vectors, outputPath); - const auto commitTasks = dataSink->close(); + auto rowType = + ROW({"id", "real_col", "double_col"}, {BIGINT(), REAL(), DOUBLE()}); + auto vector = makeRowVector( + {"id", "real_col", "double_col"}, {idVector, realVector, doubleVector}); - ASSERT_EQ(listFiles(outputPath).size(), 5); + auto dataSink = + createIcebergDataSink(rowType, outputDirectory->getPath(), {}); + dataSink->appendData(vector); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); - auto splits = createSplitsForDirectory(outputPath); - auto plan = exec::test::PlanBuilder() + createDuckDbTable({vector}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + + auto plan = exec::test::PlanBuilder(pool_.get()) .startTableScan() .connectorId(test::kIcebergConnectorId) .outputType(rowType) .endTableScan() .planNode(); - exec::test::AssertQueryBuilder(plan).splits(splits).assertResults(vectors); -} -#endif + assertQuery(plan, splits, "SELECT * FROM tmp ORDER BY id"); +} } // namespace } // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/IcebergParquetStatsTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergParquetStatsTest.cpp index ebab93a2510..2295d93ef73 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergParquetStatsTest.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergParquetStatsTest.cpp @@ -423,7 +423,7 @@ TEST_F(IcebergParquetStatsTest, empty) { {makeFlatVector(0), makeFlatVector(0)})}, outputDir->getPath()); auto commitTasks = dataSink->close(); - EXPECT_TRUE(commitTasks.empty()); + EXPECT_FALSE(commitTasks.empty()); } TEST_F(IcebergParquetStatsTest, nullValues) { diff --git a/velox/connectors/hive/iceberg/tests/IcebergPartitionIdGeneratorTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergPartitionIdGeneratorTest.cpp new file mode 100644 index 00000000000..f6aafa4be22 --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergPartitionIdGeneratorTest.cpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.h" +#include "velox/connectors/hive/iceberg/Transforms.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" + +using namespace facebook::velox; + +namespace facebook::velox::connector::hive::iceberg::test { + +class IcebergPartitionIdGeneratorTest : public IcebergTestBase { + protected: + std::vector> createColumnTransforms( + const std::vector& columnNames, + const std::vector& types, + const std::vector& transformTypes, + const std::vector>& parameters = {}) { + std::vector fields; + fields.reserve(columnNames.size()); + + for (size_t i = 0; i < columnNames.size(); ++i) { + std::optional parameter = + parameters.size() > i ? parameters[i] : std::nullopt; + + fields.emplace_back( + IcebergPartitionSpec::Field{ + columnNames[i], types[i], transformTypes[i], parameter}); + } + + return parsePartitionTransformSpecs(fields, pool_.get()); + } + + std::unique_ptr createGenerator( + const std::vector>& transforms, + bool partitionPathAsLowerCase = false) { + std::vector partitionChannels; + for (size_t i = 0; i < transforms.size(); ++i) { + partitionChannels.push_back(i); + } + + return std::make_unique( + partitionChannels, + 128, + pool_.get(), + transforms, + partitionPathAsLowerCase); + } + + void verifyPartitionComponents( + const std::string& partitionName, + const std::vector& expectedComponents) { + std::vector actualComponents; + folly::split('/', partitionName, actualComponents); + ASSERT_EQ(actualComponents.size(), expectedComponents.size()); + for (size_t i = 0; i < expectedComponents.size(); ++i) { + ASSERT_EQ(actualComponents[i], expectedComponents[i]); + } + } +}; + +TEST_F(IcebergPartitionIdGeneratorTest, partitionNameWithIdentityTransforms) { + std::vector columnNames = { + "c_int", "c_bigint", "c_varchar", "c_decimal", "c_bool", "c_date"}; + + std::vector columns = { + makeConstant(42, 1), + makeConstant(9'876'543'210, 1), + makeConstant("test string", 1), + makeConstant(12'345'678'901'234, 1, DECIMAL(18, 4)), + makeConstant(true, 1), + makeConstant(18'262, 1, DATE())}; + + std::vector types = { + INTEGER(), BIGINT(), VARCHAR(), DECIMAL(18, 4), BOOLEAN(), DATE()}; + auto rowVector = makeRowVector(columnNames, columns); + std::vector transformTypes( + columnNames.size(), TransformType::kIdentity); + auto transforms = createColumnTransforms(columnNames, types, transformTypes); + auto generator = createGenerator(transforms); + raw_vector partitionIds(1); + generator->run(rowVector, partitionIds); + + std::string partitionName = generator->partitionName(partitionIds[0]); + std::vector expectedComponents = { + "c_int=42", + "c_bigint=9876543210", + "c_varchar=test+string", + "c_decimal=1234567890.1234", + "c_bool=true", + "c_date=2020-01-01"}; + verifyPartitionComponents(partitionName, expectedComponents); +} + +TEST_F( + IcebergPartitionIdGeneratorTest, + partitionNameWithTimestampIdentitySpecialValues) { + std::vector timestamps = { + Timestamp(253402300800, 100000000), // +10000-01-01T00:00:00.1. + Timestamp(-62170000000, 0), // -0001-11-29T19:33:20. + Timestamp(-62135577748, 999000000), // 0001-01-01T05:17:32.999. + Timestamp(0, 0), // 1970-01-01T00:00. + Timestamp(1609459200, 999000000), // 2021-01-01T00:00. + Timestamp(1640995200, 500000000), // 2022-01-01T00:00:00.5. + Timestamp(1672531200, 123000000), // 2023-01-01T00:00:00.123. + Timestamp(-1, 999000000), // 1969-12-31T23:59:59.999. + Timestamp(1, 1000000), // 1970-01-01T00:00:01.001. + Timestamp(-62167219199, 0), // 0000-01-01T00:00:01. + Timestamp(-377716279140, 321000000), // -10000-01-01T01:01:00.321. + Timestamp(253402304660, 321000000), // +10000-01-01T01:01:00.321. + Timestamp(951782400, 0), // 2000-02-29T00:00:00 (leap year). + Timestamp(4107456000, 0), // 2100-02-28T00:00:00 (not leap year). + Timestamp(86400, 0), // 1970-01-02T00:00:00. + Timestamp(-86400, 0), // 1969-12-31T00:00:00. + Timestamp(1672531200, 456000000), // 2023-01-01T00:00:00.456. + Timestamp(1672531200, 789000000), // 2023-01-01T00:00:00.789. + }; + + std::vector expectedPartitionNames = { + "c_timestamp=%2B10000-01-01T00%3A00%3A00.1", + "c_timestamp=-0001-11-29T19%3A33%3A20", + "c_timestamp=0001-01-01T05%3A17%3A32.999", + "c_timestamp=1970-01-01T00%3A00", + "c_timestamp=2021-01-01T00%3A00%3A00.999", + "c_timestamp=2022-01-01T00%3A00%3A00.5", + "c_timestamp=2023-01-01T00%3A00%3A00.123", + "c_timestamp=1969-12-31T23%3A59%3A59.999", + "c_timestamp=1970-01-01T00%3A00%3A01.001", + "c_timestamp=0000-01-01T00%3A00%3A01", + "c_timestamp=-10000-08-24T19%3A21%3A00.321", + "c_timestamp=%2B10000-01-01T01%3A04%3A20.321", + "c_timestamp=2000-02-29T00%3A00", + "c_timestamp=2100-02-28T00%3A00", + "c_timestamp=1970-01-02T00%3A00", + "c_timestamp=1969-12-31T00%3A00", + "c_timestamp=2023-01-01T00%3A00%3A00.456", + "c_timestamp=2023-01-01T00%3A00%3A00.789", + }; + + auto timestampVector = makeFlatVector(timestamps); + std::vector columnNames = {"c_timestamp"}; + std::vector columns = {timestampVector}; + std::vector types = {TIMESTAMP()}; + auto rowVector = makeRowVector(columnNames, columns); + + std::vector transformTypes = {TransformType::kIdentity}; + auto transforms = createColumnTransforms(columnNames, types, transformTypes); + auto generator = createGenerator(transforms); + raw_vector partitionIds(timestamps.size()); + generator->run(rowVector, partitionIds); + + for (size_t i = 0; i < timestamps.size(); ++i) { + std::string partitionName = generator->partitionName(partitionIds[i]); + ASSERT_EQ(partitionName, expectedPartitionNames[i]); + } +} + +TEST_F(IcebergPartitionIdGeneratorTest, partitionNameWithMixedTransforms) { + std::vector columnNames = { + "c_int", + "c_bigint", + "c_varchar", + "c_year", + "c_month", + "c_day", + "c_hour", + "c_bool"}; + + std::vector columns = { + makeConstant(42, 1), + makeConstant(9'876'543'210, 1), + makeConstant("test string", 1), + makeConstant(Timestamp(1'577'836'800, 0), 1), + makeConstant(Timestamp(1'578'836'800, 0), 1), + makeConstant(Timestamp(1'579'836'800, 0), 1), + makeConstant(Timestamp(1'57'936'800, 0), 1), + makeConstant(true, 1)}; + + std::vector types = { + INTEGER(), + BIGINT(), + VARCHAR(), + TIMESTAMP(), + TIMESTAMP(), + TIMESTAMP(), + TIMESTAMP(), + BOOLEAN()}; + + auto rowVector = makeRowVector(columnNames, columns); + + std::vector transformTypes = { + TransformType::kBucket, + TransformType::kTruncate, + TransformType::kTruncate, + TransformType::kYear, + TransformType::kMonth, + TransformType::kDay, + TransformType::kHour, + TransformType::kIdentity}; + + std::vector> parameters = {4, 1'000, 5, std::nullopt}; + auto transforms = + createColumnTransforms(columnNames, types, transformTypes, parameters); + + auto generator = createGenerator(transforms); + raw_vector partitionIds(1); + generator->run(rowVector, partitionIds); + + std::string partitionName = generator->partitionName(partitionIds[0]); + std::vector expectedComponents = { + "c_int_bucket=2", + "c_bigint_trunc=9876543000", + "c_varchar_trunc=test+", + "c_year_year=2020", + "c_month_month=2020-01", + "c_day_day=2020-01-24", + "c_hour_hour=1975-01-02-23", + "c_bool=true"}; + verifyPartitionComponents(partitionName, expectedComponents); +} + +TEST_F(IcebergPartitionIdGeneratorTest, partitionNameWithNullValues) { + std::vector columnNames = {"c_int", "c_varchar", "c_decimal"}; + std::vector columns = { + makeConstant(std::nullopt, 1), + makeConstant(std::nullopt, 1), + makeConstant(std::nullopt, 1, DECIMAL(18, 4))}; + std::vector types = {INTEGER(), VARCHAR(), DECIMAL(18, 3)}; + auto rowVector = makeRowVector(columnNames, columns); + + std::vector transformTypes = { + TransformType::kBucket, + TransformType::kTruncate, + TransformType::kIdentity}; + std::vector> parameters = {4, 1'000, std::nullopt}; + auto transforms = + createColumnTransforms(columnNames, types, transformTypes, parameters); + auto generator = createGenerator(transforms); + raw_vector partitionIds(1); + generator->run(rowVector, partitionIds); + + std::string partitionName = generator->partitionName(partitionIds[0]); + std::vector expectedComponents = { + "c_int_bucket=null", "c_varchar_trunc=null", "c_decimal=null"}; + verifyPartitionComponents(partitionName, expectedComponents); +} + +TEST_F(IcebergPartitionIdGeneratorTest, partitionNameWithLowerCase) { + auto varcharVector = makeConstant("MiXeD_CaSe", 1); + std::vector columnNames = {"MiXeD_CoLuMn"}; + std::vector columns = {varcharVector}; + std::vector types = {VARCHAR()}; + auto rowVector = makeRowVector(columnNames, columns); + std::vector transformTypes = {TransformType::kIdentity}; + auto transforms = createColumnTransforms(columnNames, types, transformTypes); + auto generator = createGenerator(transforms, true); + raw_vector partitionIds(1); + generator->run(rowVector, partitionIds); + std::string partitionName = generator->partitionName(partitionIds[0]); + std::vector expectedPartitionName = {"mixed_column=MiXeD_CaSe"}; + verifyPartitionComponents(partitionName, expectedPartitionName); + + generator = createGenerator(transforms); + generator->run(rowVector, partitionIds); + partitionName = generator->partitionName(partitionIds[0]); + expectedPartitionName = {"MiXeD_CoLuMn=MiXeD_CaSe"}; + verifyPartitionComponents(partitionName, expectedPartitionName); +} + +TEST_F(IcebergPartitionIdGeneratorTest, urlEncodingForSpecialChars) { + std::vector> testCases = { + {"space test", "space+test"}, + {"slash/test", "slash%2Ftest"}, + {"question?test", "question%3Ftest"}, + {"percent%test", "percent%25test"}, + {"hash#test", "hash%23test"}, + {"ampersand&test", "ampersand%26test"}, + {"equals=test", "equals%3Dtest"}, + {"plus+test", "plus%2Btest"}, + {"comma,test", "comma%2Ctest"}, + {"semicolon;test", "semicolon%3Btest"}, + {"at@test", "at%40test"}, + {"dollar$test", "dollar%24test"}, + {"backslash\\test", "backslash%5Ctest"}, + {"quote\"test", "quote%22test"}, + {"apostrophe'test", "apostrophe%27test"}, + {"lessthan", "greater%3Ethan"}, + {"colon:test", "colon%3Atest"}, + {"pipe|test", "pipe%7Ctest"}, + {"bracket[test", "bracket%5Btest"}, + {"bracket]test", "bracket%5Dtest"}, + {"brace{test", "brace%7Btest"}, + {"brace}test", "brace%7Dtest"}, + {"caret^test", "caret%5Etest"}, + {"tilde~test", "tilde%7Etest"}, + {"backtick`test", "backtick%60test"}, + {"unicode\u00A9test", "unicode%C2%A9test"}, + {"email@example.com", "email%40example.com"}, + {"user:password@host:port/path", "user%3Apassword%40host%3Aport%2Fpath"}, + {"https://github.ibm.com/IBM/velox", + "https%3A%2F%2Fgithub.ibm.com%2FIBM%2Fvelox"}, + {"a+b=c&d=e+f", "a%2Bb%3Dc%26d%3De%2Bf"}, + {"special!@#$%^&*()_+", "special%21%40%23%24%25%5E%26*%28%29_%2B"}, + }; + + std::vector transformTypes = {TransformType::kIdentity}; + std::vector types = {VARCHAR()}; + std::vector columnNames = {"ColumnWithSpecialChars"}; + auto transforms = createColumnTransforms(columnNames, types, transformTypes); + raw_vector partitionIds(1); + auto generator = createGenerator(transforms); + + for (const auto& [input, expectedEncoded] : testCases) { + auto varcharVector = makeConstant(StringView(input), 1); + auto rowVector = makeRowVector(columnNames, {varcharVector}); + generator->run(rowVector, partitionIds); + std::string partitionName = generator->partitionName(partitionIds[0]); + std::string expectedPartitionName = + fmt::format("{}={}", columnNames[0], expectedEncoded); + ASSERT_EQ(partitionName, expectedPartitionName); + } +} + +TEST_F(IcebergPartitionIdGeneratorTest, multipleRows) { + std::vector columnNames = {"c_int", "c_varchar"}; + auto rowVector = makeRowVector( + columnNames, + {makeFlatVector({10, 20, 30}), + makeFlatVector({"value1", "value2", "value3"})}); + + std::vector types = {INTEGER(), VARCHAR()}; + std::vector transformTypes( + columnNames.size(), TransformType::kIdentity); + auto transforms = createColumnTransforms(columnNames, types, transformTypes); + auto generator = createGenerator(transforms); + raw_vector partitionIds(3); + generator->run(rowVector, partitionIds); + + std::vector expectedNames = { + "c_int=10/c_varchar=value1", + "c_int=20/c_varchar=value2", + "c_int=30/c_varchar=value3"}; + + for (size_t i = 0; i < 3; ++i) { + std::string partitionName = generator->partitionName(partitionIds[i]); + ASSERT_EQ(partitionName, expectedNames[i]); + } +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp index 71c1ae4b621..88890484c28 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp @@ -26,6 +26,7 @@ #include "velox/connectors/hive/iceberg/IcebergDeleteFile.h" #include "velox/connectors/hive/iceberg/IcebergMetadataColumns.h" #include "velox/connectors/hive/iceberg/IcebergSplit.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" #include "velox/dwio/common/tests/utils/DataFiles.h" #include "velox/exec/PlanNodeStats.h" #include "velox/exec/tests/utils/AssertQueryBuilder.h" @@ -55,7 +56,7 @@ uint64_t getTestFileSize(const std::string& path) { static const char* kIcebergConnectorId = "test-iceberg"; -class HiveIcebergTest : public HiveConnectorTestBase { +class HiveIcebergTest : public exec::test::HiveConnectorTestBase { public: void SetUp() override { HiveConnectorTestBase::SetUp(); @@ -265,13 +266,9 @@ class HiveIcebergTest : public HiveConnectorTestBase { std::string duckdbSql = getDuckDBQuery(rowGroupSizesForFiles, deleteFilesForBaseDatafiles); - auto plan = PlanBuilder() - .startTableScan() - .connectorId(kIcebergConnectorId) - .outputType(ROW({"c0"}, {BIGINT()})) - .endTableScan() - .planNode(); - auto task = assertQuery(plan, splits, duckdbSql, numPrefetchSplits); + auto plan = PlanBuilder().tableScan(ROW({"c0"}, {BIGINT()})).planNode(); + auto task = assertQuery( + plan, splits, duckdbSql, numPrefetchSplits); auto planStats = toPlanStats(task->taskStats()); @@ -280,6 +277,125 @@ class HiveIcebergTest : public HiveConnectorTestBase { ASSERT_TRUE(it->second.peakMemoryBytes > 0); } + void assertEqualityDeletes( + const std::unordered_map>>& + equalityDeleteVectorMap, + const std::unordered_map>& + equalityFieldIdsMap, + std::string duckDbSql = "", + std::vector dataVectors = {}) { + VELOX_CHECK_EQ(equalityDeleteVectorMap.size(), equalityFieldIdsMap.size()); + // We will create data vectors with numColumns number of columns that is the + // max field Id in equalityFieldIds + int32_t numDataColumns = 0; + + for (auto it = equalityFieldIdsMap.begin(); it != equalityFieldIdsMap.end(); + ++it) { + auto equalityFieldIds = it->second; + auto currentMax = + *std::max_element(equalityFieldIds.begin(), equalityFieldIds.end()); + numDataColumns = std::max(numDataColumns, currentMax); + } + + VELOX_CHECK_GT(numDataColumns, 0); + VELOX_CHECK_GE(numDataColumns, equalityDeleteVectorMap.size()); + VELOX_CHECK_GT(equalityDeleteVectorMap.size(), 0); + + VELOX_CHECK_LE(equalityFieldIdsMap.size(), numDataColumns); + + std::shared_ptr dataFilePath = + writeDataFiles(rowCount, numDataColumns, 1, dataVectors)[0]; + + std::vector deleteFiles; + std::string predicates = ""; + unsigned long numDeletedValues = 0; + + std::vector> deleteFilePaths; + for (auto it = equalityFieldIdsMap.begin(); + it != equalityFieldIdsMap.end();) { + auto equalityFieldIds = it->second; + auto equalityDeleteVector = equalityDeleteVectorMap.at(it->first); + VELOX_CHECK_GT(equalityDeleteVector.size(), 0); + numDeletedValues = + std::max(numDeletedValues, equalityDeleteVector[0].size()); + deleteFilePaths.push_back(writeEqualityDeleteFile(equalityDeleteVector)); + IcebergDeleteFile deleteFile( + FileContent::kEqualityDeletes, + deleteFilePaths.back()->getPath(), + fileFormat_, + equalityDeleteVector[0].size(), + testing::internal::GetFileSize( + std::fopen(deleteFilePaths.back()->getPath().c_str(), "r")), + equalityFieldIds); + deleteFiles.push_back(deleteFile); + predicates += makePredicates(equalityDeleteVector, equalityFieldIds); + ++it; + if (it != equalityFieldIdsMap.end()) { + predicates += " AND "; + } + } + + // The default split count is 1. + auto icebergSplits = + makeIcebergSplits(dataFilePath->getPath(), deleteFiles); + + // If the caller passed in a query, use that. + if (duckDbSql == "") { + // Select all columns + duckDbSql = "SELECT * FROM tmp "; + if (numDeletedValues > 0) { + duckDbSql += fmt::format("WHERE {}", predicates); + } + } + + assertEqualityDeletes( + icebergSplits.back(), + !dataVectors.empty() ? asRowType(dataVectors[0]->type()) : rowType_, + duckDbSql); + + // Select a column that's not in the filter columns + if (numDataColumns > 1 && + equalityDeleteVectorMap.at(0).size() < numDataColumns) { + std::string duckDbQuery = "SELECT c0 FROM tmp"; + if (numDeletedValues > 0) { + duckDbQuery += fmt::format(" WHERE {}", predicates); + } + + std::vector names({"c0"}); + std::vector types(1, BIGINT()); + assertEqualityDeletes( + icebergSplits.back(), + std::make_shared(std::move(names), std::move(types)), + duckDbQuery); + } + } + + std::vector makeSequenceValues(int32_t numRows, int8_t repeat = 1) { + VELOX_CHECK_GT(repeat, 0); + + auto maxValue = std::ceil((double)numRows / repeat); + std::vector values; + values.reserve(numRows); + for (int32_t i = 0; i < maxValue; i++) { + for (int8_t j = 0; j < repeat; j++) { + values.push_back(i); + } + } + values.resize(numRows); + return values; + } + + std::vector makeRandomDeleteValues(int32_t maxRowNumber) { + std::mt19937 gen{0}; + std::vector deleteRows; + for (int i = 0; i < maxRowNumber; i++) { + if (folly::Random::rand32(0, 10, gen) > 8) { + deleteRows.push_back(i); + } + } + return deleteRows; + } + const static int rowCount = 20000; protected: @@ -307,7 +423,8 @@ class HiveIcebergTest : public HiveConnectorTestBase { name, HiveColumnHandle::ColumnType::kRegular, type, - parquet::ParquetFieldId(fieldId), + type, + IcebergNestedField{fieldId, {}}, std::vector{}, std::optional{defaultValue}); } @@ -324,7 +441,8 @@ class HiveIcebergTest : public HiveConnectorTestBase { // Write data file with old schema auto dataFilePath = TempFilePath::create(); writeToFile(dataFilePath->getPath(), data); - auto icebergSplits = makeIcebergSplits(dataFilePath->getPath()); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), {}, {}, 1, kIcebergConnectorId); // Build plan auto plan = PlanBuilder() @@ -349,7 +467,11 @@ class HiveIcebergTest : public HiveConnectorTestBase { const std::vector& deleteFiles = {}, const std::unordered_map>& partitionKeys = {}, - const uint32_t splitCount = 1) { + const uint32_t splitCount = 1, + const std::string& connectorId = kHiveConnectorId) { + std::unordered_map customSplitInfo; + customSplitInfo["table_format"] = "hive-iceberg"; + auto file = filesystems::getFileSystem(dataFilePath, nullptr) ->openFileForRead(dataFilePath); const int64_t fileSize = file->size(); @@ -361,14 +483,14 @@ class HiveIcebergTest : public HiveConnectorTestBase { for (int i = 0; i < splitCount; ++i) { splits.emplace_back( std::make_shared( - kIcebergConnectorId, + connectorId, dataFilePath, fileFormat_, i * splitSize, splitSize, partitionKeys, std::nullopt, - std::unordered_map{}, + customSplitInfo, nullptr, /*cacheable=*/true, deleteFiles)); @@ -428,16 +550,18 @@ class HiveIcebergTest : public HiveConnectorTestBase { ->openFileForRead(path) ->size(); + std::unordered_map customSplitInfo{ + {"table_format", "hive-iceberg"}}; std::unordered_map> partitionKeys; return {std::make_shared( - kIcebergConnectorId, + kHiveConnectorId, path, FileFormat::PARQUET, 0, fileSize, partitionKeys, std::nullopt, - std::unordered_map{}, + customSplitInfo, nullptr, /*cacheable=*/true, std::vector{icebergDeleteFile})}; @@ -595,6 +719,20 @@ class HiveIcebergTest : public HiveConnectorTestBase { AssertQueryBuilder(plan).splits({split}).assertResults(tc.expectedVectors); } + void assertEqualityDeletes( + std::shared_ptr split, + RowTypePtr outputRowType, + const std::string& duckDbSql) { + auto plan = tableScanNode(outputRowType); + auto task = assertQuery(plan, {split}, duckDbSql); + + auto planStats = toPlanStats(task->taskStats()); + auto scanNodeId = plan->id(); + auto it = planStats.find(scanNodeId); + ASSERT_TRUE(it != planStats.end()); + ASSERT_TRUE(it->second.peakMemoryBytes > 0); + } + private: std::map> writeDataFiles( std::map> rowGroupSizesForFiles) { @@ -797,11 +935,165 @@ class HiveIcebergTest : public HiveConnectorTestBase { return deletePositionVector; } + std::string makeNotInList(const std::vector& deletePositionVector) { + if (deletePositionVector.empty()) { + return ""; + } + + return std::accumulate( + deletePositionVector.begin() + 1, + deletePositionVector.end(), + std::to_string(deletePositionVector[0]), + [](const std::string& a, int64_t b) { + return a + ", " + std::to_string(b); + }); + } + + core::PlanNodePtr tableScanNode(RowTypePtr outputRowType) { + return PlanBuilder(pool_.get()).tableScan(outputRowType).planNode(); + } + + std::string makePredicates( + const std::vector>& equalityDeleteVector, + const std::vector& equalityFieldIds) { + std::string predicates(""); + int32_t numDataColumns = + *std::max_element(equalityFieldIds.begin(), equalityFieldIds.end()); + + VELOX_CHECK_GT(numDataColumns, 0); + VELOX_CHECK_GE(numDataColumns, equalityDeleteVector.size()); + VELOX_CHECK_GT(equalityDeleteVector.size(), 0); + + auto numDeletedValues = equalityDeleteVector[0].size(); + + if (numDeletedValues == 0) { + return predicates; + } + + // If all values for a column are deleted, just return an always-false + // predicate + for (auto i = 0; i < equalityDeleteVector.size(); i++) { + auto equalityFieldId = equalityFieldIds[i]; + auto deleteValues = equalityDeleteVector[i]; + + auto lastIter = std::unique(deleteValues.begin(), deleteValues.end()); + auto numDistinctValues = lastIter - deleteValues.begin(); + auto minValue = 1; + auto maxValue = *std::max_element(deleteValues.begin(), lastIter); + if (maxValue - minValue + 1 == numDistinctValues && + maxValue == (rowCount - 1) / equalityFieldId) { + return "1 = 0"; + } + } + + if (equalityDeleteVector.size() == 1) { + std::string name = fmt::format("c{}", equalityFieldIds[0] - 1); + predicates = fmt::format( + "{} NOT IN ({})", name, makeNotInList({equalityDeleteVector[0]})); + } else { + for (int i = 0; i < numDeletedValues; i++) { + std::string oneRow(""); + for (int j = 0; j < equalityFieldIds.size(); j++) { + std::string name = fmt::format("c{}", equalityFieldIds[j] - 1); + std::string predicate = + fmt::format("({} <> {})", name, equalityDeleteVector[j][i]); + + oneRow = oneRow == "" ? predicate + : fmt::format("({} OR {})", oneRow, predicate); + } + + predicates = predicates == "" + ? oneRow + : fmt::format("{} AND {}", predicates, oneRow); + } + } + return predicates; + } + std::shared_ptr pathColumn_ = IcebergMetadataColumn::icebergDeleteFilePathColumn(); std::shared_ptr posColumn_ = IcebergMetadataColumn::icebergDeletePosColumn(); + + protected: + RowTypePtr rowType_{ROW({"c0"}, {BIGINT()})}; + + std::shared_ptr writeEqualityDeleteFile( + const std::vector>& equalityDeleteVector) { + std::vector names; + std::vector vectors; + for (int i = 0; i < equalityDeleteVector.size(); i++) { + names.push_back(fmt::format("c{}", i)); + vectors.push_back(makeFlatVector(equalityDeleteVector[i])); + } + + RowVectorPtr deleteFileVectors = makeRowVector(names, vectors); + + auto deleteFilePath = TempFilePath::create(); + writeToFile(deleteFilePath->getPath(), deleteFileVectors); + + return deleteFilePath; + } + + std::vector> writeDataFiles( + uint64_t numRows, + int32_t numColumns = 1, + int32_t splitCount = 1, + std::vector dataVectors = {}) { + if (dataVectors.empty()) { + dataVectors = makeVectors(splitCount, numRows, numColumns); + } + VELOX_CHECK_EQ(dataVectors.size(), splitCount); + + std::vector> dataFilePaths; + dataFilePaths.reserve(splitCount); + for (auto i = 0; i < splitCount; i++) { + dataFilePaths.emplace_back(TempFilePath::create()); + writeToFile(dataFilePaths.back()->getPath(), dataVectors[i]); + } + + createDuckDbTable(dataVectors); + return dataFilePaths; + } + + std::vector + makeVectors(int32_t count, int32_t rowsPerVector, int32_t numColumns = 1) { + std::vector types(numColumns, BIGINT()); + std::vector names; + for (int j = 0; j < numColumns; j++) { + names.push_back(fmt::format("c{}", j)); + } + + std::vector rowVectors; + for (int i = 0; i < count; i++) { + std::vector vectors; + + // Create the column values like below: + // c0 c1 c2 + // 0 0 0 + // 1 0 0 + // 2 1 0 + // 3 1 1 + // 4 2 1 + // 5 2 1 + // 6 3 2 + // ... + // In the first column c0, the values are continuously increasing and not + // repeating. In the second column c1, the values are continuously + // increasing and each value repeats once. And so on. + for (int j = 0; j < numColumns; j++) { + auto data = makeSequenceValues(rowsPerVector, j + 1); + vectors.push_back(vectorMaker_.flatVector(data)); + } + + rowVectors.push_back(makeRowVector(names, vectors)); + } + + rowType_ = std::make_shared(std::move(names), std::move(types)); + + return rowVectors; + } }; /// This test creates one single data file and one delete file. The parameter @@ -1021,12 +1313,7 @@ TEST_F(HiveIcebergTest, schemaEvolutionRemoveColumn) { })); // Read with new schema (c0 and c2 only, c1 removed). - auto plan = PlanBuilder() - .startTableScan() - .connectorId(kIcebergConnectorId) - .outputType(newRowType) - .endTableScan() - .planNode(); + auto plan = PlanBuilder().tableScan(newRowType).planNode(); AssertQueryBuilder(plan).splits(icebergSplits).assertResults(expectedVectors); } @@ -1052,13 +1339,8 @@ TEST_F(HiveIcebergTest, schemaEvolutionAddColumns) { })); // Read with new schema (c0, c1, and c2). - auto plan = PlanBuilder() - .startTableScan() - .connectorId(kIcebergConnectorId) - .outputType(newRowType) - .dataColumns(newRowType) - .endTableScan() - .planNode(); + auto plan = + PlanBuilder().tableScan(newRowType, {}, "", newRowType).planNode(); AssertQueryBuilder(plan).splits(icebergSplits).assertResults(expectedVectors); } @@ -1226,7 +1508,8 @@ TEST_F(HiveIcebergTest, addColumnWithInvalidDefault) { dataVectors.push_back(makeRowVector({makeFlatVector({1, 2, 3})})); auto dataFilePath = TempFilePath::create(); writeToFile(dataFilePath->getPath(), dataVectors); - auto icebergSplits = makeIcebergSplits(dataFilePath->getPath()); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), {}, {}, 1, kIcebergConnectorId); ColumnHandleMap assignments; assignments["c0"] = makeC0Handle(); @@ -1301,8 +1584,12 @@ TEST_F(HiveIcebergTest, defaultValueWithDeletesAndFilters) { // Test 1: No filter - rows 1,3,5,7,8,9,10 (after deletes: 2,4,6 removed) { - auto icebergSplits = - makeIcebergSplits(dataFilePath->getPath(), {icebergDeleteFile}, {}, 1); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), + {icebergDeleteFile}, + {}, + 1, + kIcebergConnectorId); std::vector expectedVectors; expectedVectors.push_back(makeRowVector( newRowType->names(), @@ -1326,8 +1613,12 @@ TEST_F(HiveIcebergTest, defaultValueWithDeletesAndFilters) { // Test 2: Filter on file column (c0 > 5) with deletes // After deletes: 1,3,5,7,8,9,10 remain. Filter c0 > 5: 7,8,9,10 { - auto icebergSplits = - makeIcebergSplits(dataFilePath->getPath(), {icebergDeleteFile}, {}, 1); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), + {icebergDeleteFile}, + {}, + 1, + kIcebergConnectorId); std::vector expectedVectors; expectedVectors.push_back(makeRowVector( newRowType->names(), @@ -1351,8 +1642,12 @@ TEST_F(HiveIcebergTest, defaultValueWithDeletesAndFilters) { // Test 3: Filter on default value column (country = 'IN') with deletes // All remaining rows should match since default is 'IN' { - auto icebergSplits = - makeIcebergSplits(dataFilePath->getPath(), {icebergDeleteFile}, {}, 1); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), + {icebergDeleteFile}, + {}, + 1, + kIcebergConnectorId); std::vector expectedVectors; expectedVectors.push_back(makeRowVector( newRowType->names(), @@ -1377,8 +1672,12 @@ TEST_F(HiveIcebergTest, defaultValueWithDeletesAndFilters) { // Test 4: Combined filter (c0 > 3 AND country = 'IN') with deletes // After deletes: 1,3,5,7,8,9,10. Filter c0 > 3: 5,7,8,9,10 { - auto icebergSplits = - makeIcebergSplits(dataFilePath->getPath(), {icebergDeleteFile}, {}, 1); + auto icebergSplits = makeIcebergSplits( + dataFilePath->getPath(), + {icebergDeleteFile}, + {}, + 1, + kIcebergConnectorId); std::vector expectedVectors; expectedVectors.push_back(makeRowVector( newRowType->names(), @@ -1442,12 +1741,7 @@ TEST_F(HiveIcebergTest, partitionColumnsFromHive) { // Read with table schema including partition columns. auto plan = PlanBuilder() - .startTableScan() - .connectorId(kIcebergConnectorId) - .outputType(tableRowType) - .dataColumns(tableRowType) - .assignments(assignments) - .endTableScan() + .tableScan(tableRowType, {}, "", tableRowType, assignments) .planNode(); AssertQueryBuilder(plan).splits(icebergSplits).assertResults(expectedVectors); } @@ -1679,7 +1973,7 @@ TEST_F(HiveIcebergTest, positionalDeleteFileWithRowGroupFilter) { // baseReadOffset tracked by Iceberg's split reader and the actual offset, // resulting in records in the position delete file being mapped to incorrect // rows. - auto path = test::getDataFilePath( + auto path = velox::test::getDataFilePath( "velox/connectors/hive/iceberg/test", "examples/three_groups.parquet"); const auto deletedPositionSize = 100; std::vector deletePositionsVec( @@ -1688,13 +1982,8 @@ TEST_F(HiveIcebergTest, positionalDeleteFileWithRowGroupFilter) { auto deleteFilePath = TempFilePath::create(); assertQuery( PlanBuilder() - .startTableScan() - .connectorId(kIcebergConnectorId) - .outputType(ROW({"id"}, {BIGINT()})) - .remainingFilter("id >= 100") - .endTableScan() + .tableScan(ROW({"id"}, {BIGINT()}), {"id >= 100"}) .planNode(), - createParquetDeleteFileAndSplits( path, deletePositionsVec, deletedPositionSize, deleteFilePath), "SELECT i AS id FROM range(100, 300) AS t(i)", @@ -1754,7 +2043,8 @@ TEST_F(HiveIcebergTest, positionalDeleteSequenceNumberApplied) { file->size(), std::unordered_map>{}, std::nullopt, - std::unordered_map{}, + std::unordered_map{ + {"table_format", "hive-iceberg"}}, nullptr, true, std::vector{deleteFile}, @@ -1820,7 +2110,8 @@ TEST_F(HiveIcebergTest, positionalDeleteSequenceNumberSkipped) { file->size(), std::unordered_map>{}, std::nullopt, - std::unordered_map{}, + std::unordered_map{ + {"table_format", "hive-iceberg"}}, nullptr, true, std::vector{deleteFile}, @@ -1890,7 +2181,8 @@ TEST_F(HiveIcebergTest, positionalDeleteSequenceNumberEqualApplied) { file->size(), std::unordered_map>{}, std::nullopt, - std::unordered_map{}, + std::unordered_map{ + {"table_format", "hive-iceberg"}}, nullptr, true, std::vector{deleteFile}, @@ -1956,7 +2248,8 @@ TEST_F(HiveIcebergTest, positionalDeleteSequenceNumberZeroDisablesFilter) { file->size(), std::unordered_map>{}, std::nullopt, - std::unordered_map{}, + std::unordered_map{ + {"table_format", "hive-iceberg"}}, nullptr, true, std::vector{deleteFile}, diff --git a/velox/connectors/hive/iceberg/tests/IcebergSortOrderTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergSortOrderTest.cpp new file mode 100644 index 00000000000..fae2df28f5a --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergSortOrderTest.cpp @@ -0,0 +1,602 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "velox/connectors/hive/iceberg/IcebergSplit.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" +#include "velox/exec/tests/utils/PlanBuilder.h" + +using namespace facebook::velox::exec::test; + +namespace facebook::velox::connector::hive::iceberg::test { + +class IcebergSortOrderTest : public IcebergTestBase { + protected: + void SetUp() override { + IcebergTestBase::SetUp(); + rowType_ = ROW( + {"c_int", + "c_bigint", + "c_varchar", + "c_date", + "c_decimal", + "c_varbinary"}, + {INTEGER(), BIGINT(), VARCHAR(), DATE(), DECIMAL(18, 3), VARBINARY()}); + } + + // Verify data in the file is sorted according to the specified sort columns. + void verifySortOrder( + const std::string& dataPath, + const std::vector& sortColumns) { + auto splits = createSplitsForDirectory(dataPath); + ASSERT_FALSE(splits.empty()) << "No data files found in " << dataPath; + + // Create a projection that selects all columns. + std::vector allColumns; + for (auto i = 0; i < rowType_->size(); ++i) { + allColumns.push_back(rowType_->nameOf(i)); + } + + auto plan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .project(allColumns) + .planNode(); + auto result = + AssertQueryBuilder(plan).splits(splits).copyResults(opPool_.get()); + + ASSERT_GT(result->size(), 0) << "No rows found in the data file"; + + // For each sort column, verify the data is sorted. + for (const auto& sortExpr : sortColumns) { + std::string columnName; + bool isAscending = true; + bool isNullsFirst = true; + + std::istringstream iss(sortExpr); + iss >> columnName; + std::string token; + if (iss >> token) { + if (token == "DESC") { + isAscending = false; + } else if (token != "ASC") { + iss.seekg(-(int32_t)token.length(), std::ios_base::cur); + } + + if (iss >> token && token == "NULLS") { + if (iss >> token && token == "LAST") { + isNullsFirst = false; + } else if (token != "FIRST") { + ASSERT_TRUE(token == "FIRST") + << "Invalid NULLS ordering: " << token; + } + } + } + + int32_t columnIndex = -1; + for (auto i = 0; i < rowType_->size(); ++i) { + if (rowType_->nameOf(i) == columnName) { + columnIndex = i; + break; + } + } + ASSERT_NE(columnIndex, -1) + << "Column " << columnName << " not found in row type"; + + auto columnVector = result->childAt(columnIndex); + bool hasNulls = false; + bool hasNonNulls = false; + vector_size_t firstNonNullIndex = 0; + vector_size_t lastNullIndex = 0; + + for (auto i = 0; i < columnVector->size(); ++i) { + if (columnVector->isNullAt(i)) { + hasNulls = true; + lastNullIndex = i; + } else { + if (!hasNonNulls) { + firstNonNullIndex = i; + hasNonNulls = true; + } + } + } + + if (hasNulls && hasNonNulls) { + if (isNullsFirst) { + ASSERT_LT(lastNullIndex, firstNonNullIndex) + << "NULL values should come before non-NULL values when NULLS FIRST is specified"; + } else { + ASSERT_GT(lastNullIndex, firstNonNullIndex) + << "NULL values should come after non-NULL values when NULLS LAST is specified"; + } + } + + DecodedVector decoded; + SelectivityVector rows(columnVector->size()); + decoded.decode(*columnVector, rows); + + for (auto i = 1; i < columnVector->size(); ++i) { + // Skip if either current or previous is null. + if (columnVector->isNullAt(i) || columnVector->isNullAt(i - 1)) { + continue; + } + + // Compare values based on type. + int32_t comparison = 0; + switch (auto kind = rowType_->childAt(columnIndex)->kind()) { + case TypeKind::INTEGER: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::BIGINT: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::VARCHAR: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::VARBINARY: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::HUGEINT: { + if (rowType_->childAt(columnIndex)->isLongDecimal()) { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + } + break; + } + default: + ASSERT_TRUE(false) + << "Unsupported column type for sorting verification: " << kind; + } + + if (isAscending) { + ASSERT_LE(comparison, 0) + << "Data not sorted in ascending order at row " << i + << " for column " << columnName; + } else { + ASSERT_GE(comparison, 0) + << "Data not sorted in descending order at row " << i + << " for column " << columnName; + } + + // If values are equal, continue to next row. + if (comparison == 0) { + continue; + } + break; + } + } + } + + // Verify that data is sorted according to multiple sort columns. + void verifyMultiColumnSortOrder( + const std::string& dataPath, + const std::vector& sortColumns) { + auto splits = createSplitsForDirectory(dataPath); + ASSERT_FALSE(splits.empty()) << "No data files found in " << dataPath; + std::vector allColumns; + for (auto i = 0; i < rowType_->size(); ++i) { + allColumns.push_back(rowType_->nameOf(i)); + } + + auto plan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .project(allColumns) + .planNode(); + auto result = + AssertQueryBuilder(plan).splits(splits).copyResults(opPool_.get()); + + ASSERT_GT(result->size(), 0) << "No rows found in the data file"; + + std::vector columnNames; + std::vector isAscending; + std::vector isNullsFirst; + std::vector columnIndices; + + for (const auto& sortExpr : sortColumns) { + std::string columnName; + bool ascending = true; + bool nullsFirst = true; + + std::istringstream iss(sortExpr); + iss >> columnName; + std::string token; + if (iss >> token) { + if (token == "DESC") { + ascending = false; + } else if (token != "ASC") { + iss.seekg(-(int32_t)token.length(), std::ios_base::cur); + } + + if (iss >> token && token == "NULLS") { + if (iss >> token && token == "LAST") { + nullsFirst = false; + } else if (token != "FIRST") { + ASSERT_TRUE(token == "FIRST") + << "Invalid NULLS ordering: " << token; + } + } + } + + int32_t columnIndex = -1; + for (auto i = 0; i < rowType_->size(); ++i) { + if (rowType_->nameOf(i) == columnName) { + columnIndex = i; + break; + } + } + ASSERT_NE(columnIndex, -1) + << "Column " << columnName << " not found in row type"; + + columnNames.push_back(columnName); + isAscending.push_back(ascending); + isNullsFirst.push_back(nullsFirst); + columnIndices.push_back(columnIndex); + } + + // Verify the sort order row by row. + for (auto i = 1; i < result->size(); ++i) { + // Compare row i-1 with row i using all sort columns in order. + for (size_t colIdx = 0; colIdx < columnIndices.size(); ++colIdx) { + int32_t columnIndex = columnIndices[colIdx]; + auto columnVector = result->childAt(columnIndex); + bool ascending = isAscending[colIdx]; + bool nullsFirst = isNullsFirst[colIdx]; + bool prevIsNull = columnVector->isNullAt(i - 1); + bool currIsNull = columnVector->isNullAt(i); + + if (prevIsNull && currIsNull) { + // Both null, continue to next column. + continue; + } else if (prevIsNull) { + // Previous is null, current is not. + ASSERT_TRUE(nullsFirst) + << "NULL values should come last at row " << (i - 1) + << " for column " << columnNames[colIdx] << " in " << dataPath; + break; + } else if (currIsNull) { + // Current is null, previous is not. + ASSERT_FALSE(nullsFirst) + << "NULL values should come first at row " << i << " for column " + << columnNames[colIdx] << " in " << dataPath; + break; + } + + // Both values are non-null, compare them. + DecodedVector decoded; + SelectivityVector rows(columnVector->size()); + decoded.decode(*columnVector, rows); + + int32_t comparison = 0; + switch (auto kind = rowType_->childAt(columnIndex)->kind()) { + case TypeKind::INTEGER: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::BIGINT: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::VARCHAR: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::VARBINARY: { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + break; + } + case TypeKind::HUGEINT: { + if (rowType_->childAt(columnIndex)->isLongDecimal()) { + auto prev = decoded.valueAt(i - 1); + auto curr = decoded.valueAt(i); + comparison = prev < curr ? -1 : (prev > curr ? 1 : 0); + } + break; + } + default: + ASSERT_TRUE(false) + << "Unsupported column type for sorting verification: " << kind; + } + + if (comparison != 0) { + if (ascending) { + ASSERT_LE(comparison, 0) + << "Data not sorted in ascending order at row " << i + << " for column " << columnNames[colIdx] << " in " << dataPath + << ". Previous value: " << columnVector->toString(i - 1) + << ", Current value: " << columnVector->toString(i); + } else { + ASSERT_GE(comparison, 0) + << "Data not sorted in descending order at row " << i + << " for column " << columnNames[colIdx] << " in " << dataPath + << ". Previous value: " << columnVector->toString(i - 1) + << ", Current value: " << columnVector->toString(i); + } + // Found definitive ordering, no need to check further columns. + break; + } + // If values are equal, continue to next column. + } + // Rows can be equal across all sort columns. + } + } + + std::vector stringBuffer_; + VectorFuzzer::Options fuzzerOptions_; + std::unique_ptr fuzzer_; + static constexpr auto numBatches = 10; + static constexpr auto rowsPerBatch = 1'000; + + void testSorting( + const std::vector& sortExpressions, + double nullRatio = 0.0) { + std::vector vectors = + createTestData(rowType_, numBatches, rowsPerBatch, nullRatio); + auto outputDirectory = TempDirectoryPath::create(); + + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), {}, sortExpressions); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + if (sortExpressions.size() == 1) { + verifySortOrder(outputDirectory->getPath(), sortExpressions); + } else { + verifyMultiColumnSortOrder(outputDirectory->getPath(), sortExpressions); + } + } + + void testSortingWithPartitioning( + const std::vector& partitionTransforms, + const std::vector& sortExpressions, + const double nullRatio = 0.0) { + std::vector vectors = + createTestData(rowType_, numBatches, rowsPerBatch, nullRatio); + const auto outputDirectory = TempDirectoryPath::create(); + + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + partitionTransforms, + sortExpressions); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + // For partitioned data, we need to find all partition directories. + std::vector partitionDirs; + std::function findLeafDataDirs = + [&partitionDirs, &findLeafDataDirs](const std::string& dir) { + bool hasSubDirs = false; + + for (const auto& entry : std::filesystem::directory_iterator(dir)) { + if (entry.is_directory()) { + hasSubDirs = true; + findLeafDataDirs(entry.path().string()); + } + } + if (!hasSubDirs) { + partitionDirs.push_back(dir); + } + }; + + // Start the recursive search from the data directory. + if (std::filesystem::exists(outputDirectory->getPath())) { + findLeafDataDirs(outputDirectory->getPath()); + } + if (partitionDirs.empty()) { + partitionDirs.push_back(outputDirectory->getPath()); + } + + // Verify each partition directory has properly sorted data. + ASSERT_FALSE(partitionDirs.empty()) << "No partition directories found"; + for (const auto& partitionDir : partitionDirs) { + if (sortExpressions.size() == 1) { + verifySortOrder(partitionDir, sortExpressions); + } else { + verifyMultiColumnSortOrder(partitionDir, sortExpressions); + } + } + } +}; + +TEST_F(IcebergSortOrderTest, singleColumnSortDefault) { + testSorting({"c_int"}); + testSorting({"c_bigint"}); + testSorting({"c_varchar"}); + testSorting({"c_date"}); + testSorting({"c_decimal"}); + testSorting({"c_varbinary"}); +} + +TEST_F(IcebergSortOrderTest, singleColumnSortDesc) { + testSorting({"c_int DESC"}); + testSorting({"c_bigint DESC"}); + testSorting({"c_varchar DESC"}); + testSorting({"c_date DESC"}); + testSorting({"c_decimal DESC"}); + testSorting({"c_varbinary DESC"}); +} + +TEST_F(IcebergSortOrderTest, nullOrderingFirst) { + testSorting({"c_int ASC NULLS FIRST"}, 0.2); + testSorting({"c_bigint ASC NULLS FIRST"}, 0.2); + testSorting({"c_varchar ASC NULLS FIRST"}, 0.3); + testSorting({"c_date ASC NULLS FIRST"}, 0.3); + testSorting({"c_decimal ASC NULLS FIRST"}, 0.2); + testSorting({"c_varbinary ASC NULLS FIRST"}, 0.2); + + testSorting({"c_varbinary DESC NULLS FIRST"}, 0.2); + testSorting({"c_int DESC NULLS FIRST"}, 0.2); + testSorting({"c_bigint DESC NULLS FIRST"}, 0.2); + testSorting({"c_varchar DESC NULLS FIRST"}, 0.3); + testSorting({"c_date DESC NULLS FIRST"}, 0.3); + testSorting({"c_decimal DESC NULLS FIRST"}, 0.2); +} + +TEST_F(IcebergSortOrderTest, nullOrderingLast) { + testSorting({"c_int ASC NULLS LAST"}, 0.2); + testSorting({"c_bigint ASC NULLS LAST"}, 0.2); + testSorting({"c_varchar ASC NULLS LAST"}, 0.2); + testSorting({"c_date ASC NULLS LAST"}, 0.2); + testSorting({"c_decimal ASC NULLS LAST"}, 0.2); + testSorting({"c_varbinary ASC NULLS LAST"}, 0.2); + + testSorting({"c_varbinary DESC NULLS LAST"}, 0.2); + testSorting({"c_int DESC NULLS LAST"}, 0.2); + testSorting({"c_bigint DESC NULLS LAST"}, 0.2); + testSorting({"c_varchar DESC NULLS LAST"}, 0.2); + testSorting({"c_date DESC NULLS LAST"}, 0.2); + testSorting({"c_decimal DESC NULLS LAST"}, 0.2); +} + +TEST_F(IcebergSortOrderTest, multiColumnSort) { + testSorting({"c_int ASC", "c_bigint DESC"}); + testSorting({"c_int ASC", "c_bigint ASC"}); + testSorting({"c_int DESC", "c_bigint DESC"}); + testSorting({"c_int DESC", "c_bigint ASC"}); + + testSorting({"c_int ASC", "c_varchar DESC"}); + testSorting({"c_int ASC", "c_varchar ASC"}); + testSorting({"c_int DESC", "c_varchar DESC"}); + testSorting({"c_int DESC", "c_varchar ASC"}); + + testSorting({"c_varchar ASC", "c_date DESC"}); + testSorting({"c_varchar ASC", "c_date ASC"}); + testSorting({"c_varchar DESC", "c_date DESC"}); + testSorting({"c_varchar DESC", "c_date ASC"}); + + testSorting({"c_int ASC", "c_decimal DESC"}); + testSorting({"c_decimal ASC", "c_varbinary DESC"}); + testSorting({"c_varbinary ASC", "c_decimal DESC"}); +} + +TEST_F(IcebergSortOrderTest, multiColumnSortWithNull) { + testSorting({"c_int", "c_bigint", "c_varchar"}, 0.2); + testSorting({"c_int", "c_bigint DESC NULLS LAST"}, 0.4); + testSorting( + {"c_int ASC NULLS FIRST", + "c_bigint DESC NULLS LAST", + "c_varchar ASC NULLS FIRST"}, + 0.2); + testSorting( + {"c_int ASC NULLS FIRST", "c_bigint", "c_varchar ASC NULLS FIRST"}, 0.2); + testSorting( + {"c_int ASC NULLS LAST", "c_bigint", "c_varchar ASC NULLS FIRST"}, 0.2); + testSorting( + {"c_int DESC NULLS LAST", "c_bigint", "c_varchar ASC NULLS LAST"}, 0.2); + testSorting( + {"c_int ASC NULLS FIRST", "c_bigint DESC", "c_varchar ASC NULLS FIRST"}, + 0.2); + testSorting( + {"c_int ASC NULLS FIRST", + "c_bigint DESC NULLS LAST", + "c_varchar ASC NULLS FIRST"}, + 0.2); + testSorting( + {"c_int ASC NULLS FIRST", "c_bigint DESC", "c_varchar ASC NULLS LAST"}, + 0.2); + testSorting( + {"c_int ASC NULLS FIRST", + "c_bigint DESC NULLS LAST", + "c_varchar DESC NULLS FIRST"}, + 0.2); + + testSorting( + {"c_int ASC NULLS FIRST", + "c_decimal DESC NULLS LAST", + "c_varbinary ASC NULLS FIRST"}, + 0.2); +} + +TEST_F(IcebergSortOrderTest, sortWithSinglePartitioning) { + testSortingWithPartitioning({{3, TransformType::kBucket, 5}}, {"c_int ASC"}); + testSortingWithPartitioning( + {{0, TransformType::kBucket, 7}}, {"c_varchar ASC"}); +} + +TEST_F(IcebergSortOrderTest, sortWithPartitioningOnSameColumn) { + testSortingWithPartitioning({{3, TransformType::kBucket, 5}}, {"c_date ASC"}); + testSortingWithPartitioning({{0, TransformType::kBucket, 7}}, {"c_int ASC"}); + testSortingWithPartitioning( + {{2, TransformType::kBucket, 4}}, {"c_varchar DESC"}); +} + +TEST_F(IcebergSortOrderTest, sortWithMultiPartitioning) { + testSortingWithPartitioning( + {{3, TransformType::kBucket, 3}, {2, TransformType::kBucket, 4}}, + {"c_int ASC", "c_bigint DESC"}); + + testSortingWithPartitioning( + {{3, TransformType::kTruncate, 10000}}, {"c_int ASC", "c_bigint DESC"}); +} + +TEST_F(IcebergSortOrderTest, sortWithPartitioningAndNulls) { + testSortingWithPartitioning( + {{0, TransformType::kBucket, 8}}, + {"c_int ASC NULLS FIRST", "c_bigint DESC NULLS LAST"}, + 0.2); + + testSortingWithPartitioning( + {{2, TransformType::kBucket, 8}}, + {"c_varchar ASC NULLS FIRST", "c_int DESC NULLS LAST"}, + 0.2); + + testSortingWithPartitioning( + {{4, TransformType::kBucket, 8}}, + {"c_decimal ASC NULLS FIRST", "c_int DESC NULLS LAST"}, + 0.3); + + testSortingWithPartitioning( + {{5, TransformType::kBucket, 8}}, + {"c_varbinary ASC NULLS FIRST", "c_int DESC NULLS LAST"}, + 0.3); +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergSplitReaderBenchmark.cpp b/velox/connectors/hive/iceberg/tests/IcebergSplitReaderBenchmark.cpp index e1ba2c4269f..fe865a62e5d 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergSplitReaderBenchmark.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergSplitReaderBenchmark.cpp @@ -15,9 +15,11 @@ */ #include "velox/connectors/hive/iceberg/tests/IcebergSplitReaderBenchmark.h" +#include #include #include "velox/connectors/hive/HiveConfig.h" +#include "velox/vector/tests/utils/VectorMaker.h" using namespace facebook::velox; using namespace facebook::velox::dwio; @@ -101,6 +103,8 @@ IcebergSplitReaderBenchmark::makeIcebergSplit( const std::string& dataFilePath, const std::vector& deleteFiles) { std::unordered_map> partitionKeys; + std::unordered_map customSplitInfo; + customSplitInfo["table_format"] = "hive-iceberg"; auto readFile = std::make_shared(dataFilePath); const int64_t fileSize = readFile->size(); @@ -113,7 +117,7 @@ IcebergSplitReaderBenchmark::makeIcebergSplit( fileSize, partitionKeys, std::nullopt, - std::unordered_map{}, + customSplitInfo, nullptr, /*cacheable=*/true, deleteFiles); @@ -330,6 +334,10 @@ void IcebergSplitReaderBenchmark::readSingleColumn( suspender.dismiss(); + auto ioExecutor = std::make_unique(3); + std::shared_ptr remainingFilterExprSet{nullptr}; + std::atomic totalRemainingFilterMs; + uint64_t resultSize = 0; for (const auto& split : splits) { scanSpec->resetCachedValues(true); @@ -346,9 +354,12 @@ void IcebergSplitReaderBenchmark::readSingleColumn( metadataIoStatistics, ioStats, &fileHandleFactory, - nullptr, + ioExecutor.get(), scanSpec, - nullptr); + nullptr, + nullptr, // infoColumns + std::vector{}, // bucketChannels + nullptr); // subfieldFiltersForValidation std::shared_ptr randomSkip; icebergSplitReader->configureReaderOptions(randomSkip); diff --git a/velox/connectors/hive/iceberg/tests/IcebergStatsTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergStatsTest.cpp new file mode 100644 index 00000000000..cb79cd5a504 --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergStatsTest.cpp @@ -0,0 +1,1395 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" + +namespace facebook::velox::connector::hive::iceberg::test { +class IcebergStatsTest : public IcebergTestBase { + protected: + void SetUp() override { + IcebergTestBase::SetUp(); + rowType_ = + ROW({"c_int", "c_bigint", "c_varchar", "c_date", "c_decimal"}, + {INTEGER(), BIGINT(), VARCHAR(), DATE(), DECIMAL(18, 3)}); + } + + void TearDown() override { + IcebergTestBase::TearDown(); + } +}; + +TEST_F(IcebergStatsTest, mixedNullTest) { + auto rowType = ROW({"int_col"}, {INTEGER()}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedIntNulls = 34; + + auto rowVector = makeRowVector({makeFlatVector( + size, [](vector_size_t row) { return row * 10; }, nullEvery(3))}); + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t intColId = 1; + EXPECT_EQ(stats->valueCounts.at(intColId), size) + << "Int column value count incorrect"; + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + + EXPECT_EQ(stats->nullValueCounts.at(intColId), expectedIntNulls) + << "Int column null count incorrect"; + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(intColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_EQ(lb, 10); + EXPECT_FALSE(stats->lowerBounds.at(intColId).empty()) + << "Int column should have non-empty lower bound"; + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(intColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_EQ(ub, 980); + EXPECT_FALSE(stats->upperBounds.at(intColId).empty()) + << "Int column should have non-empty upper bound"; +} + +TEST_F(IcebergStatsTest, bigintStatsTest) { + auto rowType = ROW({"bigint_col"}, {BIGINT()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 25; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [](vector_size_t row) { return row * 1'000'000'000LL; }, + nullEvery(4))}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t bigintColId = 1; + EXPECT_EQ(stats->valueCounts.at(bigintColId), size) + << "Bigint column value count incorrect"; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(bigintColId), expectedNulls) + << "Bigint column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(bigintColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_EQ(lb, 1'000'000'000LL); + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(bigintColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_EQ(ub, 99'000'000'000LL); + folly::dynamic json = stats->toJson(); + std::string jsonstring = folly::toJson(json); +} + +TEST_F(IcebergStatsTest, decimalStatsTest) { + auto rowType = ROW({"decimal_col"}, {DECIMAL(38, 3)}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 20; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [](vector_size_t row) { return HugeInt::build(row, row * 123); }, + nullEvery(5), + DECIMAL(38, 3))}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t decimalColId = 1; + EXPECT_EQ(stats->valueCounts.at(decimalColId), size) + << "Decimal column value count incorrect"; + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(decimalColId), expectedNulls) + << "Decimal column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + EXPECT_FALSE(stats->lowerBounds.at(decimalColId).empty()) + << "Decimal column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(decimalColId).empty()) + << "Decimal column should have non-empty upper bound"; +} + +TEST_F(IcebergStatsTest, varcharStatsTest) { + auto rowType = ROW({"varchar_col"}, {VARCHAR()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 0; + + auto varcharVector = BaseVector::create(VARCHAR(), size, opPool_.get()); + auto flatVarcharVector = varcharVector->asFlatVector(); + for (auto i = 0; i < size; ++i) { + if (i % 6 == 0) { + flatVarcharVector->setNull(i, true); + expectedNulls++; + } else { + std::string value = + "Customer#00000" + std::to_string(i) + "_" + std::string(i % 10, 'a'); + flatVarcharVector->set(i, StringView(value)); + } + } + + auto rowVector = makeRowVector({varcharVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t varcharColId = 1; + EXPECT_EQ(stats->valueCounts.at(varcharColId), size) + << "Varchar column value count incorrect"; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(varcharColId), expectedNulls) + << "Varchar column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + EXPECT_FALSE(stats->lowerBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty upper bound"; + + // Decode and verify string bounds. + std::string lowerBound = + encoding::Base64::decode(stats->lowerBounds.at(varcharColId)); + std::string upperBound = + encoding::Base64::decode(stats->upperBounds.at(varcharColId)); + EXPECT_TRUE(lowerBound.find("Customer#00000") != std::string::npos) + << "Lower bound should contain 'Customer#00000'"; + EXPECT_TRUE(upperBound.find("Customer#000009") != std::string::npos) + << "Upper bound should contain 'Customer#000009'"; +} + +TEST_F(IcebergStatsTest, varbinaryStatsTest) { + auto rowType = ROW({"varbinary_col"}, {VARBINARY()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 0; + + auto varbinaryVector = BaseVector::create(VARBINARY(), size, opPool_.get()); + auto flatVarbinaryVector = varbinaryVector->asFlatVector(); + for (auto i = 0; i < size; ++i) { + if (i % 5 == 0) { + flatVarbinaryVector->setNull(i, true); + expectedNulls++; + } else { + // Create binary values with varying content. + std::string value(17, 11); + value[0] = static_cast(i % 256); + value[1] = static_cast((i * 3) % 256); + value[2] = static_cast((i * 7) % 256); + value[3] = static_cast((i * 11) % 256); + flatVarbinaryVector->set(i, StringView(value)); + } + } + + auto rowVector = makeRowVector({varbinaryVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t varbinaryColId = 1; + EXPECT_EQ(stats->valueCounts.at(varbinaryColId), size) + << "Varbinary column value count incorrect"; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(varbinaryColId), expectedNulls) + << "Varbinary column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + EXPECT_FALSE(stats->lowerBounds.at(varbinaryColId).empty()) + << "Varbinary column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(varbinaryColId).empty()) + << "Varbinary column should have non-empty upper bound"; +} + +TEST_F(IcebergStatsTest, varbinaryStatsTest2) { + auto rowType = ROW({"varbinary_col"}, {VARBINARY()}); + + auto outputDir = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kBucket, 4}}; + auto dataSink = + createIcebergDataSink(rowType, outputDir->getPath(), partitionTransforms); + constexpr vector_size_t size = 10; + + auto varbinaryVector = BaseVector::create(VARBINARY(), size, opPool_.get()); + auto flatVarbinaryVector = varbinaryVector->asFlatVector(); + std::string values[] = { + "01020304", + "05060708", + "090A0B0C", + "0D0E0F10", + "11121314", + "15161718", + "191A1B1C", + "1D1E1F20", + "21222324", + "25262728"}; + for (auto i = 0; i < size; ++i) { + flatVarbinaryVector->set(i, StringView(values[i])); + } + + auto rowVector = makeRowVector({"varbinary_col"}, {varbinaryVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 3) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, 5) << "Record count should match input size"; + + constexpr int32_t varbinaryColId = 1; + EXPECT_EQ(stats->valueCounts.at(varbinaryColId), 5); +} + +TEST_F(IcebergStatsTest, multipleDataTypesTest) { + auto rowType = ROW( + {"int_col", "bigint_col", "decimal_col", "varchar_col", "varbinary_col"}, + {INTEGER(), BIGINT(), DECIMAL(38, 3), VARCHAR(), VARBINARY()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + + int32_t expectedIntNulls = 34; + int32_t expectedBigintNulls = 25; + int32_t expectedDecimalNulls = 20; + int32_t expectedVarcharNulls = 0; + int32_t expectedVarbinaryNulls = 0; + // Create columns with different null patterns + auto intVector = makeFlatVector( + size, [](vector_size_t row) { return row * 10; }, nullEvery(3)); + + auto bigintVector = makeFlatVector( + size, + [](vector_size_t row) { return row * 1'000'000'000LL; }, + nullEvery(4)); + + auto decimalVector = makeFlatVector( + size, + [](vector_size_t row) { return HugeInt::build(row, row * 12'345); }, + nullEvery(5), + DECIMAL(38, 3)); + + auto varcharVector = BaseVector::create(VARCHAR(), size, opPool_.get()); + auto flatVarcharVector = varcharVector->asFlatVector(); + for (auto i = 0; i < size; ++i) { + if (i % 6 == 0) { + flatVarcharVector->setNull(i, true); + expectedVarcharNulls++; + } else { + std::string value = "str_" + std::to_string(i); + flatVarcharVector->set(i, StringView(value)); + } + } + + auto varbinaryVector = BaseVector::create(VARBINARY(), size, opPool_.get()); + auto flatVarbinaryVector = varbinaryVector->asFlatVector(); + for (auto i = 0; i < size; ++i) { + if (i % 7 == 0) { + flatVarbinaryVector->setNull(i, true); + expectedVarbinaryNulls++; + } else { + std::string value(4, 0); + value[0] = static_cast(i % 256); + value[1] = static_cast((i * 3) % 256); + value[2] = static_cast((i * 7) % 256); + value[3] = static_cast((i * 11) % 256); + flatVarbinaryVector->set(i, StringView(value)); + } + } + + auto rowVector = makeRowVector( + {intVector, bigintVector, decimalVector, varcharVector, varbinaryVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + + constexpr int32_t intColId = 1; + constexpr int32_t bigintColId = 2; + constexpr int32_t decimalColId = 3; + constexpr int32_t varcharColId = 4; + constexpr int32_t varbinaryColId = 5; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(intColId), expectedIntNulls) + << "Int column null count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(bigintColId), expectedBigintNulls) + << "Bigint column null count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(decimalColId), expectedDecimalNulls) + << "Decimal column null count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(varcharColId), expectedVarcharNulls) + << "Varchar column null count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(varbinaryColId), expectedVarbinaryNulls) + << "Varbinary column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + // Verify all columns have non-empty bounds. + EXPECT_FALSE(stats->lowerBounds.at(intColId).empty()) + << "Int column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(intColId).empty()) + << "Int column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(bigintColId).empty()) + << "Bigint column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(bigintColId).empty()) + << "Bigint column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(decimalColId).empty()) + << "Decimal column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(decimalColId).empty()) + << "Decimal column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(varbinaryColId).empty()) + << "Varbinary column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(varbinaryColId).empty()) + << "Varbinary column should have non-empty upper bound"; +} + +TEST_F(IcebergStatsTest, dateStatsTest) { + auto rowType = ROW({"date_col"}, {DATE()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 20; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [](vector_size_t row) { return 18262 + row; }, + nullEvery(5), + DATE())}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t dateColId = 1; + EXPECT_EQ(stats->valueCounts.at(dateColId), size) + << "Date column value count incorrect"; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(dateColId), expectedNulls) + << "Date column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + EXPECT_FALSE(stats->lowerBounds.at(dateColId).empty()) + << "Date column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(dateColId).empty()) + << "Date column should have non-empty upper bound"; + + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(dateColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_EQ(lb, 18263) << "Lower bound should be 2020-01-02"; + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(dateColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_EQ(ub, 18262 + 99) << "Upper bound should be 2020-04-09"; +} + +TEST_F(IcebergStatsTest, booleanStatsTest) { + auto rowType = ROW({"boolean_col"}, {BOOLEAN()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 10; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [](vector_size_t row) { return row % 2 == 1; }, + nullEvery(10), + BOOLEAN())}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t boolColId = 1; + EXPECT_EQ(stats->valueCounts.at(boolColId), size) + << "Boolean column value count incorrect"; + + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(boolColId), expectedNulls) + << "Boolean column null count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + EXPECT_FALSE(stats->lowerBounds.at(boolColId).empty()) + << "Boolean column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(boolColId).empty()) + << "Boolean column should have non-empty upper bound"; + + // For boolean, the lower bound should be false (0) and upper bound should be + // true (1) if both values are present. + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(boolColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_FALSE(lb) << "Lower bound should be false"; + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(boolColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_TRUE(ub) << "Upper bound should be true"; +} + +TEST_F(IcebergStatsTest, emptyStatsTest) { + auto rowType = ROW({"int_col", "varchar_col"}, {INTEGER(), VARCHAR()}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + // Create an empty row vector (0 rows) + constexpr vector_size_t size = 0; + auto rowVector = makeRowVector( + {makeFlatVector(0), makeFlatVector(0)}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + EXPECT_EQ(stats->numRecords, size) << "Record count should be 0"; + ASSERT_TRUE(stats->valueCounts.empty()) + << "Should no value counts for columns"; +} + +TEST_F(IcebergStatsTest, nullValuesTest) { + auto rowType = ROW({"int_col", "varchar_col"}, {INTEGER(), VARCHAR()}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + // Create an empty row vector (0 rows) + constexpr vector_size_t size = 100; + auto rowVector = makeRowVector( + {makeNullConstant(TypeKind::INTEGER, size), + makeNullConstant(TypeKind::VARCHAR, size)}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + EXPECT_EQ(stats->numRecords, size) << "Record count should be 0"; + ASSERT_EQ(stats->nullValueCounts.at(1), size) << "All values is NULL."; + // Do not collect lower and upper bounds for NULLs. + ASSERT_EQ(stats->lowerBounds.size(), 0); + ASSERT_EQ(stats->upperBounds.size(), 0); +} + +TEST_F(IcebergStatsTest, realStatsTest) { + auto rowType = ROW({"real_col"}, {REAL()}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 15; + auto expectedNaNs = 0; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [&](vector_size_t row) { + if (row % 3 == 0) { + expectedNaNs++; + return std::numeric_limits::quiet_NaN(); + } + if (row % 4 == 0) { + return std::numeric_limits::infinity(); + } + if (row % 5 == 0) { + return -std::numeric_limits::infinity(); + } + return row * 1.5f; + }, + nullEvery(7), + REAL())}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t realColId = 1; + EXPECT_EQ(stats->valueCounts.at(realColId), size) + << "Real column value count incorrect"; + + EXPECT_EQ(stats->nullValueCounts.at(realColId), expectedNulls) + << "Real column null count incorrect"; + // EXPECT_EQ(stats->nanValueCounts.at(realColId), expectedNaNs) + // << "Real column NaN count incorrect"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + // Verify bounds are set correctly and NaN/infinity values don't affect + // min/max incorrectly. + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(realColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_FLOAT_EQ(lb, -std::numeric_limits::infinity()) + << "Lower bound should be -infinity"; + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(realColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_FLOAT_EQ(ub, std::numeric_limits::infinity()) + << "Upper bound should be infinity"; +} + +TEST_F(IcebergStatsTest, doubleStatsTest) { + auto rowType = ROW({"double_col"}, {DOUBLE()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 15; + auto expectedNaNs = 0; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [&](vector_size_t row) { + if (row % 3 == 0) { + expectedNaNs++; + return std::numeric_limits::quiet_NaN(); + } + if (row % 4 == 0) { + return std::numeric_limits::infinity(); + } + if (row % 5 == 0) { + return -std::numeric_limits::infinity(); + } + return row * 2.5; + }, + nullEvery(7), + DOUBLE())}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t doubleColId = 1; + EXPECT_EQ(stats->valueCounts.at(doubleColId), size) + << "Double column value count incorrect"; + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(doubleColId), expectedNulls) + << "Double column null count incorrect"; + // EXPECT_EQ(stats->nanValueCounts.at(doubleColId), expectedNaNs) + // << "Double column null count incorrect"; + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + // Verify bounds are set correctly and NaN/infinity values don't affect + // min/max incorrectly. + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(doubleColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_DOUBLE_EQ(lb, -std::numeric_limits::infinity()) + << "Lower bound should be -infinity"; + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(doubleColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_DOUBLE_EQ(ub, std::numeric_limits::infinity()) + << "Upper bound should be infinity"; +} + +TEST_F(IcebergStatsTest, MixedDoubleFloatStatsTest) { + std::vector names = {"id", "data1", "data2", "data3"}; + auto rowType = ROW(names, {INTEGER(), REAL(), DOUBLE(), DOUBLE()}); + + auto outputDir = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kIdentity, std::nullopt}}; + + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 6; + + auto intVector = + makeFlatVector(size, [](vector_size_t row) { return 1; }); + auto floatVector = makeFlatVector(size, [](vector_size_t row) { + return -std::numeric_limits::infinity(); + }); + auto doubleVector1 = makeFlatVector(size, [](vector_size_t row) { + return std::numeric_limits::infinity(); + }); + auto doubleVector2 = makeFlatVector(size, [](vector_size_t row) { + switch (row) { + case 0: + return 1.23; + case 1: + return -1.23; + case 2: + return std::numeric_limits::infinity(); + case 3: + return 2.23; + case 4: + return -std::numeric_limits::infinity(); + default: + return -2.23; + } + }); + + ASSERT_TRUE( + -std::numeric_limits::infinity() < + std::numeric_limits::min() && + std::numeric_limits::min() < + std::numeric_limits::infinity()); + auto rowVector = makeRowVector( + names, {intVector, floatVector, doubleVector1, doubleVector2}); + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t doubleColId = 4; + EXPECT_EQ(stats->valueCounts.at(doubleColId), size) + << "Double column value count incorrect"; + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(doubleColId)); + auto lb = *reinterpret_cast(lowerBounds.data()); + EXPECT_DOUBLE_EQ(lb, -std::numeric_limits::infinity()) + << "Lower bound should be -infinity"; + + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(doubleColId)); + auto ub = *reinterpret_cast(upperBounds.data()); + EXPECT_DOUBLE_EQ(ub, std::numeric_limits::infinity()) + << "Upper bound should be infinity"; + + lowerBounds = encoding::Base64::decode(stats->lowerBounds.at(2)); + auto flb = *reinterpret_cast(lowerBounds.data()); + EXPECT_DOUBLE_EQ(flb, -std::numeric_limits::infinity()) + << "Lower bound should be -infinity"; + + upperBounds = encoding::Base64::decode(stats->upperBounds.at(2)); + auto fub = *reinterpret_cast(upperBounds.data()); + EXPECT_DOUBLE_EQ(fub, -std::numeric_limits::infinity()) + << "Upper bound should be -infinity too"; +} + +TEST_F(IcebergStatsTest, NaNStatsTest) { + auto rowType = ROW({"double_col"}, {DOUBLE()}); + + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 1'000; + auto expectedNulls = 500; + auto expectedNaNs = 0; + + auto rowVector = makeRowVector({makeFlatVector( + size, + [&](vector_size_t row) { + expectedNaNs++; + return std::numeric_limits::quiet_NaN(); + }, + nullEvery(2), + DOUBLE())}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + constexpr int32_t doubleColId = 1; + EXPECT_EQ(stats->valueCounts.at(doubleColId), size) + << "Double column value count incorrect"; + ASSERT_FALSE(stats->nullValueCounts.empty()) + << "Should have null counts for columns"; + EXPECT_EQ(stats->nullValueCounts.at(doubleColId), expectedNulls) + << "Double column null count incorrect"; + // EXPECT_EQ(stats->nanValueCounts.at(doubleColId), expectedNaNs) + // << "Double column null count incorrect"; + + // Do not collect bounds for NULLs and NaNs. + ASSERT_TRUE(stats->lowerBounds.empty()) + << "Should not have lower bounds for columns"; + ASSERT_TRUE(stats->upperBounds.empty()) + << "Should not have upper bounds for columns"; +} + +TEST_F(IcebergStatsTest, partitionedTableStatsTest) { + auto rowType = ROW( + {"int_col", "date_col", "varchar_col"}, {INTEGER(), DATE(), VARCHAR()}); + auto outputDir = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kBucket, 4}, + {1, TransformType::kDay, std::nullopt}, + {2, TransformType::kTruncate, 2}}; + + auto dataSink = + createIcebergDataSink(rowType, outputDir->getPath(), partitionTransforms); + + constexpr vector_size_t size = 100; + + auto intVector = + makeFlatVector(size, [](vector_size_t row) { return row; }); + + auto dateVector = makeFlatVector( + size, + [](vector_size_t row) { return 18262 + (row % 5); }, + nullptr, + DATE()); + + auto varcharVector = BaseVector::create(VARCHAR(), size, opPool_.get()); + auto flatVarcharVector = varcharVector->asFlatVector(); + + for (auto i = 0; i < size; ++i) { + std::string str = fmt::format("str{}", i % 10); + flatVarcharVector->set(i, StringView(str.c_str(), str.size())); + } + + auto rowVector = + makeRowVector(rowType->names(), {intVector, dateVector, varcharVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + // We should have multiple files due to partitioning. + ASSERT_FALSE(fileStats.empty()) << "Should have statistics for files"; + EXPECT_GT(fileStats.size(), 1) + << "Expected multiple files due to partitioning"; + + for (const auto& stats : fileStats) { + EXPECT_GE(stats->numRecords, 0) + << "Each partition file should have records"; + ASSERT_FALSE(stats->valueCounts.empty()) + << "Should have value counts for columns"; + + constexpr int32_t intColId = 1; + constexpr int32_t dateColId = 2; + constexpr int32_t varcharColId = 3; + EXPECT_EQ(stats->valueCounts.at(intColId), stats->numRecords) + << "Integer column value count should match record count"; + EXPECT_EQ(stats->valueCounts.at(dateColId), stats->numRecords) + << "Date column value count should match record count"; + EXPECT_EQ(stats->valueCounts.at(varcharColId), stats->numRecords) + << "Varchar column value count should match record count"; + + ASSERT_FALSE(stats->lowerBounds.empty()) + << "Should have lower bounds for columns"; + ASSERT_FALSE(stats->upperBounds.empty()) + << "Should have upper bounds for columns"; + + EXPECT_FALSE(stats->lowerBounds.at(intColId).empty()) + << "Int column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(intColId).empty()) + << "Int column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(dateColId).empty()) + << "Date column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(dateColId).empty()) + << "Date column should have non-empty upper bound"; + + EXPECT_FALSE(stats->lowerBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty lower bound"; + EXPECT_FALSE(stats->upperBounds.at(varcharColId).empty()) + << "Varchar column should have non-empty upper bound"; + } + + // Verify total record count across all partitions. + auto totalRecords = 0; + for (const auto& stats : fileStats) { + totalRecords += stats->numRecords; + } + EXPECT_EQ(totalRecords, size) + << "Total records across all partitions should match input size"; +} + +TEST_F(IcebergStatsTest, multiplePartitionTransformsStatsTest) { + auto rowType = + ROW({"int_col", "date_col", "varchar_col", "bigint_col"}, + {INTEGER(), DATE(), VARCHAR(), BIGINT()}); + auto outputDir = TempDirectoryPath::create(); + + std::vector partitionTransforms = { + {0, TransformType::kBucket, 2}, + {1, TransformType::kYear, std::nullopt}, + {2, TransformType::kTruncate, 3}, + {3, TransformType::kIdentity, std::nullopt}}; + + auto dataSink = + createIcebergDataSink(rowType, outputDir->getPath(), partitionTransforms); + + constexpr vector_size_t size = 100; + auto intVector = + makeFlatVector(size, [](vector_size_t row) { return row * 10; }); + auto flatIntVector = intVector->asFlatVector(); + + auto dateVector = makeFlatVector( + size, + [](vector_size_t row) { return 18262 + (row * 100); }, + nullptr, + DATE()); + + auto varcharVector = BaseVector::create(VARCHAR(), size, opPool_.get()); + auto flatVarcharVector = varcharVector->asFlatVector(); + for (auto i = 0; i < size; ++i) { + std::string str = fmt::format("prefix{}_value", i % 5); + flatVarcharVector->set(i, StringView(str.c_str(), str.size())); + } + auto bigintVector = makeFlatVector( + size, [](vector_size_t row) { return (row % 3) * 1'000; }); + + auto rowVector = makeRowVector( + rowType->names(), {intVector, dateVector, varcharVector, bigintVector}); + + dataSink->appendData(rowVector); + + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_FALSE(fileStats.empty()) << "Should have statistics for files"; + EXPECT_GT(fileStats.size(), 1) + << "Expected multiple files due to partitioning"; + // Check each file's stats + for (const auto& stats : fileStats) { + EXPECT_GT(stats->numRecords, 0) + << "Each partition file should have records"; + constexpr int32_t intColId = 1; + constexpr int32_t dateColId = 2; + constexpr int32_t bigintColId = 4; + + if (stats->lowerBounds.find(intColId) != stats->lowerBounds.end()) { + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(intColId)); + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(intColId)); + + auto lb = *reinterpret_cast(lowerBounds.data()); + auto ub = *reinterpret_cast(upperBounds.data()); + + EXPECT_LE(lb, ub) + << "Lower bound should be <= upper bound for int column"; + } + + if (stats->lowerBounds.find(dateColId) != stats->lowerBounds.end()) { + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(dateColId)); + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(dateColId)); + + auto lb = *reinterpret_cast(lowerBounds.data()); + auto ub = *reinterpret_cast(upperBounds.data()); + + EXPECT_LE(lb, ub) + << "Lower bound should be <= upper bound for date column"; + } + + if (stats->lowerBounds.find(bigintColId) != stats->lowerBounds.end()) { + std::string lowerBounds = + encoding::Base64::decode(stats->lowerBounds.at(bigintColId)); + std::string upperBounds = + encoding::Base64::decode(stats->upperBounds.at(bigintColId)); + + auto lb = *reinterpret_cast(lowerBounds.data()); + auto ub = *reinterpret_cast(upperBounds.data()); + + EXPECT_LE(lb, ub) + << "Lower bound should be <= upper bound for bigint column"; + } + } + auto totalRecords = 0; + for (const auto& stats : fileStats) { + totalRecords += stats->numRecords; + } + EXPECT_EQ(totalRecords, size) + << "Total records across all partitions should match input size"; +} + +TEST_F(IcebergStatsTest, partitionedTableWithNullsStatsTest) { + auto rowType = ROW( + {"int_col", "date_col", "varchar_col"}, {INTEGER(), DATE(), VARCHAR()}); + auto outputDir = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kIdentity, std::nullopt}, + {1, TransformType::kMonth, std::nullopt}, + {2, TransformType::kTruncate, 2}}; + auto dataSink = + createIcebergDataSink(rowType, outputDir->getPath(), partitionTransforms); + + constexpr vector_size_t size = 100; + auto expectedNulls = 20; + auto dateNulls = 15; + auto intVector = makeFlatVector( + size, + [](vector_size_t row) { return row % 10; }, + nullEvery(5), + INTEGER()); + auto dateVector = makeFlatVector( + size, + [](vector_size_t row) { return 18262 + (row % 3) * 30; }, + nullEvery(7), + DATE()); + auto varcharVector = BaseVector::create(VARCHAR(), size, opPool_.get()); + auto flatVarcharVector = varcharVector->asFlatVector(); + auto varcharNulls = 0; + for (auto i = 0; i < size; ++i) { + if (i % 11 == 0) { + flatVarcharVector->setNull(i, true); + varcharNulls++; + } else { + std::string str = fmt::format("val{}", i % 5); + flatVarcharVector->set(i, StringView(str.c_str(), str.size())); + } + } + + auto rowVector = + makeRowVector(rowType->names(), {intVector, dateVector, varcharVector}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_FALSE(fileStats.empty()) << "Should have statistics for files"; + auto totalIntNulls = 0; + auto totalDateNulls = 0; + auto totalVarcharNulls = 0; + auto totalRecords = 0; + + constexpr int32_t intColId = 1; + constexpr int32_t dateColId = 2; + constexpr int32_t varcharColId = 3; + + for (const auto& stats : fileStats) { + totalRecords += stats->numRecords; + // Add null counts if present. + if (stats->nullValueCounts.find(intColId) != stats->nullValueCounts.end()) { + totalIntNulls += stats->nullValueCounts.at(intColId); + } + + if (stats->nullValueCounts.find(dateColId) != + stats->nullValueCounts.end()) { + totalDateNulls += stats->nullValueCounts.at(dateColId); + } + + if (stats->nullValueCounts.find(varcharColId) != + stats->nullValueCounts.end()) { + totalVarcharNulls += stats->nullValueCounts.at(varcharColId); + } + + // Check that null count is less than or equal to value count for each + // column. + if (stats->nullValueCounts.find(intColId) != stats->nullValueCounts.end() && + stats->valueCounts.find(intColId) != stats->valueCounts.end()) { + EXPECT_LE( + stats->nullValueCounts.at(intColId), stats->valueCounts.at(intColId)) + << "Null count should be <= value count for int column"; + } + + if (stats->nullValueCounts.find(dateColId) != + stats->nullValueCounts.end() && + stats->valueCounts.find(dateColId) != stats->valueCounts.end()) { + EXPECT_LE( + stats->nullValueCounts.at(dateColId), + stats->valueCounts.at(dateColId)) + << "Null count should be <= value count for date column"; + } + + if (stats->nullValueCounts.find(varcharColId) != + stats->nullValueCounts.end() && + stats->valueCounts.find(varcharColId) != stats->valueCounts.end()) { + EXPECT_LE( + stats->nullValueCounts.at(varcharColId), + stats->valueCounts.at(varcharColId)) + << "Null count should be <= value count for varchar column"; + } + } + + // Verify total counts match expected. + EXPECT_EQ(totalRecords, size) + << "Total records across all partitions should match input size"; + EXPECT_EQ(totalIntNulls, expectedNulls) + << "Total int nulls should match expected"; + EXPECT_EQ(totalDateNulls, dateNulls) + << "Total date nulls should match expected"; + EXPECT_EQ(totalVarcharNulls, varcharNulls) + << "Total varchar nulls should match expected"; +} + +TEST_F(IcebergStatsTest, mapTypeTest) { + auto rowType = + ROW({"int_col", "map_col"}, {INTEGER(), MAP(INTEGER(), VARCHAR())}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 0; + + std::vector< + std::optional>>>> + mapData; + for (auto i = 0; i < size; ++i) { + std::vector>> mapRow; + for (auto j = 0; j < 5; ++j) { + mapRow.emplace_back(j, StringView("test_value")); + } + mapData.push_back(mapRow); + } + + auto rowVector = makeRowVector( + {makeFlatVector(size, [&](auto row) { return row * 10; }), + makeNullableMapVector(mapData)}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + constexpr int32_t intColId = 1; + constexpr int32_t mapColId = 3; + + EXPECT_EQ(stats->valueCounts.at(intColId), size) + << "Int column value count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(intColId), expectedNulls) + << "Int column null count incorrect"; + EXPECT_EQ(stats->valueCounts.at(mapColId), size * 5) + << "Map column value count incorrect"; + EXPECT_TRUE(stats->lowerBounds.find(mapColId) == stats->lowerBounds.end()) + << "Map column should not have lower bounds"; + EXPECT_TRUE(stats->upperBounds.find(mapColId) == stats->upperBounds.end()) + << "Map column should not have upper bounds"; +} + +TEST_F(IcebergStatsTest, arrayTypeTest) { + auto rowType = ROW({"int_col", "array_col"}, {INTEGER(), ARRAY(VARCHAR())}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 0; + + std::vector>> arrayData; + for (auto i = 0; i < size; ++i) { + std::vector> arrayRow; + for (auto j = 0; j < 3; ++j) { + auto v = fmt::format("item_{}", i * 3 + j); + arrayRow.emplace_back(StringView(v)); + } + arrayData.push_back(arrayRow); + } + + auto rowVector = makeRowVector( + {makeFlatVector(size, [](auto row) { return row * 10; }), + makeNullableArrayVector(arrayData)}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + constexpr int32_t intColId = 1; + constexpr int32_t arrayColId = 3; + + EXPECT_EQ(stats->valueCounts.at(intColId), size) + << "Int column value count incorrect"; + EXPECT_EQ(stats->nullValueCounts.at(intColId), expectedNulls) + << "Int column null count incorrect"; + EXPECT_EQ(stats->valueCounts.at(arrayColId), size * 3) + << "Array column value count incorrect"; + EXPECT_TRUE(stats->lowerBounds.find(arrayColId) == stats->lowerBounds.end()) + << "Array column should not have lower bounds"; + EXPECT_TRUE(stats->upperBounds.find(arrayColId) == stats->upperBounds.end()) + << "Array column should not have upper bounds"; +} + +// Test statistics collection for nested struct fields. +// Assume int_col's ID start with 1. +// Struct definition with field IDs: +// struct { +// int_col: INTEGER (id: 1) +// struct_col (id: 2) { +// first_level_id: INTEGER (id: 3) +// first_level_name: VARCHAR (id: 4) +// nested_struct (id: 5) { +// second_level_id: INTEGER (id: 6) +// second_level_name: VARCHAR (id: 7) +// } +// } +// } +// Need to collect statistics for field IDs [1, 3, 4, 6, 7] +TEST_F(IcebergStatsTest, structTypeTest) { + auto rowType = + ROW({"int_col", "struct_col"}, + {INTEGER(), + ROW({"first_level_id", "first_level_name", "nested_struct"}, + {INTEGER(), + VARCHAR(), + ROW({"second_level_id", "second_level_name"}, + {INTEGER(), VARCHAR()})})}); + auto outputDir = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink(rowType, outputDir->getPath()); + constexpr vector_size_t size = 100; + auto expectedNulls = 0; + + auto intVector = + makeFlatVector(size, [](auto row) { return row * 10; }); + auto firstLevelId = makeFlatVector( + size, [](vector_size_t row) { return row % size; }, nullEvery(5)); + auto firstLevelName = makeFlatVector( + size, + [](vector_size_t row) { + auto v = fmt::format("name_{}", row * 10); + return StringView(v); + }, + nullEvery(7)); + + auto secondLevelId = makeFlatVector( + size, [](vector_size_t row) { return row * size; }, nullEvery(6)); + auto secondLevelName = makeFlatVector( + size, + [](vector_size_t row) { + auto v = fmt::format("nested_{}", row * 100); + return StringView(v); + }, + nullEvery(8)); + + auto nestedStruct = makeRowVector({secondLevelId, secondLevelName}); + auto structVector = + makeRowVector({firstLevelId, firstLevelName, nestedStruct}); + + auto rowVector = makeRowVector({intVector, structVector}); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + auto commitTasks = dataSink->close(); + ASSERT_FALSE(commitTasks.empty()); + + const auto& fileStats = dataSink->dataFileStats(); + ASSERT_EQ(fileStats.size(), 1) << "Expected exactly one file with stats"; + const auto& stats = fileStats[0]; + + EXPECT_EQ(stats->numRecords, size) << "Record count should match input size"; + + constexpr int32_t intColId = 1; + constexpr int32_t tier1ColId = 3; + constexpr int32_t tier2ColId = 6; + constexpr int32_t tier2ColId2 = 7; + + EXPECT_EQ(stats->valueCounts.size(), 5); + EXPECT_EQ(stats->lowerBounds.size(), 5); + EXPECT_EQ(stats->valueCounts.at(intColId), size); + EXPECT_EQ(stats->nullValueCounts.at(intColId), expectedNulls); + EXPECT_EQ(stats->valueCounts.at(tier1ColId), size); + EXPECT_EQ(stats->valueCounts.at(tier2ColId), size); + EXPECT_EQ(stats->nullValueCounts.at(tier1ColId), 20); + EXPECT_EQ( + encoding::Base64::decode(stats->lowerBounds.at(tier2ColId2)), + "nested_100"); + EXPECT_EQ( + encoding::Base64::decode(stats->upperBounds.at(tier2ColId2)), + "nested_9900"); +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp b/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp index eb1312eb1a2..474f0a35ebe 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp @@ -19,10 +19,7 @@ #include #include "velox/connectors/ConnectorRegistry.h" -#include "velox/connectors/hive/iceberg/IcebergColumnHandle.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" #include "velox/connectors/hive/iceberg/IcebergConnector.h" -#include "velox/connectors/hive/iceberg/IcebergDataSink.h" #include "velox/connectors/hive/iceberg/IcebergSplit.h" #include "velox/connectors/hive/iceberg/PartitionSpec.h" #include "velox/expression/Expr.h" @@ -52,21 +49,15 @@ void IcebergTestBase::SetUp() { connectorSessionProperties_ = std::make_shared( std::unordered_map(), true); - hiveConfig_ = + connectorConfig_ = std::make_shared(std::make_shared( std::unordered_map())); - icebergConfig_ = - std::make_shared(std::make_shared( - std::unordered_map{ - {IcebergConfig::kFunctionPrefixConfig, - IcebergConfig::kDefaultFunctionPrefix}})); - setupMemoryPools(); fuzzerOptions_.vectorSize = 100; fuzzerOptions_.nullRatio = 0.1; - fuzzer_ = std::make_unique(fuzzerOptions_, opPool_.get(), 1); + fuzzer_ = std::make_unique(fuzzerOptions_, opPool_.get()); } void IcebergTestBase::TearDown() { @@ -75,7 +66,6 @@ void IcebergTestBase::TearDown() { connectorPool_.reset(); opPool_.reset(); root_.reset(); - queryCtx_.reset(); ConnectorRegistry::global().erase(kIcebergConnectorId); HiveConnectorTestBase::TearDown(); } @@ -85,7 +75,6 @@ void IcebergTestBase::setupMemoryPools() { opPool_.reset(); connectorPool_.reset(); connectorQueryCtx_.reset(); - queryCtx_.reset(); root_ = memory::memoryManager()->addRootPool( "IcebergTest", 1L << 30, exec::MemoryReclaimer::create()); @@ -100,19 +89,14 @@ void IcebergTestBase::recreateConnectorQueryCtx( const std::string& sessionTimezone, bool adjustTimestampToTimezone) { connectorQueryCtx_.reset(); - queryCtx_.reset(); - - queryCtx_ = core::QueryCtx::create(nullptr, core::QueryConfig({})); - auto expressionEvaluator = std::make_unique( - queryCtx_.get(), opPool_.get()); - connectorQueryCtx_ = std::make_unique( + connectorQueryCtx_ = std::make_unique( opPool_.get(), connectorPool_.get(), connectorSessionProperties_.get(), nullptr, common::PrefixSortConfig(), - std::move(expressionEvaluator), + nullptr, nullptr, "query.IcebergTest", "task.IcebergTest", @@ -151,8 +135,8 @@ void IcebergTestBase::setConnectorSessionProperty( } std::shared_ptr IcebergTestBase::createPartitionSpec( - const RowTypePtr& rowType, - const std::vector& partitionFields) { + const std::vector& partitionFields, + const RowTypePtr& rowType) { std::vector fields; for (const auto& partitionField : partitionFields) { fields.push_back( @@ -163,55 +147,74 @@ std::shared_ptr IcebergTestBase::createPartitionSpec( partitionField.parameter}); } - return fields.empty() ? nullptr - : std::make_shared(1, fields); -} - -namespace { - -parquet::ParquetFieldId makeField(const TypePtr& type, int32_t& fieldId) { - const int32_t currentId = fieldId++; - std::vector children; - children.reserve(type->size()); - for (auto i = 0; i < type->size(); ++i) { - children.push_back(makeField(type->childAt(i), fieldId)); - } - return parquet::ParquetFieldId{currentId, children}; + return std::make_shared(1, fields); } void addColumnHandles( const RowTypePtr& rowType, const std::vector& partitionFields, - std::vector& columnHandles) { + std::vector>& columnHandles) { std::unordered_set partitionColumnIds; for (const auto& field : partitionFields) { partitionColumnIds.insert(field.id); } + HiveColumnHandle::ColumnParseParameters columnParseParameters; + + std::function + collectNestedField = [&](const TypePtr& type, + int32_t& columnOrdinal) -> IcebergNestedField { + int32_t currentId = columnOrdinal++; + std::vector children; + if (type->isRow()) { + auto rowType = asRowType(type); + for (auto i = 0; i < rowType->size(); ++i) { + children.push_back( + collectNestedField(rowType->childAt(i), columnOrdinal)); + } + } else if (type->isArray()) { + auto arrayType = std::dynamic_pointer_cast(type); + for (auto i = 0; i < arrayType->size(); ++i) { + children.push_back( + collectNestedField(arrayType->childAt(i), columnOrdinal)); + } + } else if (type->isMap()) { + auto mapType = std::dynamic_pointer_cast(type); + for (auto i = 0; i < mapType->size(); ++i) { + children.push_back( + collectNestedField(mapType->childAt(i), columnOrdinal)); + } + } + + return IcebergNestedField{currentId, children}; + }; - int32_t fieldId = 1; - columnHandles.reserve(rowType->size()); + int32_t startIndex = 1; for (auto i = 0; i < rowType->size(); ++i) { - const auto& columnName = rowType->nameOf(i); - const auto& type = rowType->childAt(i); - auto field = makeField(type, fieldId); + auto columnName = rowType->nameOf(i); + auto type = rowType->childAt(i); + auto field = collectNestedField(type, startIndex); columnHandles.push_back( - std::make_shared( + std::make_shared( columnName, + // partitionColumnIds.count(i) > 0 partitionColumnIds.contains(i) ? FileColumnHandle::ColumnType::kPartitionKey : FileColumnHandle::ColumnType::kRegular, type, - field)); + type, + field, + std::vector{}, + std::nullopt, + columnParseParameters)); } } -} // namespace - -IcebergInsertTableHandlePtr IcebergTestBase::createInsertTableHandle( +IcebergInsertTableHandlePtr IcebergTestBase::createIcebergInsertTableHandle( const RowTypePtr& rowType, const std::string& outputDirectoryPath, - const std::vector& partitionFields) { - std::vector columnHandles; + const std::vector& partitionFields, + const std::vector& sortedBy) { + std::vector> columnHandles; addColumnHandles(rowType, partitionFields, columnHandles); auto locationHandle = std::make_shared( @@ -219,44 +222,76 @@ IcebergInsertTableHandlePtr IcebergTestBase::createInsertTableHandle( outputDirectoryPath, LocationHandle::TableType::kNew); - auto partitionSpec = createPartitionSpec(rowType, partitionFields); + auto partitionSpec = createPartitionSpec(partitionFields, rowType); + + // Create sorting columns if specified + std::vector sortingColumns; + for (const auto& sortExpr : sortedBy) { + std::string columnName; + bool isAscending = true; + bool isNullsFirst = true; + + // Parse sort expression + std::istringstream iss(sortExpr); + iss >> columnName; + + std::string token; + if (iss >> token) { + if (token == "DESC") { + isAscending = false; + } else if (token != "ASC") { + // If not ASC, put it back (might be NULLS) + iss.seekg(-(int)token.length(), std::ios_base::cur); + } - return std::make_shared( - /*inputColumns=*/columnHandles, + if (iss >> token && token == "NULLS") { + if (iss >> token && token == "LAST") { + isNullsFirst = false; + } + } + } + + core::SortOrder sortOrder(isAscending, isNullsFirst); + sortingColumns.push_back(IcebergSortingColumn(columnName, sortOrder)); + } + + return std::make_shared( + columnHandles, locationHandle, - /*tableStorageFormat=*/fileFormat_, partitionSpec, - /*compressionKind=*/common::CompressionKind::CompressionKind_ZSTD); + opPool_.get(), + fileFormat_, + sortingColumns, + common::CompressionKind::CompressionKind_ZSTD); } -std::shared_ptr IcebergTestBase::createDataSink( +std::shared_ptr IcebergTestBase::createIcebergDataSink( const RowTypePtr& rowType, const std::string& outputDirectoryPath, - const std::vector& partitionFields) { - auto tableHandle = - createInsertTableHandle(rowType, outputDirectoryPath, partitionFields); + const std::vector& partitionFields, + const std::vector& sortedBy) { + auto tableHandle = createIcebergInsertTableHandle( + rowType, outputDirectoryPath, partitionFields, sortedBy); return std::make_shared( rowType, tableHandle, connectorQueryCtx_.get(), - CommitStrategy::kNoCommit, - hiveConfig_, - icebergConfig_); + connector::CommitStrategy::kNoCommit, + connectorConfig_); } std::shared_ptr IcebergTestBase::createDataSinkAndAppendData( const std::vector& vectors, - const std::string& dataPath, - const std::vector& partitionFields) { - VELOX_CHECK(!vectors.empty(), "vectors cannot be empty"); - + const std::string& outputDirectoryPath, + const std::vector& partitionFields, + const std::vector& sortedBy) { + VELOX_CHECK(!vectors.empty(), "vectors must not be empty"); auto rowType = vectors.front()->rowType(); - auto dataSink = createDataSink(rowType, dataPath, partitionFields); - + auto dataSink = createIcebergDataSink( + rowType, outputDirectoryPath, partitionFields, sortedBy); for (const auto& vector : vectors) { dataSink->appendData(vector); } - EXPECT_TRUE(dataSink->finish()); return dataSink; } @@ -276,36 +311,31 @@ std::vector IcebergTestBase::listFiles( return files; } -std::unordered_map> -IcebergTestBase::extractPartitionKeys(const std::string& filePath) { - std::unordered_map> partitionKeys; - - std::vector pathComponents; - folly::split("/", filePath, pathComponents); - for (const auto& component : pathComponents) { - if (component.find('=') != std::string::npos) { - std::vector keys; - folly::split('=', component, keys); - if (keys.size() == 2) { - if (keys[1] == "null") { - partitionKeys[keys[0]] = std::nullopt; - } else { - partitionKeys[keys[0]] = keys[1]; - } - } - } - } - - return partitionKeys; -} - std::vector> IcebergTestBase::createSplitsForDirectory(const std::string& directory) { std::vector> splits; + std::unordered_map customSplitInfo; + customSplitInfo["table_format"] = "hive-iceberg"; auto files = listFiles(directory); for (const auto& filePath : files) { - auto partitionKeys = extractPartitionKeys(filePath); + std::unordered_map> partitionKeys; + + // Extract partition keys from path if any. + std::vector pathComponents; + folly::split("/", filePath, pathComponents); + for (const auto& component : pathComponents) { + if (component.find('=') != std::string::npos) { + std::vector keys; + folly::split('=', component, keys); + if (keys.size() == 2) { + partitionKeys[keys[0]] = keys[1]; + if (keys[1] == "null") { + partitionKeys[keys[0]] = std::nullopt; + } + } + } + } const auto file = filesystems::getFileSystem(filePath, nullptr) ->openFileForRead(filePath); @@ -318,9 +348,9 @@ IcebergTestBase::createSplitsForDirectory(const std::string& directory) { file->size(), partitionKeys, std::nullopt, - std::unordered_map{}, + customSplitInfo, nullptr, - /*cacheable=*/true, + true, std::vector())); } diff --git a/velox/connectors/hive/iceberg/tests/IcebergTestBase.h b/velox/connectors/hive/iceberg/tests/IcebergTestBase.h index b3eefbd4726..3f4b6cc17ac 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergTestBase.h +++ b/velox/connectors/hive/iceberg/tests/IcebergTestBase.h @@ -17,13 +17,10 @@ #pragma once #include -#include -#include -#include #include "velox/common/testutil/TempDirectoryPath.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" #include "velox/connectors/hive/iceberg/IcebergDataSink.h" +#include "velox/connectors/hive/iceberg/IcebergSplit.h" #include "velox/exec/tests/utils/HiveConnectorTestBase.h" #include "velox/vector/fuzzer/VectorFuzzer.h" #ifdef VELOX_ENABLE_PARQUET @@ -56,25 +53,23 @@ class IcebergTestBase : public exec::test::HiveConnectorTestBase { vector_size_t rowsPerBatch, double nullRatio = 0.0); - std::shared_ptr createDataSink( + std::shared_ptr createIcebergDataSink( const RowTypePtr& rowType, const std::string& outputDirectoryPath, - const std::vector& partitionFields = {}); + const std::vector& partitionTransforms = {}, + const std::vector& sortedBy = {}); std::shared_ptr createDataSinkAndAppendData( const std::vector& vectors, - const std::string& dataPath, - const std::vector& partitionFields = {}); + const std::string& outputDirectoryPath, + const std::vector& partitionTransforms = {}, + const std::vector& sortedBy = {}); std::vector> createSplitsForDirectory( const std::string& directory); std::vector listFiles(const std::string& dirPath); - std::shared_ptr createPartitionSpec( - const RowTypePtr& rowType, - const std::vector& partitionFields); - void setConnectorSessionProperty( const std::string& key, const std::string& value); @@ -86,37 +81,33 @@ class IcebergTestBase : public exec::test::HiveConnectorTestBase { const std::string& sessionTimezone, bool adjustTimestampToTimezone); - /// Extracts partition key-value pairs from a file path. - /// Returns a map where keys are partition column names and values are - /// partition values (std::nullopt for null values). - /// Example: "/path/to/c1=10/c2=null/file.parquet" returns - /// {{"c1", "10"}, {"c2", std::nullopt}}. - static std::unordered_map> - extractPartitionKeys(const std::string& filePath); + std::shared_ptr createPartitionSpec( + const std::vector& transformSpecs, + const RowTypePtr& rowType); + + void setupMemoryPools(); dwio::common::FileFormat fileFormat_{dwio::common::FileFormat::PARQUET}; + RowTypePtr rowType_; std::shared_ptr opPool_; - std::unique_ptr connectorQueryCtx_; + std::shared_ptr connectorSessionProperties_; private: - IcebergInsertTableHandlePtr createInsertTableHandle( + IcebergInsertTableHandlePtr createIcebergInsertTableHandle( const RowTypePtr& rowType, const std::string& outputDirectoryPath, - const std::vector& partitionFields = {}); + const std::vector& partitionTransforms = {}, + const std::vector& sortedBy = {}); std::vector listPartitionDirectories( const std::string& dataPath); - void setupMemoryPools(); - std::shared_ptr root_; std::shared_ptr connectorPool_; - std::shared_ptr connectorSessionProperties_; - std::shared_ptr hiveConfig_; - std::shared_ptr icebergConfig_; + std::shared_ptr connectorConfig_; + std::unique_ptr connectorQueryCtx_; VectorFuzzer::Options fuzzerOptions_; std::unique_ptr fuzzer_; - std::shared_ptr queryCtx_; }; } // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergTransformE2ETest.cpp b/velox/connectors/hive/iceberg/tests/IcebergTransformE2ETest.cpp new file mode 100644 index 00000000000..69d38163040 --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergTransformE2ETest.cpp @@ -0,0 +1,976 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" +#include "velox/exec/tests/utils/PlanBuilder.h" +#include "velox/exec/tests/utils/TempDirectoryPath.h" + +using namespace facebook::velox::exec::test; + +namespace facebook::velox::connector::hive::iceberg::test { +class IcebergTransformE2ETest : public IcebergTestBase { + protected: + void SetUp() override { + IcebergTestBase::SetUp(); + rowType_ = + ROW({"c_int", + "c_bigint", + "c_varchar", + "c_date", + "c_decimal", + "c_varbinary", + "c_timestamp"}, + {INTEGER(), + BIGINT(), + VARCHAR(), + DATE(), + DECIMAL(18, 3), + VARBINARY(), + TIMESTAMP()}); + rng_.seed(1); + } + + std::pair buildFilter( + const std::string& partitionDir) { + const auto eq = partitionDir.find('='); + const auto us = partitionDir.rfind('_', eq - 1); + const auto column = partitionDir.substr(0, us); + const auto value = partitionDir.substr(eq + 1); + return {column, value}; + } + + std::vector createTestData( + int32_t numBatches, + int32_t rowsPerBatch) { + std::vector batches; + for (auto batchIdx = 0; batchIdx < numBatches; ++batchIdx) { + std::vector columns; + columns.push_back(makeFlatVector(rowsPerBatch, [](auto row) { + return row % 100; + })); + columns.push_back(makeFlatVector(rowsPerBatch, [](auto row) { + return row * 1'000; + })); + auto varcharVector = BaseVector::create>( + VARCHAR(), rowsPerBatch, opPool_.get()); + for (auto i = 0; i < rowsPerBatch; i++) { + std::string s = + fmt::format("string_long_data_test__{}__{}", i % 10, i % 100); + varcharVector->set(i, StringView(s)); + } + columns.push_back(varcharVector); + + auto dateVector = BaseVector::create>( + DATE(), rowsPerBatch, opPool_.get()); + for (auto i = 0; i < rowsPerBatch; i++) { + static const std::vector dates = { + 18'262, 18'628, 18'993, 19'358, 19'723, 20'181}; + dateVector->set(i, dates[i % dates.size()]); + } + columns.push_back(dateVector); + + auto decimalVector = BaseVector::create>( + DECIMAL(18, 3), rowsPerBatch, opPool_.get()); + for (auto i = 0; i < rowsPerBatch; i++) { + decimalVector->set(i, (i % 10 + 1) * 123'456); + } + columns.push_back(decimalVector); + + auto varbinaryVector = BaseVector::create>( + VARBINARY(), rowsPerBatch, opPool_.get()); + std::vector binaryData; + for (auto i = 0; i < rowsPerBatch; i++) { + if (i % 5 == 0) { + for (int32_t j = 0; j < 40; j++) { + binaryData.push_back(static_cast(j + (i % 10))); + } + } else if (i % 5 == 1) { + for (int32_t j = 0; j < 40; j++) { + binaryData.push_back(static_cast(i % 256)); + } + } else if (i % 5 == 2) { + for (int32_t j = 0; j < 40; j++) { + binaryData.push_back( + static_cast(j % 2 == 0 ? 0xAA : 0x55)); + } + } else { + for (int32_t j = 0; j < 34; j++) { + binaryData.push_back(static_cast(255 - j - (i % 10))); + } + } + + varbinaryVector->set( + i, + StringView( + reinterpret_cast(binaryData.data()), + binaryData.size())); + } + columns.push_back(varbinaryVector); + + // Add timestamp column with different time values. + auto timestampVector = BaseVector::create>( + TIMESTAMP(), rowsPerBatch, opPool_.get()); + for (auto i = 0; i < rowsPerBatch; i++) { + // Create timestamps for different years, months, days, and hours + // to test various transforms. + static const std::vector timestamps = { + Timestamp(0, 0), // 1970-01-01 00:00:00 + Timestamp(3600, 0), // 1970-01-01 01:00:00 + Timestamp(86400, 0), // 1970-01-02 00:00:00 + Timestamp(2592000, 0), // 1970-01-31 00:00:00 + Timestamp(31536000, 0), // 1971-01-01 00:00:00 + Timestamp(1609459200, 0), // 2021-01-01 00:00:00 + Timestamp(1609545600, 0), // 2021-01-02 00:00:00 + Timestamp(1612224000, 0), // 2021-02-01 00:00:00 + Timestamp(1640995200, 0), // 2022-01-01 00:00:00 + Timestamp(1672531200, 0) // 2023-01-01 00:00:00 + }; + timestampVector->set(i, timestamps[i % timestamps.size()]); + } + columns.push_back(timestampVector); + + batches.push_back(makeRowVector(rowType_->names(), columns)); + } + return batches; + } + + std::vector listFirstLevelDirectories( + const std::string& basePath) { + std::vector partitionDirs; + for (const auto& entry : std::filesystem::directory_iterator(basePath)) { + if (entry.is_directory()) { + partitionDirs.push_back(entry.path().string()); + } + } + return partitionDirs; + } + + std::vector listDirectoriesRecursively(const std::string& path) { + std::vector allDirs; + auto firstLevelDirs = listFirstLevelDirectories(path); + allDirs.insert(allDirs.end(), firstLevelDirs.begin(), firstLevelDirs.end()); + + for (const auto& dir : firstLevelDirs) { + if (std::filesystem::is_directory(dir)) { + auto subDirs = listDirectoriesRecursively(dir); + allDirs.insert(allDirs.end(), subDirs.begin(), subDirs.end()); + } + } + + return allDirs; + } + + // Verify the number of partitions and their naming convention. + void verifyPartitionCount( + const std::string& outputPath, + const std::vector& partitionTransforms, + const int32_t expectedPartitionCount) { + const auto partitionDirs = listFirstLevelDirectories(outputPath); + + if (partitionTransforms.empty()) { + ASSERT_EQ(partitionDirs.size(), 1) + << "Expected 1 directory for no partitioning, got " + << partitionDirs.size(); + } else { + ASSERT_EQ(partitionDirs.size(), expectedPartitionCount) + << "Expected " << expectedPartitionCount << " partitions, got " + << partitionDirs.size(); + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + ASSERT_TRUE(dirName.find('=') != std::string::npos) + << "Partition directory " << dirName + << " does not follow Iceberg naming convention"; + } + } + } + + // Verify the total row count across all partitions. + void verifyTotalRowCount( + RowTypePtr rowType, + const std::string& outputPath, + int32_t expectedRowCount) { + auto splits = createSplitsForDirectory(outputPath); + + const auto plan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType) + .endTableScan() + .singleAggregation({}, {"count(1)"}) + .planNode(); + + const auto result = + AssertQueryBuilder(plan).splits(splits).copyResults(opPool_.get()); + + ASSERT_EQ(result->size(), 1); + ASSERT_EQ( + result->childAt(0)->asFlatVector()->valueAt(0), + expectedRowCount); + } + + // Verify data in a specific partition. + void verifyPartitionData( + RowTypePtr rowType, + const std::string& partitionPath, + const std::string& partitionFilter, + const int32_t expectedRowCount, + bool skipRowCountCheck = false) { + const auto countPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType) + .endTableScan() + .singleAggregation({}, {"count(1)"}) + .planNode(); + + const auto countResult = + AssertQueryBuilder(countPlan) + .splits(createSplitsForDirectory(partitionPath)) + .copyResults(opPool_.get()); + + ASSERT_EQ(countResult->size(), 1); + const auto actualRowCount = + countResult->childAt(0)->asFlatVector()->valueAt(0); + + if (!skipRowCountCheck) { + ASSERT_EQ(actualRowCount, expectedRowCount); + } else { + // Just verify that we have some data. + ASSERT_GT(actualRowCount, 0); + } + + const auto dataPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType) + .endTableScan() + .filter(partitionFilter) + .singleAggregation({}, {"count(1)"}) + .planNode(); + const auto dataResult = AssertQueryBuilder(dataPlan) + .splits(createSplitsForDirectory(partitionPath)) + .copyResults(opPool_.get()); + ASSERT_EQ(dataResult->size(), 1); + const auto filteredRowCount = + dataResult->childAt(0)->asFlatVector()->valueAt(0); + if (!skipRowCountCheck) { + ASSERT_EQ(filteredRowCount, expectedRowCount); + } else { + // Just verify that the filter matches all rows in the partition. + ASSERT_EQ(filteredRowCount, actualRowCount); + } + } + + folly::Random::DefaultGenerator rng_; +}; + +TEST_F(IcebergTransformE2ETest, identityPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + verifyPartitionCount(outputDirectory->getPath(), {"c_int"}, rowsPerBatch); + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + verifyPartitionData(rowType_, dir, dirName, numBatches); + } +} + +TEST_F(IcebergTransformE2ETest, truncatePartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kTruncate, 10}}); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyPartitionCount(outputDirectory->getPath(), {"truncate(c_int, 10)"}, 10); + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + const auto partitionDirs = + listFirstLevelDirectories(outputDirectory->getPath()); + + for (const auto& dir : partitionDirs) { + const std::string dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + const std::string filter = + c + ">=" + v + " AND " + c + "<" + std::to_string(std::stoi(v) + 10); + verifyPartitionData( + rowType_, dir, filter, 20); // 10 values per batch * 2 batches. + } +} + +TEST_F(IcebergTransformE2ETest, bucketPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), {{2, TransformType::kBucket, 4}}); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + // Verify the number of partitions (should be at most 4 buckets). + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + ASSERT_EQ(partitionDirs.size(), 4); + + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + int32_t totalRowsInPartitions = 0; + for (const auto& dir : partitionDirs) { + auto splits = createSplitsForDirectory(dir); + auto countPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .singleAggregation({}, {"count(1)"}) + .planNode(); + auto countResult = + AssertQueryBuilder(countPlan).splits(splits).copyResults(opPool_.get()); + + auto partitionRowCount = + countResult->childAt(0)->asFlatVector()->valueAt(0); + totalRowsInPartitions += partitionRowCount; + ASSERT_GE(partitionRowCount, 0); + } + + ASSERT_EQ(totalRowsInPartitions, numBatches * rowsPerBatch); + + // Verify that each partition contains only rows with the same bucket value. + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + const auto equalsPos = dirName.find('='); + ASSERT_NE(equalsPos, std::string::npos); + auto dataPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .project({"c_varchar"}) + .planNode(); + auto dataResult = AssertQueryBuilder(dataPlan) + .splits(createSplitsForDirectory(dir)) + .copyResults(opPool_.get()); + // Verify that all rows in this partition have the same bucket hash value. + auto varcharColumn = dataResult->childAt(0)->asFlatVector(); + for (auto i = 0; i < dataResult->size(); i++) { + StringView value = varcharColumn->valueAt(i); + auto valuePlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .filter(fmt::format("c_varchar = '{}'", value.str())) + .project({"c_varchar"}) + .planNode(); + + auto valueResult = + AssertQueryBuilder(valuePlan) + .splits(createSplitsForDirectory(outputDirectory->getPath())) + .copyResults(opPool_.get()); + auto valueCount = valueResult->size(); + auto partitionValuePlan = + PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .filter(fmt::format("c_varchar = '{}'", value.str())) + .project({"c_varchar"}) + .planNode(); + auto partitionValueResult = AssertQueryBuilder(partitionValuePlan) + .splits(createSplitsForDirectory(dir)) + .copyResults(opPool_.get()); + + ASSERT_EQ(partitionValueResult->size(), valueCount); + } + } +} + +TEST_F(IcebergTransformE2ETest, yearPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{3, TransformType::kYear, std::nullopt}}); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + // Verify the number of partitions (should be 6 for years 2020-2025). + verifyPartitionCount(outputDirectory->getPath(), {"year(c_date)"}, 6); + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + + for (int32_t year = 2020; year <= 2025; year++) { + const auto expectedDirName = fmt::format("c_date_year={}", year); + bool foundPartition = false; + auto yearFilter = [](const int32_t year) -> std::string { + return fmt::format( + "YEAR(DATE '{}-01-01')={}", + std::to_string(year), + std::to_string(year)); + }; + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + if (dirName == expectedDirName) { + foundPartition = true; + auto datePlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .filter(yearFilter(year)) + .singleAggregation({}, {"count(1)"}) + .planNode(); + + auto dateResult = AssertQueryBuilder(datePlan) + .splits(createSplitsForDirectory(dir)) + .copyResults(opPool_.get()); + + auto partitionRowCount = + dateResult->childAt(0)->asFlatVector()->valueAt(0); + auto countPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .singleAggregation({}, {"count(1)"}) + .planNode(); + auto countResult = AssertQueryBuilder(countPlan) + .splits(createSplitsForDirectory(dir)) + .copyResults(opPool_.get()); + auto totalPartitionCount = + countResult->childAt(0)->asFlatVector()->valueAt(0); + ASSERT_EQ(partitionRowCount, totalPartitionCount); + break; + } + } + ASSERT_TRUE(foundPartition) + << "Partition for year " << year << " not found"; + } +} + +TEST_F(IcebergTransformE2ETest, varbinaryTruncatePartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + + std::vector partitionTransforms = { + {5, TransformType::kTruncate, 36}}; + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + // For binary data, we need to use a different approach for filtering. + const auto filter = c + " IS NOT NULL"; + // Verify the partition has data. + verifyPartitionData(rowType_, dir, filter, 0, true); + } +} + +TEST_F(IcebergTransformE2ETest, multipleTransformsOnSameColumn) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto vectors = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kIdentity, std::nullopt}, // c_int. + {0, TransformType::kTruncate, 10}, // truncate(c_int, 10). + {0, TransformType::kBucket, 4}}; // bucket(c_int, 4). + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + for (const auto& vector : vectors) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto firstLevelDirs = listFirstLevelDirectories(outputDirectory->getPath()); + ASSERT_GT(firstLevelDirs.size(), 0); + for (const auto& dir : firstLevelDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + ASSERT_TRUE(dirName.find("c_int=") != std::string::npos) + << "First level directory " << dirName + << " should use identity transform"; + + auto secondLevelDirs = listFirstLevelDirectories(dir); + ASSERT_GT(secondLevelDirs.size(), 0) + << "No second level directories found in " << dir; + + for (const auto& secondDir : secondLevelDirs) { + const auto secondDirName = + std::filesystem::path(secondDir).filename().string(); + ASSERT_TRUE(secondDirName.find("c_int_trunc=") != std::string::npos) + << "Second level directory " << secondDirName + << " should use truncate transform"; + + auto thirdLevelDirs = listFirstLevelDirectories(secondDir); + ASSERT_GT(thirdLevelDirs.size(), 0) + << "No third level directories found in " << secondDir; + + for (const auto& thirdDir : thirdLevelDirs) { + const auto thirdDirName = + std::filesystem::path(thirdDir).filename().string(); + ASSERT_TRUE(thirdDirName.find("c_int_bucket=") != std::string::npos) + << "Third level directory " << thirdDirName + << " should use bucket transform"; + + auto leafDir = thirdDir; + auto intValue = std::stoi( + std::filesystem::path(dir).filename().string().substr( + 6)); // c_int=X. + auto truncValue = std::stoi( + std::filesystem::path(secondDir).filename().string().substr( + 12)); // c_int_trunc=X. + std::string filter = fmt::format( + "c_int = {} AND c_int >= {} AND c_int < {}", + intValue, + truncValue, + truncValue + 10); + + // Verify the partition has data. + auto splits = createSplitsForDirectory(leafDir); + auto countPlan = PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .singleAggregation({}, {"count(1)"}) + .planNode(); + auto countResult = + AssertQueryBuilder(countPlan).splits(splits).copyResults( + opPool_.get()); + ASSERT_GT( + countResult->childAt(0)->asFlatVector()->valueAt(0), 0) + << "Leaf partition directory " << leafDir << " has no data"; + } + } + } +} + +TEST_F(IcebergTransformE2ETest, timestampYearPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto batches = createTestData(numBatches, rowsPerBatch); + + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {6, TransformType::kYear, std::nullopt}}; // c_timestamp column. + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : batches) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + std::unordered_map yearToExpectedCount; + + for (const auto& batch : batches) { + auto timestampVector = batch->childAt(6)->as>(); + for (auto i = 0; i < batch->size(); i++) { + if (!timestampVector->isNullAt(i)) { + Timestamp ts = timestampVector->valueAt(i); + std::tm tm; + if (Timestamp::epochToCalendarUtc(ts.getSeconds(), tm)) { + int32_t year = tm.tm_year + 1900; + yearToExpectedCount[year]++; + } + } + } + } + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + auto year = std::stoi(v); + std::string filter = fmt::format("YEAR(c_timestamp) = {}", year); + auto expectedRowCount = yearToExpectedCount.at(year); + verifyPartitionData(rowType_, dir, filter, expectedRowCount); + } +} + +TEST_F(IcebergTransformE2ETest, timestampMonthPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + + auto batches = createTestData(numBatches, rowsPerBatch); + + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {6, TransformType::kMonth, std::nullopt}}; // c_timestamp column. + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : batches) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + std::unordered_map monthToExpectedCount; + + for (const auto& batch : batches) { + auto timestampVector = batch->childAt(6)->as>(); + for (auto i = 0; i < batch->size(); i++) { + if (!timestampVector->isNullAt(i)) { + Timestamp ts = timestampVector->valueAt(i); + std::tm tm; + if (Timestamp::epochToCalendarUtc(ts.getSeconds(), tm)) { + int32_t year = tm.tm_year + 1900; + int32_t month = tm.tm_mon + 1; + std::string monthKey = fmt::format("{:04d}-{:02d}", year, month); + monthToExpectedCount[monthKey]++; + } + } + } + } + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + size_t dashPos = v.find('-'); + ASSERT_NE(dashPos, std::string::npos) << "Invalid month format: " << v; + + int32_t year = std::stoi(v.substr(0, dashPos)); + int32_t month = std::stoi(v.substr(dashPos + 1)); + std::string filter = fmt::format( + "YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {}", year, month); + std::string monthKey = fmt::format("{:04d}-{:02d}", year, month); + auto expectedCount = monthToExpectedCount[monthKey]; + verifyPartitionData(rowType_, dir, filter, expectedCount); + } +} + +TEST_F(IcebergTransformE2ETest, timestampDayPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto batches = createTestData(numBatches, rowsPerBatch); + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {6, TransformType::kDay, std::nullopt}}; // c_timestamp column + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : batches) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + std::unordered_map dayToExpectedCount; + for (const auto& batch : batches) { + auto timestampVector = batch->childAt(6)->as>(); + for (auto i = 0; i < batch->size(); i++) { + if (!timestampVector->isNullAt(i)) { + Timestamp ts = timestampVector->valueAt(i); + std::tm tm; + if (Timestamp::epochToCalendarUtc(ts.getSeconds(), tm)) { + int32_t year = tm.tm_year + 1900; + int32_t month = tm.tm_mon + 1; + int32_t day = tm.tm_mday; + std::string dayKey = + fmt::format("{:04d}-{:02d}-{:02d}", year, month, day); + dayToExpectedCount[dayKey]++; + } + } + } + } + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + std::vector dateParts; + folly::split('-', v, dateParts); + ASSERT_EQ(dateParts.size(), 3) << "Invalid day format: " << v; + + int32_t year = std::stoi(dateParts[0]); + int32_t month = std::stoi(dateParts[1]); + int32_t day = std::stoi(dateParts[2]); + + std::string filter = fmt::format( + "YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {} AND DAY(c_timestamp) = {}", + year, + month, + day); + + // Get expected count for this day. + std::string dayKey = fmt::format("{:04d}-{:02d}-{:02d}", year, month, day); + auto expectedCount = dayToExpectedCount[dayKey]; + verifyPartitionData(rowType_, dir, filter, expectedCount); + } +} + +TEST_F(IcebergTransformE2ETest, timestampHourPartitioning) { + constexpr auto numBatches = 2; + constexpr auto rowsPerBatch = 100; + auto batches = createTestData(numBatches, rowsPerBatch); + + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {6, TransformType::kHour, std::nullopt}}; // c_timestamp column. + auto dataSink = createIcebergDataSink( + rowType_, outputDirectory->getPath(), partitionTransforms); + + for (const auto& vector : batches) { + dataSink->appendData(vector); + } + + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyTotalRowCount( + rowType_, outputDirectory->getPath(), numBatches * rowsPerBatch); + + auto partitionDirs = listFirstLevelDirectories(outputDirectory->getPath()); + std::unordered_map hourToExpectedCount; + + for (const auto& batch : batches) { + auto timestampVector = batch->childAt(6)->as>(); + for (auto i = 0; i < batch->size(); i++) { + if (!timestampVector->isNullAt(i)) { + Timestamp ts = timestampVector->valueAt(i); + std::tm tm; + if (Timestamp::epochToCalendarUtc(ts.getSeconds(), tm)) { + int32_t year = tm.tm_year + 1900; + int32_t month = tm.tm_mon + 1; + int32_t day = tm.tm_mday; + int32_t hour = tm.tm_hour; + std::string hourKey = fmt::format( + "{:04d}-{:02d}-{:02d}-{:02d}", year, month, day, hour); + hourToExpectedCount[hourKey]++; + } + } + } + } + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + auto [c, v] = buildFilter(dirName); + std::vector dateParts; + folly::split('-', v, dateParts); + ASSERT_EQ(dateParts.size(), 4) << "Invalid hour format: " << v; + + int32_t year = std::stoi(dateParts[0]); + int32_t month = std::stoi(dateParts[1]); + int32_t day = std::stoi(dateParts[2]); + int32_t hour = std::stoi(dateParts[3]); + + std::string filter = fmt::format( + "YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {} AND " + "DAY(c_timestamp) = {} AND HOUR(c_timestamp) = {}", + year, + month, + day, + hour); + std::string hourKey = + fmt::format("{:04d}-{:02d}-{:02d}-{:02d}", year, month, day, hour); + auto expectedCount = hourToExpectedCount[hourKey]; + verifyPartitionData(rowType_, dir, filter, expectedCount); + } +} + +TEST_F(IcebergTransformE2ETest, partitionFolderNamingConventions) { + auto intVector = makeFlatVector(1, [](auto) { return 42; }); + auto bigintVector = + makeFlatVector(1, [](auto) { return 9'876'543'210; }); + auto varcharVector = + BaseVector::create>(VARCHAR(), 1, opPool_.get()); + varcharVector->set(0, StringView("test string")); + + auto varcharVector2 = + BaseVector::create>(VARCHAR(), 1, opPool_.get()); + varcharVector2->setNull(0, true); + + auto decimalVector = + BaseVector::create>(DECIMAL(18, 3), 1, opPool_.get()); + decimalVector->set(0, 1'234'567'890); + + auto varbinaryVector = + BaseVector::create>(VARBINARY(), 1, opPool_.get()); + std::string binaryData = "binary\0data\1\2\3"; + varbinaryVector->set(0, StringView(binaryData)); + + auto rowVector = makeRowVector( + {"c_int", + "c_bigint", + "c_varchar", + "c_varchar2", + "c_decimal", + "c_varbinary"}, + {intVector, + bigintVector, + varcharVector, + varcharVector2, + decimalVector, + varbinaryVector}); + auto outputDirectory = TempDirectoryPath::create(); + std::vector partitionTransforms = { + {0, TransformType::kIdentity, std::nullopt}, // c_int. + {1, TransformType::kIdentity, std::nullopt}, // c_bigint. + {2, TransformType::kIdentity, std::nullopt}, // c_varchar. + {4, TransformType::kIdentity, std::nullopt}, // c_decimal. + {5, TransformType::kIdentity, std::nullopt}, // c_varbinary. + {3, TransformType::kIdentity, std::nullopt} // c_varchar2. + }; + auto dataSink = createIcebergDataSink( + asRowType(rowVector->type()), + outputDirectory->getPath(), + partitionTransforms); + + dataSink->appendData(rowVector); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + + verifyTotalRowCount( + asRowType(rowVector->type()), outputDirectory->getPath(), 1); + auto partitionDirs = listDirectoriesRecursively(outputDirectory->getPath()); + + const std::string expectedIntFolder = "c_int=42"; + const std::string expectedBigintFolder = "c_bigint=9876543210"; + const std::string expectedVarcharFolder = "c_varchar=test+string"; + const std::string expectedVarcharFolder2 = "c_varchar2=null"; + const std::string expectedDecimalFolder = "c_decimal=1234567.890"; + const std::string expectedVarbinary = "c_varbinary=" + + encoding::Base64::encode(binaryData.data(), binaryData.size()); + + bool foundIntPartition = false; + bool foundBigintPartition = false; + bool foundVarcharPartition = false; + bool foundVarcharPartition2 = false; + bool foundDecimalPartition = false; + bool foundVarbinaryPartition = false; + + for (const auto& dir : partitionDirs) { + const auto dirName = std::filesystem::path(dir).filename().string(); + + if (dirName == expectedIntFolder) { + foundIntPartition = true; + verifyPartitionData(asRowType(rowVector->type()), dir, "c_int = 42", 1); + } else if (dirName == expectedBigintFolder) { + foundBigintPartition = true; + verifyPartitionData( + asRowType(rowVector->type()), dir, "c_bigint = 9876543210", 1); + } else if (dirName == expectedVarcharFolder) { + foundVarcharPartition = true; + verifyPartitionData( + asRowType(rowVector->type()), dir, "c_varchar = 'test string'", 1); + } else if (dirName == expectedVarcharFolder2) { + foundVarcharPartition2 = true; + verifyPartitionData( + asRowType(rowVector->type()), dir, "c_varchar2 IS NULL", 1); + } else if (dirName == expectedDecimalFolder) { + foundDecimalPartition = true; + verifyPartitionData( + asRowType(rowVector->type()), + dir, + "c_decimal = DECIMAL '1234567.890'", + 1); + } else if (dirName.find(expectedVarbinary) == 0) { + foundVarbinaryPartition = true; + verifyPartitionData( + asRowType(rowVector->type()), dir, "c_varbinary IS NOT NULL", 1); + } + } + + ASSERT_TRUE(foundIntPartition) + << "Integer partition folder not found: " << expectedIntFolder; + ASSERT_TRUE(foundBigintPartition) + << "Bigint partition folder not found: " << expectedBigintFolder; + ASSERT_TRUE(foundVarcharPartition) + << "Varchar partition folder not found: " << expectedVarcharFolder; + ASSERT_TRUE(foundVarcharPartition2) + << "Varchar2 partition folder not found: " << expectedVarcharFolder2; + ASSERT_TRUE(foundDecimalPartition) + << "Decimal partition folder not found: " << expectedDecimalFolder; + ASSERT_TRUE(foundVarbinaryPartition) + << "Varbinary partition folder not found with prefix: " + << expectedVarbinary; +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergTransformUnitTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergTransformUnitTest.cpp new file mode 100644 index 00000000000..da0116c1c33 --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergTransformUnitTest.cpp @@ -0,0 +1,801 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/encode/Base64.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" + +namespace facebook::velox::connector::hive::iceberg::test { + +class IcebergTransformUnitTest : public IcebergTestBase { + protected: + template + void testTransform( + const IcebergPartitionSpec::Field& field, + const std::vector& inputValues, + const std::vector>& expectedValues, + const TypePtr& type = nullptr) { + VectorPtr inputVector; + std::vector> transforms = + parsePartitionTransformSpecs({field}, opPool_.get()); + auto transform = transforms[0]; + if constexpr (std::is_same_v) { + auto size = inputValues.size(); + auto vectorType = type ? type : VARCHAR(); + inputVector = BaseVector::create>( + vectorType, size, opPool_.get()); + const auto flatVector = inputVector->asFlatVector(); + for (vector_size_t i = 0; i < size; i++) { + if (i < inputValues.size()) { + flatVector->set(i, inputValues[i]); + } else { + flatVector->setNull(i, true); + } + } + } else { + auto size = inputValues.size(); + inputVector = BaseVector::create>( + type ? type : CppToType::create(), size, opPool_.get()); + const auto flatVector = inputVector->asFlatVector(); + for (vector_size_t i = 0; i < size; i++) { + if (i < inputValues.size()) { + flatVector->set(i, inputValues[i]); + } else { + flatVector->setNull(i, true); + } + } + } + + std::vector children = {inputVector}; + std::vector names = {field.name}; + auto rowVector = makeRowVector(names, children); + const auto resultVector = transform->transform(rowVector, 0); + + ASSERT_EQ(resultVector->size(), expectedValues.size()); + for (vector_size_t i = 0; i < resultVector->size(); i++) { + if (expectedValues[i].has_value()) { + if constexpr ( + std::is_same_v && + std::is_same_v) { + if (type && type->isVarbinary()) { + EXPECT_EQ( + resultVector->as>()->valueAt(i).str(), + encoding::Base64::encode(expectedValues[i].value().str())); + } else { + EXPECT_EQ( + resultVector->as>()->valueAt(i).str(), + expectedValues[i].value().str()); + } + } else { + EXPECT_EQ( + resultVector->as>()->valueAt(i), + expectedValues[i].value()); + } + } else { + EXPECT_TRUE(resultVector->isNullAt(i)); + } + } + } +}; + +TEST_F(IcebergTransformUnitTest, testIdentityTransform) { + rowType_ = + ROW({"c_int", + "c_bigint", + "c_varchar", + "c_date", + "c_varbinary", + "c_decimal", + "c_timestamp"}, + {INTEGER(), + BIGINT(), + VARCHAR(), + DATE(), + VARBINARY(), + DECIMAL(18, 3), + TIMESTAMP()}); + + // Create partition spec with identity transforms. + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kIdentity, std::nullopt}, // c_int. + {1, TransformType::kIdentity, std::nullopt}, // c_bigint. + {2, TransformType::kIdentity, std::nullopt}, // c_varchar. + {4, TransformType::kIdentity, std::nullopt}, // c_varbinary. + {5, TransformType::kIdentity, std::nullopt}, // c_decimal. + {6, TransformType::kIdentity, std::nullopt}}, // c_timestamp. + rowType_); + + auto& intTransform = partitionSpec->fields[0]; + EXPECT_EQ(intTransform.transformType, TransformType::kIdentity); + testTransform( + intTransform, + {1, + 0, + -1, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {1, + 0, + -1, + std::numeric_limits::min(), + std::numeric_limits::max()}); + + auto& bigintTransform = partitionSpec->fields[1]; + EXPECT_EQ(bigintTransform.transformType, TransformType::kIdentity); + EXPECT_EQ(bigintTransform.type->kind(), TypeKind::BIGINT); + testTransform( + bigintTransform, + {1L, + 0L, + -1L, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {1, + 0, + -1, + std::numeric_limits::min(), + std::numeric_limits::max()}); + + auto& varcharTransform = partitionSpec->fields[2]; + EXPECT_EQ(varcharTransform.transformType, TransformType::kIdentity); + EXPECT_EQ(varcharTransform.type->kind(), TypeKind::VARCHAR); + testTransform( + varcharTransform, + {StringView("a"), + StringView(""), + StringView("velox"), + StringView( + "Velox is a composable execution engine distributed as an open source C++ library. It provides reusable, extensible, and high-performance data processing components that can be (re-)used to build data management systems focused on different analytical workloads, including batch, interactive, stream processing, and AI/ML. Velox was created by Meta and it is currently developed in partnership with IBM/Ahana, Intel, Voltron Data, Microsoft, ByteDance and many other companies.")}, + {StringView("a"), + StringView(""), + StringView("velox"), + StringView( + "Velox is a composable execution engine distributed as an open source C++ library. It provides reusable, extensible, and high-performance data processing components that can be (re-)used to build data management systems focused on different analytical workloads, including batch, interactive, stream processing, and AI/ML. Velox was created by Meta and it is currently developed in partnership with IBM/Ahana, Intel, Voltron Data, Microsoft, ByteDance and many other companies.")}); + + auto& varbinaryTransform = partitionSpec->fields[3]; + EXPECT_EQ(varbinaryTransform.transformType, TransformType::kIdentity); + EXPECT_EQ(varbinaryTransform.type->kind(), TypeKind::VARBINARY); + testTransform( + varbinaryTransform, + { + StringView("\x01\x02\x03", 3), + StringView("\x04\x05\x06\x07", 4), + StringView("\x08\x09", 2), + StringView("", 0), + StringView("\xFF\xFE\xFD\xFC", 4), + }, + { + StringView("\x01\x02\x03", 3), + StringView("\x04\x05\x06\x07", 4), + StringView("\x08\x09", 2), + StringView("", 0), + StringView("\xFF\xFE\xFD\xFC", 4), + }, + VARBINARY()); + + auto& timestampTransform = partitionSpec->fields[5]; + EXPECT_EQ(timestampTransform.transformType, TransformType::kIdentity); + EXPECT_EQ(timestampTransform.type->kind(), TypeKind::TIMESTAMP); + testTransform( + timestampTransform, + { + Timestamp(0, 0), + Timestamp(1609459200, 0), + Timestamp(1640995200, 0), + Timestamp(1672531200, 0), + Timestamp(9223372036854775, 999999999), + }, + { + Timestamp(0, 0), + Timestamp(1609459200, 0), + Timestamp(1640995200, 0), + Timestamp(1672531200, 0), + Timestamp(9223372036854775, 999999999), + }); +} + +TEST_F(IcebergTransformUnitTest, testTruncateTransform) { + rowType_ = + ROW({"c_int", "c_decimal", "c_varchar", "c_varbinary"}, + {INTEGER(), DECIMAL(18, 3), VARCHAR(), VARBINARY()}); + + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kTruncate, 10}, + {1, TransformType::kTruncate, 10}, + {2, TransformType::kTruncate, 2}, + {3, TransformType::kTruncate, 3}}, + rowType_); + + auto& intTruncateTransform = partitionSpec->fields[0]; + testTransform( + intTruncateTransform, + { + std::numeric_limits::min(), + std::numeric_limits::min() + 1, + std::numeric_limits::min() + 9, + std::numeric_limits::min() + 10, + -1, + 0, + 1, + 9, + std::numeric_limits::max() - 10, + std::numeric_limits::max() - 9, + std::numeric_limits::max() - 1, + std::numeric_limits::max(), + }, + { + 2'147'483'646, + 2'147'483'646, + -2'147'483'640, + -2'147'483'640, + -10, + 0, + 0, + 0, + 2'147'483'630, + 2'147'483'630, + 2'147'483'640, + 2'147'483'640, + }); + + auto& decimalTruncateTransform = partitionSpec->fields[1]; + testTransform( + decimalTruncateTransform, + { + 5000, + 5010, + 5011, + 5019, + 5020, + 5021, + -5000, + -5010, + -5011, + -5019, + -5020, + -5021, + 1234, + 1230, + 1229, + 5, + -5, + -10, + -9, + -1, + 0, + 1, + 9, + 10, + 995, + 1000, + 1005, + 1010, + + // Large values. + 999'999'999'999'999'990L, + 999'999'999'999'999'995L, + 999'999'999'999'999'999L, + // Small values. + -999'999'999'999'999'990L, + -999'999'999'999'999'995L, + -999'999'999'999'999'999L, + }, + { + 5000, + 5010, + 5010, + 5010, + 5020, + 5020, + -5000, + -5010, + -5020, + -5020, + -5020, + -5030, + 1230, + 1230, + 1220, + 0, + -10, + -10, + -10, + -10, + 0, + 0, + 0, + 10, + 990, + 1000, + 1000, + 1010, + // Expected results for large values. + 999'999'999'999'999'990L, + 999'999'999'999'999'990L, + 999'999'999'999'999'990L, + // Expected results for small values. + -999'999'999'999'999'990L, + -1'000'000'000'000'000'000L, + -1'000'000'000'000'000'000L, + }); + + auto& varcharTruncateTransform = partitionSpec->fields[2]; + testTransform( + varcharTruncateTransform, + { + StringView(""), + StringView("a"), + StringView("ab"), + StringView("abc"), + StringView("abcd"), + StringView("测"), // 1 code point, 3 bytes. + StringView("测试"), // 2 code points, 6 bytes. + StringView("测试abc"), // 5 code points. + StringView("a测b试c"), // 5 code points. + StringView("🚀"), // 1 code point, 4 bytes. + StringView("🚀🔥"), // 2 code points, 8 bytes. + StringView("abc🚀🔥"), // 5 code points. + StringView("é"), // 1 code point (e + combining acute accent). + StringView("éfac"), // 4 code points. + StringView("a\u0300"), // 'a' + combining grave accent = 1 code point. + }, + { + StringView(""), + StringView("a"), + StringView("ab"), + StringView("ab"), + StringView("ab"), + StringView("测"), + StringView("测试"), + StringView("测试"), + StringView("a测"), + StringView("🚀"), + StringView("🚀🔥"), + StringView("ab"), + StringView("é"), + StringView("éf"), + StringView("a\u0300"), + }); + + auto& varbinaryTransform = partitionSpec->fields[3]; + testTransform( + varbinaryTransform, + { + StringView("\x01\x02\x03", 3), + StringView("\x04\x05\x06\x07", 4), + StringView("\x08\x09", 2), + StringView("", 0), + StringView( + "\xFF\xFE\xFD\xFC\xFA\xFB\xFC\xF1\xF2\xF3\xF4\xF5\xF6\xF7", 14), + }, + { + StringView("\x01\x02\x03", 3), + StringView("\x04\x05\x06", 3), + StringView("\x08\x09", 2), + StringView("", 0), + StringView("\xFF\xFE\xFD", 3), + }, + VARBINARY()); +} + +TEST_F(IcebergTransformUnitTest, testBucketTransform) { + rowType_ = + ROW({"c_int", "c_bigint", "c_varchar", "c_varbinary", "c_date"}, + {INTEGER(), BIGINT(), VARCHAR(), VARBINARY(), DATE()}); + + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kBucket, 4}, + {1, TransformType::kBucket, 8}, + {2, TransformType::kBucket, 16}, + {3, TransformType::kBucket, 32}, + {4, TransformType::kBucket, 10}}, + rowType_); + + auto& intBucketTransform = partitionSpec->fields[0]; + EXPECT_EQ(intBucketTransform.transformType, TransformType::kBucket); + + testTransform( + intBucketTransform, + {8, + 34, + 0, + 1, + -1, + 42, + 100, + 1000, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {3, 3, 0, 0, 0, 2, 0, 0, 0, 2}); + + auto& bigintBucketTransform = partitionSpec->fields[1]; + EXPECT_EQ(bigintBucketTransform.transformType, TransformType::kBucket); + + testTransform( + bigintBucketTransform, + {34L, + 0L, + -34L, + -1L, + 1L, + 42L, + 123'456'789L, + -123'456'789L, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {3, 4, 5, 0, 4, 6, 1, 4, 5, 7}); + + auto& varcharBucketTransform = partitionSpec->fields[2]; + EXPECT_EQ(varcharBucketTransform.transformType, TransformType::kBucket); + + testTransform( + varcharBucketTransform, + {StringView("abcdefg"), + StringView("测试"), + StringView("测试ping试测"), + StringView(""), + StringView("🚀🔥"), + StringView("a\u0300\u0301"), // Combining characters. + StringView("To be or not to be, that is the question.")}, + {6, 8, 11, 0, 14, 11, 9}); + + auto& varbinaryBucketTransform = partitionSpec->fields[3]; + EXPECT_EQ(varbinaryBucketTransform.transformType, TransformType::kBucket); + + testTransform( + varbinaryBucketTransform, + {StringView("abc\0\0", 5), + StringView("\x01\x02\x03\x04", 4), + StringView("\xFF\xFE\xFD\xFC", 4), + StringView("\x00\x00\x00\x00", 4), + StringView("\xDE\xAD\xBE\xEF", 4), + StringView(std::string(100, 'x').c_str(), 100)}, + {11, 5, 15, 30, 10, 18}, + VARBINARY()); + + auto& dateBucketTransform = partitionSpec->fields[4]; + EXPECT_EQ(dateBucketTransform.transformType, TransformType::kBucket); + + testTransform( + dateBucketTransform, + { + 0, // 1970-01-01. + 365, // 1971-01-01. + 18'262, // 2020-01-01. + -365, // 1969-01-01. + -1, // 1969-12-31. + 20'181, // 2025-04-03. + -36889, // 1869-01-01. + 18'628 // 2021-01-01. + }, + {6, 1, 3, 6, 2, 5, 9, 0}); +} + +TEST_F(IcebergTransformUnitTest, testTemporalTransforms) { + rowType_ = ROW({"c_date"}, {DATE()}); + + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kYear, std::nullopt}, + {0, TransformType::kMonth, std::nullopt}, + {0, TransformType::kDay, std::nullopt}, + {0, TransformType::kHour, std::nullopt}, + {0, TransformType::kBucket, 8}, + {0, TransformType::kIdentity, std::nullopt}}, + rowType_); + + auto& yearTransform = partitionSpec->fields[0]; + EXPECT_EQ(yearTransform.transformType, TransformType::kYear); + // Create test dates (days since epoch). + testTransform( + yearTransform, + { + -36889, // 1869-01-01. + -18628, // 1919-01-01. + -365, // 1969-01-01. + -1, // 1969-12-31. + 0, // 1970-01-01 (epoch). + 31, // 1970-02-01. + 365, // 1971-01-01. + 18'262, // 2020-01-01. + 20'181 // 2025-04-03. + }, + { + -101, // 1869 - 1970 = -101. + -51, // 1919 - 1970 = -51. + -1, // 1969 - 1970 = -1. + -1, // 1969 - 1970 = -1. + 0, // 1970 - 1970 = 0. + 0, // 1970 - 1970 = 0. + 1, // 1971 - 1970 = 1. + 50, // 2020 - 1970 = 50. + 55 // 2025 - 1970 = 55. + }); + // Test month transform. + auto& monthTransform = partitionSpec->fields[1]; + EXPECT_EQ(monthTransform.transformType, TransformType::kMonth); + + testTransform( + monthTransform, + {-36525, -18263, -365, -1, 0, 31, 365, 18'262, 20'181}, + {-1201, -600, -12, -1, 0, 1, 12, 600, 663}); + // Test day transform. + auto& dayTransform = partitionSpec->fields[2]; + EXPECT_EQ(dayTransform.transformType, TransformType::kDay); + testTransform( + dayTransform, + {-36525, -18263, -365, -1, 0, 31, 365, 18'262, 20'181}, + {-36525, -18263, -365, -1, 0, 31, 365, 18'262, 20'181}); +} + +TEST_F(IcebergTransformUnitTest, testTransformOnTimestamp) { + rowType_ = ROW({"c_timestamp"}, {TIMESTAMP()}); + + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kYear, std::nullopt}, + {0, TransformType::kMonth, std::nullopt}, + {0, TransformType::kDay, std::nullopt}, + {0, TransformType::kHour, std::nullopt}, + {0, TransformType::kBucket, 8}, + {0, TransformType::kIdentity, std::nullopt}}, + rowType_); + + auto& yearTransform = partitionSpec->fields[0]; + EXPECT_EQ(yearTransform.transformType, TransformType::kYear); + testTransform( + yearTransform, + { + Timestamp(0, 0), + Timestamp(31536000, 0), // 1971-01-01 00:00:00. + Timestamp(1609459200, 0), // 2021-01-01 00:00:00. + Timestamp(1612224000, 0), // 2021-02-01 00:00:00. + }, + { + 0, // 1970 - 1970 = 0. + 1, // 1971 - 1970 = 1. + 51, // 2021 - 1970 = 51. + 51 // 2021 - 1970 = 51. + }); + + auto& monthTransform = partitionSpec->fields[1]; + EXPECT_EQ(monthTransform.transformType, TransformType::kMonth); + + testTransform( + monthTransform, + {Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0)}, + {0, 12, 612, 613}); + + auto& dayTransform = partitionSpec->fields[2]; + EXPECT_EQ(dayTransform.transformType, TransformType::kDay); + testTransform( + dayTransform, + {Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0)}, + {0, 365, 18628, 18660}); + + auto& hourTransform = partitionSpec->fields[3]; + EXPECT_EQ(hourTransform.transformType, TransformType::kHour); + testTransform( + hourTransform, + {Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0)}, + {0, 8760, 447072, 447840}); + + auto& bucketTransform = partitionSpec->fields[4]; + EXPECT_EQ(bucketTransform.transformType, TransformType::kBucket); + testTransform( + bucketTransform, + { + Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0), + Timestamp(-31536000, 0), + }, + {4, 4, 6, 5, 3}); + + auto& identityTransform = partitionSpec->fields[5]; + EXPECT_EQ(identityTransform.transformType, TransformType::kIdentity); + testTransform( + identityTransform, + {Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0)}, + {Timestamp(0, 0), + Timestamp(31536000, 0), + Timestamp(1609459200, 0), + Timestamp(1612224000, 0)}); +} + +TEST_F(IcebergTransformUnitTest, testTransformsWithNulls) { + rowType_ = ROW( + {"c_int", "c_bigint", "c_decimal", "c_varchar", "c_varbinary", "c_date"}, + {INTEGER(), BIGINT(), DECIMAL(18, 3), VARCHAR(), VARBINARY(), DATE()}); + + const auto partitionSpec = createPartitionSpec( + {{0, TransformType::kIdentity, std::nullopt}, + {2, TransformType::kTruncate, 100}, + {1, TransformType::kBucket, 16}, + {5, TransformType::kYear, std::nullopt}, + {5, TransformType::kMonth, std::nullopt}, + {5, TransformType::kDay, std::nullopt}}, + rowType_); + + auto& identityTransform = partitionSpec->fields[0]; + EXPECT_EQ(identityTransform.transformType, TransformType::kIdentity); + + auto intInput = + makeNullableFlatVector({5, std::nullopt, 15, std::nullopt, 25}); + std::vector children = {intInput}; + std::vector names = {identityTransform.name}; + auto rowVector = makeRowVector(names, children); + + std::vector> transforms = + parsePartitionTransformSpecs({identityTransform}, opPool_.get()); + auto transform = transforms[0]; + auto identityResult = transform->transform(rowVector, 0); + ASSERT_EQ(identityResult->size(), 5); + EXPECT_EQ(identityResult->as>()->valueAt(0), 5); + EXPECT_TRUE(identityResult->isNullAt(1)); + EXPECT_EQ(identityResult->as>()->valueAt(2), 15); + EXPECT_TRUE(identityResult->isNullAt(3)); + EXPECT_EQ(identityResult->as>()->valueAt(4), 25); + + auto& truncateTransform = partitionSpec->fields[1]; + EXPECT_EQ(truncateTransform.transformType, TransformType::kTruncate); + + auto decimalInput = makeNullableFlatVector( + {5'000, std::nullopt, 15'000, std::nullopt, 25'000}); + children = {decimalInput}; + names = {truncateTransform.name}; + rowVector = makeRowVector(names, children); + transforms = parsePartitionTransformSpecs({truncateTransform}, opPool_.get()); + transform = transforms[0]; + auto truncateResult = transform->transform(rowVector, 0); + ASSERT_EQ(truncateResult->size(), 5); + EXPECT_EQ(truncateResult->as>()->valueAt(0), 5000); + EXPECT_TRUE(truncateResult->isNullAt(1)); + EXPECT_EQ(truncateResult->as>()->valueAt(2), 15'000); + EXPECT_TRUE(truncateResult->isNullAt(3)); + EXPECT_EQ(truncateResult->as>()->valueAt(4), 25'000); + + auto& bucketTransform = partitionSpec->fields[2]; + EXPECT_EQ(bucketTransform.transformType, TransformType::kBucket); + + auto bigintInput = makeNullableFlatVector( + {50L, std::nullopt, 150L, std::nullopt, 250L}); + children = {bigintInput}; + names = {bucketTransform.name}; + rowVector = makeRowVector(names, children); + transforms = parsePartitionTransformSpecs({bucketTransform}, opPool_.get()); + transform = transforms[0]; + auto bucketResult = transform->transform(rowVector, 0); + ASSERT_EQ(bucketResult->size(), 5); + EXPECT_TRUE(bucketResult->isNullAt(1)); + EXPECT_TRUE(bucketResult->isNullAt(3)); + + auto& yearTransform = partitionSpec->fields[3]; + EXPECT_EQ(yearTransform.transformType, TransformType::kYear); + + auto dateInput = makeNullableFlatVector( + {0, std::nullopt, 365, std::nullopt, 20'175}); + children = {dateInput}; + names = {yearTransform.name}; + rowVector = makeRowVector(names, children); + transforms = parsePartitionTransformSpecs({yearTransform}, opPool_.get()); + transform = transforms[0]; + auto yearResult = transform->transform(rowVector, 0); + ASSERT_EQ(yearResult->size(), 5); + EXPECT_EQ(yearResult->as>()->valueAt(0), 0); + EXPECT_TRUE(yearResult->isNullAt(1)); + EXPECT_EQ(yearResult->as>()->valueAt(2), 1); + EXPECT_TRUE(yearResult->isNullAt(3)); + EXPECT_EQ(yearResult->as>()->valueAt(4), 55); + + auto& monthTransform = partitionSpec->fields[4]; + EXPECT_EQ(monthTransform.transformType, TransformType::kMonth); + children = {dateInput}; + names = {monthTransform.name}; + rowVector = makeRowVector(names, children); + transforms = parsePartitionTransformSpecs({monthTransform}, opPool_.get()); + transform = transforms[0]; + auto monthResult = transform->transform(rowVector, 0); + ASSERT_EQ(monthResult->size(), 5); + EXPECT_EQ(monthResult->as>()->valueAt(0), 0); + EXPECT_TRUE(monthResult->isNullAt(1)); + EXPECT_EQ(monthResult->as>()->valueAt(2), 12); + EXPECT_TRUE(monthResult->isNullAt(3)); + EXPECT_EQ(monthResult->as>()->valueAt(4), 662); + + auto& dayTransform = partitionSpec->fields[5]; + EXPECT_EQ(dayTransform.transformType, TransformType::kDay); + names = {dayTransform.name}; + rowVector = makeRowVector(names, children); + transforms = parsePartitionTransformSpecs({dayTransform}, opPool_.get()); + transform = transforms[0]; + auto dayResult = transform->transform(rowVector, 0); + ASSERT_EQ(dayResult->size(), 5); + EXPECT_EQ(dayResult->as>()->valueAt(0), 0); + EXPECT_TRUE(dayResult->isNullAt(1)); + EXPECT_EQ(dayResult->as>()->valueAt(2), 365); + EXPECT_TRUE(dayResult->isNullAt(3)); + EXPECT_EQ(dayResult->as>()->valueAt(4), 20'175); + + auto varcharInput = makeNullableFlatVector( + {StringView("abc"), + std::nullopt, + StringView("def"), + std::nullopt, + StringView("ghi")}); + + rowType_ = ROW({"c_varchar"}, {VARCHAR()}); + auto varcharIdentityTransform = + createPartitionSpec( + {{0, TransformType::kIdentity, std::nullopt}}, rowType_) + ->fields[0]; + children = {varcharInput}; + names = {varcharIdentityTransform.name}; + rowVector = makeRowVector(names, children); + + transforms = + parsePartitionTransformSpecs({varcharIdentityTransform}, opPool_.get()); + transform = transforms[0]; + auto varcharIdentityResult = transform->transform(rowVector, 0); + ASSERT_EQ(varcharIdentityResult->size(), 5); + EXPECT_EQ( + varcharIdentityResult->as>()->valueAt(0).str(), + "abc"); + EXPECT_TRUE(varcharIdentityResult->isNullAt(1)); + EXPECT_EQ( + varcharIdentityResult->as>()->valueAt(2).str(), + "def"); + EXPECT_TRUE(varcharIdentityResult->isNullAt(3)); + EXPECT_EQ( + varcharIdentityResult->as>()->valueAt(4).str(), + "ghi"); + + auto varbinaryInput = makeNullableFlatVector( + {StringView("\x01\x02\x03", 3), + std::nullopt, + StringView("\x04\x05\x06", 3), + std::nullopt, + StringView("\x07\x08\x09", 3)}, + VARBINARY()); + + rowType_ = ROW({"c_varbinary"}, {VARBINARY()}); + auto varbinaryIdentityTransform = + createPartitionSpec( + {{0, TransformType::kIdentity, std::nullopt}}, rowType_) + ->fields[0]; + children = {varbinaryInput}; + names = {varbinaryIdentityTransform.name}; + rowVector = makeRowVector(names, children); + transforms = + parsePartitionTransformSpecs({varbinaryIdentityTransform}, opPool_.get()); + transform = transforms[0]; + auto varbinaryIdentityResult = transform->transform(rowVector, 0); + ASSERT_EQ(varbinaryIdentityResult->size(), 5); + EXPECT_TRUE(varbinaryIdentityResult->isNullAt(1)); + EXPECT_TRUE(varbinaryIdentityResult->isNullAt(3)); +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/IcebergWriterModeTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergWriterModeTest.cpp new file mode 100644 index 00000000000..e5401203ddd --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/IcebergWriterModeTest.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/connectors/hive/HiveConfig.h" +#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" +#include "velox/exec/tests/utils/AssertQueryBuilder.h" +#include "velox/exec/tests/utils/PlanBuilder.h" +#include "velox/exec/tests/utils/TempDirectoryPath.h" + +using namespace facebook::velox::exec::test; + +namespace facebook::velox::connector::hive::iceberg::test { + +class IcebergWriterModeTest : public IcebergTestBase, + public ::testing::WithParamInterface { + protected: + void SetUp() override { + IcebergTestBase::SetUp(); + + std::unordered_map sessionProps = { + {HiveConfig::kFanoutEnabledSession, GetParam() ? "true" : "false"}, + }; + + connectorSessionProperties_ = + std::make_shared(std::move(sessionProps), true); + + setupMemoryPools(); + } +}; + +INSTANTIATE_TEST_SUITE_P( + FanoutModes, + IcebergWriterModeTest, + ::testing::Values(true, false), + [](const testing::TestParamInfo& info) { + return info.param ? "FanoutEnabled" : "FanoutDisabled"; + }); + +TEST_P(IcebergWriterModeTest, identityPartitioning) { + constexpr auto size = 10; + std::vector names = {"c_int"}; + std::vector types = {INTEGER()}; + rowType_ = ROW(names, types); + + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}); + + auto intVector1 = + makeFlatVector(size, [](vector_size_t row) { return row; }); + auto vector1 = makeRowVector(names, {intVector1}); + auto intVector2 = + makeFlatVector(size, [](vector_size_t row) { return row + 10; }); + auto vector2 = makeRowVector(names, {intVector2}); + dataSink->appendData(vector1); + dataSink->appendData(vector2); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + createDuckDbTable({vector1, vector2}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .planNode(); + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); +} + +TEST_P(IcebergWriterModeTest, clusteredInput) { + constexpr auto size = 100; + std::vector names = {"c_int"}; + std::vector types = {INTEGER()}; + rowType_ = ROW(names, types); + + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}); + + auto intVector1 = makeConstant(100, size, INTEGER()); + auto vector1 = makeRowVector(names, {intVector1}); + auto intVector2 = makeConstant(100, size, INTEGER()); + auto vector2 = makeRowVector(names, {intVector2}); + dataSink->appendData(vector1); + dataSink->appendData(vector2); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + auto stats = dataSink->dataFileStats(); + ASSERT_EQ(stats.at(0)->numRecords, size * 2); + ASSERT_FALSE(stats.at(0)->lowerBounds.empty()); + ASSERT_FALSE(stats.at(0)->upperBounds.empty()); + createDuckDbTable({vector1, vector2}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + ASSERT_EQ(splits.size(), 1); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .planNode(); + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); +} + +TEST_P(IcebergWriterModeTest, clusteredNullInput) { + constexpr auto size = 100; + std::vector names = {"c_int"}; + std::vector types = {INTEGER()}; + rowType_ = ROW(names, types); + + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}); + + auto intVector1 = makeNullConstant(TypeKind::INTEGER, size); + auto vector1 = makeRowVector(names, {intVector1}); + auto intVector2 = makeNullConstant(TypeKind::INTEGER, size); + auto vector2 = makeRowVector(names, {intVector2}); + dataSink->appendData(vector1); + dataSink->appendData(vector2); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + auto stats = dataSink->dataFileStats(); + ASSERT_TRUE(stats.at(0)->upperBounds.empty()); + ASSERT_EQ(stats.at(0)->nullValueCounts.at(1), size * 2); + createDuckDbTable({vector1, vector2}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + ASSERT_EQ(splits.size(), 1); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .planNode(); + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); +} + +TEST_P(IcebergWriterModeTest, sortedByAndIdentityPartittioning) { + constexpr auto size = 10; + std::vector names = {"c_int"}; + std::vector types = {INTEGER()}; + rowType_ = ROW(names, types); + + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}, + {"c_int DESC"}); + + auto intVector1 = + makeFlatVector(size, [](vector_size_t row) { return row; }); + auto vector1 = makeRowVector(names, {intVector1}); + auto intVector2 = + makeFlatVector(size, [](vector_size_t row) { return row + 10; }); + auto vector2 = makeRowVector(names, {intVector2}); + dataSink->appendData(vector1); + dataSink->appendData(vector2); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + createDuckDbTable({vector1, vector2}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + ASSERT_EQ(splits.size(), size * 2); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .planNode(); + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); +} + +TEST_P(IcebergWriterModeTest, nonClusteredInput) { + constexpr auto size = 10; + std::vector names = {"c_int"}; + std::vector types = {INTEGER()}; + rowType_ = ROW(names, types); + + auto outputDirectory = TempDirectoryPath::create(); + auto dataSink = createIcebergDataSink( + rowType_, + outputDirectory->getPath(), + {{0, TransformType::kIdentity, std::nullopt}}); + + auto intVector1 = + makeFlatVector(size, [](vector_size_t row) { return row; }); + auto vector1 = makeRowVector(names, {intVector1}); + auto intVector2 = + makeFlatVector(size, [](vector_size_t row) { return row + 5; }); + auto vector2 = makeRowVector(names, {intVector2}); + dataSink->appendData(vector1); + if (!GetParam()) { + VELOX_ASSERT_THROW( + dataSink->appendData(vector2), + "Incoming records violate the writer assumption that records are clustered by spec and \n by partition within each spec. Either cluster the incoming records or switch to fanout writers.\nEncountered records that belong to already closed files:\n"); + } else { + dataSink->appendData(vector2); + ASSERT_TRUE(dataSink->finish()); + dataSink->close(); + createDuckDbTable({vector1, vector2}); + auto splits = createSplitsForDirectory(outputDirectory->getPath()); + auto plan = exec::test::PlanBuilder() + .startTableScan() + .connectorId(test::kIcebergConnectorId) + .outputType(rowType_) + .endTableScan() + .planNode(); + assertQuery(plan, splits, fmt::format("SELECT * FROM tmp")); + } +} + +} // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/Murmur3Test.cpp b/velox/connectors/hive/iceberg/tests/Murmur3Test.cpp new file mode 100644 index 00000000000..c8444ecee5b --- /dev/null +++ b/velox/connectors/hive/iceberg/tests/Murmur3Test.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/connectors/hive/iceberg/Murmur3.h" +#include +#include "folly/Random.h" +#include "velox/type/TimestampConversion.h" + +namespace facebook::velox::connector::hive::iceberg { +class Murmur3HashTest : public ::testing::Test { + public: + void SetUp() override { + rng_.seed(1); + } + + void TearDown() override {} + + // Little-endian. + static std::vector toBytes(uint64_t value) { + std::vector bytes; + bytes.reserve(sizeof(uint64_t)); + for (int32_t i = 0; i < sizeof(uint64_t); ++i) { + bytes[i] = static_cast((value >> (8 * i)) & 0xFF); + } + return bytes; + } + + template + void + verifyHashBucket(T input, uint32_t bucketCount, uint32_t expectedBucket) { + const auto hash = Murmur3Hash32::hash(input); + uint32_t actualBucket = (hash & 0X7FFFFFFF) % bucketCount; + EXPECT_EQ(actualBucket, expectedBucket) + << "Input: " << input << ", Bucket Count: " << bucketCount + << ", Hash: " << hash << ", Expected Bucket: " << expectedBucket + << ", Actual Bucket: " << actualBucket; + } + + protected: + folly::Random::DefaultGenerator rng_; +}; + +TEST_F(Murmur3HashTest, testSpecValues) { + auto hash = Murmur3Hash32::hash(34); + EXPECT_EQ(hash, 2'017'239'379); + + const auto days = + util::fromDateString("2017-11-16", util::ParseMode::kIso8601); + EXPECT_EQ(days.value(), 17'486); + hash = Murmur3Hash32::hash(days.value()); + EXPECT_EQ(hash, -653'330'422); + + auto timestampResult = util::fromTimestampString( + "2017-11-16T22:31:08", util::TimestampParseMode::kIso8601); + hash = Murmur3Hash32::hash(timestampResult.value().toMicros()); + EXPECT_EQ(hash, -2'047'944'441); + + timestampResult = util::fromTimestampString( + "2017-11-16T22:31:08.000001", util::TimestampParseMode::kIso8601); + hash = Murmur3Hash32::hash(timestampResult.value().toMicros()); + EXPECT_EQ(hash, -1'207'196'810); + + timestampResult = util::fromTimestampString( + "2017-11-16T22:31:08.000001001", util::TimestampParseMode::kIso8601); + hash = Murmur3Hash32::hash(timestampResult.value().toMicros()); + EXPECT_EQ(hash, -1'207'196'810); + + const auto bytes = new char[4]{0x00, 0x01, 0x02, 0x03}; + hash = Murmur3Hash32::hash(bytes, 4); + EXPECT_EQ(hash, -188'683'207); + + hash = Murmur3Hash32::hash("iceberg"); + EXPECT_EQ(hash, 1'210'000'089); +} + +TEST_F(Murmur3HashTest, hashString) { + const std::vector> testCases = { + {"abcdefg", 5, 4}, + {"abc", 128, 122}, + {"abcde", 64, 54}, + {"测试", 12, 8}, + {"测试raul试测", 16, 1}, + {"", 16, 0}}; + + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + verifyHashBucket(input, bucketCount, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashInteger) { + const std::vector> testCases = { + {8, 10, 3}, {34, 100, 79}}; + + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + verifyHashBucket(input, bucketCount, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashTrue) { + const auto hash = Murmur3Hash32::hash(1); + EXPECT_EQ(hash, 1'392'991'556U); +} + +TEST_F(Murmur3HashTest, hashDate) { + const std::vector> testCases = { + {util::fromDateString("1970-01-09", util::ParseMode::kIso8601).value(), + 10, + 3}, + {util::fromDateString("1970-02-04", util::ParseMode::kIso8601).value(), + 100, + 79}}; + + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + verifyHashBucket(input, bucketCount, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashLong) { + const std::vector> testCases = { + {34L, 100, 79}, {0L, 100, 76}, {-34L, 100, 97}, {-1L, 2, 0}}; + + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + verifyHashBucket(input, bucketCount, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashDecimal) { + const std::vector> testCases = { + {1234L, 64, 56}, + {1230L, 18, 13}, + {12999L, 16, 2}, + {5L, 32, 21}, + {5L, 18, 3}}; + + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + const auto hash = Murmur3Hash32::hashDecimal(input); + auto actualBucket = (hash & 0X7FFFFFFF) % bucketCount; + EXPECT_EQ(actualBucket, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashBinary) { + const std::string s("abc\0\0", 5); + const std::vector> testCases = { + {StringView("abcdefg"), 12, 10}, + {StringView(s), 18, 13}, + {StringView("abc"), 48, 42}, + {StringView("测试_"), 16, 3}}; + for (const auto& [input, bucketCount, expectedBucket] : testCases) { + verifyHashBucket(input, bucketCount, expectedBucket); + } +} + +TEST_F(Murmur3HashTest, hashIntegerAndBytes) { + const auto number = folly::Random::rand32(rng_); + const auto hashOfInteger = Murmur3Hash32::hash(number); + const auto hashOfBytes = Murmur3Hash32::hash(toBytes(number).data(), 8); + EXPECT_EQ(hashOfInteger, hashOfBytes); +} + +TEST_F(Murmur3HashTest, hashLongAndBytes) { + const auto number = folly::Random::rand64(rng_); + const auto hashOfLong = Murmur3Hash32::hash(number); + const auto hashOfBytes = Murmur3Hash32::hash(toBytes(number).data(), 8); + EXPECT_EQ(hashOfLong, hashOfBytes); +} + +} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/PartitionNameTest.cpp b/velox/connectors/hive/iceberg/tests/PartitionNameTest.cpp index 8e9bafbe453..36f98149d7c 100644 --- a/velox/connectors/hive/iceberg/tests/PartitionNameTest.cpp +++ b/velox/connectors/hive/iceberg/tests/PartitionNameTest.cpp @@ -17,7 +17,6 @@ #include #include "velox/common/encode/Base64.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" #include "velox/connectors/hive/iceberg/IcebergPartitionName.h" #include "velox/connectors/hive/iceberg/TransformEvaluator.h" #include "velox/connectors/hive/iceberg/TransformExprBuilder.h" @@ -60,7 +59,7 @@ class PartitionNameTest : public test::IcebergTestBase { partitionSpec, partitionChannels, rowType, - std::string(IcebergConfig::kDefaultFunctionPrefix)); + std::string(test::kDefaultTestIcebergFunctionNamePrefix)); auto transformEvaluator = std::make_unique( transformExpressions, connectorQueryCtx_.get()); diff --git a/velox/connectors/hive/iceberg/tests/PartitionSpecTest.cpp b/velox/connectors/hive/iceberg/tests/PartitionSpecTest.cpp deleted file mode 100644 index 23a06340aad..00000000000 --- a/velox/connectors/hive/iceberg/tests/PartitionSpecTest.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/connectors/hive/iceberg/PartitionSpec.h" - -#include -#include "velox/common/base/tests/GTestUtils.h" -#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" -#include "velox/type/Type.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -TEST(PartitionSpecTest, invalidColumnType) { - auto makeSpec = [](const TypePtr& type) { - std::vector fields = { - {"c0", type, TransformType::kIdentity, std::nullopt}, - }; - return std::make_shared(1, fields); - }; - - VELOX_ASSERT_USER_THROW( - makeSpec(ROW({{"a", INTEGER()}})), - "Type is not supported as a partition column: ROW"); - VELOX_ASSERT_USER_THROW( - makeSpec(ARRAY(INTEGER())), - "Type is not supported as a partition column: ARRAY"); - VELOX_ASSERT_USER_THROW( - makeSpec(MAP(VARCHAR(), INTEGER())), - "Type is not supported as a partition column: MAP"); - VELOX_ASSERT_USER_THROW( - makeSpec(TIMESTAMP_WITH_TIME_ZONE()), - "Type is not supported as a partition column: TIMESTAMP WITH TIME ZONE"); -} - -TEST(PartitionSpecTest, invalidMultipleTransforms) { - { - std::vector fields = { - {"c0", VARCHAR(), TransformType::kIdentity, std::nullopt}, - {"c0", VARCHAR(), TransformType::kIdentity, std::nullopt}, - }; - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields), - "Column: 'c0', Category: Identity, Transforms: [identity, identity]"); - } - - { - std::vector fields = { - {"c0", VARCHAR(), TransformType::kBucket, 16}, - {"c0", VARCHAR(), TransformType::kBucket, 32}, - }; - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields), - "Column: 'c0', Category: Bucket, Transforms: [bucket, bucket]"); - } - - { - std::vector fields = { - {"c0", VARCHAR(), TransformType::kTruncate, 2}, - {"c0", VARCHAR(), TransformType::kTruncate, 5}, - }; - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields), - "Column: 'c0', Category: Truncate, Transforms: [trunc, trunc]"); - } - - { - std::vector fields4 = { - {"c0", TIMESTAMP(), TransformType::kYear, std::nullopt}, - {"c0", TIMESTAMP(), TransformType::kMonth, std::nullopt}, - {"c0", TIMESTAMP(), TransformType::kDay, std::nullopt}, - {"c0", TIMESTAMP(), TransformType::kHour, std::nullopt}, - }; - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields4), - "Column: 'c0', Category: Temporal, Transforms: [year, month, day, hour]"); - } -} - -TEST(PartitionSpecTest, invalidMultipleTransformsMultipleColumns) { - std::vector fields = { - {"c0", DATE(), TransformType::kYear, std::nullopt}, - {"c0", DATE(), TransformType::kMonth, std::nullopt}, - {"c1", VARCHAR(), TransformType::kBucket, 16}, - {"c1", VARCHAR(), TransformType::kBucket, 32}, - }; - // order may vary due to map iteration. - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields), - "Column: 'c0', Category: Temporal, Transforms: [year, month]"); - VELOX_ASSERT_USER_THROW( - std::make_shared(1, fields), - "Column: 'c1', Category: Bucket, Transforms: [bucket, bucket]"); -} - -TEST(PartitionSpecTest, validMultipleTransforms) { - { - std::vector fields = { - {"c0", VARCHAR(), TransformType::kIdentity, std::nullopt}, - {"c0", VARCHAR(), TransformType::kBucket, 16}, - {"c0", VARCHAR(), TransformType::kTruncate, 10}, - }; - auto spec = std::make_shared(1, fields); - EXPECT_EQ(spec->fields.size(), 3); - } - - { - std::vector fields = { - {"c0", DATE(), TransformType::kYear, std::nullopt}, - {"c0", DATE(), TransformType::kBucket, 16}, - {"c0", DATE(), TransformType::kIdentity, std::nullopt}, - }; - auto spec = std::make_shared(1, fields); - EXPECT_EQ(spec->fields.size(), 3); - } -} - -} // namespace - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/PartitionValueFormatterTest.cpp b/velox/connectors/hive/iceberg/tests/PartitionValueFormatterTest.cpp deleted file mode 100644 index 13cda035cbf..00000000000 --- a/velox/connectors/hive/iceberg/tests/PartitionValueFormatterTest.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "velox/connectors/hive/iceberg/IcebergPartitionName.h" -#include "velox/type/Type.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -template -std::string toPath(TransformType transform, T value, const TypePtr& type) { - return IcebergPartitionName::toName(value, type, transform); -} - -std::string timestampToPath(const Timestamp& timestamp) { - return toPath(TransformType::kIdentity, timestamp, TIMESTAMP()); -} - -std::string testString( - const std::string& value, - const TypePtr& typePtr = VARCHAR()) { - auto identityResult = - toPath(TransformType::kIdentity, StringView(value), typePtr); - auto truncateResult = - toPath(TransformType::kTruncate, StringView(value), typePtr); - EXPECT_EQ(identityResult, truncateResult); - return identityResult; -} - -std::string testVarbinary(const std::string& value) { - return testString(value, VARBINARY()); -} - -std::string testInteger(int32_t value) { - auto identityResult = toPath(TransformType::kIdentity, value, INTEGER()); - auto bucketResult = toPath(TransformType::kBucket, value, INTEGER()); - auto truncResult = toPath(TransformType::kTruncate, value, INTEGER()); - EXPECT_EQ(identityResult, truncResult); - EXPECT_EQ(bucketResult, truncResult); - return truncResult; -} - -TEST(IcebergPartitionPathTest, integer) { - EXPECT_EQ(testInteger(0), "0"); - EXPECT_EQ(testInteger(1), "1"); - EXPECT_EQ(testInteger(100), "100"); - EXPECT_EQ(testInteger(-100), "-100"); - EXPECT_EQ(testInteger(128), "128"); - EXPECT_EQ(testInteger(1024), "1024"); -} - -TEST(IcebergPartitionPathTest, date) { - EXPECT_EQ(toPath(TransformType::kIdentity, 18'262, DATE()), "2020-01-01"); - EXPECT_EQ(toPath(TransformType::kIdentity, 0, DATE()), "1970-01-01"); - EXPECT_EQ(toPath(TransformType::kIdentity, -1, DATE()), "1969-12-31"); - EXPECT_EQ(toPath(TransformType::kIdentity, 2'932'897, DATE()), "10000-01-01"); -} - -TEST(IcebergPartitionPathTest, boolean) { - EXPECT_EQ(toPath(TransformType::kIdentity, true, BOOLEAN()), "true"); - EXPECT_EQ(toPath(TransformType::kIdentity, false, BOOLEAN()), "false"); -} - -TEST(IcebergPartitionPathTest, string) { - EXPECT_EQ(testString("a/b/c=d"), "a/b/c=d"); - EXPECT_EQ(testString(""), ""); - EXPECT_EQ(testString("abc"), "abc"); -} - -TEST(IcebergPartitionPathTest, varbinary) { - EXPECT_EQ(testVarbinary("\x48\x65\x6c\x6c\x6f"), "SGVsbG8="); - EXPECT_EQ(testVarbinary("\x1\x2\x3"), "AQID"); - EXPECT_EQ(testVarbinary(""), ""); -} - -TEST(IcebergPartitionPathTest, timestamp) { - EXPECT_EQ(timestampToPath(Timestamp(0, 0)), "1970-01-01T00:00:00"); - EXPECT_EQ( - timestampToPath(Timestamp(1'609'459'200, 999'000'000)), - "2021-01-01T00:00:00.999"); - EXPECT_EQ( - timestampToPath(Timestamp(1'640'995'200, 500'000'000)), - "2022-01-01T00:00:00.5"); - EXPECT_EQ( - timestampToPath(Timestamp(-1, 999'000'000)), "1969-12-31T23:59:59.999"); - EXPECT_EQ( - timestampToPath(Timestamp(253'402'300'800, 100'000'000)), - "+10000-01-01T00:00:00.1"); - EXPECT_EQ( - timestampToPath(Timestamp(-62'170'000'000, 0)), "-0001-11-29T19:33:20"); - EXPECT_EQ( - timestampToPath(Timestamp(-62'167'219'199, 0)), "0000-01-01T00:00:01"); -} - -TEST(IcebergPartitionPathTest, year) { - EXPECT_EQ(toPath(TransformType::kYear, 0, INTEGER()), "1970"); - EXPECT_EQ(toPath(TransformType::kYear, 1, INTEGER()), "1971"); - EXPECT_EQ(toPath(TransformType::kYear, 8'030, INTEGER()), "10000"); - EXPECT_EQ(toPath(TransformType::kYear, -1, INTEGER()), "1969"); - EXPECT_EQ(toPath(TransformType::kYear, -50, INTEGER()), "1920"); -} - -TEST(IcebergPartitionPathTest, month) { - EXPECT_EQ(toPath(TransformType::kMonth, 0, INTEGER()), "1970-01"); - EXPECT_EQ(toPath(TransformType::kMonth, 1, INTEGER()), "1970-02"); - EXPECT_EQ(toPath(TransformType::kMonth, 11, INTEGER()), "1970-12"); - EXPECT_EQ(toPath(TransformType::kMonth, 612, INTEGER()), "2021-01"); - EXPECT_EQ(toPath(TransformType::kMonth, -1, INTEGER()), "1969-12"); - EXPECT_EQ(toPath(TransformType::kMonth, -13, INTEGER()), "1968-12"); -} - -TEST(IcebergPartitionPathTest, day) { - EXPECT_EQ(toPath(TransformType::kDay, 0, DATE()), "1970-01-01"); - EXPECT_EQ(toPath(TransformType::kDay, 1, DATE()), "1970-01-02"); - EXPECT_EQ(toPath(TransformType::kDay, 18'262, DATE()), "2020-01-01"); - EXPECT_EQ(toPath(TransformType::kDay, -1, DATE()), "1969-12-31"); -} - -TEST(IcebergPartitionPathTest, hour) { - EXPECT_EQ(toPath(TransformType::kHour, 0, INTEGER()), "1970-01-01-00"); - EXPECT_EQ(toPath(TransformType::kHour, 1, INTEGER()), "1970-01-01-01"); - EXPECT_EQ(toPath(TransformType::kHour, 24, INTEGER()), "1970-01-02-00"); - EXPECT_EQ(toPath(TransformType::kHour, 438'288, INTEGER()), "2020-01-01-00"); - EXPECT_EQ(toPath(TransformType::kHour, -1, INTEGER()), "1969-12-31-23"); -} - -} // namespace - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/TransformTest.cpp b/velox/connectors/hive/iceberg/tests/TransformTest.cpp index 05660e6cf8c..420eea89d8b 100644 --- a/velox/connectors/hive/iceberg/tests/TransformTest.cpp +++ b/velox/connectors/hive/iceberg/tests/TransformTest.cpp @@ -15,7 +15,6 @@ */ #include "velox/common/encode/Base64.h" -#include "velox/connectors/hive/iceberg/IcebergConfig.h" #include "velox/connectors/hive/iceberg/PartitionSpec.h" #include "velox/connectors/hive/iceberg/TransformEvaluator.h" #include "velox/connectors/hive/iceberg/TransformExprBuilder.h" @@ -40,7 +39,7 @@ class TransformTest : public test::IcebergTestBase { spec, partitionChannels, input->rowType(), - std::string(IcebergConfig::kDefaultFunctionPrefix)); + std::string(test::kDefaultTestIcebergFunctionNamePrefix)); auto transformEvaluator = std::make_unique( transformExprs, connectorQueryCtx_.get()); auto result = transformEvaluator->evaluate(input); diff --git a/velox/connectors/hive/tests/CMakeLists.txt b/velox/connectors/hive/tests/CMakeLists.txt index 9cceaa511e3..a61623bb7cc 100644 --- a/velox/connectors/hive/tests/CMakeLists.txt +++ b/velox/connectors/hive/tests/CMakeLists.txt @@ -24,7 +24,7 @@ add_executable( HiveConnectorUtilTest.cpp HiveConnectorSerDeTest.cpp HivePartitionFunctionTest.cpp - HivePartitionNameTest.cpp + HivePartitionUtilTest.cpp HiveSplitTest.cpp PartitionIdGeneratorTest.cpp TableHandleTest.cpp diff --git a/velox/connectors/hive/tests/HiveDataSinkTest.cpp b/velox/connectors/hive/tests/HiveDataSinkTest.cpp index 0fd25a9805b..ae7bf760319 100644 --- a/velox/connectors/hive/tests/HiveDataSinkTest.cpp +++ b/velox/connectors/hive/tests/HiveDataSinkTest.cpp @@ -1156,7 +1156,7 @@ TEST_F(HiveDataSinkTest, insertTableHandleToString) { TEST_F(HiveDataSinkTest, flushPolicyWithParquet) { const auto outputDirectory = TempDirectoryPath::create(); auto flushPolicyFactory = []() { - return std::make_unique(1234, 0); + return std::make_unique(1234, 1); }; auto writeOptions = std::make_shared(); writeOptions->flushPolicyFactory = flushPolicyFactory; diff --git a/velox/connectors/hive/tests/HivePartitionNameTest.cpp b/velox/connectors/hive/tests/HivePartitionUtilTest.cpp similarity index 85% rename from velox/connectors/hive/tests/HivePartitionNameTest.cpp rename to velox/connectors/hive/tests/HivePartitionUtilTest.cpp index b236951946f..3087dec6b6b 100644 --- a/velox/connectors/hive/tests/HivePartitionNameTest.cpp +++ b/velox/connectors/hive/tests/HivePartitionUtilTest.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "velox/connectors/hive/HivePartitionName.h" +#include "velox/connectors/hive/HivePartitionUtil.h" #include "velox/common/base/tests/GTestUtils.h" #include "velox/dwio/catalog/fbhive/FileUtils.h" #include "velox/vector/tests/utils/VectorTestBase.h" @@ -26,7 +26,7 @@ using namespace facebook::velox; using namespace facebook::velox::connector::hive; using namespace facebook::velox::dwio::catalog::fbhive; -class HivePartitionNameTest : public ::testing::Test, +class HivePartitionUtilTest : public ::testing::Test, public velox::test::VectorTestBase { protected: static void SetUpTestCase() { @@ -62,26 +62,9 @@ class HivePartitionNameTest : public ::testing::Test, input->size(), partitions); } - - static auto toPartitionName() { - return [](auto value, const TypePtr& type, int /*columnIndex*/) { - return HivePartitionName::toName(value, type); - }; - } - - std::vector> extractPartitionKeyValues( - RowVectorPtr input, - const std::vector& partitionChannels, - vector_size_t rowIndex = 0) { - return HivePartitionName::partitionKeyValues( - rowIndex, - makePartitionsVector(input, partitionChannels), - /*nullValueString=*/"", - toPartitionName()); - } }; -TEST_F(HivePartitionNameTest, partitionName) { +TEST_F(HivePartitionUtilTest, partitionName) { { RowVectorPtr input = makeRowVector( {"flat_bool_col", @@ -119,7 +102,9 @@ TEST_F(HivePartitionNameTest, partitionName) { EXPECT_EQ( FileUtils::makePartName( - extractPartitionKeyValues(input, partitionChannels), true), + HivePartitionUtil::extractPartitionKeyValues( + makePartitionsVector(input, partitionChannels), 0), + true), folly::join( "/", std::vector( @@ -139,12 +124,14 @@ TEST_F(HivePartitionNameTest, partitionName) { VELOX_ASSERT_THROW( FileUtils::makePartName( - extractPartitionKeyValues(input, partitionChannels), true), + HivePartitionUtil::extractPartitionKeyValues( + makePartitionsVector(input, partitionChannels), 0), + true), "Unsupported partition type: MAP"); } } -TEST_F(HivePartitionNameTest, partitionNameForNull) { +TEST_F(HivePartitionUtilTest, partitionNameForNull) { std::vector partitionColumnNames{ "flat_bool_col", "flat_tinyint_col", @@ -168,14 +155,15 @@ TEST_F(HivePartitionNameTest, partitionNameForNull) { for (auto i = 0; i < partitionColumnNames.size(); i++) { std::vector partitionChannels = {(column_index_t)i}; - auto partitionEntries = extractPartitionKeyValues(input, partitionChannels); + auto partitionEntries = HivePartitionUtil::extractPartitionKeyValues( + makePartitionsVector(input, partitionChannels), 0); EXPECT_EQ(1, partitionEntries.size()); EXPECT_EQ(partitionColumnNames[i], partitionEntries[0].first); EXPECT_EQ("", partitionEntries[0].second); } } -TEST_F(HivePartitionNameTest, timestampPartitionValueFormatting) { +TEST_F(HivePartitionUtilTest, timestampPartitionValueFormatting) { // Test timestamp partition value formatting to match Presto's // java.sql.Timestamp.toString() behavior: removes trailing zeros but keeps at // least one decimal place @@ -204,10 +192,11 @@ TEST_F(HivePartitionNameTest, timestampPartitionValueFormatting) { makeRowVector({"timestamp_col"}, {makeFlatVector(timestamps)}); std::vector partitionChannels{0}; + auto partitionsVector = makePartitionsVector(input, partitionChannels); for (size_t i = 0; i < timestamps.size(); i++) { - auto partitionEntries = - extractPartitionKeyValues(input, partitionChannels, i); + auto partitionEntries = HivePartitionUtil::extractPartitionKeyValues( + partitionsVector, static_cast(i)); EXPECT_EQ(1, partitionEntries.size()); EXPECT_EQ("timestamp_col", partitionEntries[0].first); diff --git a/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp b/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp index 7dcb0d5e195..271e4599d3f 100644 --- a/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp +++ b/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp @@ -16,7 +16,6 @@ #include "velox/connectors/hive/PartitionIdGenerator.h" #include "velox/common/base/tests/GTestUtils.h" -#include "velox/connectors/hive/HivePartitionName.h" #include "velox/type/TimestampConversion.h" #include "velox/vector/tests/utils/VectorTestBase.h" @@ -35,7 +34,7 @@ class PartitionIdGeneratorTest : public ::testing::Test, TEST_F(PartitionIdGeneratorTest, consecutiveIdsSingleKey) { auto numPartitions = 100; - PartitionIdGenerator idGenerator(ROW({VARCHAR()}), {0}, 100, pool()); + PartitionIdGenerator idGenerator(ROW({VARCHAR()}), {0}, 100, pool(), true); auto input = makeRowVector( {makeFlatVector(numPartitions * 3, [&](auto row) { @@ -57,7 +56,7 @@ TEST_F(PartitionIdGeneratorTest, consecutiveIdsSingleKey) { TEST_F(PartitionIdGeneratorTest, consecutiveIdsMultipleKeys) { PartitionIdGenerator idGenerator( - ROW({VARCHAR(), INTEGER()}), {0, 1}, 100, pool()); + ROW({VARCHAR(), INTEGER()}), {0, 1}, 100, pool(), true); auto input = makeRowVector({ makeFlatVector( @@ -84,7 +83,7 @@ TEST_F(PartitionIdGeneratorTest, consecutiveIdsMultipleKeys) { TEST_F(PartitionIdGeneratorTest, multipleBoolKeys) { PartitionIdGenerator idGenerator( - ROW({BOOLEAN(), BOOLEAN()}), {0, 1}, 100, pool()); + ROW({BOOLEAN(), BOOLEAN()}), {0, 1}, 100, pool(), true); auto input = makeRowVector({ makeFlatVector( @@ -110,7 +109,7 @@ TEST_F(PartitionIdGeneratorTest, multipleBoolKeys) { } TEST_F(PartitionIdGeneratorTest, stableIdsSingleKey) { - PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool()); + PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool(), true); auto numPartitions = 40; auto input = makeRowVector({ @@ -137,7 +136,7 @@ TEST_F(PartitionIdGeneratorTest, stableIdsSingleKey) { TEST_F(PartitionIdGeneratorTest, stableIdsMultipleKeys) { PartitionIdGenerator idGenerator( - ROW({BIGINT(), VARCHAR(), INTEGER()}), {1, 2}, 100, pool()); + ROW({BIGINT(), VARCHAR(), INTEGER()}), {1, 2}, 100, pool(), true); const vector_size_t size = 1'000; auto input = makeRowVector({ @@ -176,7 +175,7 @@ TEST_F(PartitionIdGeneratorTest, stableIdsMultipleKeys) { TEST_F(PartitionIdGeneratorTest, partitionKeysCaseSensitive) { PartitionIdGenerator idGenerator( - ROW({"cc0", "Cc+1"}, {BIGINT(), VARCHAR()}), {1}, 100, pool()); + ROW({"cc0", "Cc1"}, {BIGINT(), VARCHAR()}), {1}, 100, pool(), false); auto input = makeRowVector({ makeFlatVector({1, 2, 3}), @@ -185,19 +184,12 @@ TEST_F(PartitionIdGeneratorTest, partitionKeysCaseSensitive) { raw_vector firstTimeIds; idGenerator.run(input, firstTimeIds); - - EXPECT_EQ( - "Cc+1=apple", - HivePartitionName::partitionName( - 0, idGenerator.partitionValues(), /*partitionKeyAsLowerCase=*/false)); - EXPECT_EQ( - "Cc+1=orange", - HivePartitionName::partitionName( - 1, idGenerator.partitionValues(), /*partitionKeyAsLowerCase=*/false)); + EXPECT_EQ("Cc1=apple", idGenerator.partitionName(0)); + EXPECT_EQ("Cc1=orange", idGenerator.partitionName(1)); } TEST_F(PartitionIdGeneratorTest, numPartitions) { - PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool()); + PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool(), true); // First run to process partition 0,..,9. Total num of partitions processed by // far is 10. @@ -232,7 +224,7 @@ TEST_F(PartitionIdGeneratorTest, limitOfPartitionNumber) { auto maxPartitions = 100; PartitionIdGenerator idGenerator( - ROW({INTEGER()}), {0}, maxPartitions, pool()); + ROW({INTEGER()}), {0}, maxPartitions, pool(), true); auto input = makeRowVector({ makeFlatVector(maxPartitions + 1, [](auto row) { return row; }), @@ -247,7 +239,7 @@ TEST_F(PartitionIdGeneratorTest, limitOfPartitionNumber) { TEST_F(PartitionIdGeneratorTest, timestampPartitionKeyComparasion) { PartitionIdGenerator idGenerator( - ROW({"timestamp_col"}, {TIMESTAMP()}), {0}, 100, pool()); + ROW({"timestamp_col"}, {TIMESTAMP()}), {0}, 100, pool(), true); auto timestampResult = util::fromTimestampString( "2025-01-02 00:00:00.0", util::TimestampParseMode::kPrestoCast); auto input = makeRowVector({ @@ -255,17 +247,13 @@ TEST_F(PartitionIdGeneratorTest, timestampPartitionKeyComparasion) { }); raw_vector testTimeIds; idGenerator.run(input, testTimeIds); - EXPECT_EQ( - HivePartitionName::partitionName( - testTimeIds[0], - idGenerator.partitionValues(), - /*partitionKeyAsLowerCase=*/true), + idGenerator.partitionName(testTimeIds[0]), "timestamp_col=2025-01-01 16%3A00%3A00.0"); } TEST_F(PartitionIdGeneratorTest, timestampPartitionKey) { - PartitionIdGenerator idGenerator(ROW({TIMESTAMP()}), {0}, 100, pool()); + PartitionIdGenerator idGenerator(ROW({TIMESTAMP()}), {0}, 100, pool(), true); auto numPartitions = 50; auto input = makeRowVector({ @@ -337,7 +325,8 @@ TEST_F(PartitionIdGeneratorTest, supportedPartitionKeyTypes) { }), {0, 1, 2, 3, 4, 5, 6, 7}, 100, - pool()); + pool(), + true); auto input = makeRowVector( {makeNullableFlatVector( @@ -373,7 +362,8 @@ TEST_F(PartitionIdGeneratorTest, supportedPartitionKeyTypes) { for (column_index_t i = 1; i < input->childrenSize(); i++) { VELOX_ASSERT_THROW( - PartitionIdGenerator(asRowType(input->type()), {i}, 100, pool()), + PartitionIdGenerator( + asRowType(input->type()), {i}, 100, pool(), true), fmt::format( "Unsupported partition type: {}.", input->childAt(i)->type()->toString())); diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index f5f6bfeb5a5..728f4a54361 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -915,6 +915,11 @@ must be specified as raw byte counts. - Speculative tail-read size in bytes when opening Nimble files. Controls how many bytes are read from the end of the file to load the footer and nearby metadata in a single IO operation. Set to 0 for adaptive mode. + * - fanout-enabled + - fanout_enabled + - bool + - true + - Controls the writer mode, whether the fanout mode writer is enabled, default value is true, setting to false means clustered mode. Currently applies only to the Iceberg writer. ``ORC File Format Configuration`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/velox/dwio/common/CMakeLists.txt b/velox/dwio/common/CMakeLists.txt index 532f2d3bcf6..dbcea199aef 100644 --- a/velox/dwio/common/CMakeLists.txt +++ b/velox/dwio/common/CMakeLists.txt @@ -43,6 +43,7 @@ velox_add_library( OnDemandUnitLoader.cpp InputStream.cpp IntDecoder.cpp + DataFileStatistics.cpp MetadataFilter.cpp Options.cpp OutputStream.cpp diff --git a/velox/dwio/common/DataFileStatistics.cpp b/velox/dwio/common/DataFileStatistics.cpp new file mode 100644 index 00000000000..6968f8ee528 --- /dev/null +++ b/velox/dwio/common/DataFileStatistics.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/dwio/common/DataFileStatistics.h" + +namespace facebook::velox::dwio::common { + +folly::dynamic DataFileStatistics::toJson() const { + folly::dynamic json = folly::dynamic::object; + json["recordCount"] = numRecords; + + auto mapToJson = [](const auto& map) { + folly::dynamic result = folly::dynamic::object; + for (const auto& pair : map) { + result[folly::to(pair.first)] = pair.second; + } + return result; + }; + + json["columnSizes"] = mapToJson(columnsSizes); + json["valueCounts"] = mapToJson(valueCounts); + json["nullValueCounts"] = mapToJson(nullValueCounts); + json["nanValueCounts"] = mapToJson(nanValueCounts); + json["lowerBounds"] = mapToJson(lowerBounds); + json["upperBounds"] = mapToJson(upperBounds); + + return json; +} + +folly::dynamic DataFileStatistics::splitOffsetsAsJson() const { + folly::dynamic arr = folly::dynamic::array; + for (const auto& offset : splitOffsets) { + arr.push_back(offset); + } + return arr; +} + +} // namespace facebook::velox::dwio::common diff --git a/velox/dwio/common/DataFileStatistics.h b/velox/dwio/common/DataFileStatistics.h new file mode 100644 index 00000000000..d8eb1491d8e --- /dev/null +++ b/velox/dwio/common/DataFileStatistics.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace facebook::velox::dwio::common { + +// Iceberg data_file struct fields. +struct DataFileStatistics { + int64_t numRecords; + std::unordered_map columnsSizes; + std::unordered_map valueCounts; + std::unordered_map nullValueCounts; + std::unordered_map nanValueCounts; + std::unordered_map lowerBounds; + std::unordered_map upperBounds; + + // Split offsets for the data file. For example, all row + // group offsets in a Parquet file. Must be sorted ascending. + std::vector splitOffsets; + + DataFileStatistics() : numRecords(0) {} + + folly::dynamic toJson() const; + + folly::dynamic splitOffsetsAsJson() const; +}; + +} // namespace facebook::velox::dwio::common diff --git a/velox/dwio/common/DataFileStatsCollector.h b/velox/dwio/common/DataFileStatsCollector.h new file mode 100644 index 00000000000..0f400d43c5b --- /dev/null +++ b/velox/dwio/common/DataFileStatsCollector.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/dwio/common/DataFileStatistics.h" + +namespace facebook::velox::dwio::common { + +/// Base settings for collecting data file statistics. Can be extended +/// by specific table formats to add format-specific fields. +struct DataFileStatsSettings { + virtual ~DataFileStatsSettings() = default; +}; + +class FileStatsCollector { + public: + explicit FileStatsCollector( + std::shared_ptr< + std::vector>> + settings) + : statsSetting_(std::move(settings)) {} + + virtual ~FileStatsCollector() = default; + + virtual void collectStats( + const void* metadata, + const std::shared_ptr& fileStats) = 0; + + protected: + std::shared_ptr< + std::vector>> + statsSetting_; +}; + +} // namespace facebook::velox::dwio::common diff --git a/velox/dwio/common/Options.h b/velox/dwio/common/Options.h index 48f7f530638..d93ab1fd0da 100644 --- a/velox/dwio/common/Options.h +++ b/velox/dwio/common/Options.h @@ -28,6 +28,7 @@ #include "velox/common/io/Options.h" #include "velox/common/memory/Memory.h" #include "velox/dwio/common/ColumnSelector.h" +#include "velox/dwio/common/DataFileStatsCollector.h" #include "velox/dwio/common/ErrorTolerance.h" #include "velox/dwio/common/FlatMapHelper.h" #include "velox/dwio/common/FlushPolicy.h" @@ -905,6 +906,11 @@ struct WriterOptions { std::string sessionTimezoneName; bool adjustTimestampToTimezone{false}; + /// Data file statistics collector for format-specific statistics collection + /// during write operations. Each table format (e.g., Iceberg, Hudi) can + /// provide its own implementation to collect connector-specific metadata. + FileStatsCollector* fileStatsCollector{nullptr}; + // WriterOption implementations can implement this function to specify how to // process format-specific session and connector configs. virtual void processConfigs( diff --git a/velox/dwio/common/ScanSpec.cpp b/velox/dwio/common/ScanSpec.cpp index 54764f70516..991dbee067e 100644 --- a/velox/dwio/common/ScanSpec.cpp +++ b/velox/dwio/common/ScanSpec.cpp @@ -76,6 +76,7 @@ bool ScanSpec::compareTimeToDropValue( } return left->filter_->kind() < right->filter_->kind(); } + // If hasFilter() is true but 'filter_' is nullptr, we have a filter // on complex type members. The simple type filter goes first. if (left->filter_) { diff --git a/velox/dwio/common/ScanSpec.h b/velox/dwio/common/ScanSpec.h index 8204011ccda..76cbb1d0555 100644 --- a/velox/dwio/common/ScanSpec.h +++ b/velox/dwio/common/ScanSpec.h @@ -63,8 +63,8 @@ class ScanSpec { return filterDisabled_ ? nullptr : filter_.get(); } - /// Sets 'filter_'. May be used at initialization or when adding a - /// pushed down filter, e.g. top k cutoff. + // Sets 'filter_'. May be used at initialization or when adding a + // pushed down filter, e.g. top k cutoff. void setFilter(std::shared_ptr filter) { filter_ = std::move(filter); } @@ -95,8 +95,8 @@ class ScanSpec { return metadataFilters_[i].second; } - /// Returns a constant vector if 'this' corresponds to a partitioning - /// column or to a missing column. These change from split to split. + // Returns a constant vector if 'this' corresponds to a partitioning + // column or to a missing column. These change from split to split. VectorPtr constantValue() const { return constantValue_; } @@ -127,22 +127,22 @@ class ScanSpec { return columnType_ == ColumnType::kRegular && !isConstant(); } - /// Name of the value in its container, i.e. field name in struct or - /// string key in map. Not all fields of 'this' apply in list/map - /// value cases but the overhead is manageable, the space taken is - /// less than the Subfield path that will in any case exist for each - /// separately named list/map element. + // Name of the value in its container, i.e. field name in struct or + // string key in map. Not all fields of 'this' apply in list/map + // value cases but the overhead is manageable, the space taken is + // less than the Subfield path that will in any case exist for each + // separately named list/map element. const std::string& fieldName() const { return fieldName_; } - /// Subscript if this refers to a member of a list or an - /// integer-keyed map value. If this is a member in a row, this is - /// the ordinal position in the row type. Subscript is mutable, for - /// example the position of the reader in a struct's readers may vary - /// between splits. Set to correspond to the position of 'fieldName' - /// when first reading a struct. Not mutable if this refers to a - /// list/map subscript. + // Subscript if this refers to a member of a list or an + // integer-keyed map value. If this is a member in a row, this is + // the ordinal position in the row type. Subscript is mutable, for + // example the position of the reader in a struct's readers may vary + // between splits. Set to correspond to the position of 'fieldName' + // when first reading a struct. Not mutable if this refers to a + // list/map subscript. int64_t subscript() const { return subscript_; } @@ -153,8 +153,8 @@ class ScanSpec { } } - /// True if the value is returned from scan. A runtime pushdown of a filter - /// function may cause this to become false at run time. + // True if the value is returned from scan. A runtime pushdown of a filter + // function may cause this to become false at run time. bool projectOut() const { return projectOut_; } @@ -181,31 +181,31 @@ class ScanSpec { return children_; } - /// Returns 'children in a stable order. May be used for parallel - /// construction and read-ahead of reader trees while the main user - /// of 'this' is running. 'children_' may be reordered while running - /// but the tree being constructed must see a single, unchanging - /// order. + // Returns 'children in a stable order. May be used for parallel + // construction and read-ahead of reader trees while the main user + // of 'this' is running. 'children_' may be reordered while running + // but the tree being constructed must see a single, unchanging + // order. const std::vector& stableChildren(); - /// Returns a read sequence number. This can b used for tagging - /// lazy vectors with a generation number so that we can check that - /// the reader that made them has not advanced between the making and - /// the loading of the lazy vector. This must be called if 'this' - /// corresponds to a struct or flat map reader with pushdown. This - /// may periodically do adaptation such as filter reordering. This - /// will initialize the read order on first call and calling this at - /// each level of struct is mandatory. + // Returns a read sequence number. This can b used for tagging + // lazy vectors with a generation number so that we can check that + // the reader that made them has not advanced between the making and + // the loading of the lazy vector. This must be called if 'this' + // corresponds to a struct or flat map reader with pushdown. This + // may periodically do adaptation such as filter reordering. This + // will initialize the read order on first call and calling this at + // each level of struct is mandatory. uint64_t newRead(); /// Returns the ScanSpec corresponding to 'name'. Creates it if needed without /// any intermediate level. ScanSpec* getOrCreateChild(const std::string& name); - /// Returns the ScanSpec corresponding to 'subfield'. Creates it if - /// needed, including any intermediate levels. This is used at - /// TableScan initialization to create the ScanSpec tree that - /// corresponds to the ColumnReader tree. + // Returns the ScanSpec corresponding to 'subfield'. Creates it if + // needed, including any intermediate levels. This is used at + // TableScan initialization to create the ScanSpec tree that + // corresponds to the ColumnReader tree. ScanSpec* getOrCreateChild(const Subfield& subfield); ScanSpec* childByName(const std::string& name) const { @@ -228,11 +228,11 @@ class ScanSpec { valueHook_ = valueHook; } - /// Returns true if the corresponding reader only needs to reference the nulls - /// stream. True if filter is is-null with or without value extraction or if - /// filter is is-not-null and no value is extracted. Note that this does not - /// apply to Nimble format leaf nodes, because nulls are mixed in the encoding - /// with actual values. + // Returns true if the corresponding reader only needs to reference the nulls + // stream. True if filter is is-null with or without value extraction or if + // filter is is-not-null and no value is extracted. Note that this does not + // apply to Nimble format leaf nodes, because nulls are mixed in the encoding + // with actual values. bool readsNullsOnly() const { if (auto* filter = this->filter()) { if (filter->kind() == FilterKind::kIsNull) { @@ -253,11 +253,11 @@ class ScanSpec { makeFlat_ = makeFlat; } - /// True if this or a descendant has a filter that will affect the number of - /// output rows. Note that filter on map keys and array indices is not - /// counted, as they do not change the number of container output rows. - /// - /// This may change as a result of runtime adaptation. + // True if this or a descendant has a filter that will affect the number of + // output rows. Note that filter on map keys and array indices is not + // counted, as they do not change the number of container output rows. + // + // This may change as a result of runtime adaptation. bool hasFilter() const; /// Similar as hasFilter() but also return true even there is a filter on @@ -272,8 +272,8 @@ class ScanSpec { /// filtered out. bool testNull() const; - /// Resets cached values after this or children were updated, e.g. a new - /// filter was added or existing filter was modified. + // Resets cached values after this or children were updated, e.g. a new filter + // was added or existing filter was modified. void resetCachedValues(bool doReorder) { hasFilter_.reset(); for (auto& child : children_) { @@ -284,50 +284,49 @@ class ScanSpec { } } - /// Returns the child which produces values for 'channel'. Throws if not - /// found. + // Returns the child which produces values for 'channel'. Throws if not found. ScanSpec& getChildByChannel(column_index_t channel); - /// Sets filter order and filters of 'this' from 'other'. Used when - /// initializing a ScanSpec for a new split or stripe. This transfers - /// dynamically acquired filters and adaptive filter order. 'other' - /// should not be used after this. Different splits or stripes may - /// have their own ScanSpec trees, so we only move the content, not - /// the ScanSpec tree itself. + // sets filter order and filters of 'this' from 'other'. Used when + // initializing a ScanSpec for a new split or stripe. This transfers + // dynamically acquired filters and adaptive filter order. 'other' + // should not be used after this. Different splits or stripes may + // have their own ScanSpec trees, so we only move the content, not + // the ScanSpec tree itself. void moveAdaptationFrom(ScanSpec& other); std::string toString() const; - /// Add a field to this ScanSpec, with content projected out. + // Add a field to this ScanSpec, with content projected out. ScanSpec* addField(const std::string& name, column_index_t channel); - /// Add a field and its children recursively to this ScanSpec, all projected - /// out. + // Add a field and its children recursively to this ScanSpec, all projected + // out. ScanSpec* addFieldRecursively( const std::string& name, const Type&, column_index_t channel); - /// Add a field for map key. + // Add a field for map key. ScanSpec* addMapKeyField(); - /// Add a field for map key, along with its child recursively. + // Add a field for map key, along with its child recursively. ScanSpec* addMapKeyFieldRecursively(const Type&); - /// Add a field for map value. + // Add a field for map value. ScanSpec* addMapValueField(); - /// Add a field for map value, along with its child recursively. + // Add a field for map value, along with its child recursively. ScanSpec* addMapValueFieldRecursively(const Type&); - /// Add a field for array element. + // Add a field for array element. ScanSpec* addArrayElementField(); - /// Add a field for array element, along with its child recursively. + // Add a field for array element, along with its child recursively. ScanSpec* addArrayElementFieldRecursively(const Type&); - /// Add all child fields on the type recursively to this ScanSpec, all - /// projected out. + // Add all child fields on the type recursively to this ScanSpec, all + // projected out. void addAllChildFields(const Type&); const std::vector& flatMapFeatureSelection() const { @@ -595,8 +594,8 @@ void ScanSpec::visit(const Type& type, F&& f) { } } -/// Returns false if no value from a range defined by stats can pass the -/// filter. True, otherwise. +// Returns false if no value from a range defined by stats can pass the +// filter. True, otherwise. bool testFilter( const common::Filter* filter, dwio::common::ColumnStatistics* stats, diff --git a/velox/dwio/common/SortingWriter.cpp b/velox/dwio/common/SortingWriter.cpp index 9e973106739..81ade470c1c 100644 --- a/velox/dwio/common/SortingWriter.cpp +++ b/velox/dwio/common/SortingWriter.cpp @@ -90,7 +90,9 @@ std::unique_ptr SortingWriter::close() { VELOX_CHECK(isFinishing()); setState(State::kClosed); VELOX_CHECK_NULL(sortBuffer_); - return outputWriter_->close(); + auto metadata = outputWriter_->close(); + dataFileStats_ = outputWriter_->dataFileStats(); + return metadata; } void SortingWriter::abort() { diff --git a/velox/dwio/common/TypeWithId.h b/velox/dwio/common/TypeWithId.h index 80084dbfb53..60a8bb7e943 100644 --- a/velox/dwio/common/TypeWithId.h +++ b/velox/dwio/common/TypeWithId.h @@ -43,6 +43,10 @@ class TypeWithId : public velox::Tree> { /// Create TypeWithId node but leave all the unselected children as nullptr. /// The ids are set correctly even when some of the previous nodes are not /// selected. + /// @requiredExtraFieldIds is used to determine the ids of the fields should + /// be included. This is because some connectors may require extra fields to + /// be included in the TypeWithId and ScanSpec after the base ScanSpec is + /// created. static std::unique_ptr create( const RowTypePtr& type, const velox::common::ScanSpec& spec); diff --git a/velox/dwio/common/Writer.h b/velox/dwio/common/Writer.h index 1950677a0bf..e9078c27c4e 100644 --- a/velox/dwio/common/Writer.h +++ b/velox/dwio/common/Writer.h @@ -21,6 +21,8 @@ #include "velox/common/base/Portability.h" #include "velox/dwio/common/FileMetadata.h" +#include "velox/dwio/common/DataFileStatistics.h" +#include "velox/dwio/common/DataFileStatsCollector.h" #include "velox/vector/BaseVector.h" namespace facebook::velox::dwio::common { @@ -81,6 +83,11 @@ class Writer { /// Data can no longer be written. virtual void abort() = 0; + /// Return statistics based on each Iceberg data file + std::shared_ptr dataFileStats() const { + return dataFileStats_; + }; + protected: bool isRunning() const; bool isFinishing() const; @@ -94,6 +101,8 @@ class Writer { static void checkStateTransition(State oldState, State newState); tsan_atomic state_{State::kInit}; + std::shared_ptr dataFileStats_{nullptr}; + dwio::common::FileStatsCollector* statsCollector_; }; FOLLY_ALWAYS_INLINE std::ostream& operator<<( diff --git a/velox/dwio/parquet/common/CMakeLists.txt b/velox/dwio/parquet/common/CMakeLists.txt index 4f1256edd75..9b99962af58 100644 --- a/velox/dwio/parquet/common/CMakeLists.txt +++ b/velox/dwio/parquet/common/CMakeLists.txt @@ -15,9 +15,9 @@ velox_add_library( velox_dwio_parquet_common BloomFilter.cpp - XxHasher.cpp LevelComparison.cpp LevelConversion.cpp + XxHasher.cpp HEADERS BitStreamUtilsInternal.h BloomFilter.h diff --git a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp index 7bf974a0ede..fd5930c9910 100644 --- a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp +++ b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp @@ -17,6 +17,7 @@ #include "velox/common/io/IoStatistics.h" #include "velox/dwio/common/tests/utils/E2EFilterTestBase.h" #include "velox/dwio/parquet/reader/ParquetReader.h" +#include "velox/dwio/parquet/reader/ParquetTypeWithId.h" #include "velox/dwio/parquet/writer/Writer.h" #include "velox/vector/tests/utils/VectorTestBase.h" @@ -896,6 +897,97 @@ TEST_F(E2EFilterTest, parquetMRVersionStringStatsRowGroupFiltering) { EXPECT_EQ(stats181.processedStrides, 2); } +TEST_F(E2EFilterTest, flushRowGroupByBufferedSize) { + bytesInRowGroup_ = 100; + rowType_ = ROW({"c0"}, {INTEGER()}); + std::vector batches; + batches.reserve(5); + for (int i = 0; i < 5; i++) { + batches.push_back( + makeRowVector({makeFlatVector({1, 1, 1, 1, 1})})); + } + writeToMemory(rowType_, batches, false); + dwio::common::ReaderOptions readerOpts{leafPool_.get()}; + auto input = std::make_unique( + std::make_shared(sinkData_), readerOpts.memoryPool()); + auto reader = makeReader(readerOpts, std::move(input)); + auto parquetReader = dynamic_cast(*reader.get()); + EXPECT_EQ(parquetReader.fileMetaData().numRowGroups(), 1); + EXPECT_EQ(parquetReader.numberOfRows(), 25); +} + +TEST_F(E2EFilterTest, writeDecimalAsInteger) { + auto rowVector = makeRowVector( + {makeFlatVector({1, 2}, DECIMAL(8, 2)), + makeFlatVector({1, 2}, DECIMAL(10, 2)), + makeFlatVector({1, 2}, DECIMAL(19, 2))}); + writeToMemory(rowVector->type(), {rowVector}, false); + dwio::common::ReaderOptions readerOpts(leafPool_.get()); + readerOpts.setDataIoStats(dataIoStats_); + readerOpts.setMetadataIoStats(metadataIoStats_); + auto input = std::make_unique( + std::make_shared(sinkData_), readerOpts.memoryPool()); + auto reader = makeReader(readerOpts, std::move(input)); + auto parquetReader = dynamic_cast(*reader.get()); + + auto types = parquetReader.typeWithId()->getChildren(); + auto c0 = std::dynamic_pointer_cast(types[0]); + EXPECT_EQ(c0->parquetType_.value(), thrift::Type::type::INT32); + auto c1 = std::dynamic_pointer_cast(types[1]); + EXPECT_EQ(c1->parquetType_.value(), thrift::Type::type::INT64); + auto c2 = std::dynamic_pointer_cast(types[2]); + EXPECT_EQ(c2->parquetType_.value(), thrift::Type::type::FIXED_LEN_BYTE_ARRAY); +} + +TEST_F(E2EFilterTest, configurableWriteSchema) { + auto test = [&](auto& type, auto& newType) { + std::vector batches; + for (auto i = 0; i < 5; i++) { + auto vector = BaseVector::create(type, 100, pool()); + auto rowVector = std::dynamic_pointer_cast(vector); + batches.push_back(rowVector); + } + + writeToMemory(newType, batches, false); + dwio::common::ReaderOptions readerOpts(leafPool_.get()); + readerOpts.setDataIoStats(dataIoStats_); + readerOpts.setMetadataIoStats(metadataIoStats_); + auto input = std::make_unique( + std::make_shared(sinkData_), readerOpts.memoryPool()); + auto reader = makeReader(readerOpts, std::move(input)); + auto parquetReader = dynamic_cast(*reader.get()); + + EXPECT_EQ(parquetReader.rowType()->toString(), newType->toString()); + }; + + // ROW(ROW(ROW)) + auto type = + ROW({"a", "b"}, {INTEGER(), ROW({"c"}, {ROW({"d"}, {INTEGER()})})}); + auto newType = + ROW({"aa", "bb"}, {INTEGER(), ROW({"cc"}, {ROW({"dd"}, {INTEGER()})})}); + test(type, newType); + + // ARRAY(ROW) + type = + ROW({"a", "b"}, {ARRAY(ROW({"c", "d"}, {BIGINT(), BIGINT()})), BIGINT()}); + newType = ROW( + {"aa", "bb"}, {ARRAY(ROW({"cc", "dd"}, {BIGINT(), BIGINT()})), BIGINT()}); + test(type, newType); + + // // MAP(ROW) + type = + ROW({"a", "b"}, + {MAP(ROW({"c", "d"}, {BIGINT(), BIGINT()}), + ROW({"e", "f"}, {BIGINT(), BIGINT()})), + BIGINT()}); + newType = + ROW({"aa", "bb"}, + {MAP(ROW({"cc", "dd"}, {BIGINT(), BIGINT()}), + ROW({"ee", "ff"}, {BIGINT(), BIGINT()})), + BIGINT()}); + test(type, newType); +} + TEST_F(E2EFilterTest, booleanRle) { options_.enableDictionary = false; options_.encoding = facebook::velox::parquet::arrow::Encoding::kRle; diff --git a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp index f04f13d178c..349efa276ab 100644 --- a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp +++ b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp @@ -30,9 +30,13 @@ #include "velox/dwio/parquet/reader/PageReader.h" #include "velox/dwio/parquet/reader/ParquetTypeWithId.h" #include "velox/dwio/parquet/tests/ParquetTestBase.h" +#include "velox/dwio/parquet/writer/Writer.h" #include "velox/dwio/parquet/writer/WriterConfig.h" #include "velox/dwio/parquet/writer/arrow/tests/ColumnReader.h" #include "velox/dwio/parquet/writer/arrow/tests/FileReader.h" +#include "velox/dwio/parquet/writer/arrow/Metadata.h" +#include "velox/dwio/parquet/writer/arrow/Schema.h" +#include "velox/dwio/parquet/writer/arrow/tests/FileReader.h" #include "velox/exec/Cursor.h" #include "velox/exec/tests/utils/AssertQueryBuilder.h" #include "velox/exec/tests/utils/PlanBuilder.h" @@ -1116,6 +1120,157 @@ TEST_F(ParquetWriterTest, allNulls) { assertReadWithReaderAndExpected(schema, *rowReader, data, *leafPool_); } +TEST_F(ParquetWriterTest, withoutFieldIds) { + auto schema = + ROW({"a", "b", "c", "m"}, + {BIGINT(), + ROW({"x", "y"}, {INTEGER(), VARCHAR()}), + ARRAY(INTEGER()), + MAP(VARCHAR(), INTEGER())}); + + auto arrVec = makeArrayVector({{3}}); + auto mapVec = makeMapVector({{{StringView("k"), 4}}}); + auto data = makeRowVector({ + makeFlatVector(1, [](auto) { return 1; }), + makeRowVector( + {makeFlatVector(1, [](auto) { return 2; }), + makeFlatVector( + 1, [](auto) { return StringView("z"); })}), + arrVec, + mapVec, + }); + + auto sink = std::make_unique( + 4 * 1024 * 1024, + dwio::common::FileSink::Options{.pool = leafPool_.get()}); + auto* sinkPtr = sink.get(); + + parquet::WriterOptions writerOptions; + writerOptions.memoryPool = leafPool_.get(); + + auto writer = std::make_unique( + std::move(sink), writerOptions, rootPool_, schema); + writer->write(data); + writer->close(); + + std::string_view sinkData(sinkPtr->data(), sinkPtr->size()); + auto readFile = std::make_shared(sinkData); + auto input = std::make_unique(readFile, *leafPool_.get()); + dwio::common::ReaderOptions readerOptions(leafPool_.get()); + readerOptions.setDataIoStats(dataIoStats_); + readerOptions.setMetadataIoStats(metadataIoStats_); + auto veloxReader = + std::make_unique(std::move(input), readerOptions); + EXPECT_EQ(veloxReader->numberOfRows(), 1); + auto veloxRowType = veloxReader->rowType(); + EXPECT_EQ(*veloxRowType, *schema); + + auto arrowBufferReader = std::make_shared<::arrow::io::BufferReader>( + std::make_shared<::arrow::Buffer>( + reinterpret_cast(sinkData.data()), sinkData.size())); + + auto fileReader = parquet::arrow::ParquetFileReader::open(arrowBufferReader); + auto metadata = fileReader->metadata(); + auto* descr = metadata->schema(); + auto* root = descr->groupNode(); + + ASSERT_EQ(root->fieldCount(), 4); + + // All field IDs should be -1 (not set). + EXPECT_EQ(root->field(0)->fieldId(), -1); + EXPECT_EQ(root->field(1)->fieldId(), -1); + EXPECT_EQ(root->field(2)->fieldId(), -1); + EXPECT_EQ(root->field(3)->fieldId(), -1); +} + +TEST_F(ParquetWriterTest, withFieldIds) { + auto schema = + ROW({"a", "b", "c", "m"}, + {BIGINT(), + ROW({"x", "y"}, {INTEGER(), VARCHAR()}), + ARRAY(INTEGER()), + MAP(VARCHAR(), INTEGER())}); + + auto arrVec = makeArrayVector({{3}}); + auto mapVec = makeMapVector({{{StringView("k"), 4}}}); + auto data = makeRowVector({ + makeFlatVector(1, [](auto) { return 1; }), + makeRowVector( + {makeFlatVector(1, [](auto) { return 2; }), + makeFlatVector( + 1, [](auto) { return StringView("z"); })}), + arrVec, + mapVec, + }); + + auto sink = std::make_unique( + 4 * 1024 * 1024, + dwio::common::FileSink::Options{.pool = leafPool_.get()}); + auto* sinkPtr = sink.get(); + + parquet::WriterOptions writerOptions; + writerOptions.memoryPool = leafPool_.get(); + + // Provide Parquet field IDs aligned with the Velox schema tree. + writerOptions.parquetFieldIds = { + ParquetFieldId{10, {}}, + ParquetFieldId{20, {ParquetFieldId{21, {}}, ParquetFieldId{22, {}}}}, + ParquetFieldId{30, {ParquetFieldId{31, {}}}}, + ParquetFieldId{40, {ParquetFieldId{41, {}}, ParquetFieldId{42, {}}}}, + }; + + auto writer = std::make_unique( + std::move(sink), writerOptions, rootPool_, schema); + writer->write(data); + writer->close(); + + std::string_view sinkData(sinkPtr->data(), sinkPtr->size()); + auto readFile = std::make_shared(sinkData); + auto input = std::make_unique(readFile, *leafPool_.get()); + dwio::common::ReaderOptions readerOptions(leafPool_.get()); + readerOptions.setDataIoStats(dataIoStats_); + readerOptions.setMetadataIoStats(metadataIoStats_); + auto veloxReader = + std::make_unique(std::move(input), readerOptions); + EXPECT_EQ(veloxReader->numberOfRows(), 1); + auto veloxRowType = veloxReader->rowType(); + EXPECT_EQ(*veloxRowType, *schema); + + auto arrowBufferReader = std::make_shared<::arrow::io::BufferReader>( + std::make_shared<::arrow::Buffer>( + reinterpret_cast(sinkData.data()), sinkData.size())); + + auto fileReader = parquet::arrow::ParquetFileReader::open(arrowBufferReader); + auto metadata = fileReader->metadata(); + auto* descr = metadata->schema(); + auto* root = descr->groupNode(); + + ASSERT_EQ(root->fieldCount(), 4); + + // Top-level field IDs. + EXPECT_EQ(root->field(0)->fieldId(), 10); + EXPECT_EQ(root->field(1)->fieldId(), 20); + EXPECT_EQ(root->field(2)->fieldId(), 30); + EXPECT_EQ(root->field(3)->fieldId(), 40); + + using GroupNode = parquet::arrow::schema::GroupNode; + auto* b = static_cast(root->field(1).get()); + EXPECT_EQ(b->field(0)->fieldId(), 21); + EXPECT_EQ(b->field(1)->fieldId(), 22); + + auto* c = static_cast(root->field(2).get()); + auto* listEntries = c->field(0).get(); + auto* listGroup = static_cast(listEntries); + auto* element = listGroup->field(0).get(); + EXPECT_EQ(element->fieldId(), 31); + + auto* m = static_cast(root->field(3).get()); + auto* keyValue = m->field(0).get(); + auto* keyValueGroup = static_cast(keyValue); + EXPECT_EQ(keyValueGroup->field(0)->fieldId(), 41); + EXPECT_EQ(keyValueGroup->field(1)->fieldId(), 42); +} + } // namespace int main(int argc, char** argv) { diff --git a/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp b/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp index 674c99300f7..757472a58ee 100644 --- a/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp +++ b/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp @@ -677,6 +677,11 @@ void Statistics::__set_distinct_count(const int64_t val) { __isset.distinct_count = true; } +void Statistics::__set_nan_count(const int64_t val) { + this->nan_count = val; + __isset.nan_count = true; +} + void Statistics::__set_max_value(const std::string& val) { this->max_value = val; __isset.max_value = true; @@ -820,6 +825,7 @@ void swap(Statistics& a, Statistics& b) { swap(a.min, b.min); swap(a.null_count, b.null_count); swap(a.distinct_count, b.distinct_count); + swap(a.nan_count, b.nan_count); swap(a.max_value, b.max_value); swap(a.min_value, b.min_value); swap(a.__isset, b.__isset); @@ -830,6 +836,7 @@ Statistics::Statistics(const Statistics& other0) { min = other0.min; null_count = other0.null_count; distinct_count = other0.distinct_count; + nan_count = other0.nan_count; max_value = other0.max_value; min_value = other0.min_value; __isset = other0.__isset; @@ -839,6 +846,7 @@ Statistics& Statistics::operator=(const Statistics& other1) { min = other1.min; null_count = other1.null_count; distinct_count = other1.distinct_count; + nan_count = other1.nan_count; max_value = other1.max_value; min_value = other1.min_value; __isset = other1.__isset; diff --git a/velox/dwio/parquet/thrift/ParquetThriftTypes.h b/velox/dwio/parquet/thrift/ParquetThriftTypes.h index 0fd63e83cc0..1e5bd41b98b 100644 --- a/velox/dwio/parquet/thrift/ParquetThriftTypes.h +++ b/velox/dwio/parquet/thrift/ParquetThriftTypes.h @@ -466,12 +466,14 @@ typedef struct _Statistics__isset { min(false), null_count(false), distinct_count(false), + nan_count(false), max_value(false), min_value(false) {} bool max : 1; bool min : 1; bool null_count : 1; bool distinct_count : 1; + bool nan_count : 1; bool max_value : 1; bool min_value : 1; } _Statistics__isset; @@ -489,6 +491,7 @@ class Statistics : public virtual apache::thrift::TBase { min(), null_count(0), distinct_count(0), + nan_count(0), max_value(), min_value() {} @@ -516,6 +519,15 @@ class Statistics : public virtual apache::thrift::TBase { * count of distinct values occurring */ int64_t distinct_count; + + /** + * count of NaN values occurring. + * Note: This is a Velox extension to the Parquet format. The upstream + * Parquet community is considering adding official support for this field. + * See: https://github.com/apache/parquet-format/pull/514 + */ + int64_t nan_count; + /** * Min and max values for the column, determined by its ColumnOrder. * @@ -535,6 +547,8 @@ class Statistics : public virtual apache::thrift::TBase { void __set_distinct_count(const int64_t val); + void __set_nan_count(const int64_t val); + void __set_max_value(const std::string& val); void __set_min_value(const std::string& val); @@ -556,6 +570,10 @@ class Statistics : public virtual apache::thrift::TBase { return false; else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) return false; + if (__isset.nan_count != rhs.__isset.nan_count) + return false; + else if (__isset.nan_count && !(nan_count == rhs.nan_count)) + return false; if (__isset.max_value != rhs.__isset.max_value) return false; else if (__isset.max_value && !(max_value == rhs.max_value)) diff --git a/velox/dwio/parquet/writer/Writer.cpp b/velox/dwio/parquet/writer/Writer.cpp index dbe05bff050..c93caea3801 100644 --- a/velox/dwio/parquet/writer/Writer.cpp +++ b/velox/dwio/parquet/writer/Writer.cpp @@ -25,7 +25,7 @@ #include "velox/common/base/Pointers.h" #include "velox/common/config/Config.h" #include "velox/common/testutil/TestValue.h" -#include "velox/core/QueryConfig.h" + #include "velox/dwio/parquet/writer/arrow/ArrowSchema.h" #include "velox/dwio/parquet/writer/arrow/Properties.h" #include "velox/dwio/parquet/writer/arrow/Writer.h" @@ -38,22 +38,25 @@ using facebook::velox::parquet::arrow::Compression; using facebook::velox::parquet::arrow::WriterProperties; using facebook::velox::parquet::arrow::arrow::FileWriter; -// Utility for buffering Arrow output with a DataBuffer. +// Utility for buffering Arrow output with a DataBuffer, with automatic +// flushing when the buffer size exceeds a configured threshold. class ArrowDataBufferSink : public ::arrow::io::OutputStream { public: /// @param growRatio Growth factor used when invoking the reserve() method of /// DataSink, thereby helping to minimize frequent memcpy operations. + /// @param flushThreshold Threshold for flushing data to the underlying sink. ArrowDataBufferSink( std::unique_ptr sink, memory::MemoryPool& pool, - double growRatio) - : sink_(std::move(sink)), growRatio_(growRatio), buffer_(pool) {} + double growRatio, + int64_t flushThreshold) + : sink_(std::move(sink)), + growRatio_(growRatio), + flushThreshold_(flushThreshold), + buffer_(pool) {} ::arrow::Status Write(const std::shared_ptr<::arrow::Buffer>& data) override { - auto requestCapacity = buffer_.size() + data->size(); - if (requestCapacity > buffer_.capacity()) { - buffer_.reserve(growRatio_ * (requestCapacity)); - } + ARROW_RETURN_NOT_OK(ensureCapacity(data->size())); buffer_.append( buffer_.size(), reinterpret_cast(data->data()), @@ -62,10 +65,7 @@ class ArrowDataBufferSink : public ::arrow::io::OutputStream { } ::arrow::Status Write(const void* data, int64_t nbytes) override { - auto requestCapacity = buffer_.size() + nbytes; - if (requestCapacity > buffer_.capacity()) { - buffer_.reserve(growRatio_ * (requestCapacity)); - } + ARROW_RETURN_NOT_OK(ensureCapacity(nbytes)); buffer_.append(buffer_.size(), reinterpret_cast(data), nbytes); return ::arrow::Status::OK(); } @@ -96,20 +96,29 @@ class ArrowDataBufferSink : public ::arrow::io::OutputStream { } private: + ::arrow::Status ensureCapacity(int64_t bytesToWrite) { + auto requestCapacity = buffer_.size() + bytesToWrite; + if (requestCapacity > flushThreshold_) { + ARROW_RETURN_NOT_OK(Flush()); + requestCapacity = bytesToWrite; + } + + if (requestCapacity > buffer_.capacity()) { + buffer_.reserve(growRatio_ * (requestCapacity)); + } + return ::arrow::Status::OK(); + } + std::unique_ptr sink_; const double growRatio_; + const int64_t flushThreshold_; dwio::common::DataBuffer buffer_; int64_t bytesFlushed_ = 0; }; struct ArrowContext { std::unique_ptr writer; - std::shared_ptr<::arrow::Schema> schema; std::shared_ptr properties; - uint64_t stagingRows = 0; - int64_t stagingBytes = 0; - // columns, Arrays - std::vector>> stagingChunks; }; Compression::type getArrowParquetCompression( @@ -160,6 +169,8 @@ std::shared_ptr getArrowParquetWriterOptions( facebook::velox::parquet::arrow::DEFAULT_WRITE_BATCH_SIZE)); properties = properties->maxRowGroupLength( static_cast(flushPolicy->rowsInRowGroup())); + properties = properties->maxRowGroupBytes( + static_cast(flushPolicy->bytesInRowGroup())); properties = properties->codecOptions(options.codecOptions); if (options.enableStoreDecimalAsInteger.value_or(true)) { properties = properties->enableStoreDecimalAsInteger(); @@ -363,11 +374,6 @@ Writer::Writer( RowTypePtr schema) : pool_(std::move(pool)), generalPool_{pool_->addLeafChild(".general")}, - stream_( - std::make_shared( - std::move(sink), - *generalPool_, - options.bufferGrowRatio)), arrowContext_(std::make_shared()), schema_(std::move(schema)) { validateSchemaRecursive(schema_, options.parquetFieldIds); @@ -377,6 +383,11 @@ Writer::Writer( } else { flushPolicy_ = std::make_unique(); } + stream_ = std::make_shared( + std::move(sink), + *generalPool_, + options.bufferGrowRatio, + flushPolicy_->bytesInRowGroup()); options_.timestampUnit = static_cast(options.parquetWriteTimestampUnit.value_or( TimestampPrecision::kNanoseconds)); @@ -389,6 +400,8 @@ Writer::Writer( writeInt96AsTimestamp_ = options.writeInt96AsTimestamp; arrowMemoryPool_ = options.arrowMemoryPool; parquetFieldIds_ = std::move(options.parquetFieldIds); + dataFileStats_ = std::make_shared(); + statsCollector_ = options.fileStatsCollector; } Writer::Writer( @@ -404,62 +417,9 @@ Writer::Writer( folly::to(folly::Random::rand64()))), std::move(schema)} {} -void Writer::flush() { - if (arrowContext_->stagingRows > 0) { - if (!arrowContext_->writer) { - ArrowWriterProperties::Builder builder; - if (writeInt96AsTimestamp_) { - builder.enableDeprecatedInt96Timestamps(); - } - auto arrowProperties = builder.build(); - PARQUET_ASSIGN_OR_THROW( - arrowContext_->writer, - FileWriter::open( - *arrowContext_->schema.get(), - arrowMemoryPool_.get(), - stream_, - arrowContext_->properties, - arrowProperties)); - } - - auto fields = arrowContext_->schema->fields(); - std::vector> chunks; - for (int colIdx = 0; colIdx < fields.size(); colIdx++) { - auto dataType = fields.at(colIdx)->type(); - auto chunk = - ::arrow::ChunkedArray::Make( - std::move(arrowContext_->stagingChunks.at(colIdx)), dataType) - .ValueOrDie(); - chunks.push_back(chunk); - } - auto table = ::arrow::Table::Make( - arrowContext_->schema, - std::move(chunks), - static_cast(arrowContext_->stagingRows)); - PARQUET_THROW_NOT_OK(arrowContext_->writer->writeTable( - *table, static_cast(flushPolicy_->rowsInRowGroup()))); - PARQUET_THROW_NOT_OK(stream_->Flush()); - for (auto& chunk : arrowContext_->stagingChunks) { - chunk.clear(); - } - arrowContext_->stagingRows = 0; - arrowContext_->stagingBytes = 0; - } -} - -dwio::common::StripeProgress getStripeProgress( - uint64_t stagingRows, - int64_t stagingBytes) { - return dwio::common::StripeProgress{ - .stripeRowCount = stagingRows, .stripeSizeEstimate = stagingBytes}; -} +void Writer::flush() {} /** - * This method would cache input `ColumnarBatch` to make the size of row group - * big. It would flush when: - * - the cached numRows bigger than `rowsInRowGroup_` - * - the cached bytes bigger than `bytesInRowGroup_` - * * This method assumes each input `ColumnarBatch` have same schema. */ void Writer::write(const VectorPtr& data) { @@ -498,28 +458,23 @@ void Writer::write(const VectorPtr& data) { PARQUET_ASSIGN_OR_THROW( auto recordBatch, ::arrow::ImportRecordBatch(&array, ::arrow::schema(newFields))); - if (!arrowContext_->schema) { - arrowContext_->schema = recordBatch->schema(); - for (int colIdx = 0; colIdx < arrowContext_->schema->num_fields(); - colIdx++) { - arrowContext_->stagingChunks.push_back( - std::vector>()); - } - } - auto bytes = data->estimateFlatSize(); - auto numRows = data->size(); - if (flushPolicy_->shouldFlush(getStripeProgress( - arrowContext_->stagingRows, arrowContext_->stagingBytes))) { - flush(); - } - - for (int colIdx = 0; colIdx < recordBatch->num_columns(); colIdx++) { - arrowContext_->stagingChunks.at(colIdx).push_back( - recordBatch->column(colIdx)); - } - arrowContext_->stagingRows += numRows; - arrowContext_->stagingBytes += bytes; + if (!arrowContext_->writer) { + ArrowWriterProperties::Builder builder; + if (writeInt96AsTimestamp_) { + builder.enableDeprecatedInt96Timestamps(); + } + auto arrowProperties = builder.build(); + PARQUET_ASSIGN_OR_THROW( + arrowContext_->writer, + FileWriter::open( + *recordBatch->schema(), + ::arrow::default_memory_pool(), + stream_, + arrowContext_->properties, + arrowProperties)); + } + (void)arrowContext_->writer->writeRecordBatch(*recordBatch); } bool Writer::isCodecAvailable(common::CompressionKind compression) { @@ -537,15 +492,18 @@ std::unique_ptr Writer::close() { std::unique_ptr parquetFileMetadata; if (arrowContext_->writer) { PARQUET_THROW_NOT_OK(arrowContext_->writer->close()); - parquetFileMetadata = std::make_unique( - arrowContext_->writer->metadata()); + if (statsCollector_) { + auto fileMetadata = arrowContext_->writer->metadata(); + parquetFileMetadata = std::make_unique( + fileMetadata); + statsCollector_->collectStats( + static_cast(&fileMetadata), dataFileStats_); + } arrowContext_->writer.reset(); } - - PARQUET_THROW_NOT_OK(stream_->Close()); - - arrowContext_->stagingChunks.clear(); - + if (stream_ && !stream_->closed()) { + PARQUET_THROW_NOT_OK(stream_->Close()); + } return parquetFileMetadata; } diff --git a/velox/dwio/parquet/writer/arrow/ColumnWriter.h b/velox/dwio/parquet/writer/arrow/ColumnWriter.h index c20652e941c..26edc89dfc7 100644 --- a/velox/dwio/parquet/writer/arrow/ColumnWriter.h +++ b/velox/dwio/parquet/writer/arrow/ColumnWriter.h @@ -211,6 +211,9 @@ class PARQUET_EXPORT ColumnWriter { const ::arrow::Array& leafArray, ArrowWriteContext* ctx, bool leafFieldNullable) = 0; + + /// \brief Estimated size of the values that are not written to a page yet. + virtual int64_t estimatedBufferedValueBytes() const = 0; }; // API to write values to a single column. This is the main client facing API. @@ -270,7 +273,7 @@ class TypedColumnWriter : public ColumnWriter { int64_t validBitsOffset, const T* values) = 0; - // Estimated size of the values that are not written to a page yet + // Estimated size of the values that are not written to a page yet. virtual int64_t estimatedBufferedValueBytes() const = 0; }; diff --git a/velox/dwio/parquet/writer/arrow/FileWriter.cpp b/velox/dwio/parquet/writer/arrow/FileWriter.cpp index 3e2e49e6e8f..2e2d912c0ab 100644 --- a/velox/dwio/parquet/writer/arrow/FileWriter.cpp +++ b/velox/dwio/parquet/writer/arrow/FileWriter.cpp @@ -73,6 +73,12 @@ int64_t RowGroupWriter::totalCompressedBytesWritten() const { return contents_->totalCompressedBytesWritten(); } +int64_t RowGroupWriter::totalBufferedBytes() const { + return contents_->totalCompressedBytes() + + contents_->totalCompressedBytesWritten() + + contents_->estimatedBufferedValueBytes(); +} + bool RowGroupWriter::buffered() const { return contents_->buffered(); } @@ -267,6 +273,20 @@ class RowGroupSerializer : public RowGroupWriter::Contents { return totalCompressedBytesWritten; } + int64_t estimatedBufferedValueBytes() const override { + if (closed_) { + return 0; + } + int64_t estimatedBufferedValueBytes = 0; + for (size_t i = 0; i < columnWriters_.size(); i++) { + if (columnWriters_[i]) { + estimatedBufferedValueBytes += + columnWriters_[i]->estimatedBufferedValueBytes(); + } + } + return estimatedBufferedValueBytes; + } + bool buffered() const override { return bufferedRowGroup_; } diff --git a/velox/dwio/parquet/writer/arrow/FileWriter.h b/velox/dwio/parquet/writer/arrow/FileWriter.h index 5e466929769..cc597078d41 100644 --- a/velox/dwio/parquet/writer/arrow/FileWriter.h +++ b/velox/dwio/parquet/writer/arrow/FileWriter.h @@ -53,12 +53,14 @@ class PARQUET_EXPORT RowGroupWriter { virtual int currentColumn() const = 0; virtual void close() = 0; - /// \brief Total uncompressed bytes written by the page writer + /// \brief Total uncompressed bytes written by the page writer. virtual int64_t totalBytesWritten() const = 0; - /// \brief Total bytes still compressed but not written by the page writer + /// \brief Total bytes still compressed but not written by the page writer. virtual int64_t totalCompressedBytes() const = 0; - /// \brief Total compressed bytes written by the page writer + /// \brief Total compressed bytes written by the page writer. virtual int64_t totalCompressedBytesWritten() const = 0; + /// \brief Estimated size of the values that are not written to a page yet. + virtual int64_t estimatedBufferedValueBytes() const = 0; virtual bool buffered() const = 0; }; @@ -98,8 +100,11 @@ class PARQUET_EXPORT RowGroupWriter { /// \brief Total bytes still compressed but not written by the page writer. /// It will always return 0 from the SerializedPageWriter. int64_t totalCompressedBytes() const; - /// \brief Total compressed bytes written by the page writer + /// \brief Total compressed bytes written by the page writer. int64_t totalCompressedBytesWritten() const; + /// \brief Including compressed bytes in page writer and uncompressed data + /// value buffer. + int64_t totalBufferedBytes() const; /// Returns whether the current RowGroupWriter is in the buffered mode and is /// created by calling ParquetFileWriter::appendBufferedRowGroup(). bool buffered() const; diff --git a/velox/dwio/parquet/writer/arrow/Metadata.cpp b/velox/dwio/parquet/writer/arrow/Metadata.cpp index b62d9ecea85..e02b12d00a4 100644 --- a/velox/dwio/parquet/writer/arrow/Metadata.cpp +++ b/velox/dwio/parquet/writer/arrow/Metadata.cpp @@ -102,11 +102,11 @@ static std::shared_ptr makeTypedColumnStats( metadata.num_values - stats.null_count, stats.null_count, stats.distinct_count, + stats.__isset.nan_count ? stats.nan_count : 0, stats.__isset.max_value || stats.__isset.min_value, stats.__isset.null_count, stats.__isset.distinct_count, - false, - 0); + stats.__isset.nan_count); } // Default behavior. return makeStatistics( @@ -116,11 +116,11 @@ static std::shared_ptr makeTypedColumnStats( metadata.num_values - stats.null_count, stats.null_count, stats.distinct_count, + stats.__isset.nan_count ? stats.nan_count : 0, stats.__isset.max || stats.__isset.min, stats.__isset.null_count, stats.__isset.distinct_count, - false, - 0); + stats.__isset.nan_count); } std::shared_ptr makeColumnStats( @@ -1019,8 +1019,8 @@ class FileMetaData::FileMetaDataImpl { // Set NaN counts from the builder (called during Finish) // This stores total NaN counts per field ID across all row groups. void setNaNCounts( - std::unordered_map> nan_counts) { - fieldNanCounts_ = std::move(nan_counts); + std::unordered_map> nanCounts) { + fieldNanCounts_ = std::move(nanCounts); } // Get total NaN count for a specific field ID across all row groups. diff --git a/velox/dwio/parquet/writer/arrow/Properties.h b/velox/dwio/parquet/writer/arrow/Properties.h index 60cd17ff426..f3224c3236b 100644 --- a/velox/dwio/parquet/writer/arrow/Properties.h +++ b/velox/dwio/parquet/writer/arrow/Properties.h @@ -26,6 +26,7 @@ #include "arrow/io/caching.h" #include "arrow/type.h" +#include "arrow/util/logging.h" #include "arrow/util/type_fwd.h" #include "velox/dwio/parquet/writer/arrow/Encryption.h" #include "velox/dwio/parquet/writer/arrow/Exception.h" @@ -211,6 +212,7 @@ static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = kDefaultDataPageSize; static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024; static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 1024 * 1024; +static constexpr int64_t DEFAULT_MAX_ROW_GROUP_BYTES = 128 * 1024 * 1024; static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true; static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096; static constexpr Encoding::type DEFAULT_ENCODING = Encoding::kUnknown; @@ -321,6 +323,7 @@ class PARQUET_EXPORT WriterProperties { dictionaryPagesizeLimit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT), writeBatchSize_(DEFAULT_WRITE_BATCH_SIZE), maxRowGroupLength_(DEFAULT_MAX_ROW_GROUP_LENGTH), + maxRowGroupBytes_(DEFAULT_MAX_ROW_GROUP_BYTES), pagesize_(kDefaultDataPageSize), version_(ParquetVersion::PARQUET_2_6), dataPageVersion_(ParquetDataPageVersion::V1), @@ -391,10 +394,22 @@ class PARQUET_EXPORT WriterProperties { /// Specify the max number of rows to put in a single row group. /// Default 1Mi rows. Builder* maxRowGroupLength(int64_t maxRowGroupLength) { + ARROW_CHECK_GT(maxRowGroupLength, 0) + << "maxRowGroupLength must be positive"; maxRowGroupLength_ = maxRowGroupLength; return this; } + /// Specify the max bytes to put in a single row group. + /// The size is estimated based on encoded and compressed data. + /// Default 128MB. + Builder* maxRowGroupBytes(int64_t maxRowGroupBytes) { + ARROW_CHECK_GT(maxRowGroupBytes, 0) + << "maxRowGroupBytes must be positive"; + maxRowGroupBytes_ = maxRowGroupBytes; + return this; + } + /// Specify the data page size. /// Default 1MB. Builder* dataPagesize(int64_t pgSize) { @@ -434,8 +449,8 @@ class PARQUET_EXPORT WriterProperties { /// \brief Define the encoding that is used when we don't utilise /// dictionary encoding. // - /// This either applies if dictionary encoding is disabled or if we - /// fallback because the dictionary grew too large. + /// This either applies if dictionary encoding is disabled or if we fallback + /// because the dictionary grew too large. Builder* encoding(Encoding::type encodingType) { if (encodingType == Encoding::kPlainDictionary || encodingType == Encoding::kRleDictionary) { @@ -450,8 +465,8 @@ class PARQUET_EXPORT WriterProperties { /// \brief Define the encoding that is used when we don't utilise /// dictionary encoding. // - /// This either applies if dictionary encoding is disabled or if we - /// fallback because the dictionary grew too large. + /// This either applies if dictionary encoding is disabled or if we fallback + /// because the dictionary grew too large. Builder* encoding(const std::string& path, Encoding::type encodingType) { if (encodingType == Encoding::kPlainDictionary || encodingType == Encoding::kRleDictionary) { @@ -466,8 +481,8 @@ class PARQUET_EXPORT WriterProperties { /// \brief Define the encoding that is used when we don't utilise /// dictionary encoding. // - /// This either applies if dictionary encoding is disabled or if we - /// fallback because the dictionary grew too large. + /// This either applies if dictionary encoding is disabled or if we fallback + /// because the dictionary grew too large. Builder* encoding( const std::shared_ptr& path, Encoding::type encodingType) { @@ -503,37 +518,37 @@ class PARQUET_EXPORT WriterProperties { return this->compression(path->toDotString(), codec); } - /// \brief Specify the default compression level for the compressor - /// in every column. In case a column does not have an explicitly - /// specified compression level, the default one would be used. + /// \brief Specify the default compression level for the compressor in + /// every column. In case a column does not have an explicitly specified + /// compression level, the default one would be used. /// - /// The provided compression level is compressor specific. The user - /// would have to familiarize oneself with the available levels for - /// the selected compressor. If the compressor does not allow for - /// selecting different compression levels, calling this function - /// would not have any effect. Parquet and Arrow do not validate the - /// passed compression level. If no level is selected by the user or - /// if the special std::numeric_limits::min() value is passed, - /// then Arrow selects the compression level. + /// The provided compression level is compressor specific. The user would + /// have to familiarize oneself with the available levels for the selected + /// compressor. If the compressor does not allow for selecting different + /// compression levels, calling this function would not have any effect. + /// Parquet and Arrow do not validate the passed compression level. If no + /// level is selected by the user or if the special + /// std::numeric_limits::min() value is passed, then Arrow selects the + /// compression level. /// - /// If other compressor-specific options need to be set in addition - /// to the compression level, use the codec_options method. + /// If other compressor-specific options need to be set in addition to the + /// compression level, use the codec_options method. Builder* compressionLevel(int compressionLevel) { defaultColumnProperties_.setCompressionLevel(compressionLevel); return this; } - /// \brief Specify a compression level for the compressor for the - /// column described by path. + /// \brief Specify a compression level for the compressor for the column + /// described by path. /// - /// The provided compression level is compressor specific. The user - /// would have to familiarize oneself with the available levels for - /// the selected compressor. If the compressor does not allow for - /// selecting different compression levels, calling this function - /// would not have any effect. Parquet and Arrow do not validate the - /// passed compression level. If no level is selected by the user or - /// if the special std::numeric_limits::min() value is passed, - /// then Arrow selects the compression level. + /// The provided compression level is compressor specific. The user would + /// have to familiarize oneself with the available levels for the selected + /// compressor. If the compressor does not allow for selecting different + /// compression levels, calling this function would not have any effect. + /// Parquet and Arrow do not validate the passed compression level. If no + /// level is selected by the user or if the special + /// std::numeric_limits::min() value is passed, then Arrow selects the + /// compression level. Builder* compressionLevel(const std::string& path, int compressionLevel) { if (!codecOptions_[path]) { codecOptions_[path] = std::make_shared(); @@ -542,17 +557,17 @@ class PARQUET_EXPORT WriterProperties { return this; } - /// \brief Specify a compression level for the compressor for the - /// column described by path. + /// \brief Specify a compression level for the compressor for the column + /// described by path. /// - /// The provided compression level is compressor specific. The user - /// would have to familiarize oneself with the available levels for - /// the selected compressor. If the compressor does not allow for - /// selecting different compression levels, calling this function - /// would not have any effect. Parquet and Arrow do not validate the - /// passed compression level. If no level is selected by the user or - /// if the special std::numeric_limits::min() value is passed, - /// then Arrow selects the compression level. + /// The provided compression level is compressor specific. The user would + /// have to familiarize oneself with the available levels for the selected + /// compressor. If the compressor does not allow for selecting different + /// compression levels, calling this function would not have any effect. + /// Parquet and Arrow do not validate the passed compression level. If no + /// level is selected by the user or if the special + /// std::numeric_limits::min() value is passed, then Arrow selects the + /// compression level. Builder* compressionLevel( const std::shared_ptr& path, int compressionLevel) { @@ -562,15 +577,15 @@ class PARQUET_EXPORT WriterProperties { /// \brief Specify the default codec options for the compressor in /// every column. /// - /// The codec options allow configuring the compression level as - /// well as other codec-specific options. + /// The codec options allow configuring the compression level as well + /// as other codec-specific options. Builder* codecOptions(const std::shared_ptr& codecOptions) { defaultColumnProperties_.setCodecOptions(codecOptions); return this; } - /// \brief Specify the codec options for the compressor for the - /// column described by path. + /// \brief Specify the codec options for the compressor for the column + /// described by path. Builder* codecOptions( const std::string& path, const std::shared_ptr& codecOptions) { @@ -578,8 +593,8 @@ class PARQUET_EXPORT WriterProperties { return this; } - /// \brief Specify the codec options for the compressor for the - /// column described by path. + /// \brief Specify the codec options for the compressor for the column + /// described by path. Builder* codecOptions( const std::shared_ptr& path, const std::shared_ptr& codecOptions) { @@ -646,38 +661,35 @@ class PARQUET_EXPORT WriterProperties { return this->disableStatistics(path->toDotString()); } - /// Allow decimals with 1 <= precision <= 18 to be stored as - /// integers. + /// Allow decimals with 1 <= precision <= 18 to be stored as integers. /// - /// In Parquet, DECIMAL can be stored in any of the following - /// physical types: + /// In Parquet, DECIMAL can be stored in any of the following physical + /// types: /// - Int32: For 1 <= precision <= 9. /// - Int64: For 10 <= precision <= 18. /// - Fixed_len_byte_array: Precision is limited by the array size. - /// Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 - /// digits. - /// - Binary: Precision is unlimited. The minimum number of bytes to - /// store + /// Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits. + /// - Binary: Precision is unlimited. The minimum number of bytes to store /// the unscaled value is used. /// /// By default, this is DISABLED and all decimal types annotate. /// Fixed_len_byte_array. /// - /// When enabled, the C++ writer will use following physical types - /// to store decimals: + /// When enabled, the C++ writer will use following physical types to store + /// decimals: /// - Int32: For 1 <= precision <= 9. /// - Int64: For 10 <= precision <= 18. /// - Fixed_len_byte_array: For precision > 18. /// - /// As a consequence, decimal columns stored in integer types are - /// more compact. + /// As a consequence, decimal columns stored in integer types are more + /// compact. Builder* enableStoreDecimalAsInteger() { storeDecimalAsInteger_ = true; return this; } - /// Disable decimal logical type with 1 <= precision <= 18 to be - /// stored as integer physical type. + /// Disable decimal logical type with 1 <= precision <= 18 to be stored as + /// integer physical type. /// /// Default disabled. Builder* disableStoreDecimalAsInteger() { @@ -685,13 +697,12 @@ class PARQUET_EXPORT WriterProperties { return this; } - /// Enable writing page index in general for all columns. Default - /// disabled. + /// Enable writing page index in general for all columns. Default disabled. /// - /// Writing statistics to the page index disables the old method of - /// writing statistics to each data page header. The page index - /// makes filtering more efficient than the page header, as it - /// gathers all the statistics for a Parquet file in a single place, + /// Writing statistics to the page index disables the old method of writing + /// statistics to each data page header. + /// The page index makes filtering more efficient than the page header, as + /// it gathers all the statistics for a Parquet file in a single place, /// avoiding scattered I/O. /// /// Please check the link below for more details: @@ -701,36 +712,35 @@ class PARQUET_EXPORT WriterProperties { return this; } - /// Disable writing page index in general for all columns. Default - /// disabled. + /// Disable writing page index in general for all columns. Default disabled. Builder* disableWritePageIndex() { defaultColumnProperties_.setPageIndexEnabled(false); return this; } - /// Enable writing page index for column specified by `path`. - /// Default disabled. + /// Enable writing page index for column specified by `path`. Default + /// disabled. Builder* enableWritePageIndex(const std::string& path) { pageIndexEnabled_[path] = true; return this; } - /// Enable writing page index for column specified by `path`. - /// Default disabled. + /// Enable writing page index for column specified by `path`. Default + /// disabled. Builder* enableWritePageIndex( const std::shared_ptr& path) { return this->enableWritePageIndex(path->toDotString()); } - /// Disable writing page index for column specified by `path`. - /// Default disabled. + /// Disable writing page index for column specified by `path`. Default + /// disabled. Builder* disableWritePageIndex(const std::string& path) { pageIndexEnabled_[path] = false; return this; } - /// Disable writing page index for column specified by `path`. - /// Default disabled. + /// Disable writing page index for column specified by `path`. Default + /// disabled. Builder* disableWritePageIndex( const std::shared_ptr& path) { return this->disableWritePageIndex(path->toDotString()); @@ -766,6 +776,7 @@ class PARQUET_EXPORT WriterProperties { dictionaryPagesizeLimit_, writeBatchSize_, maxRowGroupLength_, + maxRowGroupBytes_, pagesize_, version_, createdBy_, @@ -783,6 +794,7 @@ class PARQUET_EXPORT WriterProperties { int64_t dictionaryPagesizeLimit_; int64_t writeBatchSize_; int64_t maxRowGroupLength_; + int64_t maxRowGroupBytes_; int64_t pagesize_; ParquetVersion::type version_; ParquetDataPageVersion dataPageVersion_; @@ -795,8 +807,7 @@ class PARQUET_EXPORT WriterProperties { // If empty, there is no sorting columns. std::vector sortingColumns_; - // Settings used for each column unless overridden in any of the - // maps below. + // Settings used for each column unless overridden in any of the maps below. ColumnProperties defaultColumnProperties_; std::unordered_map encodings_; std::unordered_map codecs_; @@ -823,6 +834,9 @@ class PARQUET_EXPORT WriterProperties { return maxRowGroupLength_; } + inline int64_t maxRowGroupBytes() const { + return maxRowGroupBytes_; + } inline int64_t dataPagesize() const { return pagesize_; } @@ -944,6 +958,7 @@ class PARQUET_EXPORT WriterProperties { int64_t dictionaryPagesizeLimit, int64_t writeBatchSize, int64_t maxRowGroupLength, + int64_t maxRowGroupBytes, int64_t pagesize, ParquetVersion::type version, const std::string& createdBy, @@ -958,6 +973,7 @@ class PARQUET_EXPORT WriterProperties { dictionaryPagesizeLimit_(dictionaryPagesizeLimit), writeBatchSize_(writeBatchSize), maxRowGroupLength_(maxRowGroupLength), + maxRowGroupBytes_(maxRowGroupBytes), pagesize_(pagesize), parquetDataPageVersion_(dataPageVersion), parquetVersion_(version), @@ -973,6 +989,7 @@ class PARQUET_EXPORT WriterProperties { int64_t dictionaryPagesizeLimit_; int64_t writeBatchSize_; int64_t maxRowGroupLength_; + int64_t maxRowGroupBytes_; int64_t pagesize_; ParquetDataPageVersion parquetDataPageVersion_; ParquetVersion::type parquetVersion_; @@ -1010,8 +1027,7 @@ class PARQUET_EXPORT ArrowReaderProperties { cacheOptions_(::arrow::io::CacheOptions::Defaults()), coerceInt96TimestampUnit_(::arrow::TimeUnit::NANO) {} - /// \brief Set whether to use the IO thread pool to parse columns in - /// parallel. + /// \brief Set whether to use the IO thread pool to parse columns in parallel. /// /// Default is false. void setUseThreads(bool useThreads) { @@ -1022,8 +1038,7 @@ class PARQUET_EXPORT ArrowReaderProperties { return useThreads_; } - /// \brief Set whether to read a particular column as dictionary - /// encoded. + /// \brief Set whether to read a particular column as dictionary encoded. /// /// If the file metadata contains a serialized Arrow schema, then this /// is only supported for columns with a Parquet physical type of @@ -1134,18 +1149,16 @@ class PARQUET_EXPORT ArrowWriterProperties { executor_(NULLPTR) {} virtual ~Builder() = default; - /// \brief Disable writing legacy int96 timestamps (default - /// disabled). + /// \brief Disable writing legacy int96 timestamps (default disabled). Builder* disableDeprecatedInt96Timestamps() { writeTimestampsAsInt96_ = false; return this; } - /// \brief Enable writing legacy int96 timestamps (default - /// disabled). + /// \brief Enable writing legacy int96 timestamps (default disabled). /// - /// May be turned on to write timestamps compatible with older - /// Parquet writers. This takes precedent over coerceTimestamps. + /// May be turned on to write timestamps compatible with older Parquet + /// writers. This takes precedent over coerceTimestamps. Builder* enableDeprecatedInt96Timestamps() { writeTimestampsAsInt96_ = true; return this; @@ -1169,27 +1182,25 @@ class PARQUET_EXPORT ArrowWriterProperties { return this; } - /// \brief Disallow loss of data when truncating timestamps - /// (default). + /// \brief Disallow loss of data when truncating timestamps (default). Builder* disallowTruncatedTimestamps() { truncatedTimestampsAllowed_ = false; return this; } - /// \brief EXPERIMENTAL: Write binary serialized Arrow schema to the - /// file, to enable certain read options (like "read_dictionary") to - /// be set automatically. + /// \brief EXPERIMENTAL: Write binary serialized Arrow schema to the file, + /// to enable certain read options (like "read_dictionary") to be set + /// automatically. Builder* storeSchema() { storeSchema_ = true; return this; } - /// \brief When enabled, will not preserve Arrow field names for - /// list types. + /// \brief When enabled, will not preserve Arrow field names for list types. /// - /// Instead of using the field names Arrow uses for the values array - /// of. List types (default "item"), will use "element", as is - /// specified in. The Parquet spec. + /// Instead of using the field names Arrow uses for the values array of. + /// List types (default "item"), will use "element", as is specified in. + /// The Parquet spec. /// /// This is enabled by default. Builder* enableCompliantNestedTypes() { @@ -1279,21 +1290,19 @@ class PARQUET_EXPORT ArrowWriterProperties { return storeSchema_; } - /// \brief Enable nested type naming according to the parquet - /// specification. + /// \brief Enable nested type naming according to the parquet specification. /// - /// Older versions of arrow wrote out field names for nested lists - /// based on the name of the field. According to the Parquet - /// specification they should always be "element". + /// Older versions of arrow wrote out field names for nested lists based on + /// the name of the field. According to the Parquet specification they + /// should always be "element". bool compliantNestedTypes() const { return compliantNestedTypes_; } - /// \brief The underlying engine version to use when writing Arrow - /// data. + /// \brief The underlying engine version to use when writing Arrow data. /// - /// V2 is currently the latest V1 is considered deprecated but left - /// in. Place in case there are bugs detected in V2. + /// V2 is currently the latest V1 is considered deprecated but left in. + /// Place in case there are bugs detected in V2. EngineVersion engineVersion() const { return engineVersion_; } @@ -1339,8 +1348,8 @@ class PARQUET_EXPORT ArrowWriterProperties { ::arrow::internal::Executor* executor_; }; -/// \brief State object used for writing Arrow data directly to a -/// Parquet. Column chunk. API possibly not stable. +/// \brief State object used for writing Arrow data directly to a Parquet. +/// Column chunk. API possibly not stable. struct ArrowWriteContext { ArrowWriteContext(MemoryPool* memoryPool, ArrowWriterProperties* properties) : memoryPool(memoryPool), @@ -1358,8 +1367,8 @@ struct ArrowWriteContext { MemoryPool* memoryPool; const ArrowWriterProperties* properties; - // Buffer used for storing the data of an array converted to the - // physical type. As expected by parquet-cpp. + // Buffer used for storing the data of an array converted to the physical + // type. As expected by parquet-cpp. std::shared_ptr dataBuffer; // We use the shared ownership of this buffer. diff --git a/velox/dwio/parquet/writer/arrow/Statistics.cpp b/velox/dwio/parquet/writer/arrow/Statistics.cpp index 3d88fc41d33..83530e8b7a1 100644 --- a/velox/dwio/parquet/writer/arrow/Statistics.cpp +++ b/velox/dwio/parquet/writer/arrow/Statistics.cpp @@ -40,7 +40,6 @@ #include "velox/dwio/parquet/writer/arrow/Platform.h" #include "velox/dwio/parquet/writer/arrow/Schema.h" #include "velox/dwio/parquet/writer/arrow/StringTruncation.h" - #include "velox/type/DecimalUtil.h" #include "velox/type/HugeInt.h" @@ -607,11 +606,11 @@ class TypedStatisticsImpl : public TypedStatistics { int64_t numValues, int64_t nullCount, int64_t distinctCount, + int64_t nanCount, bool hasMinMax, bool hasNullCount, bool hasDistinctCount, bool hasNaNCount, - int64_t nanCount, MemoryPool* pool) : TypedStatisticsImpl(descr, pool) { TypedStatisticsImpl::incrementNumValues(numValues); @@ -813,6 +812,63 @@ class TypedStatisticsImpl : public TypedStatistics { return encodeMin(); } + std::string MinValue() const override { + if constexpr (std::is_same_v) { + if (descr_->logicalType()->isDecimal()) { + return encodeDecimalToBigEndian(min_); + } + } + if constexpr (std::is_same_v) { + return encodeDecimalToBigEndian(min_); + } + if constexpr (std::is_same_v) { + // TODO: 16 is default value. See DEFAULT_WRITE_METRICS_MODE_DEFAULT in + // org.apache.iceberg.TableProperties. Need to support this table + // property. + const auto truncatedMin = truncateUtf8( + std::string_view( + reinterpret_cast(min_.ptr), min_.len), + 16); + std::string s; + this->plainEncode( + ByteArray( + truncatedMin.size(), + reinterpret_cast(truncatedMin.data())), + &s); + return s; + } + return encodeMin(); + } + + std::string MaxValue() const override { + if constexpr (std::is_same_v) { + if (descr_->logicalType()->isDecimal()) { + return encodeDecimalToBigEndian(max_); + } + } + if constexpr (std::is_same_v) { + return encodeDecimalToBigEndian(max_); + } + if constexpr (std::is_same_v) { + const auto truncatedMaxOpt = roundUpUtf8( + std::string_view( + reinterpret_cast(max_.ptr), max_.len), + 16); + // If roundUpUtf8 returns nullopt, fall back to the original max value + const auto& truncatedMax = + truncatedMaxOpt.has_value() ? truncatedMaxOpt.value() + : std::string(reinterpret_cast(max_.ptr), max_.len); + std::string s; + this->plainEncode( + ByteArray( + truncatedMax.size(), + reinterpret_cast(truncatedMax.data())), + &s); + return s; + } + return encodeMax(); + } + std::optional icebergUpperBoundExclusive( int32_t truncateTo) const override { if constexpr (std::is_same_v) { @@ -870,7 +926,7 @@ class TypedStatisticsImpl : public TypedStatistics { s.setDistinctCount(this->distinctCount()); } if (hasNanCount_) { - s.set_nan_count(nanCount_); + s.setNanCount(nanCount_); } return s; } @@ -901,6 +957,16 @@ class TypedStatisticsImpl : public TypedStatistics { return comparator_->compare(min_, typedOther->min_) ? true : false; } + bool CompareMax(const Statistics& other) const override { + auto typedStats = dynamic_cast*>(&other); + return comparator_->compare(max_, typedStats->max_) ? false : true; + } + + bool CompareMin(const Statistics& other) const override { + auto typedStats = dynamic_cast*>(&other); + return comparator_->compare(min_, typedStats->min_) ? true : false; + } + private: const ColumnDescriptor* descr_; bool hasMinMax_ = false; @@ -1261,11 +1327,11 @@ std::shared_ptr Statistics::make( numValues, encodedStats->nullCount, encodedStats->distinctCount, + encodedStats->nanCount, encodedStats->hasMin && encodedStats->hasMax, encodedStats->hasNullCount, encodedStats->hasDistinctCount, encodedStats->hasNanCount, - encodedStats->nanCount, pool); } @@ -1276,11 +1342,11 @@ std::shared_ptr Statistics::make( int64_t numValues, int64_t nullCount, int64_t distinctCount, + int64_t nanCount, bool hasMinMax, bool hasNullCount, bool hasDistinctCount, bool hasNaNCount, - int64_t nanCount, ::arrow::MemoryPool* pool) { #define MAKE_STATS(CAP_TYPE, KLASS) \ case Type::CAP_TYPE: \ @@ -1291,11 +1357,11 @@ std::shared_ptr Statistics::make( numValues, \ nullCount, \ distinctCount, \ + nanCount, \ hasMinMax, \ hasNullCount, \ hasDistinctCount, \ hasNaNCount, \ - nanCount, \ pool) switch (descr->physicalType()) { diff --git a/velox/dwio/parquet/writer/arrow/Statistics.h b/velox/dwio/parquet/writer/arrow/Statistics.h index 64295487359..56d949142ca 100644 --- a/velox/dwio/parquet/writer/arrow/Statistics.h +++ b/velox/dwio/parquet/writer/arrow/Statistics.h @@ -170,7 +170,7 @@ class PARQUET_EXPORT EncodedStatistics { } bool isSet() const { - return hasMin || hasMax || hasNullCount || hasDistinctCount; + return hasMin || hasMax || hasNullCount || hasDistinctCount || hasNanCount; } bool isSigned() const { @@ -205,7 +205,7 @@ class PARQUET_EXPORT EncodedStatistics { return *this; } - EncodedStatistics& set_nan_count(int64_t value) { + EncodedStatistics& setNanCount(int64_t value) { nanCount = value; hasNanCount = true; return *this; @@ -245,11 +245,11 @@ class PARQUET_EXPORT Statistics { int64_t numValues, int64_t nullCount, int64_t distinctCount, + int64_t nanCount, bool hasMinMax, bool hasNullCount, bool hasDistinctCount, bool hasNaNCount, - int64_t nanCount, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); // Helper function to convert EncodedStatistics to Statistics. @@ -333,6 +333,11 @@ class PARQUET_EXPORT Statistics { /// bound can be computed. virtual std::optional icebergUpperBoundExclusive( int32_t truncateTo) const = 0; + /// \brief Compatible minimum value with iceberg + virtual std::string MinValue() const = 0; + + /// \brief Compatible maximum value with iceberg + virtual std::string MaxValue() const = 0; /// \brief The finalized encoded form of the statistics for transport. virtual EncodedStatistics encode() = 0; @@ -355,6 +360,24 @@ class PARQUET_EXPORT Statistics { /// \param[in] other The Statistics object to compare against. virtual bool minLessThan(const Statistics& other) const = 0; + /// \brief Return true if this object is greater than other + virtual bool CompareMax(const Statistics& other) const = 0; + + /// \brief Return true if this object is less than other + virtual bool CompareMin(const Statistics& other) const = 0; + + static std::shared_ptr CompareAndGetMax( + const std::shared_ptr& stats1, + const std::shared_ptr& stats2) { + return stats1->CompareMax(*stats2) ? stats1 : stats2; + } + + static std::shared_ptr CompareAndGetMin( + const std::shared_ptr& stats1, + const std::shared_ptr& stats2) { + return stats1->CompareMin(*stats2) ? stats1 : stats2; + } + protected: static std::shared_ptr make( Type::type physicalType, @@ -476,11 +499,11 @@ std::shared_ptr> makeStatistics( int64_t numValues, int64_t nullCount, int64_t distinctCount, + int64_t nanCount, bool hasMinMax, bool hasNullCount, bool hasDistinctCount, bool hasNaNCount, - int64_t nanCount, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { return std::static_pointer_cast>(Statistics::make( descr, @@ -489,11 +512,11 @@ std::shared_ptr> makeStatistics( numValues, nullCount, distinctCount, + nanCount, hasMinMax, hasNullCount, hasDistinctCount, hasNaNCount, - nanCount, pool)); } diff --git a/velox/dwio/parquet/writer/arrow/ThriftInternal.h b/velox/dwio/parquet/writer/arrow/ThriftInternal.h index 2ce0e151741..cf553c15d92 100644 --- a/velox/dwio/parquet/writer/arrow/ThriftInternal.h +++ b/velox/dwio/parquet/writer/arrow/ThriftInternal.h @@ -362,31 +362,34 @@ static inline facebook::velox::parquet::thrift::SortingColumn toThrift( static inline facebook::velox::parquet::thrift::Statistics toThrift( const EncodedStatistics& stats) { - facebook::velox::parquet::thrift::Statistics Statistics; + facebook::velox::parquet::thrift::Statistics statistics; if (stats.hasMin) { - Statistics.__set_min_value(stats.min()); + statistics.__set_min_value(stats.min()); // If the order is SIGNED, then the old min value must be set too. // This for backward compatibility. if (stats.isSigned()) { - Statistics.__set_min(stats.min()); + statistics.__set_min(stats.min()); } } if (stats.hasMax) { - Statistics.__set_max_value(stats.max()); + statistics.__set_max_value(stats.max()); // If the order is SIGNED, then the old max value must be set too. // This for backward compatibility. if (stats.isSigned()) { - Statistics.__set_max(stats.max()); + statistics.__set_max(stats.max()); } } if (stats.hasNullCount) { - Statistics.__set_null_count(stats.nullCount); + statistics.__set_null_count(stats.nullCount); } if (stats.hasDistinctCount) { - Statistics.__set_distinct_count(stats.distinctCount); + statistics.__set_distinct_count(stats.distinctCount); + } + if (stats.hasNanCount) { + statistics.__set_nan_count(stats.nanCount); } - return Statistics; + return statistics; } static inline facebook::velox::parquet::thrift::AesGcmV1 toAesGcmV1Thrift( diff --git a/velox/dwio/parquet/writer/arrow/Writer.cpp b/velox/dwio/parquet/writer/arrow/Writer.cpp index 1d078d8bb7b..3d092115278 100644 --- a/velox/dwio/parquet/writer/arrow/Writer.cpp +++ b/velox/dwio/parquet/writer/arrow/Writer.cpp @@ -394,9 +394,7 @@ class FileWriterImpl : public FileWriter { Status writeTable(const Table& table, int64_t chunkSize) override { RETURN_NOT_OK(table.Validate()); - if (chunkSize <= 0 && table.num_rows() > 0) { - return Status::Invalid("chunk size per row_group must be greater than 0"); - } else if (!table.schema()->Equals(*schema_, false)) { + if (!table.schema()->Equals(*schema_, false)) { return Status::Invalid( "table schema does not match this writer's. table:'", table.schema()->ToString(), @@ -406,6 +404,19 @@ class FileWriterImpl : public FileWriter { } else if (chunkSize > this->properties().maxRowGroupLength()) { chunkSize = this->properties().maxRowGroupLength(); } + // maxRowGroupBytes is applied only after the row group has accumulated + // data. + if (rowGroupWriter_ != nullptr && rowGroupWriter_->numRows() > 0) { + double avgRowSize = rowGroupWriter_->totalBufferedBytes() * 1.0 / + rowGroupWriter_->numRows(); + chunkSize = std::min( + chunkSize, + static_cast( + this->properties().maxRowGroupBytes() / avgRowSize)); + } + if (chunkSize <= 0 && table.num_rows() > 0) { + return Status::Invalid("rows per row_group must be greater than 0"); + } auto writeRowGroup = [&](int64_t offset, int64_t size) { RETURN_NOT_OK(newRowGroup(size)); @@ -443,11 +454,7 @@ class FileWriterImpl : public FileWriter { return Status::OK(); } - // Max number of rows allowed in a row group. - const int64_t maxRowGroupLength = this->properties().maxRowGroupLength(); - - if (rowGroupWriter_ == nullptr || !rowGroupWriter_->buffered() || - rowGroupWriter_->numRows() >= maxRowGroupLength) { + if (rowGroupWriter_ == nullptr || !rowGroupWriter_->buffered()) { RETURN_NOT_OK(newBufferedRowGroup()); } @@ -488,16 +495,29 @@ class FileWriterImpl : public FileWriter { return Status::OK(); }; + // Max number of rows allowed in a row group. + const int64_t maxRowGroupLength = this->properties().maxRowGroupLength(); + // Max number of bytes allowed in a row group. + const int64_t maxRowGroupBytes = this->properties().maxRowGroupBytes(); + int64_t offset = 0; while (offset < batch.num_rows()) { - const int64_t batchSize = std::min( - maxRowGroupLength - rowGroupWriter_->numRows(), - batch.num_rows() - offset); - RETURN_NOT_OK(writeBatch(offset, batchSize)); - offset += batchSize; - - // Flush current row group if it is full. - if (rowGroupWriter_->numRows() >= maxRowGroupLength) { + int64_t groupRows = rowGroupWriter_->numRows(); + int64_t batchSize = + std::min(maxRowGroupLength - groupRows, batch.num_rows() - offset); + if (groupRows > 0) { + int64_t bufferedBytes = rowGroupWriter_->totalBufferedBytes(); + double avgRowSize = bufferedBytes * 1.0 / groupRows; + batchSize = std::min( + batchSize, + static_cast( + (maxRowGroupBytes - bufferedBytes) / avgRowSize)); + } + if (batchSize > 0) { + RETURN_NOT_OK(writeBatch(offset, batchSize)); + offset += batchSize; + } else if (offset < batch.num_rows()) { + // Current row group is full, write remaining rows in a new group. RETURN_NOT_OK(newBufferedRowGroup()); } } diff --git a/velox/dwio/parquet/writer/arrow/Writer.h b/velox/dwio/parquet/writer/arrow/Writer.h index 840d11cbc4e..6e64fc9d9ee 100644 --- a/velox/dwio/parquet/writer/arrow/Writer.h +++ b/velox/dwio/parquet/writer/arrow/Writer.h @@ -138,9 +138,9 @@ class PARQUET_EXPORT FileWriter { /// Multiple RecordBatches can be written into the same row group through this /// method. /// - /// WriterProperties.maxRowGroupLength() is respected and a new - /// row group will be created if the current row group exceeds the - /// limit. + /// WriterProperties::maxRowGroupLength() and + /// WriterProperties::maxRowGroupBytes() are respected and a new row group + /// will be created if the current row group exceeds the limits. /// /// Batches get flushed to the output stream once newBufferedRowGroup() /// or close() is called. diff --git a/velox/dwio/parquet/writer/arrow/tests/StatisticsTest.cpp b/velox/dwio/parquet/writer/arrow/tests/StatisticsTest.cpp index 008d95d3df9..f4afbfb2d7f 100644 --- a/velox/dwio/parquet/writer/arrow/tests/StatisticsTest.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/StatisticsTest.cpp @@ -345,11 +345,12 @@ class TestStatistics : public PrimitiveTypedTest { this->values_.size(), 0, // nullCount. 0, // distinctCount. + 0, true, // hasMinMax. true, // hasNullCount. true, // hasDistinctCount. - false, // hasNaNCount. - 0); // nanCount. + false // hasNaNCount. + ); auto statistics3 = makeStatistics(this->schema_.column(0)); std::vector validBits( @@ -615,11 +616,12 @@ void TestStatistics::testMinMaxEncode() { this->values_.size(), 0, // nullCount 0, // distinctCount + 0, true, // hasMinMax true, // hasNullCount true, // hasDistinctCount - false, // hasNaNCount - 0); // nanCount + false // hasNaNCount + ); ASSERT_EQ(encodedMin, statistics2->encodeMin()); ASSERT_EQ(encodedMax, statistics2->encodeMax()); diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index b5c876ee57b..2cfc1ea6465 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -288,17 +288,22 @@ StringView Timestamp::tmToStringView( *writePosition++ = ':'; writePosition += appendDigits(tmValue.tm_min, 2, writePosition); - // Second. - *writePosition++ = ':'; - writePosition += appendDigits(tmValue.tm_sec, 2, writePosition); - if (options.precision == TimestampToStringOptions::Precision::kMilliseconds) { nanos /= 1'000'000; } else if ( options.precision == TimestampToStringOptions::Precision::kMicroseconds) { nanos /= 1'000; } - if (options.skipTrailingZeros && nanos == 0) { + + // Second. + const bool shouldSkipSeconds = + options.skipTrailingZeroSeconds && tmValue.tm_sec == 0 && nanos == 0; + if (!shouldSkipSeconds) { + *writePosition++ = ':'; + writePosition += appendDigits(tmValue.tm_sec, 2, writePosition); + } + + if ((options.skipTrailingZeros && nanos == 0) || shouldSkipSeconds) { return StringView(startPosition, writePosition - startPosition); } diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index 73748c6ba6f..51b8002b1f7 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -49,6 +49,12 @@ struct TimestampToStringOptions { /// '2000-01-01 12:21:56.129000' becomes '2000-01-01 12:21:56.129'. bool skipTrailingZeros = false; + /// Whether to skip trailing zeros of second part. E.g. when true, + /// '2000-01-01 12:21:00' becomes '2000-01-01 12:21'. + /// '2000-01-01 12:21:00.000' becomes '2000-01-01 12:21'. + /// '2000-01-01 12:21:00.123' will not be impacted by this option. + bool skipTrailingZeroSeconds = false; + /// Whether padding zeros are added when the digits of year is less than 4. /// E.g. when true, '1-01-01 05:17:32.000' becomes '0001-01-01 05:17:32.000', /// '-03-24 13:20:00.000' becomes '0000-03-24 13:20:00.000', and '-1-11-29 diff --git a/velox/type/tests/TimestampTest.cpp b/velox/type/tests/TimestampTest.cpp index 290bc79a0b2..754421303ea 100644 --- a/velox/type/tests/TimestampTest.cpp +++ b/velox/type/tests/TimestampTest.cpp @@ -580,5 +580,44 @@ TEST(TimestampTest, skipTrailingZeros) { "0384-01-01 08:00:00.7266"); } +TEST(TimestampTest, skipTrailingZeroSeconds) { + TimestampToStringOptions options = { + .precision = TimestampToStringOptions::Precision::kMicroseconds, + .skipTrailingZeros = true, + .skipTrailingZeroSeconds = true, + .zeroPaddingYear = true, + .dateTimeSeparator = ' ', + }; + + ASSERT_EQ( + timestampToString(Timestamp(-946684800, 0), options), "1940-01-02 00:00"); + ASSERT_EQ(timestampToString(Timestamp(0, 0), options), "1970-01-01 00:00"); + ASSERT_EQ(timestampToString(Timestamp(0, 365), options), "1970-01-01 00:00"); + ASSERT_EQ( + timestampToString(Timestamp(0, 65873), options), + "1970-01-01 00:00:00.000065"); + ASSERT_EQ( + timestampToString(Timestamp(94668480000, 0), options), + "4969-12-04 00:00"); + ASSERT_EQ( + timestampToString(Timestamp(946729316, 129999999), options), + "2000-01-01 12:21:56.129999"); + ASSERT_EQ( + timestampToString(Timestamp(946729316, 129900000), options), + "2000-01-01 12:21:56.1299"); + ASSERT_EQ( + timestampToString(Timestamp(946729316, 129000000), options), + "2000-01-01 12:21:56.129"); + ASSERT_EQ( + timestampToString(Timestamp(946729316, 0), options), + "2000-01-01 12:21:56"); + ASSERT_EQ( + timestampToString(Timestamp(946729316, 129001000), options), + "2000-01-01 12:21:56.129001"); + ASSERT_EQ( + timestampToString(Timestamp(-50049331200, 726600000), options), + "0384-01-01 08:00:00.7266"); +} + } // namespace } // namespace facebook::velox