To run this benchmark for spark-3.4:
- * ./gradlew -DsparkVersions=3.4 :iceberg-spark:iceberg-spark-extensions-3.4_2.12:jmh
- * -PjmhIncludeRegex=TaskGroupPlanningBenchmark
- * -PjmhOutputPath=benchmark/iceberg-task-group-planning-benchmark.txt
- *
- */
-@Fork(1)
-@State(Scope.Benchmark)
-@Warmup(iterations = 3)
-@Measurement(iterations = 5)
-@Timeout(time = 30, timeUnit = TimeUnit.MINUTES)
-@BenchmarkMode(Mode.SingleShotTime)
-public class TaskGroupPlanningBenchmark {
-
- private static final String TABLE_NAME = "test_table";
- private static final String PARTITION_COLUMN = "ss_ticket_number";
-
- private static final int NUM_PARTITIONS = 150;
- private static final int NUM_REAL_DATA_FILES_PER_PARTITION = 5;
- private static final int NUM_REPLICA_DATA_FILES_PER_PARTITION = 50_000;
- private static final int NUM_DELETE_FILES_PER_PARTITION = 25;
- private static final int NUM_ROWS_PER_DATA_FILE = 150;
-
- private final Configuration hadoopConf = new Configuration();
- private SparkSession spark;
- private Table table;
-
- private List fileTasks;
-
- @Setup
- public void setupBenchmark() throws NoSuchTableException, ParseException {
- setupSpark();
- initTable();
- initDataAndDeletes();
- loadFileTasks();
- }
-
- @TearDown
- public void tearDownBenchmark() {
- dropTable();
- tearDownSpark();
- }
-
- @Benchmark
- @Threads(1)
- public void planTaskGroups(Blackhole blackhole) {
- SparkReadConf readConf = new SparkReadConf(spark, table, ImmutableMap.of());
- List> taskGroups =
- TableScanUtil.planTaskGroups(
- fileTasks,
- readConf.splitSize(),
- readConf.splitLookback(),
- readConf.splitOpenFileCost());
-
- long rowsCount = 0L;
- for (ScanTaskGroup taskGroup : taskGroups) {
- rowsCount += taskGroup.estimatedRowsCount();
- }
- blackhole.consume(rowsCount);
-
- long filesCount = 0L;
- for (ScanTaskGroup taskGroup : taskGroups) {
- filesCount += taskGroup.filesCount();
- }
- blackhole.consume(filesCount);
-
- long sizeBytes = 0L;
- for (ScanTaskGroup taskGroup : taskGroups) {
- sizeBytes += taskGroup.sizeBytes();
- }
- blackhole.consume(sizeBytes);
- }
-
- private void loadFileTasks() {
- table.refresh();
-
- try (CloseableIterable fileTasksIterable = table.newBatchScan().planFiles()) {
- this.fileTasks = Lists.newArrayList(fileTasksIterable);
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-
- private DataFile loadAddedDataFile() {
- table.refresh();
-
- Iterable addedDataFiles = SnapshotChanges.builderFor(table).build().addedDataFiles();
- return Iterables.getOnlyElement(addedDataFiles);
- }
-
- private DeleteFile loadAddedDeleteFile() {
- table.refresh();
-
- Iterable addedDeleteFiles =
- SnapshotChanges.builderFor(table).build().addedDeleteFiles();
- return Iterables.getOnlyElement(addedDeleteFiles);
- }
-
- private void initDataAndDeletes() throws NoSuchTableException {
- Schema schema = table.schema();
- PartitionSpec spec = table.spec();
- LocationProvider locations = table.locationProvider();
-
- for (int partitionOrdinal = 0; partitionOrdinal < NUM_PARTITIONS; partitionOrdinal++) {
- Dataset inputDF =
- randomDataDF(schema, NUM_ROWS_PER_DATA_FILE)
- .drop(PARTITION_COLUMN)
- .withColumn(PARTITION_COLUMN, lit(partitionOrdinal));
-
- for (int fileOrdinal = 0; fileOrdinal < NUM_REAL_DATA_FILES_PER_PARTITION; fileOrdinal++) {
- appendAsFile(inputDF);
- }
-
- DataFile dataFile = loadAddedDataFile();
-
- sql(
- "DELETE FROM %s WHERE ss_item_sk IS NULL AND %s = %d",
- TABLE_NAME, PARTITION_COLUMN, partitionOrdinal);
-
- DeleteFile deleteFile = loadAddedDeleteFile();
-
- AppendFiles append = table.newFastAppend();
-
- for (int fileOrdinal = 0; fileOrdinal < NUM_REPLICA_DATA_FILES_PER_PARTITION; fileOrdinal++) {
- String replicaFileName = UUID.randomUUID() + "-replica.parquet";
- DataFile replicaDataFile =
- DataFiles.builder(spec)
- .copy(dataFile)
- .withPath(locations.newDataLocation(spec, dataFile.partition(), replicaFileName))
- .build();
- append.appendFile(replicaDataFile);
- }
-
- append.commit();
-
- RowDelta rowDelta = table.newRowDelta();
-
- for (int fileOrdinal = 0; fileOrdinal < NUM_DELETE_FILES_PER_PARTITION; fileOrdinal++) {
- String replicaFileName = UUID.randomUUID() + "-replica.parquet";
- DeleteFile replicaDeleteFile =
- FileMetadata.deleteFileBuilder(spec)
- .copy(deleteFile)
- .withPath(locations.newDataLocation(spec, deleteFile.partition(), replicaFileName))
- .build();
- rowDelta.addDeletes(replicaDeleteFile);
- }
-
- rowDelta.commit();
- }
- }
-
- private void appendAsFile(Dataset df) throws NoSuchTableException {
- df.coalesce(1).writeTo(TABLE_NAME).append();
- }
-
- private Dataset randomDataDF(Schema schema, int numRows) {
- Iterable rows = RandomData.generateSpark(schema, numRows, 0);
- JavaSparkContext context = JavaSparkContext.fromSparkContext(spark.sparkContext());
- JavaRDD rowRDD = context.parallelize(Lists.newArrayList(rows));
- StructType rowSparkType = SparkSchemaUtil.convert(schema);
- return spark.internalCreateDataFrame(JavaRDD.toRDD(rowRDD), rowSparkType, false);
- }
-
- private void setupSpark() {
- this.spark =
- SparkSession.builder()
- .config(TestBase.DISABLE_UI)
- .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
- .config("spark.sql.extensions", IcebergSparkSessionExtensions.class.getName())
- .config("spark.sql.catalog.spark_catalog", SparkSessionCatalog.class.getName())
- .config("spark.sql.catalog.spark_catalog.type", "hadoop")
- .config("spark.sql.catalog.spark_catalog.warehouse", newWarehouseDir())
- .master("local[*]")
- .getOrCreate();
- }
-
- private void tearDownSpark() {
- spark.stop();
- }
-
- private void initTable() throws NoSuchTableException, ParseException {
- sql(
- "CREATE TABLE %s ( "
- + " `ss_sold_date_sk` INT, "
- + " `ss_sold_time_sk` INT, "
- + " `ss_item_sk` INT, "
- + " `ss_customer_sk` STRING, "
- + " `ss_cdemo_sk` STRING, "
- + " `ss_hdemo_sk` STRING, "
- + " `ss_addr_sk` STRING, "
- + " `ss_store_sk` STRING, "
- + " `ss_promo_sk` STRING, "
- + " `ss_ticket_number` INT, "
- + " `ss_quantity` STRING, "
- + " `ss_wholesale_cost` STRING, "
- + " `ss_list_price` STRING, "
- + " `ss_sales_price` STRING, "
- + " `ss_ext_discount_amt` STRING, "
- + " `ss_ext_sales_price` STRING, "
- + " `ss_ext_wholesale_cost` STRING, "
- + " `ss_ext_list_price` STRING, "
- + " `ss_ext_tax` STRING, "
- + " `ss_coupon_amt` STRING, "
- + " `ss_net_paid` STRING, "
- + " `ss_net_paid_inc_tax` STRING, "
- + " `ss_net_profit` STRING "
- + ")"
- + "USING iceberg "
- + "PARTITIONED BY (%s) "
- + "TBLPROPERTIES ("
- + " '%s' '%b',"
- + " '%s' '%s',"
- + " '%s' '%d')",
- TABLE_NAME,
- PARTITION_COLUMN,
- TableProperties.MANIFEST_MERGE_ENABLED,
- false,
- TableProperties.DELETE_MODE,
- RowLevelOperationMode.MERGE_ON_READ.modeName(),
- TableProperties.FORMAT_VERSION,
- 2);
-
- this.table = Spark3Util.loadIcebergTable(spark, TABLE_NAME);
- }
-
- private void dropTable() {
- sql("DROP TABLE IF EXISTS %s PURGE", TABLE_NAME);
- }
-
- private String newWarehouseDir() {
- return hadoopConf.get("hadoop.tmp.dir") + UUID.randomUUID();
- }
-
- @FormatMethod
- private void sql(@FormatString String query, Object... args) {
- spark.sql(String.format(query, args));
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/jmh/java/org/apache/iceberg/spark/UpdateProjectionBenchmark.java b/spark/v3.4/spark-extensions/src/jmh/java/org/apache/iceberg/spark/UpdateProjectionBenchmark.java
deleted file mode 100644
index caa23625fc44..000000000000
--- a/spark/v3.4/spark-extensions/src/jmh/java/org/apache/iceberg/spark/UpdateProjectionBenchmark.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark;
-
-import static org.apache.spark.sql.functions.current_date;
-import static org.apache.spark.sql.functions.date_add;
-import static org.apache.spark.sql.functions.expr;
-
-import com.google.errorprone.annotations.FormatMethod;
-import com.google.errorprone.annotations.FormatString;
-import java.util.UUID;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.iceberg.DistributionMode;
-import org.apache.iceberg.RowLevelOperationMode;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
-import org.apache.spark.sql.catalyst.parser.ParseException;
-import org.apache.spark.sql.internal.SQLConf;
-import org.apache.spark.sql.types.StructType;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.BenchmarkMode;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Mode;
-import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.TearDown;
-import org.openjdk.jmh.annotations.Threads;
-import org.openjdk.jmh.annotations.Warmup;
-
-@Fork(1)
-@State(Scope.Benchmark)
-@Warmup(iterations = 3)
-@Measurement(iterations = 5)
-@BenchmarkMode(Mode.SingleShotTime)
-public class UpdateProjectionBenchmark {
-
- private static final String TABLE_NAME = "test_table";
- private static final int NUM_FILES = 5;
- private static final int NUM_ROWS_PER_FILE = 1_000_000;
-
- private final Configuration hadoopConf = new Configuration();
- private SparkSession spark;
- private long originalSnapshotId;
-
- @Setup
- public void setupBenchmark() throws NoSuchTableException, ParseException {
- setupSpark();
- initTable();
- appendData();
-
- Table table = Spark3Util.loadIcebergTable(spark, TABLE_NAME);
- this.originalSnapshotId = table.currentSnapshot().snapshotId();
- }
-
- @TearDown
- public void tearDownBenchmark() {
- tearDownSpark();
- dropTable();
- }
-
- @Benchmark
- @Threads(1)
- public void copyOnWriteUpdate10Percent() {
- runBenchmark(RowLevelOperationMode.COPY_ON_WRITE, 0.1);
- }
-
- @Benchmark
- @Threads(1)
- public void copyOnWriteUpdate30Percent() {
- runBenchmark(RowLevelOperationMode.COPY_ON_WRITE, 0.3);
- }
-
- @Benchmark
- @Threads(1)
- public void copyOnWriteUpdate75Percent() {
- runBenchmark(RowLevelOperationMode.COPY_ON_WRITE, 0.75);
- }
-
- @Benchmark
- @Threads(1)
- public void mergeOnRead10Percent() {
- runBenchmark(RowLevelOperationMode.MERGE_ON_READ, 0.1);
- }
-
- @Benchmark
- @Threads(1)
- public void mergeOnReadUpdate30Percent() {
- runBenchmark(RowLevelOperationMode.MERGE_ON_READ, 0.3);
- }
-
- @Benchmark
- @Threads(1)
- public void mergeOnReadUpdate75Percent() {
- runBenchmark(RowLevelOperationMode.MERGE_ON_READ, 0.75);
- }
-
- private void runBenchmark(RowLevelOperationMode mode, double updatePercentage) {
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s')",
- TABLE_NAME, TableProperties.UPDATE_MODE, mode.modeName());
-
- int mod = (int) (NUM_ROWS_PER_FILE / (NUM_ROWS_PER_FILE * updatePercentage));
-
- sql(
- "UPDATE %s "
- + "SET intCol = intCol + 10, dateCol = date_add(dateCol, 1) "
- + "WHERE mod(id, %d) = 0",
- TABLE_NAME, mod);
-
- sql(
- "CALL system.rollback_to_snapshot(table => '%s', snapshot_id => %dL)",
- TABLE_NAME, originalSnapshotId);
- }
-
- private void setupSpark() {
- this.spark =
- SparkSession.builder()
- .config(TestBase.DISABLE_UI)
- .config("spark.sql.extensions", IcebergSparkSessionExtensions.class.getName())
- .config("spark.sql.catalog.spark_catalog", SparkSessionCatalog.class.getName())
- .config("spark.sql.catalog.spark_catalog.type", "hadoop")
- .config("spark.sql.catalog.spark_catalog.warehouse", newWarehouseDir())
- .config(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED().key(), "false")
- .config(SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), "false")
- .config(SQLConf.SHUFFLE_PARTITIONS().key(), "2")
- .master("local")
- .getOrCreate();
- }
-
- private void tearDownSpark() {
- spark.stop();
- }
-
- private void initTable() {
- sql(
- "CREATE TABLE %s ( "
- + " id LONG, intCol INT, floatCol FLOAT, doubleCol DOUBLE, "
- + " decimalCol DECIMAL(20, 5), dateCol DATE, timestampCol TIMESTAMP, "
- + " stringCol STRING)"
- + "USING iceberg "
- + "TBLPROPERTIES ("
- + " '%s' '%s',"
- + " '%s' '%d',"
- + " '%s' '%d')",
- TABLE_NAME,
- TableProperties.UPDATE_DISTRIBUTION_MODE,
- DistributionMode.NONE.modeName(),
- TableProperties.SPLIT_OPEN_FILE_COST,
- Integer.MAX_VALUE,
- TableProperties.FORMAT_VERSION,
- 2);
-
- sql("ALTER TABLE %s WRITE ORDERED BY id", TABLE_NAME);
- }
-
- private void dropTable() {
- sql("DROP TABLE IF EXISTS %s PURGE", TABLE_NAME);
- }
-
- private void appendData() throws NoSuchTableException {
- for (int fileNum = 1; fileNum <= NUM_FILES; fileNum++) {
- Dataset inputDF =
- spark
- .range(NUM_ROWS_PER_FILE)
- .withColumn("intCol", expr("CAST(id AS INT)"))
- .withColumn("floatCol", expr("CAST(id AS FLOAT)"))
- .withColumn("doubleCol", expr("CAST(id AS DOUBLE)"))
- .withColumn("decimalCol", expr("CAST(id AS DECIMAL(20, 5))"))
- .withColumn("dateCol", date_add(current_date(), fileNum))
- .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
- .withColumn("stringCol", expr("CAST(dateCol AS STRING)"));
- appendAsFile(inputDF);
- }
- }
-
- private void appendAsFile(Dataset df) throws NoSuchTableException {
- // ensure the schema is precise (including nullability)
- StructType sparkSchema = spark.table(TABLE_NAME).schema();
- spark.createDataFrame(df.rdd(), sparkSchema).coalesce(1).writeTo(TABLE_NAME).append();
- }
-
- private String newWarehouseDir() {
- return hadoopConf.get("hadoop.tmp.dir") + UUID.randomUUID();
- }
-
- @FormatMethod
- private void sql(@FormatString String query, Object... args) {
- spark.sql(String.format(query, args));
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4 b/spark/v3.4/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4
deleted file mode 100644
index b962699d9b47..000000000000
--- a/spark/v3.4/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- * This file is an adaptation of Presto's and Spark's grammar files.
- */
-
-grammar IcebergSqlExtensions;
-
-@lexer::members {
- /**
- * Verify whether current token is a valid decimal token (which contains dot).
- * Returns true if the character that follows the token is not a digit or letter or underscore.
- *
- * For example:
- * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
- * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
- * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
- * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
- * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
- * which is not a digit or letter or underscore.
- */
- public boolean isValidDecimal() {
- int nextChar = _input.LA(1);
- if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
- nextChar == '_') {
- return false;
- } else {
- return true;
- }
- }
-
- /**
- * This method will be called when we see '/*' and try to match it as a bracketed comment.
- * If the next character is '+', it should be parsed as hint later, and we cannot match
- * it as a bracketed comment.
- *
- * Returns true if the next character is '+'.
- */
- public boolean isHint() {
- int nextChar = _input.LA(1);
- if (nextChar == '+') {
- return true;
- } else {
- return false;
- }
- }
-}
-
-singleStatement
- : statement EOF
- ;
-
-statement
- : CALL multipartIdentifier '(' (callArgument (',' callArgument)*)? ')' #call
- | ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS name=identifier)? #addPartitionField
- | ALTER TABLE multipartIdentifier DROP PARTITION FIELD transform #dropPartitionField
- | ALTER TABLE multipartIdentifier REPLACE PARTITION FIELD transform WITH transform (AS name=identifier)? #replacePartitionField
- | ALTER TABLE multipartIdentifier WRITE writeSpec #setWriteDistributionAndOrdering
- | ALTER TABLE multipartIdentifier SET IDENTIFIER_KW FIELDS fieldList #setIdentifierFields
- | ALTER TABLE multipartIdentifier DROP IDENTIFIER_KW FIELDS fieldList #dropIdentifierFields
- | ALTER TABLE multipartIdentifier createReplaceBranchClause #createOrReplaceBranch
- | ALTER TABLE multipartIdentifier createReplaceTagClause #createOrReplaceTag
- | ALTER TABLE multipartIdentifier DROP BRANCH (IF EXISTS)? identifier #dropBranch
- | ALTER TABLE multipartIdentifier DROP TAG (IF EXISTS)? identifier #dropTag
- ;
-
-createReplaceTagClause
- : (CREATE OR)? REPLACE TAG identifier tagOptions
- | CREATE TAG (IF NOT EXISTS)? identifier tagOptions
- ;
-
-createReplaceBranchClause
- : (CREATE OR)? REPLACE BRANCH identifier branchOptions
- | CREATE BRANCH (IF NOT EXISTS)? identifier branchOptions
- ;
-
-tagOptions
- : (AS OF VERSION snapshotId)? (refRetain)?
- ;
-
-branchOptions
- : (AS OF VERSION snapshotId)? (refRetain)? (snapshotRetention)?
- ;
-
-snapshotRetention
- : WITH SNAPSHOT RETENTION minSnapshotsToKeep
- | WITH SNAPSHOT RETENTION maxSnapshotAge
- | WITH SNAPSHOT RETENTION minSnapshotsToKeep maxSnapshotAge
- ;
-
-refRetain
- : RETAIN number timeUnit
- ;
-
-maxSnapshotAge
- : number timeUnit
- ;
-
-minSnapshotsToKeep
- : number SNAPSHOTS
- ;
-
-writeSpec
- : (writeDistributionSpec | writeOrderingSpec)*
- ;
-
-writeDistributionSpec
- : DISTRIBUTED BY PARTITION
- ;
-
-writeOrderingSpec
- : LOCALLY? ORDERED BY order
- | UNORDERED
- ;
-
-callArgument
- : expression #positionalArgument
- | identifier '=>' expression #namedArgument
- ;
-
-singleOrder
- : order EOF
- ;
-
-order
- : fields+=orderField (',' fields+=orderField)*
- | '(' fields+=orderField (',' fields+=orderField)* ')'
- ;
-
-orderField
- : transform direction=(ASC | DESC)? (NULLS nullOrder=(FIRST | LAST))?
- ;
-
-transform
- : multipartIdentifier #identityTransform
- | transformName=identifier
- '(' arguments+=transformArgument (',' arguments+=transformArgument)* ')' #applyTransform
- ;
-
-transformArgument
- : multipartIdentifier
- | constant
- ;
-
-expression
- : constant
- | stringMap
- | stringArray
- ;
-
-constant
- : number #numericLiteral
- | booleanValue #booleanLiteral
- | STRING+ #stringLiteral
- | identifier STRING #typeConstructor
- ;
-
-stringMap
- : MAP '(' constant (',' constant)* ')'
- ;
-
-stringArray
- : ARRAY '(' constant (',' constant)* ')'
- ;
-
-booleanValue
- : TRUE | FALSE
- ;
-
-number
- : MINUS? EXPONENT_VALUE #exponentLiteral
- | MINUS? DECIMAL_VALUE #decimalLiteral
- | MINUS? INTEGER_VALUE #integerLiteral
- | MINUS? BIGINT_LITERAL #bigIntLiteral
- | MINUS? SMALLINT_LITERAL #smallIntLiteral
- | MINUS? TINYINT_LITERAL #tinyIntLiteral
- | MINUS? DOUBLE_LITERAL #doubleLiteral
- | MINUS? FLOAT_LITERAL #floatLiteral
- | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
- ;
-
-multipartIdentifier
- : parts+=identifier ('.' parts+=identifier)*
- ;
-
-identifier
- : IDENTIFIER #unquotedIdentifier
- | quotedIdentifier #quotedIdentifierAlternative
- | nonReserved #unquotedIdentifier
- ;
-
-quotedIdentifier
- : BACKQUOTED_IDENTIFIER
- ;
-
-fieldList
- : fields+=multipartIdentifier (',' fields+=multipartIdentifier)*
- ;
-
-nonReserved
- : ADD | ALTER | AS | ASC | BRANCH | BY | CALL | CREATE | DAYS | DESC | DROP | EXISTS | FIELD | FIRST | HOURS | IF | LAST | NOT | NULLS | OF | OR | ORDERED | PARTITION | TABLE | WRITE
- | DISTRIBUTED | LOCALLY | MINUTES | MONTHS | UNORDERED | REPLACE | RETAIN | VERSION | WITH | IDENTIFIER_KW | FIELDS | SET | SNAPSHOT | SNAPSHOTS
- | TAG | TRUE | FALSE
- | MAP
- ;
-
-snapshotId
- : number
- ;
-
-numSnapshots
- : number
- ;
-
-timeUnit
- : DAYS
- | HOURS
- | MINUTES
- ;
-
-ADD: 'ADD';
-ALTER: 'ALTER';
-AS: 'AS';
-ASC: 'ASC';
-BRANCH: 'BRANCH';
-BY: 'BY';
-CALL: 'CALL';
-DAYS: 'DAYS';
-DESC: 'DESC';
-DISTRIBUTED: 'DISTRIBUTED';
-DROP: 'DROP';
-EXISTS: 'EXISTS';
-FIELD: 'FIELD';
-FIELDS: 'FIELDS';
-FIRST: 'FIRST';
-HOURS: 'HOURS';
-IF : 'IF';
-LAST: 'LAST';
-LOCALLY: 'LOCALLY';
-MINUTES: 'MINUTES';
-MONTHS: 'MONTHS';
-CREATE: 'CREATE';
-NOT: 'NOT';
-NULLS: 'NULLS';
-OF: 'OF';
-OR: 'OR';
-ORDERED: 'ORDERED';
-PARTITION: 'PARTITION';
-REPLACE: 'REPLACE';
-RETAIN: 'RETAIN';
-RETENTION: 'RETENTION';
-IDENTIFIER_KW: 'IDENTIFIER';
-SET: 'SET';
-SNAPSHOT: 'SNAPSHOT';
-SNAPSHOTS: 'SNAPSHOTS';
-TABLE: 'TABLE';
-TAG: 'TAG';
-UNORDERED: 'UNORDERED';
-VERSION: 'VERSION';
-WITH: 'WITH';
-WRITE: 'WRITE';
-
-TRUE: 'TRUE';
-FALSE: 'FALSE';
-
-MAP: 'MAP';
-ARRAY: 'ARRAY';
-
-PLUS: '+';
-MINUS: '-';
-
-STRING
- : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
- | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
- ;
-
-BIGINT_LITERAL
- : DIGIT+ 'L'
- ;
-
-SMALLINT_LITERAL
- : DIGIT+ 'S'
- ;
-
-TINYINT_LITERAL
- : DIGIT+ 'Y'
- ;
-
-INTEGER_VALUE
- : DIGIT+
- ;
-
-EXPONENT_VALUE
- : DIGIT+ EXPONENT
- | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
- ;
-
-DECIMAL_VALUE
- : DECIMAL_DIGITS {isValidDecimal()}?
- ;
-
-FLOAT_LITERAL
- : DIGIT+ EXPONENT? 'F'
- | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
- ;
-
-DOUBLE_LITERAL
- : DIGIT+ EXPONENT? 'D'
- | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
- ;
-
-BIGDECIMAL_LITERAL
- : DIGIT+ EXPONENT? 'BD'
- | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
- ;
-
-IDENTIFIER
- : (LETTER | DIGIT | '_')+
- ;
-
-BACKQUOTED_IDENTIFIER
- : '`' ( ~'`' | '``' )* '`'
- ;
-
-fragment DECIMAL_DIGITS
- : DIGIT+ '.' DIGIT*
- | '.' DIGIT+
- ;
-
-fragment EXPONENT
- : 'E' [+-]? DIGIT+
- ;
-
-fragment DIGIT
- : [0-9]
- ;
-
-fragment LETTER
- : [A-Z]
- ;
-
-SIMPLE_COMMENT
- : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
- ;
-
-BRACKETED_COMMENT
- : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
- ;
-
-WS
- : [ \r\n\t]+ -> channel(HIDDEN)
- ;
-
-// Catch-all for anything we can't recognize.
-// We use this to be able to ignore and recover all the text
-// when splitting statements with DelimiterLexer
-UNRECOGNIZED
- : .
- ;
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
deleted file mode 100644
index c1f77da66f9f..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions
-
-import org.apache.spark.sql.SparkSessionExtensions
-import org.apache.spark.sql.catalyst.analysis.AlignedRowLevelIcebergCommandCheck
-import org.apache.spark.sql.catalyst.analysis.AlignRowLevelCommandAssignments
-import org.apache.spark.sql.catalyst.analysis.CheckMergeIntoTableConditions
-import org.apache.spark.sql.catalyst.analysis.CheckViews
-import org.apache.spark.sql.catalyst.analysis.MergeIntoIcebergTableResolutionCheck
-import org.apache.spark.sql.catalyst.analysis.ProcedureArgumentCoercion
-import org.apache.spark.sql.catalyst.analysis.ResolveMergeIntoTableReferences
-import org.apache.spark.sql.catalyst.analysis.ResolveProcedures
-import org.apache.spark.sql.catalyst.analysis.ResolveViews
-import org.apache.spark.sql.catalyst.analysis.RewriteMergeIntoTable
-import org.apache.spark.sql.catalyst.analysis.RewriteMergeIntoTableForRowLineage
-import org.apache.spark.sql.catalyst.analysis.RewriteUpdateTable
-import org.apache.spark.sql.catalyst.analysis.RewriteUpdateTableForRowLineage
-import org.apache.spark.sql.catalyst.optimizer.ExtendedReplaceNullWithFalseInPredicate
-import org.apache.spark.sql.catalyst.optimizer.ExtendedSimplifyConditionalsInPredicate
-import org.apache.spark.sql.catalyst.optimizer.RemoveRowLineageOutputFromOriginalTable
-import org.apache.spark.sql.catalyst.optimizer.ReplaceStaticInvoke
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser
-import org.apache.spark.sql.execution.datasources.v2.ExtendedDataSourceV2Strategy
-import org.apache.spark.sql.execution.datasources.v2.ExtendedV2Writes
-import org.apache.spark.sql.execution.datasources.v2.ReplaceRewrittenRowLevelCommand
-import org.apache.spark.sql.execution.datasources.v2.RowLevelCommandScanRelationPushDown
-import org.apache.spark.sql.execution.dynamicpruning.RowLevelCommandDynamicPruning
-
-class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {
-
- override def apply(extensions: SparkSessionExtensions): Unit = {
- // parser extensions
- extensions.injectParser { case (_, parser) => new IcebergSparkSqlExtensionsParser(parser) }
-
- // analyzer extensions
- extensions.injectResolutionRule { spark => ResolveProcedures(spark) }
- extensions.injectResolutionRule { spark => ResolveViews(spark) }
- extensions.injectResolutionRule { spark => ResolveMergeIntoTableReferences(spark) }
- extensions.injectResolutionRule { _ => CheckMergeIntoTableConditions }
- extensions.injectResolutionRule { _ => ProcedureArgumentCoercion }
- extensions.injectResolutionRule { _ => AlignRowLevelCommandAssignments }
- extensions.injectResolutionRule { _ => RewriteUpdateTableForRowLineage }
- extensions.injectResolutionRule { _ => RewriteMergeIntoTableForRowLineage }
- extensions.injectResolutionRule { _ => RewriteUpdateTable }
- extensions.injectResolutionRule { _ => RewriteMergeIntoTable }
- extensions.injectCheckRule { _ => CheckViews }
- extensions.injectCheckRule { _ => MergeIntoIcebergTableResolutionCheck }
- extensions.injectCheckRule { _ => AlignedRowLevelIcebergCommandCheck }
-
- // optimizer extensions
- extensions.injectOptimizerRule { _ => ExtendedSimplifyConditionalsInPredicate }
- extensions.injectOptimizerRule { _ => ExtendedReplaceNullWithFalseInPredicate }
- extensions.injectOptimizerRule { _ => ReplaceStaticInvoke }
- extensions.injectOptimizerRule { _ => RemoveRowLineageOutputFromOriginalTable }
- // pre-CBO rules run only once and the order of the rules is important
- // - dynamic filters should be added before replacing commands with rewrite plans
- // - scans must be planned before building writes
- extensions.injectPreCBORule { _ => RowLevelCommandScanRelationPushDown }
- extensions.injectPreCBORule { _ => ExtendedV2Writes }
- extensions.injectPreCBORule { spark => RowLevelCommandDynamicPruning(spark) }
- extensions.injectPreCBORule { _ => ReplaceRewrittenRowLevelCommand }
-
- // planner extensions
- extensions.injectPlannerStrategy { spark => ExtendedDataSourceV2Strategy(spark) }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala
deleted file mode 100644
index c70ded5ed283..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A rule that aligns assignments in UPDATE and MERGE operations.
- *
- * Note that this rule must be run before rewriting row-level commands.
- */
-object AlignRowLevelCommandAssignments extends Rule[LogicalPlan] with AssignmentAlignmentSupport {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case u: UpdateIcebergTable if u.resolved && !u.aligned =>
- u.copy(assignments = alignAssignments(u.table, u.assignments))
-
- case m: MergeIntoIcebergTable if m.resolved && !m.aligned =>
- val alignedMatchedActions = m.matchedActions.map {
- case u @ UpdateAction(_, assignments) =>
- u.copy(assignments = alignAssignments(m.targetTable, assignments))
- case d: DeleteAction =>
- d
- case _ =>
- throw new AnalysisException("Matched actions can only contain UPDATE or DELETE")
- }
-
- val alignedNotMatchedActions = m.notMatchedActions.map {
- case i @ InsertAction(_, assignments) =>
- // check no nested columns are present
- val refs = assignments.map(_.key).map(AssignmentUtils.toAssignmentRef)
- refs.foreach { ref =>
- if (ref.size > 1) {
- throw new AnalysisException(
- "Nested fields are not supported inside INSERT clauses of MERGE operations: " +
- s"${ref.mkString("`", "`.`", "`")}")
- }
- }
-
- val colNames = refs.map(_.head)
-
- // check there are no duplicates
- val duplicateColNames = colNames.groupBy(identity).collect {
- case (name, matchingNames) if matchingNames.size > 1 => name
- }
-
- if (duplicateColNames.nonEmpty) {
- throw new AnalysisException(
- s"Duplicate column names inside INSERT clause: ${duplicateColNames.mkString(", ")}")
- }
-
- // reorder assignments by the target table column order
- val assignmentMap = colNames.zip(assignments).toMap
- i.copy(assignments = alignInsertActionAssignments(m.targetTable, assignmentMap))
-
- case _ =>
- throw new AnalysisException("Not matched actions can only contain INSERT")
- }
-
- m.copy(matchedActions = alignedMatchedActions, notMatchedActions = alignedNotMatchedActions)
- }
-
- private def alignInsertActionAssignments(
- targetTable: LogicalPlan,
- assignmentMap: Map[String, Assignment]): Seq[Assignment] = {
-
- val resolver = conf.resolver
-
- targetTable.output.map { targetAttr =>
- val assignment = assignmentMap
- .find { case (name, _) => resolver(name, targetAttr.name) }
- .map { case (_, assignment) => assignment }
-
- if (assignment.isEmpty) {
- throw new AnalysisException(
- s"Cannot find column '${targetAttr.name}' of the target table among " +
- s"the INSERT columns: ${assignmentMap.keys.mkString(", ")}. " +
- "INSERT clauses must provide values for all columns of the target table.")
- }
-
- val key = assignment.get.key
- val value = castIfNeeded(targetAttr, assignment.get.value, resolver, Seq(targetAttr.name))
- AssignmentUtils.handleCharVarcharLimits(Assignment(key, value))
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala
deleted file mode 100644
index 38021fda29f4..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-
-object AlignedRowLevelIcebergCommandCheck extends (LogicalPlan => Unit) {
-
- override def apply(plan: LogicalPlan): Unit = {
- plan foreach {
- case m: MergeIntoIcebergTable if !m.aligned =>
- throw new AnalysisException(s"Could not align Iceberg MERGE INTO: $m")
- case u: UpdateIcebergTable if !u.aligned =>
- throw new AnalysisException(s"Could not align Iceberg UPDATE: $u")
- case _ => // OK
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala
deleted file mode 100644
index 0b53075a73ed..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils._
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.GetStructField
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.StructField
-import org.apache.spark.sql.types.StructType
-import scala.collection.compat.immutable.ArraySeq
-import scala.collection.mutable
-
-trait AssignmentAlignmentSupport extends CastSupport {
-
- self: SQLConfHelper =>
-
- private case class ColumnUpdate(ref: Seq[String], expr: Expression)
-
- /**
- * Aligns assignments to match table columns.
- *
- * This method processes and reorders given assignments so that each target column gets
- * an expression it should be set to. If a column does not have a matching assignment,
- * it will be set to its current value. For example, if one passes a table with columns c1, c2
- * and an assignment c2 = 1, this method will return c1 = c1, c2 = 1.
- *
- * This method also handles updates to nested columns. If there is an assignment to a particular
- * nested field, this method will construct a new struct with one field updated
- * preserving other fields that have not been modified. For example, if one passes a table with
- * columns c1, c2 where c2 is a struct with fields n1 and n2 and an assignment c2.n2 = 1,
- * this method will return c1 = c1, c2 = struct(c2.n1, 1).
- *
- * @param table a target table
- * @param assignments assignments to align
- * @return aligned assignments that match table columns
- */
- protected def alignAssignments(
- table: LogicalPlan,
- assignments: Seq[Assignment]): Seq[Assignment] = {
-
- val columnUpdates = assignments.map(a => ColumnUpdate(toAssignmentRef(a.key), a.value))
- val outputExprs = applyUpdates(table.output, columnUpdates)
- outputExprs.zip(table.output).map { case (expr, attr) =>
- handleCharVarcharLimits(Assignment(attr, expr))
- }
- }
-
- private def applyUpdates(
- cols: Seq[NamedExpression],
- updates: Seq[ColumnUpdate],
- resolver: Resolver = conf.resolver,
- namePrefix: Seq[String] = Nil): Seq[Expression] = {
-
- // iterate through columns at the current level and find which column updates match
- cols.map { col =>
- // find matches for this column or any of its children
- val prefixMatchedUpdates = updates.filter(a => resolver(a.ref.head, col.name))
- prefixMatchedUpdates match {
- // if there is no exact match and no match for children, return the column as is
- case updates if updates.isEmpty =>
- col
-
- // if there is an exact match, return the assigned expression
- case Seq(update) if isExactMatch(update, col, resolver) =>
- castIfNeeded(col, update.expr, resolver, namePrefix :+ col.name)
-
- // if there are matches only for children
- case updates if !hasExactMatch(updates, col, resolver) =>
- col.dataType match {
- case StructType(fields) =>
- // build field expressions
- val fieldExprs = fields.zipWithIndex.map { case (field, ordinal) =>
- Alias(GetStructField(col, ordinal, Some(field.name)), field.name)()
- }
-
- // recursively apply this method on nested fields
- val newUpdates = updates.map(u => u.copy(ref = u.ref.tail))
- val updatedFieldExprs = applyUpdates(
- ArraySeq.unsafeWrapArray(fieldExprs),
- newUpdates,
- resolver,
- namePrefix :+ col.name)
-
- // construct a new struct with updated field expressions
- toNamedStruct(ArraySeq.unsafeWrapArray(fields), updatedFieldExprs)
-
- case otherType =>
- val colName = (namePrefix :+ col.name).mkString(".")
- throw new AnalysisException(
- "Updating nested fields is only supported for StructType " +
- s"but $colName is of type $otherType")
- }
-
- // if there are conflicting updates, throw an exception
- // there are two illegal scenarios:
- // - multiple updates to the same column
- // - updates to a top-level struct and its nested fields (e.g., a.b and a.b.c)
- case updates if hasExactMatch(updates, col, resolver) =>
- val conflictingCols = updates.map(u => (namePrefix ++ u.ref).mkString("."))
- throw new AnalysisException(
- "Updates are in conflict for these columns: " +
- conflictingCols.distinct.mkString(", "))
- }
- }
- }
-
- private def toNamedStruct(fields: Seq[StructField], fieldExprs: Seq[Expression]): Expression = {
- val namedStructExprs = fields.zip(fieldExprs).flatMap { case (field, expr) =>
- Seq(Literal(field.name), expr)
- }
- CreateNamedStruct(namedStructExprs)
- }
-
- private def hasExactMatch(
- updates: Seq[ColumnUpdate],
- col: NamedExpression,
- resolver: Resolver): Boolean = {
-
- updates.exists(assignment => isExactMatch(assignment, col, resolver))
- }
-
- private def isExactMatch(
- update: ColumnUpdate,
- col: NamedExpression,
- resolver: Resolver): Boolean = {
-
- update.ref match {
- case Seq(namePart) if resolver(namePart, col.name) => true
- case _ => false
- }
- }
-
- protected def castIfNeeded(
- tableAttr: NamedExpression,
- expr: Expression,
- resolver: Resolver,
- colPath: Seq[String]): Expression = {
-
- val storeAssignmentPolicy = conf.storeAssignmentPolicy
-
- // run the type check and catch type errors
- storeAssignmentPolicy match {
- case StoreAssignmentPolicy.STRICT | StoreAssignmentPolicy.ANSI =>
- if (expr.nullable && !tableAttr.nullable) {
- throw new AnalysisException(
- s"Cannot write nullable values to non-null column '${tableAttr.name}'")
- }
-
- // use byName = true to catch cases when struct field names don't match
- // e.g. a struct with fields (a, b) is assigned as a struct with fields (a, c) or (b, a)
- val errors = new mutable.ArrayBuffer[String]()
- val canWrite = DataType.canWrite(
- expr.dataType,
- tableAttr.dataType,
- byName = true,
- resolver,
- tableAttr.name,
- storeAssignmentPolicy,
- err => errors += err)
-
- if (!canWrite) {
- throw new AnalysisException(
- s"Cannot write incompatible data:\n- ${errors.mkString("\n- ")}")
- }
-
- case _ => // OK
- }
-
- storeAssignmentPolicy match {
- case _ if tableAttr.dataType.sameType(expr.dataType) =>
- expr
- case StoreAssignmentPolicy.ANSI =>
- val cast =
- Cast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone), ansiEnabled = true)
- cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
- TableOutputResolver.checkCastOverflowInTableInsert(cast, colPath.quoted)
- case _ =>
- Cast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala
deleted file mode 100644
index 079a0eee234d..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A rule that checks MERGE operations contain only supported conditions.
- *
- * Note that this rule must be run in the resolution batch before Spark executes CheckAnalysis.
- * Otherwise, CheckAnalysis will throw a less descriptive error.
- */
-object CheckMergeIntoTableConditions extends Rule[LogicalPlan] {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case m: MergeIntoIcebergTable if m.resolved =>
- checkMergeIntoCondition("SEARCH", m.mergeCondition)
-
- val actions = m.matchedActions ++ m.notMatchedActions
- actions.foreach {
- case DeleteAction(Some(cond)) => checkMergeIntoCondition("DELETE", cond)
- case UpdateAction(Some(cond), _) => checkMergeIntoCondition("UPDATE", cond)
- case InsertAction(Some(cond), _) => checkMergeIntoCondition("INSERT", cond)
- case _ => // OK
- }
-
- m
- }
-
- private def checkMergeIntoCondition(condName: String, cond: Expression): Unit = {
- if (!cond.deterministic) {
- throw new AnalysisException(
- s"Non-deterministic functions are not supported in $condName conditions of " +
- s"MERGE operations: ${cond.sql}")
- }
-
- if (SubqueryExpression.hasSubquery(cond)) {
- throw new AnalysisException(
- s"Subqueries are not supported in conditions of MERGE operations. " +
- s"Found a subquery in the $condName condition: ${cond.sql}")
- }
-
- if (cond.find(_.isInstanceOf[AggregateExpression]).isDefined) {
- throw new AnalysisException(
- s"Agg functions are not supported in $condName conditions of MERGE operations: " + {
- cond.sql
- })
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
deleted file mode 100644
index 549aefaae28f..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.AlterViewAs
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
-import org.apache.spark.sql.catalyst.plans.logical.View
-import org.apache.spark.sql.catalyst.plans.logical.views.CreateIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.util.SchemaUtils
-
-object CheckViews extends (LogicalPlan => Unit) {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override def apply(plan: LogicalPlan): Unit = {
- plan foreach {
- case CreateIcebergView(
- resolvedIdent @ ResolvedIdentifier(_: ViewCatalog, _),
- _,
- query,
- columnAliases,
- _,
- _,
- _,
- _,
- _,
- replace,
- _,
- _) =>
- verifyColumnCount(resolvedIdent, columnAliases, query)
- SchemaUtils.checkColumnNameDuplication(
- query.schema.fieldNames.toIndexedSeq,
- SQLConf.get.resolver)
- if (replace) {
- val viewIdent: Seq[String] =
- resolvedIdent.catalog.name() +: resolvedIdent.identifier.asMultipartIdentifier
- checkCyclicViewReference(viewIdent, query, Seq(viewIdent))
- }
-
- case AlterViewAs(ResolvedV2View(_, _), _, _) =>
- throw new AnalysisException(
- "ALTER VIEW AS is not supported. Use CREATE OR REPLACE VIEW instead")
-
- case _ => // OK
- }
- }
-
- private def verifyColumnCount(
- ident: ResolvedIdentifier,
- columns: Seq[String],
- query: LogicalPlan): Unit = {
- if (columns.nonEmpty) {
- if (columns.length > query.output.length) {
- throw new AnalysisException(
- String.format(
- "Cannot create view %s.%s, the reason is not enough data columns:\n" +
- "View columns: %s\n" +
- "Data columns: %s",
- ident.catalog.name(),
- ident.identifier,
- columns.mkString(", "),
- query.output.map(c => c.name).mkString(", ")))
- } else if (columns.length < query.output.length) {
- throw new AnalysisException(
- String.format(
- "Cannot create view %s.%s, the reason is too many data columns:\n" +
- "View columns: %s\n" +
- "Data columns: %s",
- ident.catalog.name(),
- ident.identifier,
- columns.mkString(", "),
- query.output.map(c => c.name).mkString(", ")))
- }
- }
- }
-
- private def checkCyclicViewReference(
- viewIdent: Seq[String],
- plan: LogicalPlan,
- cyclePath: Seq[Seq[String]]): Unit = {
- plan match {
- case sub @ SubqueryAlias(_, Project(_, _)) =>
- val currentViewIdent: Seq[String] = sub.identifier.qualifier :+ sub.identifier.name
- checkIfRecursiveView(viewIdent, currentViewIdent, cyclePath, sub.children)
- case v1View: View =>
- val currentViewIdent: Seq[String] = v1View.desc.identifier.nameParts
- checkIfRecursiveView(viewIdent, currentViewIdent, cyclePath, v1View.children)
- case _ =>
- plan.children.foreach(child => checkCyclicViewReference(viewIdent, child, cyclePath))
- }
-
- plan.expressions.flatMap(_.flatMap {
- case e: SubqueryExpression =>
- checkCyclicViewReference(viewIdent, e.plan, cyclePath)
- None
- case _ => None
- })
- }
-
- private def checkIfRecursiveView(
- viewIdent: Seq[String],
- currentViewIdent: Seq[String],
- cyclePath: Seq[Seq[String]],
- children: Seq[LogicalPlan]): Unit = {
- val newCyclePath = cyclePath :+ currentViewIdent
- if (currentViewIdent == viewIdent) {
- throw new AnalysisException(
- String.format(
- "Recursive cycle in view detected: %s (cycle: %s)",
- viewIdent.asIdentifier,
- newCyclePath.map(p => p.mkString(".")).mkString(" -> ")))
- } else {
- children.foreach { c =>
- checkCyclicViewReference(viewIdent, c, newCyclePath)
- }
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala
deleted file mode 100644
index 3cbf27e6453b..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-
-object MergeIntoIcebergTableResolutionCheck extends (LogicalPlan => Unit) {
-
- override def apply(plan: LogicalPlan): Unit = {
- plan foreach {
- case m: UnresolvedMergeIntoIcebergTable =>
- throw new AnalysisException(s"Could not resolve Iceberg MERGE INTO statement: $m")
- case _ => // OK
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala
deleted file mode 100644
index aadc332181e2..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-
-object ProcedureArgumentCoercion extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case c @ Call(procedure, args) if c.resolved =>
- val params = procedure.parameters
-
- val newArgs = args.zipWithIndex.map { case (arg, index) =>
- val param = params(index)
- val paramType = param.dataType
- val argType = arg.dataType
-
- if (paramType != argType && !Cast.canUpCast(argType, paramType)) {
- throw new AnalysisException(
- s"Wrong arg type for ${param.name}: cannot cast $argType to $paramType")
- }
-
- if (paramType != argType) {
- Cast(arg, paramType)
- } else {
- arg
- }
- }
-
- if (newArgs != args) {
- c.copy(args = newArgs)
- } else {
- c
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala
deleted file mode 100644
index 0c64636a000d..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertStarAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateStarAction
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A resolution rule similar to ResolveReferences in Spark but handles Iceberg MERGE operations.
- */
-case class ResolveMergeIntoTableReferences(spark: SparkSession) extends Rule[LogicalPlan] {
-
- private lazy val analyzer: Analyzer = spark.sessionState.analyzer
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
- case m @ UnresolvedMergeIntoIcebergTable(targetTable, sourceTable, context)
- if targetTable.resolved && sourceTable.resolved && m.duplicateResolved =>
-
- val resolvedMatchedActions = context.matchedActions.map {
- case DeleteAction(cond) =>
- val resolvedCond = cond.map(resolveCond("DELETE", _, m))
- DeleteAction(resolvedCond)
-
- case UpdateAction(cond, assignments) =>
- val resolvedCond = cond.map(resolveCond("UPDATE", _, m))
- // the update action can access columns from both target and source tables
- val resolvedAssignments =
- resolveAssignments(assignments, m, resolveValuesWithSourceOnly = false)
- UpdateAction(resolvedCond, resolvedAssignments)
-
- case UpdateStarAction(updateCondition) =>
- val resolvedUpdateCondition = updateCondition.map(resolveCond("UPDATE", _, m))
- val assignments = targetTable.output.map { attr =>
- Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
- }
- // for UPDATE *, the value must be from the source table
- val resolvedAssignments =
- resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- UpdateAction(resolvedUpdateCondition, resolvedAssignments)
-
- case _ =>
- throw new AnalysisException("Matched actions can only contain UPDATE or DELETE")
- }
-
- val resolvedNotMatchedActions = context.notMatchedActions.map {
- case InsertAction(cond, assignments) =>
- // the insert action is used when not matched, so its condition and value can only
- // access columns from the source table
- val resolvedCond = cond.map(resolveCond("INSERT", _, Project(Nil, m.sourceTable)))
- val resolvedAssignments =
- resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- InsertAction(resolvedCond, resolvedAssignments)
-
- case InsertStarAction(cond) =>
- // the insert action is used when not matched, so its condition and value can only
- // access columns from the source table
- val resolvedCond = cond.map(resolveCond("INSERT", _, Project(Nil, m.sourceTable)))
- val assignments = targetTable.output.map { attr =>
- Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
- }
- val resolvedAssignments =
- resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- InsertAction(resolvedCond, resolvedAssignments)
-
- case _ =>
- throw new AnalysisException("Not matched actions can only contain INSERT")
- }
-
- val resolvedMergeCondition = resolveCond("SEARCH", context.mergeCondition, m)
-
- MergeIntoIcebergTable(
- targetTable,
- sourceTable,
- mergeCondition = resolvedMergeCondition,
- matchedActions = resolvedMatchedActions,
- notMatchedActions = resolvedNotMatchedActions)
- }
-
- private def resolveCond(condName: String, cond: Expression, plan: LogicalPlan): Expression = {
- val resolvedCond = analyzer.resolveExpressionByPlanChildren(cond, plan)
-
- val unresolvedAttrs = resolvedCond.references.filter(!_.resolved)
- if (unresolvedAttrs.nonEmpty) {
- throw new AnalysisException(
- s"Cannot resolve ${unresolvedAttrs.map(_.sql).mkString("[", ",", "]")} in $condName condition " +
- s"of MERGE operation given input columns: ${plan.inputSet.toSeq.map(_.sql).mkString("[", ",", "]")}")
- }
-
- resolvedCond
- }
-
- // copied from ResolveReferences in Spark
- private def resolveAssignments(
- assignments: Seq[Assignment],
- mergeInto: UnresolvedMergeIntoIcebergTable,
- resolveValuesWithSourceOnly: Boolean): Seq[Assignment] = {
- assignments.map { assign =>
- val resolvedKey = assign.key match {
- case c if !c.resolved =>
- resolveMergeExprOrFail(c, Project(Nil, mergeInto.targetTable))
- case o => o
- }
- val resolvedValue = assign.value match {
- // The update values may contain target and/or source references.
- case c if !c.resolved =>
- if (resolveValuesWithSourceOnly) {
- resolveMergeExprOrFail(c, Project(Nil, mergeInto.sourceTable))
- } else {
- resolveMergeExprOrFail(c, mergeInto)
- }
- case o => o
- }
- Assignment(resolvedKey, resolvedValue)
- }
- }
-
- // copied from ResolveReferences in Spark
- private def resolveMergeExprOrFail(e: Expression, p: LogicalPlan): Expression = {
- val resolved = analyzer.resolveExpressionByPlanChildren(e, p)
- resolved.references.filter(!_.resolved).foreach { a =>
- // Note: This will throw error only on unresolved attribute issues,
- // not other resolution errors like mismatched data types.
- val cols = p.inputSet.toSeq.map(_.sql).mkString(", ")
- throw new AnalysisException(s"cannot resolve ${a.sql} in MERGE command given columns [$cols]")
- }
- resolved
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala
deleted file mode 100644
index c2d13bfa021e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import java.util.Locale
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.CallArgument
-import org.apache.spark.sql.catalyst.plans.logical.CallStatement
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.NamedArgument
-import org.apache.spark.sql.catalyst.plans.logical.PositionalArgument
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.CatalogPlugin
-import org.apache.spark.sql.connector.catalog.LookupCatalog
-import org.apache.spark.sql.connector.iceberg.catalog.ProcedureCatalog
-import org.apache.spark.sql.connector.iceberg.catalog.ProcedureParameter
-import scala.collection.Seq
-
-case class ResolveProcedures(spark: SparkSession) extends Rule[LogicalPlan] with LookupCatalog {
-
- protected lazy val catalogManager: CatalogManager = spark.sessionState.catalogManager
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case CallStatement(CatalogAndIdentifier(catalog, ident), args) =>
- val procedure = catalog.asProcedureCatalog.loadProcedure(ident)
-
- val params = procedure.parameters
- val normalizedParams = normalizeParams(params)
- validateParams(normalizedParams)
-
- val normalizedArgs = normalizeArgs(args)
- Call(procedure, args = buildArgExprs(normalizedParams, normalizedArgs).toSeq)
- }
-
- private def validateParams(params: Seq[ProcedureParameter]): Unit = {
- // should not be any duplicate param names
- val duplicateParamNames = params.groupBy(_.name).collect {
- case (name, matchingParams) if matchingParams.length > 1 => name
- }
-
- if (duplicateParamNames.nonEmpty) {
- throw new AnalysisException(
- s"Duplicate parameter names: ${duplicateParamNames.mkString("[", ",", "]")}")
- }
-
- // optional params should be at the end
- params.sliding(2).foreach {
- case Seq(previousParam, currentParam) if !previousParam.required && currentParam.required =>
- throw new AnalysisException(
- s"Optional parameters must be after required ones but $currentParam is after $previousParam")
- case _ =>
- }
- }
-
- private def buildArgExprs(
- params: Seq[ProcedureParameter],
- args: Seq[CallArgument]): Seq[Expression] = {
-
- // build a map of declared parameter names to their positions
- val nameToPositionMap = params.map(_.name).zipWithIndex.toMap
-
- // build a map of parameter names to args
- val nameToArgMap = buildNameToArgMap(params, args, nameToPositionMap)
-
- // verify all required parameters are provided
- val missingParamNames = params.filter(_.required).collect {
- case param if !nameToArgMap.contains(param.name) => param.name
- }
-
- if (missingParamNames.nonEmpty) {
- throw new AnalysisException(
- s"Missing required parameters: ${missingParamNames.mkString("[", ",", "]")}")
- }
-
- val argExprs = new Array[Expression](params.size)
-
- nameToArgMap.foreach { case (name, arg) =>
- val position = nameToPositionMap(name)
- argExprs(position) = arg.expr
- }
-
- // assign nulls to optional params that were not set
- params.foreach {
- case p if !p.required && !nameToArgMap.contains(p.name) =>
- val position = nameToPositionMap(p.name)
- argExprs(position) = Literal.create(null, p.dataType)
- case _ =>
- }
-
- argExprs
- }
-
- private def buildNameToArgMap(
- params: Seq[ProcedureParameter],
- args: Seq[CallArgument],
- nameToPositionMap: Map[String, Int]): Map[String, CallArgument] = {
-
- val containsNamedArg = args.exists(_.isInstanceOf[NamedArgument])
- val containsPositionalArg = args.exists(_.isInstanceOf[PositionalArgument])
-
- if (containsNamedArg && containsPositionalArg) {
- throw new AnalysisException("Named and positional arguments cannot be mixed")
- }
-
- if (containsNamedArg) {
- buildNameToArgMapUsingNames(args, nameToPositionMap)
- } else {
- buildNameToArgMapUsingPositions(args, params)
- }
- }
-
- private def buildNameToArgMapUsingNames(
- args: Seq[CallArgument],
- nameToPositionMap: Map[String, Int]): Map[String, CallArgument] = {
-
- val namedArgs = args.asInstanceOf[Seq[NamedArgument]]
-
- val validationErrors = namedArgs.groupBy(_.name).collect {
- case (name, matchingArgs) if matchingArgs.size > 1 => s"Duplicate procedure argument: $name"
- case (name, _) if !nameToPositionMap.contains(name) => s"Unknown argument: $name"
- }
-
- if (validationErrors.nonEmpty) {
- throw new AnalysisException(
- s"Could not build name to arg map: ${validationErrors.mkString(", ")}")
- }
-
- namedArgs.map(arg => arg.name -> arg).toMap
- }
-
- private def buildNameToArgMapUsingPositions(
- args: Seq[CallArgument],
- params: Seq[ProcedureParameter]): Map[String, CallArgument] = {
-
- if (args.size > params.size) {
- throw new AnalysisException("Too many arguments for procedure")
- }
-
- args.zipWithIndex.map { case (arg, position) =>
- val param = params(position)
- param.name -> arg
- }.toMap
- }
-
- private def normalizeParams(params: Seq[ProcedureParameter]): Seq[ProcedureParameter] = {
- params.map {
- case param if param.required =>
- val normalizedName = param.name.toLowerCase(Locale.ROOT)
- ProcedureParameter.required(normalizedName, param.dataType)
- case param =>
- val normalizedName = param.name.toLowerCase(Locale.ROOT)
- ProcedureParameter.optional(normalizedName, param.dataType)
- }
- }
-
- private def normalizeArgs(args: Seq[CallArgument]): Seq[CallArgument] = {
- args.map {
- case a @ NamedArgument(name, _) => a.copy(name = name.toLowerCase(Locale.ROOT))
- case other => other
- }
- }
-
- implicit class CatalogHelper(plugin: CatalogPlugin) {
- def asProcedureCatalog: ProcedureCatalog = plugin match {
- case procedureCatalog: ProcedureCatalog =>
- procedureCatalog
- case _ =>
- throw new AnalysisException(s"Cannot use catalog ${plugin.name}: not a ProcedureCatalog")
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
deleted file mode 100644
index 4f7e2b4d0f24..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.analysis.ViewUtil.IcebergViewHelper
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.expressions.UpCast
-import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
-import org.apache.spark.sql.catalyst.plans.logical.views.CreateIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.LookupCatalog
-import org.apache.spark.sql.connector.catalog.View
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.MetadataBuilder
-
-case class ResolveViews(spark: SparkSession) extends Rule[LogicalPlan] with LookupCatalog {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- protected lazy val catalogManager: CatalogManager = spark.sessionState.catalogManager
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case u @ UnresolvedRelation(nameParts, _, _)
- if catalogManager.v1SessionCatalog.isTempView(nameParts) =>
- u
-
- case u @ UnresolvedRelation(parts @ CatalogAndIdentifier(catalog, ident), _, _) =>
- ViewUtil
- .loadView(catalog, ident)
- .map(createViewRelation(parts, _))
- .getOrElse(u)
-
- case u @ UnresolvedTableOrView(CatalogAndIdentifier(catalog, ident), _, _) =>
- ViewUtil
- .loadView(catalog, ident)
- .map(_ => ResolvedV2View(catalog.asViewCatalog, ident))
- .getOrElse(u)
-
- case c @ CreateIcebergView(
- ResolvedIdentifier(_, _),
- _,
- query,
- columnAliases,
- columnComments,
- _,
- _,
- _,
- _,
- _,
- _,
- _) if query.resolved && !c.rewritten =>
- val aliased = aliasColumns(query, columnAliases, columnComments)
- c.copy(
- query = aliased,
- queryColumnNames = query.schema.fieldNames.toIndexedSeq,
- rewritten = true)
- }
-
- private def aliasColumns(
- plan: LogicalPlan,
- columnAliases: Seq[String],
- columnComments: Seq[Option[String]]): LogicalPlan = {
- if (columnAliases.isEmpty || columnAliases.length != plan.output.length) {
- plan
- } else {
- val projectList = plan.output.zipWithIndex.map { case (attr, pos) =>
- if (columnComments.apply(pos).isDefined) {
- val meta =
- new MetadataBuilder().putString("comment", columnComments.apply(pos).get).build()
- Alias(attr, columnAliases.apply(pos))(explicitMetadata = Some(meta))
- } else {
- Alias(attr, columnAliases.apply(pos))()
- }
- }
- Project(projectList, plan)
- }
- }
-
- private def createViewRelation(nameParts: Seq[String], view: View): LogicalPlan = {
- val parsed = parseViewText(nameParts.quoted, view.query)
-
- // Apply any necessary rewrites to preserve correct resolution
- val viewCatalogAndNamespace: Seq[String] = view.currentCatalog +: view.currentNamespace.toSeq
- val rewritten = rewriteIdentifiers(parsed, viewCatalogAndNamespace);
-
- // Apply the field aliases and column comments
- // This logic differs from how Spark handles views in SessionCatalog.fromCatalogTable.
- // This is more strict because it doesn't allow resolution by field name.
- val aliases = view.schema.fields.zipWithIndex.map { case (expected, pos) =>
- val attr = GetColumnByOrdinal(pos, expected.dataType)
- Alias(UpCast(attr, expected.dataType), expected.name)(explicitMetadata =
- Some(expected.metadata))
- }.toIndexedSeq
-
- SubqueryAlias(nameParts, Project(aliases, rewritten))
- }
-
- private def parseViewText(name: String, viewText: String): LogicalPlan = {
- val origin = Origin(objectType = Some("VIEW"), objectName = Some(name))
-
- try {
- CurrentOrigin.withOrigin(origin) {
- spark.sessionState.sqlParser.parseQuery(viewText)
- }
- } catch {
- case _: ParseException =>
- throw QueryCompilationErrors.invalidViewText(viewText, name)
- }
- }
-
- private def rewriteIdentifiers(
- plan: LogicalPlan,
- catalogAndNamespace: Seq[String]): LogicalPlan = {
- // Substitute CTEs and Unresolved Ordinals within the view, then rewrite unresolved functions and relations
- qualifyTableIdentifiers(
- qualifyFunctionIdentifiers(
- SubstituteUnresolvedOrdinals.apply(CTESubstitution.apply(plan)),
- catalogAndNamespace),
- catalogAndNamespace)
- }
-
- private def qualifyFunctionIdentifiers(
- plan: LogicalPlan,
- catalogAndNamespace: Seq[String]): LogicalPlan = plan transformExpressions {
- case u @ UnresolvedFunction(Seq(name), _, _, _, _) =>
- if (!isBuiltinFunction(name)) {
- u.copy(nameParts = catalogAndNamespace :+ name)
- } else {
- u
- }
- case u @ UnresolvedFunction(parts, _, _, _, _) if !isCatalog(parts.head) =>
- u.copy(nameParts = catalogAndNamespace.head +: parts)
- }
-
- /**
- * Qualify table identifiers with default catalog and namespace if necessary.
- */
- private def qualifyTableIdentifiers(
- child: LogicalPlan,
- catalogAndNamespace: Seq[String]): LogicalPlan =
- child transform {
- case u @ UnresolvedRelation(Seq(table), _, _) =>
- u.copy(multipartIdentifier = catalogAndNamespace :+ table)
- case u @ UnresolvedRelation(parts, _, _) if !isCatalog(parts.head) =>
- u.copy(multipartIdentifier = catalogAndNamespace.head +: parts)
- case other =>
- other.transformExpressions { case subquery: SubqueryExpression =>
- subquery.withNewPlan(qualifyTableIdentifiers(subquery.plan, catalogAndNamespace))
- }
- }
-
- private def isCatalog(name: String): Boolean = {
- catalogManager.isCatalogRegistered(name)
- }
-
- private def isBuiltinFunction(name: String): Boolean = {
- catalogManager.v1SessionCatalog.isBuiltinFunction(FunctionIdentifier(name))
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
deleted file mode 100644
index 7ac9a1c6c856..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.IsNotNull
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
-import org.apache.spark.sql.catalyst.plans.FullOuter
-import org.apache.spark.sql.catalyst.plans.Inner
-import org.apache.spark.sql.catalyst.plans.LeftAnti
-import org.apache.spark.sql.catalyst.plans.LeftOuter
-import org.apache.spark.sql.catalyst.plans.RightOuter
-import org.apache.spark.sql.catalyst.plans.logical.AppendData
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.HintInfo
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.JoinHint
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeAction
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.MergeRows
-import org.apache.spark.sql.catalyst.plans.logical.NO_BROADCAST_HASH
-import org.apache.spark.sql.catalyst.plans.logical.NoStatsUnaryNode
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.NamedReference
-import org.apache.spark.sql.connector.write.RowLevelOperation.Command.MERGE
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.connector.write.SupportsDelta
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-/**
- * Assigns a rewrite plan for v2 tables that support rewriting data to handle MERGE statements.
- *
- * This rule assumes the commands have been fully resolved and all assignments have been aligned.
- * That's why it must be run after AlignRowLevelCommandAssignments.
- */
-object RewriteMergeIntoTable extends RewriteRowLevelIcebergCommand with PredicateHelper {
-
- private final val ROW_FROM_SOURCE = "__row_from_source"
- private final val ROW_FROM_TARGET = "__row_from_target"
- private final val ROW_ID = "__row_id"
-
- private final val ROW_FROM_SOURCE_REF = FieldReference(ROW_FROM_SOURCE)
- private final val ROW_FROM_TARGET_REF = FieldReference(ROW_FROM_TARGET)
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case m @ MergeIntoIcebergTable(
- aliasedTable,
- source,
- cond,
- matchedActions,
- notMatchedActions,
- None)
- if m.resolved && m.aligned && matchedActions.isEmpty && notMatchedActions.size == 1 =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r: DataSourceV2Relation =>
- // NOT MATCHED conditions may only refer to columns in source so they can be pushed down
- val insertAction = notMatchedActions.head.asInstanceOf[InsertAction]
- val filteredSource = insertAction.condition match {
- case Some(insertCond) => Filter(insertCond, source)
- case None => source
- }
-
- // when there are no MATCHED actions, use a left anti join to remove any matching rows
- // and switch to using a regular append instead of a row-level merge
- // only unmatched source rows that match the condition are appended to the table
- val joinPlan = Join(filteredSource, r, LeftAnti, Some(cond), JoinHint.NONE)
-
- val outputExprs = insertAction.assignments.map(_.value)
- val outputColNames = r.output.map(_.name)
- val outputCols = outputExprs.zip(outputColNames).map { case (expr, name) =>
- Alias(expr, name)()
- }
- val project = Project(outputCols, joinPlan)
-
- AppendData.byPosition(r, project)
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
-
- case m @ MergeIntoIcebergTable(
- aliasedTable,
- source,
- cond,
- matchedActions,
- notMatchedActions,
- None) if m.resolved && m.aligned && matchedActions.isEmpty =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r: DataSourceV2Relation =>
- // when there are no MATCHED actions, use a left anti join to remove any matching rows
- // and switch to using a regular append instead of a row-level merge
- // only unmatched source rows that match action conditions are appended to the table
- val joinPlan = Join(source, r, LeftAnti, Some(cond), JoinHint.NONE)
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map(notMatchedActionOutput(_, Nil))
-
- // merge rows as there are multiple not matched actions
- val mergeRows = MergeRows(
- isSourceRowPresent = TrueLiteral,
- isTargetRowPresent = FalseLiteral,
- matchedConditions = Nil,
- matchedOutputs = Nil,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- targetOutput = Nil,
- performCardinalityCheck = false,
- emitNotMatchedTargetRows = false,
- output = buildMergeRowsOutput(Nil, notMatchedOutputs, r.output),
- joinPlan)
-
- AppendData.byPosition(r, mergeRows)
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
-
- case m @ MergeIntoIcebergTable(
- aliasedTable,
- source,
- cond,
- matchedActions,
- notMatchedActions,
- None) if m.resolved && m.aligned =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
- val table = buildOperationTable(tbl, MERGE, CaseInsensitiveStringMap.empty())
- val rewritePlan = table.operation match {
- case _: SupportsDelta =>
- buildWriteDeltaPlan(r, table, source, cond, matchedActions, notMatchedActions)
- case _ =>
- buildReplaceDataPlan(r, table, source, cond, matchedActions, notMatchedActions)
- }
-
- m.copy(rewritePlan = Some(rewritePlan))
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- private def buildReplaceDataPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- source: LogicalPlan,
- cond: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction]): ReplaceIcebergData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs)
- val readAttrs = readRelation.output
-
- val performCardinalityCheck = isCardinalityCheckNeeded(matchedActions)
-
- // project an extra column to check if a target row exists after the join
- // if needed, project a synthetic row ID to perform the cardinality check
- val rowFromTarget = Alias(TrueLiteral, ROW_FROM_TARGET)()
- val targetTableProjExprs = if (performCardinalityCheck) {
- val rowId = Alias(MonotonicallyIncreasingID(), ROW_ID)()
- readAttrs ++ Seq(rowFromTarget, rowId)
- } else {
- readAttrs :+ rowFromTarget
- }
- val targetTableProj = Project(targetTableProjExprs, readRelation)
-
- // project an extra column to check if a source row exists after the join
- val rowFromSource = Alias(TrueLiteral, ROW_FROM_SOURCE)()
- val sourceTableProjExprs = source.output :+ rowFromSource
- val sourceTableProj = Project(sourceTableProjExprs, source)
-
- // use left outer join if there is no NOT MATCHED action, unmatched source rows can be discarded
- // use full outer join in all other cases, unmatched source rows may be needed
- // disable broadcasts for the target table to perform the cardinality check
- val joinType = if (notMatchedActions.isEmpty) LeftOuter else FullOuter
- val joinHint = JoinHint(leftHint = Some(HintInfo(Some(NO_BROADCAST_HASH))), rightHint = None)
- val joinPlan =
- Join(NoStatsUnaryNode(targetTableProj), sourceTableProj, joinType, Some(cond), joinHint)
-
- val matchedConditions = matchedActions.map(actionCondition)
- val matchedOutputs = matchedActions.map(matchedActionOutput(_, metadataAttrs))
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map(notMatchedActionOutput(_, metadataAttrs))
-
- val rowFromSourceAttr = resolveAttrRef(ROW_FROM_SOURCE_REF, joinPlan)
- val rowFromTargetAttr = resolveAttrRef(ROW_FROM_TARGET_REF, joinPlan)
-
- val mergeRows = MergeRows(
- isSourceRowPresent = IsNotNull(rowFromSourceAttr),
- isTargetRowPresent =
- if (notMatchedActions.isEmpty) TrueLiteral else IsNotNull(rowFromTargetAttr),
- matchedConditions = matchedConditions,
- matchedOutputs = matchedOutputs,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- targetOutput = readAttrs,
- performCardinalityCheck = performCardinalityCheck,
- emitNotMatchedTargetRows = true,
- output = buildMergeRowsOutput(matchedOutputs, notMatchedOutputs :+ readAttrs, readAttrs),
- joinPlan)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceIcebergData(writeRelation, mergeRows, relation)
- }
-
- // build a rewrite plan for sources that support row deltas
- private def buildWriteDeltaPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- source: LogicalPlan,
- cond: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction]): WriteIcebergDelta = {
-
- // resolve all needed attrs (e.g. row ID and any required metadata attrs)
- val rowAttrs = relation.output
- val rowIdAttrs = resolveRowIdAttrs(relation, operationTable.operation)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildRelationWithAttrs(relation, operationTable, rowIdAttrs ++ metadataAttrs)
- val readAttrs = readRelation.output
-
- val (targetCond, joinCond) = splitMergeCond(cond, readRelation)
-
- val performCardinalityCheck = isCardinalityCheckNeeded(matchedActions)
-
- // project an extra column to check if a target row exists after the join
- // if needed, project a synthetic row ID to perform the cardinality check
- val rowFromTarget = Alias(TrueLiteral, ROW_FROM_TARGET)()
- val targetTableProjExprs = if (performCardinalityCheck) {
- val rowId = Alias(MonotonicallyIncreasingID(), ROW_ID)()
- readAttrs ++ Seq(rowFromTarget, rowId)
- } else {
- readAttrs :+ rowFromTarget
- }
- val targetTableProj = Project(targetTableProjExprs, Filter(targetCond, readRelation))
-
- // project an extra column to check if a source row exists after the join
- val sourceTableProjExprs = source.output :+ Alias(TrueLiteral, ROW_FROM_SOURCE)()
- val sourceTableProj = Project(sourceTableProjExprs, source)
-
- // use inner join if there is no NOT MATCHED action, unmatched source rows can be discarded
- // use right outer join in all other cases, unmatched source rows may be needed
- // also disable broadcasts for the target table to perform the cardinality check
- val joinType = if (notMatchedActions.isEmpty) Inner else RightOuter
- val joinHint = JoinHint(leftHint = Some(HintInfo(Some(NO_BROADCAST_HASH))), rightHint = None)
- val joinPlan =
- Join(NoStatsUnaryNode(targetTableProj), sourceTableProj, joinType, Some(joinCond), joinHint)
-
- val metadataReadAttrs = readAttrs.filterNot(relation.outputSet.contains)
-
- val matchedConditions = matchedActions.map(actionCondition)
- val matchedOutputs = matchedActions.map { action =>
- matchedDeltaActionOutput(action, rowAttrs, rowIdAttrs, metadataReadAttrs)
- }
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map { action =>
- notMatchedDeltaActionOutput(action, metadataReadAttrs)
- }
-
- val operationTypeAttr = AttributeReference(OPERATION_COLUMN, IntegerType, nullable = false)()
- val rowFromSourceAttr = resolveAttrRef(ROW_FROM_SOURCE_REF, joinPlan)
- val rowFromTargetAttr = resolveAttrRef(ROW_FROM_TARGET_REF, joinPlan)
-
- // merged rows must contain values for the operation type and all read attrs
- val mergeRowsOutput =
- buildMergeRowsOutput(matchedOutputs, notMatchedOutputs, operationTypeAttr +: readAttrs)
-
- val mergeRows = MergeRows(
- isSourceRowPresent = IsNotNull(rowFromSourceAttr),
- isTargetRowPresent =
- if (notMatchedActions.isEmpty) TrueLiteral else IsNotNull(rowFromTargetAttr),
- matchedConditions = matchedConditions,
- matchedOutputs = matchedOutputs,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- // only needed if emitting unmatched target rows
- targetOutput = Nil,
- performCardinalityCheck = performCardinalityCheck,
- emitNotMatchedTargetRows = false,
- output = mergeRowsOutput,
- joinPlan)
-
- // build a plan to write the row delta to the table
- val writeRelation = relation.copy(table = operationTable)
- val projections = buildDeltaProjections(mergeRows, rowAttrs, rowIdAttrs, metadataAttrs)
- WriteIcebergDelta(writeRelation, mergeRows, relation, projections)
- }
-
- private def actionCondition(action: MergeAction): Expression = {
- action.condition.getOrElse(TrueLiteral)
- }
-
- private def matchedActionOutput(
- clause: MergeAction,
- metadataAttrs: Seq[Attribute]): Seq[Seq[Expression]] = {
-
- clause match {
- case u: UpdateAction =>
- Seq(u.assignments.map(_.value) ++ metadataAttrs)
-
- case _: DeleteAction =>
- Nil
-
- case other =>
- throw new AnalysisException(s"Unexpected WHEN MATCHED action: $other")
- }
- }
-
- private def notMatchedActionOutput(
- clause: MergeAction,
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
-
- clause match {
- case i: InsertAction =>
- i.assignments.map(_.value) ++ metadataAttrs.map(attr => Literal(null, attr.dataType))
-
- case other =>
- throw new AnalysisException(s"Unexpected WHEN NOT MATCHED action: $other")
- }
- }
-
- private def matchedDeltaActionOutput(
- action: MergeAction,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): Seq[Seq[Expression]] = {
-
- action match {
- case u: UpdateAction =>
- val delete = deltaDeleteOutput(rowAttrs, rowIdAttrs, metadataAttrs)
- val insert = deltaInsertOutput(u.assignments.map(_.value), metadataAttrs)
- Seq(delete, insert)
-
- case _: DeleteAction =>
- val delete = deltaDeleteOutput(rowAttrs, rowIdAttrs, metadataAttrs)
- Seq(delete)
-
- case other =>
- throw new AnalysisException(s"Unexpected WHEN MATCHED action: $other")
- }
- }
-
- private def notMatchedDeltaActionOutput(
- action: MergeAction,
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
-
- action match {
- case i: InsertAction =>
- deltaInsertOutput(i.assignments.map(_.value), metadataAttrs)
-
- case other =>
- throw new AnalysisException(s"Unexpected WHEN NOT MATCHED action: $other")
- }
- }
-
- private def buildMergeRowsOutput(
- matchedOutputs: Seq[Seq[Seq[Expression]]],
- notMatchedOutputs: Seq[Seq[Expression]],
- attrs: Seq[Attribute]): Seq[Attribute] = {
-
- // collect all outputs from matched and not matched actions (ignoring actions that discard rows)
- val outputs = matchedOutputs.flatten.filter(_.nonEmpty) ++ notMatchedOutputs.filter(_.nonEmpty)
- buildMergingOutput(outputs, attrs)
- }
-
- private def isCardinalityCheckNeeded(actions: Seq[MergeAction]): Boolean = actions match {
- case Seq(DeleteAction(None)) => false
- case _ => true
- }
-
- private def resolveAttrRef(ref: NamedReference, plan: LogicalPlan): AttributeReference = {
- V2ExpressionUtils.resolveRef[AttributeReference](ref, plan)
- }
-
- private def buildDeltaProjections(
- mergeRows: MergeRows,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val outputs = mergeRows.matchedOutputs.flatten ++ mergeRows.notMatchedOutputs
- buildDeltaProjections(mergeRows, outputs, rowAttrs, rowIdAttrs, metadataAttrs)
- }
-
- // splits the MERGE condition into a predicate that references columns only from the target table,
- // which can be pushed down, and a predicate used as a join condition to find matches
- private def splitMergeCond(
- cond: Expression,
- targetTable: LogicalPlan): (Expression, Expression) = {
-
- val (targetPredicates, joinPredicates) = splitConjunctivePredicates(cond)
- .partition(_.references.subsetOf(targetTable.outputSet))
- val targetCond = targetPredicates.reduceOption(And).getOrElse(TrueLiteral)
- val joinCond = joinPredicates.reduceOption(And).getOrElse(TrueLiteral)
- (targetCond, joinCond)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTableForRowLineage.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTableForRowLineage.scala
deleted file mode 100644
index 1dbc171f5dbd..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTableForRowLineage.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-
-object RewriteMergeIntoTableForRowLineage extends RewriteOperationForRowLineage {
-
- override def apply(plan: LogicalPlan): LogicalPlan = {
- plan.resolveOperators {
- case m @ MergeIntoIcebergTable(_, _, _, matchedActions, _, _)
- if m.resolved && m.aligned &&
- matchedActions.nonEmpty &&
- shouldUpdatePlan(m.targetTable) =>
- updateMergeIntoForRowLineage(m)
- }
- }
-
- protected def updateMergeIntoForRowLineage(mergeIntoTable: MergeIntoIcebergTable): LogicalPlan = {
- EliminateSubqueryAliases(mergeIntoTable.targetTable) match {
- case r: DataSourceV2Relation =>
- val matchedActions = mergeIntoTable.matchedActions
- val notMatchedActions = mergeIntoTable.notMatchedActions
- val (rowId, lastUpdatedSequenceNumber) = findRowLineageAttributes(r.metadataOutput).get
-
- val matchedAssignmentsForLineage = matchedActions.map {
- case UpdateAction(cond, assignments) =>
- UpdateAction(
- cond,
- assignments ++ Seq(
- Assignment(rowId, rowId),
- Assignment(lastUpdatedSequenceNumber, Literal(null))))
-
- case deleteAction => deleteAction
- }
-
- val notMatchedActionsForLineage = notMatchedActions.map {
- case InsertAction(cond, assignments) =>
- InsertAction(
- cond,
- assignments ++ Seq(
- Assignment(rowId, Literal(null)),
- Assignment(lastUpdatedSequenceNumber, Literal(null))))
- }
-
- val tableWithLineage = r.copy(output = r.output ++ Seq(rowId, lastUpdatedSequenceNumber))
- mergeIntoTable.copy(
- targetTable = tableWithLineage,
- matchedActions = matchedAssignmentsForLineage,
- notMatchedActions = notMatchedActionsForLineage)
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteOperationForRowLineage.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteOperationForRowLineage.scala
deleted file mode 100644
index 538e04264778..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteOperationForRowLineage.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.iceberg.MetadataColumns
-import org.apache.iceberg.TableUtil
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util.METADATA_COL_ATTR_KEY
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.MetadataBuilder
-
-trait RewriteOperationForRowLineage extends RewriteRowLevelIcebergCommand {
-
- protected val ROW_ID_ATTRIBUTE_NAME = MetadataColumns.ROW_ID.name()
- protected val LAST_UPDATED_SEQUENCE_NUMBER_ATTRIBUTE_NAME =
- MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.name()
-
- // The plan should only be updated if the underlying Iceberg table supports row lineage AND
- // lineage attributes are not already on the output of operation which indicates the rule already ran
- protected def shouldUpdatePlan(table: LogicalPlan): Boolean = {
- val supportsRowLineage = EliminateSubqueryAliases(table) match {
- case r: DataSourceV2Relation =>
- r.table match {
- case sparkTable: SparkTable =>
- TableUtil.supportsRowLineage(sparkTable.table())
- }
- case _ => false
- }
-
- val rowIdAbsentFromOutput = !table.output.exists(_.name == ROW_ID_ATTRIBUTE_NAME)
-
- supportsRowLineage && rowIdAbsentFromOutput
- }
-
- protected def findRowLineageAttributes(
- expressions: Seq[Expression]): Option[(AttributeReference, AttributeReference)] = {
- val rowIdAttr = expressions.collectFirst {
- case attr: AttributeReference
- if isMetadataColumn(attr) && attr.name == ROW_ID_ATTRIBUTE_NAME =>
- attr
- }
-
- val lastUpdatedAttr = expressions.collectFirst {
- case attr: AttributeReference
- if isMetadataColumn(attr) && attr.name == LAST_UPDATED_SEQUENCE_NUMBER_ATTRIBUTE_NAME =>
- attr
- }
-
- // Treat row lineage columns as data columns by removing the metadata attribute
- // This works around the logic in ExposesMetadataColumns,
- // which prevents surfacing other metadata columns when a single metadata column is in the output
- (rowIdAttr, lastUpdatedAttr) match {
- case (Some(rowId), Some(lastUpdated)) =>
- Some((removeMetadataColumnAttribute(rowId), removeMetadataColumnAttribute(lastUpdated)))
- case _ => None
- }
- }
-
- protected def removeMetadataColumnAttribute(attr: AttributeReference): AttributeReference = {
- attr.withMetadata(
- new MetadataBuilder()
- .withMetadata(attr.metadata)
- .remove(METADATA_COL_ATTR_KEY)
- .build())
- }
-
- private def isMetadataColumn(attributeReference: AttributeReference): Boolean = {
- attributeReference.metadata.contains(METADATA_COL_ATTR_KEY)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelIcebergCommand.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelIcebergCommand.scala
deleted file mode 100644
index f40fc16e8ce1..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelIcebergCommand.scala
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.ProjectingInternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.write.RowLevelOperation
-import org.apache.spark.sql.connector.write.SupportsDelta
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.StructField
-import org.apache.spark.sql.types.StructType
-
-trait RewriteRowLevelIcebergCommand extends RewriteRowLevelCommand {
-
- // override as the existing Spark method does not work for UPDATE and MERGE
- protected override def buildWriteDeltaProjections(
- plan: LogicalPlan,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val rowProjection = if (rowAttrs.nonEmpty) {
- Some(newLazyProjection(plan, rowAttrs))
- } else {
- None
- }
-
- val rowIdProjection = newLazyProjection(plan, rowIdAttrs)
-
- val metadataProjection = if (metadataAttrs.nonEmpty) {
- Some(newLazyProjection(plan, metadataAttrs))
- } else {
- None
- }
-
- WriteDeltaProjections(rowProjection, rowIdProjection, metadataProjection)
- }
-
- // the projection is done by name, ignoring expr IDs
- private def newLazyProjection(
- plan: LogicalPlan,
- projectedAttrs: Seq[Attribute]): ProjectingInternalRow = {
-
- val projectedOrdinals = projectedAttrs.map(attr => plan.output.indexWhere(_.name == attr.name))
- val schema = StructType.fromAttributes(projectedOrdinals.map(plan.output(_)))
- ProjectingInternalRow(schema, projectedOrdinals)
- }
-
- protected def buildDeltaProjections(
- plan: LogicalPlan,
- outputs: Seq[Seq[Expression]],
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val insertAndUpdateOutputs = outputs.filterNot(_.head == Literal(DELETE_OPERATION))
- val updateAndDeleteOutputs = outputs.filterNot(_.head == Literal(INSERT_OPERATION))
-
- val rowProjection = if (rowAttrs.nonEmpty) {
- Some(newLazyProjection(insertAndUpdateOutputs, plan.output, rowAttrs))
- } else {
- None
- }
-
- val rowIdProjection = newLazyProjection(updateAndDeleteOutputs, plan.output, rowIdAttrs)
-
- val metadataProjection = if (metadataAttrs.nonEmpty) {
- Some(newLazyProjection(updateAndDeleteOutputs, plan.output, metadataAttrs))
- } else {
- None
- }
-
- WriteDeltaProjections(rowProjection, rowIdProjection, metadataProjection)
- }
-
- // the projection is done by name, ignoring expr IDs
- private def newLazyProjection(
- outputs: Seq[Seq[Expression]],
- outputAttrs: Seq[Attribute],
- projectedAttrs: Seq[Attribute]): ProjectingInternalRow = {
-
- val projectedOrdinals = projectedAttrs.map(attr => outputAttrs.indexWhere(_.name == attr.name))
-
- val structFields = projectedAttrs.zip(projectedOrdinals).map { case (attr, ordinal) =>
- // output attr is nullable if at least one output projection may produce null for that attr
- // but row ID and metadata attrs are projected only for update/delete records and
- // row attrs are projected only in insert/update records
- // that's why the projection schema must rely only on relevant outputs
- // instead of blindly inheriting the output attr nullability
- val nullable = outputs.exists(output => output(ordinal).nullable)
- StructField(attr.name, attr.dataType, nullable, attr.metadata)
- }
- val schema = StructType(structFields)
-
- ProjectingInternalRow(schema, projectedOrdinals)
- }
-
- protected def deltaDeleteOutput(
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
- val deleteRowValues = buildDeltaDeleteRowValues(rowAttrs, rowIdAttrs)
- Seq(Literal(DELETE_OPERATION)) ++ deleteRowValues ++ metadataAttrs
- }
-
- protected def deltaInsertOutput(
- rowValues: Seq[Expression],
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
- val metadataValues = metadataAttrs.map(attr => Literal(null, attr.dataType))
- Seq(Literal(INSERT_OPERATION)) ++ rowValues ++ metadataValues
- }
-
- private def buildDeltaDeleteRowValues(
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute]): Seq[Expression] = {
-
- // nullify all row attrs that are not part of the row ID
- val rowIdAttSet = AttributeSet(rowIdAttrs)
- rowAttrs.map {
- case attr if rowIdAttSet.contains(attr) => attr
- case attr => Literal(null, attr.dataType)
- }
- }
-
- protected def buildMergingOutput(
- outputs: Seq[Seq[Expression]],
- attrs: Seq[Attribute]): Seq[Attribute] = {
-
- // build a correct nullability map for output attributes
- // an attribute is nullable if at least one output may produce null
- val nullabilityMap = attrs.indices.map { index =>
- index -> outputs.exists(output => output(index).nullable)
- }.toMap
-
- attrs.zipWithIndex.map { case (attr, index) =>
- AttributeReference(attr.name, attr.dataType, nullabilityMap(index))()
- }
- }
-
- protected def resolveRowIdAttrs(
- relation: DataSourceV2Relation,
- operation: RowLevelOperation): Seq[AttributeReference] = {
-
- operation match {
- case supportsDelta: SupportsDelta =>
- val rowIdAttrs =
- V2ExpressionUtils.resolveRefs[AttributeReference](supportsDelta.rowId.toSeq, relation)
-
- val nullableRowIdAttrs = rowIdAttrs.filter(_.nullable)
- if (nullableRowIdAttrs.nonEmpty) {
- throw new AnalysisException(s"Row ID attrs cannot be nullable: $nullableRowIdAttrs")
- }
-
- rowIdAttrs
-
- case other =>
- throw new AnalysisException(s"Operation $other does not support deltas")
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
deleted file mode 100644
index 664f4d8da6cb..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.EqualNullSafe
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.Union
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateRows
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.write.RowLevelOperation.Command.UPDATE
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.connector.write.SupportsDelta
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-/**
- * Assigns a rewrite plan for v2 tables that support rewriting data to handle UPDATE statements.
- *
- * This rule assumes the commands have been fully resolved and all assignments have been aligned.
- * That's why it must be run after AlignRowLevelCommandAssignments.
- *
- * This rule also must be run in the same batch with DeduplicateRelations in Spark.
- */
-object RewriteUpdateTable extends RewriteRowLevelIcebergCommand {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case u @ UpdateIcebergTable(aliasedTable, assignments, cond, None) if u.resolved && u.aligned =>
- EliminateSubqueryAliases(aliasedTable) match {
- case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
- val table = buildOperationTable(tbl, UPDATE, CaseInsensitiveStringMap.empty())
- val updateCond = cond.getOrElse(Literal.TrueLiteral)
- val rewritePlan = table.operation match {
- case _: SupportsDelta =>
- buildWriteDeltaPlan(r, table, assignments, updateCond)
- case _ if SubqueryExpression.hasSubquery(updateCond) =>
- buildReplaceDataWithUnionPlan(r, table, assignments, updateCond)
- case _ =>
- buildReplaceDataPlan(r, table, assignments, updateCond)
- }
- UpdateIcebergTable(r, assignments, cond, Some(rewritePlan))
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- // if the condition does NOT contain a subquery
- private def buildReplaceDataPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): ReplaceIcebergData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs)
-
- // build a plan with updated and copied over records
- val updatedAndRemainingRowsPlan = buildUpdateProjection(readRelation, assignments, cond)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceIcebergData(writeRelation, updatedAndRemainingRowsPlan, relation)
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- // if the condition contains a subquery
- private def buildReplaceDataWithUnionPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): ReplaceIcebergData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- // the same read relation will be used to read records that must be updated and be copied over
- // DeduplicateRelations will take care of duplicated attr IDs
- val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs)
-
- // build a plan for records that match the cond and should be updated
- val matchedRowsPlan = Filter(cond, readRelation)
- val updatedRowsPlan = buildUpdateProjection(matchedRowsPlan, assignments)
-
- // build a plan for records that did not match the cond but had to be copied over
- val remainingRowFilter = Not(EqualNullSafe(cond, Literal.TrueLiteral))
- val remainingRowsPlan = Filter(remainingRowFilter, readRelation)
-
- // new state is a union of updated and copied over records
- val updatedAndRemainingRowsPlan = Union(updatedRowsPlan, remainingRowsPlan)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceIcebergData(writeRelation, updatedAndRemainingRowsPlan, relation)
- }
-
- // build a rewrite plan for sources that support row deltas
- private def buildWriteDeltaPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): WriteIcebergDelta = {
-
- // resolve all needed attrs (e.g. row ID and any required metadata attrs)
- val rowAttrs = relation.output
- val rowIdAttrs = resolveRowIdAttrs(relation, operationTable.operation)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildRelationWithAttrs(relation, operationTable, rowIdAttrs ++ metadataAttrs)
- val readAttrs = readRelation.output
- val metadataReadAttrs = readAttrs.filterNot(relation.outputSet.contains)
-
- // build a plan for updated records that match the cond
- val matchedRowsPlan = Filter(cond, readRelation)
- val updatedRowsPlan =
- updateRows(matchedRowsPlan, assignments, readAttrs, rowAttrs, rowIdAttrs, metadataReadAttrs)
-
- // build a plan to write the row delta to the table
- val writeRelation = relation.copy(table = operationTable)
- val projections = buildDeltaProjections(updatedRowsPlan, rowAttrs, rowIdAttrs, metadataAttrs)
- WriteIcebergDelta(writeRelation, updatedRowsPlan, relation, projections)
- }
-
- private def updateRows(
- matchedRowsPlan: LogicalPlan,
- assignments: Seq[Assignment],
- readAttrs: Seq[Attribute],
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): UpdateRows = {
-
- val delete = deltaDeleteOutput(rowAttrs, rowIdAttrs, metadataAttrs)
- val insert = deltaInsertOutput(assignments.map(_.value), metadataAttrs)
- val outputs = Seq(delete, insert)
- val operationTypeAttr = AttributeReference(OPERATION_COLUMN, IntegerType, nullable = false)()
- val updateRowsOutput = buildMergingOutput(outputs, operationTypeAttr +: readAttrs)
- UpdateRows(delete, insert, updateRowsOutput, matchedRowsPlan)
- }
-
- private def buildDeltaProjections(
- updateRows: UpdateRows,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val outputs = Seq(updateRows.deleteOutput, updateRows.insertOutput)
- buildDeltaProjections(updateRows, outputs, rowAttrs, rowIdAttrs, metadataAttrs)
- }
-
- // this method assumes the assignments have been already aligned before
- // the condition passed to this method may be different from the UPDATE condition
- private def buildUpdateProjection(
- plan: LogicalPlan,
- assignments: Seq[Assignment],
- cond: Expression = Literal.TrueLiteral): LogicalPlan = {
-
- // TODO: avoid executing the condition for each column
-
- // the plan output may include metadata columns that are not modified
- // that's why the number of assignments may not match the number of plan output columns
-
- val assignedValues = assignments.map(_.value)
- val updatedValues = plan.output.zipWithIndex.map { case (attr, index) =>
- if (index < assignments.size) {
- val assignedExpr = assignedValues(index)
- val updatedValue = If(cond, assignedExpr, attr)
- Alias(updatedValue, attr.name)()
- } else {
- attr
- }
- }
-
- Project(updatedValues, plan)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTableForRowLineage.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTableForRowLineage.scala
deleted file mode 100644
index 34f4de6bb3e3..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTableForRowLineage.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-
-object RewriteUpdateTableForRowLineage extends RewriteOperationForRowLineage {
-
- override def apply(plan: LogicalPlan): LogicalPlan = {
- plan resolveOperators {
- case updateTable @ UpdateIcebergTable(_, _, _, _) if shouldUpdatePlan(updateTable.table) =>
- updatePlanWithRowLineage(updateTable)
- }
- }
-
- private def updatePlanWithRowLineage(updateTable: UpdateIcebergTable): LogicalPlan = {
- EliminateSubqueryAliases(updateTable.table) match {
- case r @ DataSourceV2Relation(_: SupportsRowLevelOperations, _, _, _, _) =>
- val lineageAttributes = findRowLineageAttributes(r.metadataOutput).get
- val (rowId, lastUpdatedSequence) = (
- removeMetadataColumnAttribute(lineageAttributes._1),
- removeMetadataColumnAttribute(lineageAttributes._2))
-
- val lineageAssignments = updateTable.assignments ++
- Seq(Assignment(lastUpdatedSequence, Literal(null)), Assignment(rowId, rowId))
-
- val tableWithLineage = r.copy(output = r.output ++ Seq(rowId, lastUpdatedSequence))
- updateTable.copy(table = tableWithLineage, assignments = lineageAssignments)
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala
deleted file mode 100644
index e5cad5ee10bc..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.analysis.ViewUtil.IcebergViewHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.CreateView
-import org.apache.spark.sql.catalyst.plans.logical.DropView
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.ShowViews
-import org.apache.spark.sql.catalyst.plans.logical.View
-import org.apache.spark.sql.catalyst.plans.logical.views.CreateIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.DropIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View
-import org.apache.spark.sql.catalyst.plans.logical.views.ShowIcebergViews
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_FUNCTION
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.LookupCatalog
-import scala.collection.mutable
-
-/**
- * ResolveSessionCatalog exits early for some v2 View commands,
- * thus they are pre-substituted here and then handled in ResolveViews
- */
-case class RewriteViewCommands(spark: SparkSession) extends Rule[LogicalPlan] with LookupCatalog {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- protected lazy val catalogManager: CatalogManager = spark.sessionState.catalogManager
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
- case DropView(ResolvedIdent(resolved), ifExists) =>
- DropIcebergView(resolved, ifExists)
-
- case CreateView(
- ResolvedIdent(resolved),
- userSpecifiedColumns,
- comment,
- properties,
- Some(queryText),
- query,
- allowExisting,
- replace) =>
- val q = CTESubstitution.apply(query)
- verifyTemporaryObjectsDontExist(resolved, q)
- CreateIcebergView(
- child = resolved,
- queryText = queryText,
- query = q,
- columnAliases = userSpecifiedColumns.map(_._1),
- columnComments = userSpecifiedColumns.map(_._2.orElse(Option.empty)),
- comment = comment,
- properties = properties,
- allowExisting = allowExisting,
- replace = replace)
-
- case view @ ShowViews(UnresolvedNamespace(Seq()), pattern, output) =>
- if (ViewUtil.isViewCatalog(catalogManager.currentCatalog)) {
- ShowIcebergViews(
- ResolvedNamespace(
- catalogManager.currentCatalog,
- catalogManager.currentNamespace.toIndexedSeq),
- pattern,
- output)
- } else {
- view
- }
-
- case ShowViews(UnresolvedNamespace(CatalogAndNamespace(catalog, ns)), pattern, output)
- if ViewUtil.isViewCatalog(catalog) =>
- ShowIcebergViews(ResolvedNamespace(catalog, ns), pattern, output)
-
- // needs to be done here instead of in ResolveViews, so that a V2 view can be resolved before the Analyzer
- // tries to resolve it, which would result in an error, saying that V2 views aren't supported
- case u @ UnresolvedView(ResolvedView(resolved), _, _, _) =>
- ViewUtil
- .loadView(resolved.catalog, resolved.identifier)
- .map(_ => ResolvedV2View(resolved.catalog.asViewCatalog, resolved.identifier))
- .getOrElse(u)
- }
-
- private def isTempView(nameParts: Seq[String]): Boolean = {
- catalogManager.v1SessionCatalog.isTempView(nameParts)
- }
-
- private def isTempFunction(nameParts: Seq[String]): Boolean = {
- if (nameParts.size > 1) {
- return false
- }
- catalogManager.v1SessionCatalog.isTemporaryFunction(nameParts.asFunctionIdentifier)
- }
-
- object ResolvedIdent {
- def unapply(unresolved: UnresolvedIdentifier): Option[ResolvedIdentifier] = unresolved match {
- case UnresolvedIdentifier(nameParts, true) if isTempView(nameParts) =>
- None
-
- case UnresolvedIdentifier(CatalogAndIdentifier(catalog, ident), _)
- if ViewUtil.isViewCatalog(catalog) =>
- Some(ResolvedIdentifier(catalog, ident))
-
- case _ =>
- None
- }
- }
-
- /**
- * Permanent views are not allowed to reference temp objects
- */
- private def verifyTemporaryObjectsDontExist(
- identifier: ResolvedIdentifier,
- child: LogicalPlan): Unit = {
- val tempViews = collectTemporaryViews(child)
- if (tempViews.nonEmpty) {
- throw invalidRefToTempObject(
- identifier,
- tempViews.map(v => v.quoted).mkString("[", ", ", "]"),
- "view")
- }
-
- val tempFunctions = collectTemporaryFunctions(child)
- if (tempFunctions.nonEmpty) {
- throw invalidRefToTempObject(identifier, tempFunctions.mkString("[", ", ", "]"), "function")
- }
- }
-
- private def invalidRefToTempObject(
- ident: ResolvedIdentifier,
- tempObjectNames: String,
- tempObjectType: String) = {
- new AnalysisException(
- String.format(
- "Cannot create view %s.%s that references temporary %s: %s",
- ident.catalog.name(),
- ident.identifier,
- tempObjectType,
- tempObjectNames))
- }
-
- /**
- * Collect all temporary views and return the identifiers separately
- */
- private def collectTemporaryViews(child: LogicalPlan): Seq[Seq[String]] = {
- def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
- child.flatMap {
- case unresolved: UnresolvedRelation if isTempView(unresolved.multipartIdentifier) =>
- Seq(unresolved.multipartIdentifier)
- case view: View if view.isTempView => Seq(view.desc.identifier.nameParts)
- case plan =>
- plan.expressions.flatMap(_.flatMap {
- case e: SubqueryExpression => collectTempViews(e.plan)
- case _ => Seq.empty
- })
- }.distinct
- }
-
- collectTempViews(child)
- }
-
- private object ResolvedView {
- def unapply(identifier: Seq[String]): Option[ResolvedV2View] = identifier match {
- case nameParts if isTempView(nameParts) =>
- None
-
- case CatalogAndIdentifier(catalog, ident) if ViewUtil.isViewCatalog(catalog) =>
- ViewUtil
- .loadView(catalog, ident)
- .flatMap(_ => Some(ResolvedV2View(catalog.asViewCatalog, ident)))
-
- case _ =>
- None
- }
- }
-
- /**
- * Collect the names of all temporary functions.
- */
- private def collectTemporaryFunctions(child: LogicalPlan): Seq[String] = {
- val tempFunctions = new mutable.HashSet[String]()
- child.resolveExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_FUNCTION)) {
- case f @ UnresolvedFunction(nameParts, _, _, _, _) if isTempFunction(nameParts) =>
- tempFunctions += nameParts.head
- f
- case e: SubqueryExpression =>
- tempFunctions ++= collectTemporaryFunctions(e.plan)
- e
- }
- tempFunctions.toSeq
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewUtil.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewUtil.scala
deleted file mode 100644
index a7188837c51e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewUtil.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.connector.catalog.CatalogPlugin
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.View
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.errors.QueryCompilationErrors
-
-object ViewUtil {
- def loadView(catalog: CatalogPlugin, ident: Identifier): Option[View] = catalog match {
- case viewCatalog: ViewCatalog =>
- try {
- Option(viewCatalog.loadView(ident))
- } catch {
- case _: NoSuchViewException => None
- }
- case _ => None
- }
-
- def isViewCatalog(catalog: CatalogPlugin): Boolean = {
- catalog.isInstanceOf[ViewCatalog]
- }
-
- implicit class IcebergViewHelper(plugin: CatalogPlugin) {
- def asViewCatalog: ViewCatalog = plugin match {
- case viewCatalog: ViewCatalog =>
- viewCatalog
- case _ =>
- throw QueryCompilationErrors.missingCatalogAbilityError(plugin, "views")
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala
deleted file mode 100644
index 89c0630556b3..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.types.DataType
-
-object AssignmentUtils extends SQLConfHelper {
-
- /**
- * Checks whether assignments are aligned and match table columns.
- *
- * @param table a target table
- * @param assignments assignments to check
- * @return true if the assignments are aligned
- */
- def aligned(table: LogicalPlan, assignments: Seq[Assignment]): Boolean = {
- val sameSize = table.output.size == assignments.size
- sameSize && table.output.zip(assignments).forall { case (attr, assignment) =>
- val key = assignment.key
- val value = assignment.value
- val refsEqual = toAssignmentRef(attr)
- .zip(toAssignmentRef(key))
- .forall { case (attrRef, keyRef) => conf.resolver(attrRef, keyRef) }
-
- refsEqual &&
- DataType.equalsIgnoreCompatibleNullability(value.dataType, attr.dataType) &&
- (attr.nullable || !value.nullable)
- }
- }
-
- def toAssignmentRef(expr: Expression): Seq[String] = expr match {
- case attr: AttributeReference =>
- Seq(attr.name)
- case Alias(child, _) =>
- toAssignmentRef(child)
- case GetStructField(child, _, Some(name)) =>
- toAssignmentRef(child) :+ name
- case other: ExtractValue =>
- throw new AnalysisException(s"Updating nested fields is only supported for structs: $other")
- case other =>
- throw new AnalysisException(s"Cannot convert to a reference, unsupported expression: $other")
- }
-
- def handleCharVarcharLimits(assignment: Assignment): Assignment = {
- val key = assignment.key
- val value = assignment.value
-
- val rawKeyType = key.transform { case attr: AttributeReference =>
- CharVarcharUtils
- .getRawType(attr.metadata)
- .map(attr.withDataType)
- .getOrElse(attr)
- }.dataType
-
- if (CharVarcharUtils.hasCharVarchar(rawKeyType)) {
- val newKey = key.transform { case attr: AttributeReference =>
- CharVarcharUtils.cleanAttrMetadata(attr)
- }
- val newValue = CharVarcharUtils.stringLengthCheck(value, rawKeyType)
- Assignment(newKey, newValue)
- } else {
- assignment
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala
deleted file mode 100644
index ad11f927f142..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.CaseWhen
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.In
-import org.apache.spark.sql.catalyst.expressions.InSet
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.Or
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertStarAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeAction
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateStarAction
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.INSET
-import org.apache.spark.sql.catalyst.trees.TreePattern.NULL_LITERAL
-import org.apache.spark.sql.catalyst.trees.TreePattern.TRUE_OR_FALSE_LITERAL
-import org.apache.spark.sql.types.BooleanType
-import org.apache.spark.util.Utils
-
-/**
- * A rule similar to ReplaceNullWithFalseInPredicate in Spark but applies to Iceberg row-level commands.
- */
-object ExtendedReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
-
- override def apply(plan: LogicalPlan): LogicalPlan =
- plan.transformWithPruning(_.containsAnyPattern(NULL_LITERAL, TRUE_OR_FALSE_LITERAL, INSET)) {
-
- case u @ UpdateIcebergTable(_, _, Some(cond), _) =>
- u.copy(condition = Some(replaceNullWithFalse(cond)))
-
- case m @ MergeIntoIcebergTable(_, _, mergeCond, matchedActions, notMatchedActions, _) =>
- m.copy(
- mergeCondition = replaceNullWithFalse(mergeCond),
- matchedActions = replaceNullWithFalse(matchedActions),
- notMatchedActions = replaceNullWithFalse(notMatchedActions))
- }
-
- /**
- * Recursively traverse the Boolean-type expression to replace
- * `Literal(null, BooleanType)` with `FalseLiteral`, if possible.
- *
- * Note that `transformExpressionsDown` can not be used here as we must stop as soon as we hit
- * an expression that is not [[CaseWhen]], [[If]], [[And]], [[Or]] or
- * `Literal(null, BooleanType)`.
- */
- private def replaceNullWithFalse(e: Expression): Expression = e match {
- case Literal(null, BooleanType) =>
- FalseLiteral
- // In SQL, the `Not(IN)` expression evaluates as follows:
- // `NULL not in (1)` -> NULL
- // `NULL not in (1, NULL)` -> NULL
- // `1 not in (1, NULL)` -> false
- // `1 not in (2, NULL)` -> NULL
- // In predicate, NULL is equal to false, so we can simplify them to false directly.
- case Not(In(value, list)) if (value +: list).exists(isNullLiteral) =>
- FalseLiteral
- case Not(InSet(value, list)) if isNullLiteral(value) || list.contains(null) =>
- FalseLiteral
-
- case And(left, right) =>
- And(replaceNullWithFalse(left), replaceNullWithFalse(right))
- case Or(left, right) =>
- Or(replaceNullWithFalse(left), replaceNullWithFalse(right))
- case cw: CaseWhen if cw.dataType == BooleanType =>
- val newBranches = cw.branches.map { case (cond, value) =>
- replaceNullWithFalse(cond) -> replaceNullWithFalse(value)
- }
- val newElseValue = cw.elseValue.map(replaceNullWithFalse).getOrElse(FalseLiteral)
- CaseWhen(newBranches, newElseValue)
- case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType =>
- If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal))
- case e if e.dataType == BooleanType =>
- e
- case e =>
- val message = "Expected a Boolean type expression in replaceNullWithFalse, " +
- s"but got the type `${e.dataType.catalogString}` in `${e.sql}`."
- if (Utils.isTesting) {
- throw new IllegalArgumentException(message)
- } else {
- logWarning(message)
- e
- }
- }
-
- private def isNullLiteral(e: Expression): Boolean = e match {
- case Literal(null, _) => true
- case _ => false
- }
-
- private def replaceNullWithFalse(mergeActions: Seq[MergeAction]): Seq[MergeAction] = {
- mergeActions.map {
- case u @ UpdateAction(Some(cond), _) => u.copy(condition = Some(replaceNullWithFalse(cond)))
- case u @ UpdateStarAction(Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
- case d @ DeleteAction(Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond)))
- case i @ InsertAction(Some(cond), _) => i.copy(condition = Some(replaceNullWithFalse(cond)))
- case i @ InsertStarAction(Some(cond)) => i.copy(condition = Some(replaceNullWithFalse(cond)))
- case other => other
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala
deleted file mode 100644
index ba5f6c8dfbbf..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.CaseWhen
-import org.apache.spark.sql.catalyst.expressions.Coalesce
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.Or
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.CASE_WHEN
-import org.apache.spark.sql.catalyst.trees.TreePattern.IF
-import org.apache.spark.sql.types.BooleanType
-
-/**
- * A rule similar to SimplifyConditionalsInPredicate in Spark but applies to Iceberg row-level commands.
- */
-object ExtendedSimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
-
- override def apply(plan: LogicalPlan): LogicalPlan =
- plan.transformWithPruning(_.containsAnyPattern(CASE_WHEN, IF)) {
-
- case u @ UpdateIcebergTable(_, _, Some(cond), _) =>
- u.copy(condition = Some(simplifyConditional(cond)))
-
- case m @ MergeIntoIcebergTable(_, _, mergeCond, matchedActions, notMatchedActions, _) =>
- m.copy(
- mergeCondition = simplifyConditional(mergeCond),
- matchedActions = simplifyConditional(matchedActions),
- notMatchedActions = simplifyConditional(notMatchedActions))
- }
-
- private def simplifyConditional(e: Expression): Expression = e match {
- case And(left, right) => And(simplifyConditional(left), simplifyConditional(right))
- case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right))
- case If(cond, trueValue, FalseLiteral) => And(cond, trueValue)
- case If(cond, trueValue, TrueLiteral) => Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue)
- case If(cond, FalseLiteral, falseValue) =>
- And(Not(Coalesce(Seq(cond, FalseLiteral))), falseValue)
- case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue)
- case CaseWhen(
- Seq((cond, trueValue)),
- Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) =>
- And(cond, trueValue)
- case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) =>
- Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue)
- case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) =>
- And(Not(Coalesce(Seq(cond, FalseLiteral))), elseValue)
- case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) =>
- Or(cond, elseValue)
- case e if e.dataType == BooleanType => e
- case e =>
- assert(
- e.dataType != BooleanType,
- "Expected a Boolean type expression in ExtendedSimplifyConditionalsInPredicate, " +
- s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.")
- e
- }
-
- private def simplifyConditional(mergeActions: Seq[MergeAction]): Seq[MergeAction] = {
- mergeActions.map {
- case u @ UpdateAction(Some(cond), _) => u.copy(condition = Some(simplifyConditional(cond)))
- case u @ UpdateStarAction(Some(cond)) => u.copy(condition = Some(simplifyConditional(cond)))
- case d @ DeleteAction(Some(cond)) => d.copy(condition = Some(simplifyConditional(cond)))
- case i @ InsertAction(Some(cond), _) => i.copy(condition = Some(simplifyConditional(cond)))
- case i @ InsertStarAction(Some(cond)) => i.copy(condition = Some(simplifyConditional(cond)))
- case other => other
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRowLineageOutputFromOriginalTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRowLineageOutputFromOriginalTable.scala
deleted file mode 100644
index b6af8ae799e3..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRowLineageOutputFromOriginalTable.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.iceberg.MetadataColumns
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-
-/**
- * RemoveRowLineageOutputFromOriginalTable removes row lineage outputs from Dsv2 write's
- * originalTable so that downstream behaviors like relation caching just work, without having to
- * modify physical planning strategies.
- */
-object RemoveRowLineageOutputFromOriginalTable extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = {
- plan.transform {
- case writeDelta @ WriteIcebergDelta(_, _, originalTable, _, _) =>
- writeDelta.copy(originalTable = removeRowLineageOutput(originalTable))
- case replaceData @ ReplaceIcebergData(_, _, originalTable, _) =>
- replaceData.copy(originalTable = removeRowLineageOutput(originalTable))
- }
- }
-
- private def removeRowLineageOutput(table: NamedRelation): DataSourceV2Relation = {
- table match {
- case dsv2Relation @ DataSourceV2Relation(_, _, _, _, _) =>
- dsv2Relation.copy(output = dsv2Relation.output.filterNot(attr =>
- attr.name == MetadataColumns.ROW_ID.name() ||
- attr.name == MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.name()))
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceStaticInvoke.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceStaticInvoke.scala
deleted file mode 100644
index a2a68d864b65..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceStaticInvoke.scala
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.iceberg.spark.functions.SparkFunctions
-import org.apache.spark.sql.catalyst.expressions.ApplyFunctionExpression
-import org.apache.spark.sql.catalyst.expressions.BinaryComparison
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.In
-import org.apache.spark.sql.catalyst.expressions.InSet
-import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.BINARY_COMPARISON
-import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
-import org.apache.spark.sql.catalyst.trees.TreePattern.FILTER
-import org.apache.spark.sql.catalyst.trees.TreePattern.IN
-import org.apache.spark.sql.catalyst.trees.TreePattern.INSET
-import org.apache.spark.sql.catalyst.trees.TreePattern.JOIN
-import org.apache.spark.sql.connector.catalog.functions.ScalarFunction
-import org.apache.spark.sql.types.StructField
-import org.apache.spark.sql.types.StructType
-
-/**
- * Spark analyzes the Iceberg system function to {@link StaticInvoke} which could not be pushed
- * down to datasource. This rule will replace {@link StaticInvoke} to
- * {@link ApplyFunctionExpression} for Iceberg system function in a filter condition.
- */
-object ReplaceStaticInvoke extends Rule[LogicalPlan] {
-
- override def apply(plan: LogicalPlan): LogicalPlan =
- plan.transformWithPruning(_.containsAnyPattern(COMMAND, FILTER, JOIN)) {
- case join @ Join(_, _, _, Some(cond), _) =>
- replaceStaticInvoke(join, cond, newCond => join.copy(condition = Some(newCond)))
-
- case filter @ Filter(cond, _) =>
- replaceStaticInvoke(filter, cond, newCond => filter.copy(condition = newCond))
- }
-
- private def replaceStaticInvoke[T <: LogicalPlan](
- node: T,
- condition: Expression,
- copy: Expression => T): T = {
- val newCondition = replaceStaticInvoke(condition)
- if (newCondition fastEquals condition) node else copy(newCondition)
- }
-
- private def replaceStaticInvoke(condition: Expression): Expression = {
- condition.transformWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN, INSET)) {
- case in @ In(value: StaticInvoke, _) if canReplace(value) =>
- in.copy(value = replaceStaticInvoke(value))
-
- case in @ InSet(value: StaticInvoke, _) if canReplace(value) =>
- in.copy(child = replaceStaticInvoke(value))
-
- case c @ BinaryComparison(left: StaticInvoke, right) if canReplace(left) && right.foldable =>
- c.withNewChildren(Seq(replaceStaticInvoke(left), right))
-
- case c @ BinaryComparison(left, right: StaticInvoke) if canReplace(right) && left.foldable =>
- c.withNewChildren(Seq(left, replaceStaticInvoke(right)))
- }
- }
-
- private def replaceStaticInvoke(invoke: StaticInvoke): Expression = {
- // Adaptive from `resolveV2Function` in org.apache.spark.sql.catalyst.analysis.ResolveFunctions
- val unbound = SparkFunctions.loadFunctionByClass(invoke.staticObject)
- if (unbound == null) {
- return invoke
- }
-
- val inputType = StructType(invoke.arguments.zipWithIndex.map { case (exp, pos) =>
- StructField(s"_$pos", exp.dataType, exp.nullable)
- })
-
- val bound =
- try {
- unbound.bind(inputType)
- } catch {
- case _: Exception =>
- return invoke
- }
-
- if (bound.inputTypes().length != invoke.arguments.length) {
- return invoke
- }
-
- bound match {
- case scalarFunc: ScalarFunction[_] =>
- ApplyFunctionExpression(scalarFunc, invoke.arguments)
- case _ => invoke
- }
- }
-
- @inline
- private def canReplace(invoke: StaticInvoke): Boolean = {
- invoke.functionName == ScalarFunction.MAGIC_METHOD_NAME && !invoke.foldable
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala
deleted file mode 100644
index 1fb0641c00c8..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.parser.extensions
-
-import java.util.Locale
-import org.antlr.v4.runtime._
-import org.antlr.v4.runtime.atn.PredictionMode
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.misc.ParseCancellationException
-import org.antlr.v4.runtime.tree.TerminalNodeImpl
-import org.apache.iceberg.common.DynConstructors
-import org.apache.iceberg.spark.ExtendedParser
-import org.apache.iceberg.spark.ExtendedParser.RawOrderField
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.procedures.SparkProcedures
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.analysis.RewriteViewCommands
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser.NonReservedContext
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser.QuotedIdentifierContext
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoContext
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoTable
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateTable
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.execution.command.ExplainCommand
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.VariableSubstitution
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.StructType
-import scala.jdk.CollectionConverters._
-import scala.util.Try
-
-class IcebergSparkSqlExtensionsParser(delegate: ParserInterface)
- extends ParserInterface
- with ExtendedParser {
-
- import IcebergSparkSqlExtensionsParser._
-
- private lazy val substitutor = substitutorCtor.newInstance(SQLConf.get)
- private lazy val astBuilder = new IcebergSqlExtensionsAstBuilder(delegate)
-
- /**
- * Parse a string to a DataType.
- */
- override def parseDataType(sqlText: String): DataType = {
- delegate.parseDataType(sqlText)
- }
-
- /**
- * Parse a string to a raw DataType without CHAR/VARCHAR replacement.
- */
- def parseRawDataType(sqlText: String): DataType = throw new UnsupportedOperationException()
-
- /**
- * Parse a string to an Expression.
- */
- override def parseExpression(sqlText: String): Expression = {
- delegate.parseExpression(sqlText)
- }
-
- /**
- * Parse a string to a TableIdentifier.
- */
- override def parseTableIdentifier(sqlText: String): TableIdentifier = {
- delegate.parseTableIdentifier(sqlText)
- }
-
- /**
- * Parse a string to a FunctionIdentifier.
- */
- override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = {
- delegate.parseFunctionIdentifier(sqlText)
- }
-
- /**
- * Parse a string to a multi-part identifier.
- */
- override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
- delegate.parseMultipartIdentifier(sqlText)
- }
-
- /**
- * Creates StructType for a given SQL string, which is a comma separated list of field
- * definitions which will preserve the correct Hive metadata.
- */
- override def parseTableSchema(sqlText: String): StructType = {
- delegate.parseTableSchema(sqlText)
- }
-
- override def parseSortOrder(sqlText: String): java.util.List[RawOrderField] = {
- val fields = parse(sqlText) { parser => astBuilder.visitSingleOrder(parser.singleOrder()) }
- fields.map { field =>
- val (term, direction, order) = field
- new RawOrderField(term, direction, order)
- }.asJava
- }
-
- /**
- * Parse a string to a LogicalPlan.
- */
- override def parsePlan(sqlText: String): LogicalPlan = {
- val sqlTextAfterSubstitution = substitutor.substitute(sqlText)
- if (isIcebergCommand(sqlTextAfterSubstitution)) {
- parse(sqlTextAfterSubstitution) { parser => astBuilder.visit(parser.singleStatement()) }
- .asInstanceOf[LogicalPlan]
- } else {
- val parsedPlan = RewriteViewCommands(SparkSession.active).apply(delegate.parsePlan(sqlText))
- parsedPlan match {
- case e: ExplainCommand =>
- e.copy(logicalPlan = replaceRowLevelCommands(e.logicalPlan))
- case p =>
- replaceRowLevelCommands(p)
- }
- }
- }
-
- private def replaceRowLevelCommands(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsDown {
- case UpdateTable(UnresolvedIcebergTable(aliasedTable), assignments, condition) =>
- UpdateIcebergTable(aliasedTable, assignments, condition)
-
- case MergeIntoTable(
- UnresolvedIcebergTable(aliasedTable),
- source,
- cond,
- matchedActions,
- notMatchedActions,
- Nil) =>
- // cannot construct MergeIntoIcebergTable right away as MERGE operations require special resolution
- // that's why the condition and actions must be hidden from the regular resolution rules in Spark
- // see ResolveMergeIntoTableReferences for details
- val context = MergeIntoContext(cond, matchedActions, notMatchedActions)
- UnresolvedMergeIntoIcebergTable(aliasedTable, source, context)
-
- case MergeIntoTable(UnresolvedIcebergTable(_), _, _, _, _, notMatchedBySourceActions)
- if notMatchedBySourceActions.nonEmpty =>
- throw new AnalysisException("Iceberg does not support WHEN NOT MATCHED BY SOURCE clause")
- }
-
- object UnresolvedIcebergTable {
-
- def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
- EliminateSubqueryAliases(plan) match {
- case UnresolvedRelation(multipartIdentifier, _, _) if isIcebergTable(multipartIdentifier) =>
- Some(plan)
- case _ =>
- None
- }
- }
-
- private def isIcebergTable(multipartIdent: Seq[String]): Boolean = {
- val catalogAndIdentifier =
- Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
- catalogAndIdentifier.catalog match {
- case tableCatalog: TableCatalog =>
- Try(tableCatalog.loadTable(catalogAndIdentifier.identifier))
- .map(isIcebergTable)
- .getOrElse(false)
-
- case _ =>
- false
- }
- }
-
- private def isIcebergTable(table: Table): Boolean = table match {
- case _: SparkTable => true
- case _ => false
- }
- }
-
- private def isIcebergCommand(sqlText: String): Boolean = {
- val normalized = sqlText
- .toLowerCase(Locale.ROOT)
- .trim()
- // Strip simple SQL comments that terminate a line, e.g. comments starting with `--` .
- .replaceAll("--.*?\\n", " ")
- // Strip newlines.
- .replaceAll("\\s+", " ")
- // Strip comments of the form /* ... */. This must come after stripping newlines so that
- // comments that span multiple lines are caught.
- .replaceAll("/\\*.*?\\*/", " ")
- // Strip backtick then `system`.`ancestors_of` changes to system.ancestors_of
- .replaceAll("`", "")
- .trim()
- isIcebergProcedure(normalized) || (normalized
- .startsWith("alter table") && (normalized.contains("add partition field") ||
- normalized.contains("drop partition field") ||
- normalized.contains("replace partition field") ||
- normalized.contains("write ordered by") ||
- normalized.contains("write locally ordered by") ||
- normalized.contains("write distributed by") ||
- normalized.contains("write unordered") ||
- normalized.contains("set identifier fields") ||
- normalized.contains("drop identifier fields") ||
- isSnapshotRefDdl(normalized)))
- }
-
- // All builtin Iceberg procedures are under the 'system' namespace
- private def isIcebergProcedure(normalized: String): Boolean = {
- normalized.startsWith("call") &&
- SparkProcedures.names().asScala.map("system." + _).exists(normalized.contains)
- }
-
- private def isSnapshotRefDdl(normalized: String): Boolean = {
- normalized.contains("create branch") ||
- normalized.contains("replace branch") ||
- normalized.contains("create tag") ||
- normalized.contains("replace tag") ||
- normalized.contains("drop branch") ||
- normalized.contains("drop tag")
- }
-
- protected def parse[T](command: String)(toResult: IcebergSqlExtensionsParser => T): T = {
- val lexer = new IcebergSqlExtensionsLexer(
- new UpperCaseCharStream(CharStreams.fromString(command)))
- lexer.removeErrorListeners()
- lexer.addErrorListener(IcebergParseErrorListener)
-
- val tokenStream = new CommonTokenStream(lexer)
- val parser = new IcebergSqlExtensionsParser(tokenStream)
- parser.addParseListener(IcebergSqlExtensionsPostProcessor)
- parser.removeErrorListeners()
- parser.addErrorListener(IcebergParseErrorListener)
-
- // https://github.com/antlr/antlr4/issues/192#issuecomment-15238595
- // Save a great deal of time on correct inputs by using a two-stage parsing strategy.
- try {
- try {
- // first, try parsing with potentially faster SLL mode and BailErrorStrategy
- parser.setErrorHandler(new BailErrorStrategy)
- parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
- toResult(parser)
- } catch {
- case _: ParseCancellationException =>
- // if we fail, parse with LL mode with DefaultErrorStrategy
- tokenStream.seek(0) // rewind input stream
- parser.reset()
-
- // Try Again.
- parser.setErrorHandler(new DefaultErrorStrategy)
- parser.getInterpreter.setPredictionMode(PredictionMode.LL)
- toResult(parser)
- }
- } catch {
- case e: IcebergParseException if e.command.isDefined =>
- throw e
- case e: IcebergParseException =>
- throw e.withCommand(command)
- case e: AnalysisException =>
- val position = Origin(e.line, e.startPosition)
- throw new IcebergParseException(Option(command), e.message, position, position)
- }
- }
-
- override def parseQuery(sqlText: String): LogicalPlan = {
- parsePlan(sqlText)
- }
-}
-
-object IcebergSparkSqlExtensionsParser {
- private val substitutorCtor: DynConstructors.Ctor[VariableSubstitution] =
- DynConstructors
- .builder()
- .impl(classOf[VariableSubstitution])
- .impl(classOf[VariableSubstitution], classOf[SQLConf])
- .build()
-}
-
-/* Copied from Apache Spark's to avoid dependency on Spark Internals */
-class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream {
- override def consume(): Unit = wrapped.consume
- override def getSourceName(): String = wrapped.getSourceName
- override def index(): Int = wrapped.index
- override def mark(): Int = wrapped.mark
- override def release(marker: Int): Unit = wrapped.release(marker)
- override def seek(where: Int): Unit = wrapped.seek(where)
- override def size(): Int = wrapped.size
-
- override def getText(interval: Interval): String = wrapped.getText(interval)
-
- // scalastyle:off
- override def LA(i: Int): Int = {
- val la = wrapped.LA(i)
- if (la == 0 || la == IntStream.EOF) la
- else Character.toUpperCase(la)
- }
- // scalastyle:on
-}
-
-/**
- * The post-processor validates & cleans-up the parse tree during the parse process.
- */
-case object IcebergSqlExtensionsPostProcessor extends IcebergSqlExtensionsBaseListener {
-
- /** Remove the back ticks from an Identifier. */
- override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
- replaceTokenByIdentifier(ctx, 1) { token =>
- // Remove the double back ticks in the string.
- token.setText(token.getText.replace("``", "`"))
- token
- }
- }
-
- /** Treat non-reserved keywords as Identifiers. */
- override def exitNonReserved(ctx: NonReservedContext): Unit = {
- replaceTokenByIdentifier(ctx, 0)(identity)
- }
-
- private def replaceTokenByIdentifier(ctx: ParserRuleContext, stripMargins: Int)(
- f: CommonToken => CommonToken = identity): Unit = {
- val parent = ctx.getParent
- parent.removeLastChild()
- val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
- val newToken = new CommonToken(
- new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
- IcebergSqlExtensionsParser.IDENTIFIER,
- token.getChannel,
- token.getStartIndex + stripMargins,
- token.getStopIndex - stripMargins)
- parent.addChild(new TerminalNodeImpl(f(newToken)))
- }
-}
-
-/* Partially copied from Apache Spark's Parser to avoid dependency on Spark Internals */
-case object IcebergParseErrorListener extends BaseErrorListener {
- override def syntaxError(
- recognizer: Recognizer[_, _],
- offendingSymbol: scala.Any,
- line: Int,
- charPositionInLine: Int,
- msg: String,
- e: RecognitionException): Unit = {
- val (start, stop) = offendingSymbol match {
- case token: CommonToken =>
- val start = Origin(Some(line), Some(token.getCharPositionInLine))
- val length = token.getStopIndex - token.getStartIndex + 1
- val stop = Origin(Some(line), Some(token.getCharPositionInLine + length))
- (start, stop)
- case _ =>
- val start = Origin(Some(line), Some(charPositionInLine))
- (start, start)
- }
- throw new IcebergParseException(None, msg, start, stop)
- }
-}
-
-/**
- * Copied from Apache Spark
- * A [[ParseException]] is an [[AnalysisException]] that is thrown during the parse process. It
- * contains fields and an extended error message that make reporting and diagnosing errors easier.
- */
-class IcebergParseException(
- val command: Option[String],
- message: String,
- val start: Origin,
- val stop: Origin)
- extends AnalysisException(message, start.line, start.startPosition) {
-
- def this(message: String, ctx: ParserRuleContext) = {
- this(
- Option(IcebergParserUtils.command(ctx)),
- message,
- IcebergParserUtils.position(ctx.getStart),
- IcebergParserUtils.position(ctx.getStop))
- }
-
- override def getMessage: String = {
- val builder = new StringBuilder
- builder ++= "\n" ++= message
- start match {
- case Origin(Some(l), Some(p), Some(_), Some(_), Some(_), Some(_), Some(_)) =>
- builder ++= s"(line $l, pos $p)\n"
- command.foreach { cmd =>
- val (above, below) = cmd.split("\n").splitAt(l)
- builder ++= "\n== SQL ==\n"
- above.foreach(builder ++= _ += '\n')
- builder ++= (0 until p).map(_ => "-").mkString("") ++= "^^^\n"
- below.foreach(builder ++= _ += '\n')
- }
- case _ =>
- command.foreach { cmd =>
- builder ++= "\n== SQL ==\n" ++= cmd
- }
- }
- builder.toString
- }
-
- def withCommand(cmd: String): IcebergParseException = {
- new IcebergParseException(Option(cmd), message, start, stop)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala
deleted file mode 100644
index f87733e6b79b..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.parser.extensions
-
-import java.util.Locale
-import java.util.concurrent.TimeUnit
-import org.antlr.v4.runtime._
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.tree.ParseTree
-import org.antlr.v4.runtime.tree.TerminalNode
-import org.apache.iceberg.DistributionMode
-import org.apache.iceberg.NullOrder
-import org.apache.iceberg.SortDirection
-import org.apache.iceberg.expressions.Term
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergParserUtils.withOrigin
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser._
-import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.BranchOptions
-import org.apache.spark.sql.catalyst.plans.logical.CallArgument
-import org.apache.spark.sql.catalyst.plans.logical.CallStatement
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceBranch
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceTag
-import org.apache.spark.sql.catalyst.plans.logical.DropBranch
-import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.DropTag
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.NamedArgument
-import org.apache.spark.sql.catalyst.plans.logical.PositionalArgument
-import org.apache.spark.sql.catalyst.plans.logical.ReplacePartitionField
-import org.apache.spark.sql.catalyst.plans.logical.SetIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.SetWriteDistributionAndOrdering
-import org.apache.spark.sql.catalyst.plans.logical.TagOptions
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.expressions
-import org.apache.spark.sql.connector.expressions.ApplyTransform
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.LiteralValue
-import org.apache.spark.sql.connector.expressions.Transform
-import scala.jdk.CollectionConverters._
-
-class IcebergSqlExtensionsAstBuilder(delegate: ParserInterface)
- extends IcebergSqlExtensionsBaseVisitor[AnyRef] {
-
- private def toBuffer[T](list: java.util.List[T]): scala.collection.mutable.Buffer[T] =
- list.asScala
- private def toSeq[T](list: java.util.List[T]): Seq[T] = toBuffer(list).toSeq
-
- /**
- * Create a [[CallStatement]] for a stored procedure call.
- */
- override def visitCall(ctx: CallContext): CallStatement = withOrigin(ctx) {
- val name = toSeq(ctx.multipartIdentifier.parts).map(_.getText)
- val args = toSeq(ctx.callArgument).map(typedVisit[CallArgument])
- CallStatement(name, args)
- }
-
- /**
- * Create an ADD PARTITION FIELD logical command.
- */
- override def visitAddPartitionField(ctx: AddPartitionFieldContext): AddPartitionField =
- withOrigin(ctx) {
- AddPartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform),
- Option(ctx.name).map(_.getText))
- }
-
- /**
- * Create a DROP PARTITION FIELD logical command.
- */
- override def visitDropPartitionField(ctx: DropPartitionFieldContext): DropPartitionField =
- withOrigin(ctx) {
- DropPartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform))
- }
-
- /**
- * Create a CREATE OR REPLACE BRANCH logical command.
- */
- override def visitCreateOrReplaceBranch(
- ctx: CreateOrReplaceBranchContext): CreateOrReplaceBranch = withOrigin(ctx) {
- val createOrReplaceBranchClause = ctx.createReplaceBranchClause()
-
- val branchName = createOrReplaceBranchClause.identifier()
- val branchOptionsContext = Option(createOrReplaceBranchClause.branchOptions())
- val snapshotId = branchOptionsContext
- .flatMap(branchOptions => Option(branchOptions.snapshotId()))
- .map(_.getText.toLong)
- val snapshotRetention =
- branchOptionsContext.flatMap(branchOptions => Option(branchOptions.snapshotRetention()))
- val minSnapshotsToKeep = snapshotRetention
- .flatMap(retention => Option(retention.minSnapshotsToKeep()))
- .map(minSnapshots => minSnapshots.number().getText.toLong)
- val maxSnapshotAgeMs = snapshotRetention
- .flatMap(retention => Option(retention.maxSnapshotAge()))
- .map(retention =>
- TimeUnit
- .valueOf(retention.timeUnit().getText.toUpperCase(Locale.ENGLISH))
- .toMillis(retention.number().getText.toLong))
- val branchRetention =
- branchOptionsContext.flatMap(branchOptions => Option(branchOptions.refRetain()))
- val branchRefAgeMs = branchRetention.map(retain =>
- TimeUnit
- .valueOf(retain.timeUnit().getText.toUpperCase(Locale.ENGLISH))
- .toMillis(retain.number().getText.toLong))
- val create = createOrReplaceBranchClause.CREATE() != null
- val replace = ctx.createReplaceBranchClause().REPLACE() != null
- val ifNotExists = createOrReplaceBranchClause.EXISTS() != null
-
- val branchOptions =
- BranchOptions(snapshotId, minSnapshotsToKeep, maxSnapshotAgeMs, branchRefAgeMs)
-
- CreateOrReplaceBranch(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- branchName.getText,
- branchOptions,
- create,
- replace,
- ifNotExists)
- }
-
- /**
- * Create an CREATE OR REPLACE TAG logical command.
- */
- override def visitCreateOrReplaceTag(ctx: CreateOrReplaceTagContext): CreateOrReplaceTag =
- withOrigin(ctx) {
- val createTagClause = ctx.createReplaceTagClause()
-
- val tagName = createTagClause.identifier().getText
-
- val tagOptionsContext = Option(createTagClause.tagOptions())
- val snapshotId = tagOptionsContext
- .flatMap(tagOptions => Option(tagOptions.snapshotId()))
- .map(_.getText.toLong)
- val tagRetain = tagOptionsContext.flatMap(tagOptions => Option(tagOptions.refRetain()))
- val tagRefAgeMs = tagRetain.map(retain =>
- TimeUnit
- .valueOf(retain.timeUnit().getText.toUpperCase(Locale.ENGLISH))
- .toMillis(retain.number().getText.toLong))
- val tagOptions = TagOptions(snapshotId, tagRefAgeMs)
-
- val create = createTagClause.CREATE() != null
- val replace = createTagClause.REPLACE() != null
- val ifNotExists = createTagClause.EXISTS() != null
-
- CreateOrReplaceTag(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- tagName,
- tagOptions,
- create,
- replace,
- ifNotExists)
- }
-
- /**
- * Create an DROP BRANCH logical command.
- */
- override def visitDropBranch(ctx: DropBranchContext): DropBranch = withOrigin(ctx) {
- DropBranch(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- ctx.identifier().getText,
- ctx.EXISTS() != null)
- }
-
- /**
- * Create an DROP TAG logical command.
- */
- override def visitDropTag(ctx: DropTagContext): DropTag = withOrigin(ctx) {
- DropTag(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- ctx.identifier().getText,
- ctx.EXISTS() != null)
- }
-
- /**
- * Create an REPLACE PARTITION FIELD logical command.
- */
- override def visitReplacePartitionField(
- ctx: ReplacePartitionFieldContext): ReplacePartitionField = withOrigin(ctx) {
- ReplacePartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform(0)),
- typedVisit[Transform](ctx.transform(1)),
- Option(ctx.name).map(_.getText))
- }
-
- /**
- * Create an SET IDENTIFIER FIELDS logical command.
- */
- override def visitSetIdentifierFields(ctx: SetIdentifierFieldsContext): SetIdentifierFields =
- withOrigin(ctx) {
- SetIdentifierFields(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- toSeq(ctx.fieldList.fields).map(_.getText))
- }
-
- /**
- * Create an DROP IDENTIFIER FIELDS logical command.
- */
- override def visitDropIdentifierFields(ctx: DropIdentifierFieldsContext): DropIdentifierFields =
- withOrigin(ctx) {
- DropIdentifierFields(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- toSeq(ctx.fieldList.fields).map(_.getText))
- }
-
- /**
- * Create a [[SetWriteDistributionAndOrdering]] for changing the write distribution and ordering.
- */
- override def visitSetWriteDistributionAndOrdering(
- ctx: SetWriteDistributionAndOrderingContext): SetWriteDistributionAndOrdering = {
-
- val tableName = typedVisit[Seq[String]](ctx.multipartIdentifier)
-
- val (distributionSpec, orderingSpec) = toDistributionAndOrderingSpec(ctx.writeSpec)
-
- if (distributionSpec == null && orderingSpec == null) {
- throw new AnalysisException(
- "ALTER TABLE has no changes: missing both distribution and ordering clauses")
- }
-
- val distributionMode = if (distributionSpec != null) {
- Some(DistributionMode.HASH)
- } else if (orderingSpec.UNORDERED != null) {
- Some(DistributionMode.NONE)
- } else if (orderingSpec.LOCALLY() != null) {
- None
- } else {
- Some(DistributionMode.RANGE)
- }
-
- val ordering = if (orderingSpec != null && orderingSpec.order != null) {
- toSeq(orderingSpec.order.fields).map(typedVisit[(Term, SortDirection, NullOrder)])
- } else {
- Seq.empty
- }
-
- SetWriteDistributionAndOrdering(tableName, distributionMode, ordering)
- }
-
- private def toDistributionAndOrderingSpec(
- writeSpec: WriteSpecContext): (WriteDistributionSpecContext, WriteOrderingSpecContext) = {
-
- if (writeSpec.writeDistributionSpec.size > 1) {
- throw new AnalysisException("ALTER TABLE contains multiple distribution clauses")
- }
-
- if (writeSpec.writeOrderingSpec.size > 1) {
- throw new AnalysisException("ALTER TABLE contains multiple ordering clauses")
- }
-
- val distributionSpec = toBuffer(writeSpec.writeDistributionSpec).headOption.orNull
- val orderingSpec = toBuffer(writeSpec.writeOrderingSpec).headOption.orNull
-
- (distributionSpec, orderingSpec)
- }
-
- /**
- * Create an order field.
- */
- override def visitOrderField(ctx: OrderFieldContext): (Term, SortDirection, NullOrder) = {
- val term = Spark3Util.toIcebergTerm(typedVisit[Transform](ctx.transform))
- val direction = Option(ctx.ASC)
- .map(_ => SortDirection.ASC)
- .orElse(Option(ctx.DESC).map(_ => SortDirection.DESC))
- .getOrElse(SortDirection.ASC)
- val nullOrder = Option(ctx.FIRST)
- .map(_ => NullOrder.NULLS_FIRST)
- .orElse(Option(ctx.LAST).map(_ => NullOrder.NULLS_LAST))
- .getOrElse(
- if (direction == SortDirection.ASC) NullOrder.NULLS_FIRST else NullOrder.NULLS_LAST)
- (term, direction, nullOrder)
- }
-
- /**
- * Create an IdentityTransform for a column reference.
- */
- override def visitIdentityTransform(ctx: IdentityTransformContext): Transform = withOrigin(ctx) {
- IdentityTransform(FieldReference(typedVisit[Seq[String]](ctx.multipartIdentifier())))
- }
-
- /**
- * Create a named Transform from argument expressions.
- */
- override def visitApplyTransform(ctx: ApplyTransformContext): Transform = withOrigin(ctx) {
- val args = toSeq(ctx.arguments).map(typedVisit[expressions.Expression])
- ApplyTransform(ctx.transformName.getText, args)
- }
-
- /**
- * Create a transform argument from a column reference or a constant.
- */
- override def visitTransformArgument(ctx: TransformArgumentContext): expressions.Expression =
- withOrigin(ctx) {
- val reference = Option(ctx.multipartIdentifier())
- .map(typedVisit[Seq[String]])
- .map(FieldReference(_))
- val literal = Option(ctx.constant)
- .map(visitConstant)
- .map(lit => LiteralValue(lit.value, lit.dataType))
- reference
- .orElse(literal)
- .getOrElse(throw new IcebergParseException(s"Invalid transform argument", ctx))
- }
-
- /**
- * Return a multi-part identifier as Seq[String].
- */
- override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
- withOrigin(ctx) {
- toSeq(ctx.parts).map(_.getText)
- }
-
- override def visitSingleOrder(ctx: SingleOrderContext): Seq[(Term, SortDirection, NullOrder)] =
- withOrigin(ctx) {
- toSeq(ctx.order.fields).map(typedVisit[(Term, SortDirection, NullOrder)])
- }
-
- /**
- * Create a positional argument in a stored procedure call.
- */
- override def visitPositionalArgument(ctx: PositionalArgumentContext): CallArgument =
- withOrigin(ctx) {
- val expr = typedVisit[Expression](ctx.expression)
- PositionalArgument(expr)
- }
-
- /**
- * Create a named argument in a stored procedure call.
- */
- override def visitNamedArgument(ctx: NamedArgumentContext): CallArgument = withOrigin(ctx) {
- val name = ctx.identifier.getText
- val expr = typedVisit[Expression](ctx.expression)
- NamedArgument(name, expr)
- }
-
- override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
- visit(ctx.statement).asInstanceOf[LogicalPlan]
- }
-
- def visitConstant(ctx: ConstantContext): Literal = {
- delegate.parseExpression(ctx.getText).asInstanceOf[Literal]
- }
-
- override def visitExpression(ctx: ExpressionContext): Expression = {
- // reconstruct the SQL string and parse it using the main Spark parser
- // while we can avoid the logic to build Spark expressions, we still have to parse them
- // we cannot call ctx.getText directly since it will not render spaces correctly
- // that's why we need to recurse down the tree in reconstructSqlString
- val sqlString = reconstructSqlString(ctx)
- delegate.parseExpression(sqlString)
- }
-
- private def reconstructSqlString(ctx: ParserRuleContext): String = {
- toBuffer(ctx.children)
- .map {
- case c: ParserRuleContext => reconstructSqlString(c)
- case t: TerminalNode => t.getText
- }
- .mkString(" ")
- }
-
- private def typedVisit[T](ctx: ParseTree): T = {
- ctx.accept(this).asInstanceOf[T]
- }
-}
-
-/* Partially copied from Apache Spark's Parser to avoid dependency on Spark Internals */
-object IcebergParserUtils {
-
- private[sql] def withOrigin[T](ctx: ParserRuleContext)(f: => T): T = {
- val current = CurrentOrigin.get
- CurrentOrigin.set(position(ctx.getStart))
- try {
- f
- } finally {
- CurrentOrigin.set(current)
- }
- }
-
- private[sql] def position(token: Token): Origin = {
- val opt = Option(token)
- Origin(opt.map(_.getLine), opt.map(_.getCharPositionInLine))
- }
-
- /** Get the command which created the token. */
- private[sql] def command(ctx: ParserRuleContext): String = {
- val stream = ctx.getStart.getInputStream
- stream.getText(Interval.of(0, stream.size() - 1))
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala
deleted file mode 100644
index 623251d1222c..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.planning
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
-
-/**
- * An extractor for operations such as DELETE and MERGE that require rewriting data.
- *
- * This class extracts the following entities:
- * - the row-level command (such as DeleteFromIcebergTable);
- * - the read relation in the rewrite plan that can be either DataSourceV2Relation or
- * DataSourceV2ScanRelation depending on whether the planning has already happened;
- * - the current rewrite plan.
- */
-object RewrittenRowLevelCommand {
- type ReturnType = (RowLevelCommand, LogicalPlan, LogicalPlan)
-
- def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
- case c: RowLevelCommand if c.rewritePlan.nonEmpty =>
- val rewritePlan = c.rewritePlan.get
-
- // both ReplaceData and WriteDelta reference a write relation
- // but the corresponding read relation should be at the bottom of the write plan
- // both the write and read relations will share the same RowLevelOperationTable object
- // that's why it is safe to use reference equality to find the needed read relation
-
- val allowScanDuplication = c match {
- // group-based updates that rely on the union approach may have multiple identical scans
- case _: UpdateIcebergTable if rewritePlan.isInstanceOf[ReplaceIcebergData] => true
- case _ => false
- }
-
- rewritePlan match {
- case rd @ ReplaceIcebergData(DataSourceV2Relation(table, _, _, _, _), query, _, _) =>
- val readRelation = findReadRelation(table, query, allowScanDuplication)
- readRelation.map((c, _, rd))
- case wd @ WriteIcebergDelta(DataSourceV2Relation(table, _, _, _, _), query, _, _, _) =>
- val readRelation = findReadRelation(table, query, allowScanDuplication)
- readRelation.map((c, _, wd))
- case _ =>
- None
- }
-
- case _ =>
- None
- }
-
- private def findReadRelation(
- table: Table,
- plan: LogicalPlan,
- allowScanDuplication: Boolean): Option[LogicalPlan] = {
-
- val readRelations = plan.collect {
- case r: DataSourceV2Relation if r.table eq table => r
- case r: DataSourceV2ScanRelation if r.relation.table eq table => r
- }
-
- // in some cases, the optimizer replaces the v2 read relation with a local relation
- // for example, there is no reason to query the table if the condition is always false
- // that's why it is valid not to find the corresponding v2 read relation
-
- readRelations match {
- case relations if relations.isEmpty =>
- None
-
- case Seq(relation) =>
- Some(relation)
-
- case Seq(relation1: DataSourceV2Relation, relation2: DataSourceV2Relation)
- if allowScanDuplication && (relation1.table eq relation2.table) =>
- Some(relation1)
-
- case Seq(relation1: DataSourceV2ScanRelation, relation2: DataSourceV2ScanRelation)
- if allowScanDuplication && (relation1.scan eq relation2.scan) =>
- Some(relation1)
-
- case Seq(relation1, relation2) if allowScanDuplication =>
- throw new AnalysisException(s"Row-level read relations don't match: $relation1, $relation2")
-
- case relations if allowScanDuplication =>
- throw new AnalysisException(s"Expected up to two row-level read relations: $relations")
-
- case relations =>
- throw new AnalysisException(s"Expected only one row-level read relation: $relations")
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala
deleted file mode 100644
index 0a830dbd4f6a..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class AddPartitionField(table: Seq[String], transform: Transform, name: Option[String])
- extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"AddPartitionField ${table.quoted} ${name.map(n => s"$n=").getOrElse("")}${transform.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala
deleted file mode 100644
index 15b908300213..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-case class BranchOptions(
- snapshotId: Option[Long],
- numSnapshots: Option[Long],
- snapshotRetain: Option[Long],
- snapshotRefRetain: Option[Long])
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
deleted file mode 100644
index e80715e57ba1..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.iceberg.catalog.Procedure
-
-case class Call(procedure: Procedure, args: Seq[Expression]) extends LeafCommand {
- override lazy val output: Seq[Attribute] = procedure.outputType.toAttributes
-
- override def simpleString(maxFields: Int): String = {
- s"Call${truncatedString(output.toSeq, "[", ", ", "]", maxFields)} ${procedure.description}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala
deleted file mode 100644
index 6900f6e8cc50..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class CreateOrReplaceBranch(
- table: Seq[String],
- branch: String,
- branchOptions: BranchOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean)
- extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplaceBranch branch: ${branch} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala
deleted file mode 100644
index 957c68e7a540..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class CreateOrReplaceTag(
- table: Seq[String],
- tag: String,
- tagOptions: TagOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean)
- extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplaceTag tag: ${tag} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala
deleted file mode 100644
index ed4f1f512b85..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropBranch(table: Seq[String], branch: String, ifExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropBranch branch: ${branch} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala
deleted file mode 100644
index 1a91806280b3..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropIdentifierFields(table: Seq[String], fields: Seq[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropIdentifierFields ${table.quoted} (${fields.quoted})"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala
deleted file mode 100644
index ec952c8c7118..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class DropPartitionField(table: Seq[String], transform: Transform) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropPartitionField ${table.quoted} ${transform.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala
deleted file mode 100644
index da69ca0383a1..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropTag(table: Seq[String], tag: String, ifExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropTag tag: ${tag} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala
deleted file mode 100644
index 9e2734ab1f68..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-case class MergeIntoIcebergTable(
- targetTable: LogicalPlan,
- sourceTable: LogicalPlan,
- mergeCondition: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction],
- rewritePlan: Option[LogicalPlan] = None)
- extends RowLevelCommand {
-
- lazy val aligned: Boolean = {
- val matchedActionsAligned = matchedActions.forall {
- case UpdateAction(_, assignments) =>
- AssignmentUtils.aligned(targetTable, assignments)
- case _: DeleteAction =>
- true
- case _ =>
- false
- }
-
- val notMatchedActionsAligned = notMatchedActions.forall {
- case InsertAction(_, assignments) =>
- AssignmentUtils.aligned(targetTable, assignments)
- case _ =>
- false
- }
-
- matchedActionsAligned && notMatchedActionsAligned
- }
-
- def condition: Option[Expression] = Some(mergeCondition)
-
- override def children: Seq[LogicalPlan] = if (rewritePlan.isDefined) {
- targetTable :: sourceTable :: rewritePlan.get :: Nil
- } else {
- targetTable :: sourceTable :: Nil
- }
-
- override def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand = {
- copy(rewritePlan = Some(newRewritePlan))
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): MergeIntoIcebergTable = {
-
- newChildren match {
- case Seq(newTarget, newSource) =>
- copy(targetTable = newTarget, sourceTable = newSource, rewritePlan = None)
- case Seq(newTarget, newSource, newRewritePlan) =>
- copy(targetTable = newTarget, sourceTable = newSource, rewritePlan = Some(newRewritePlan))
- case _ =>
- throw new IllegalArgumentException(
- "MergeIntoIcebergTable expects either two or three children")
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala
deleted file mode 100644
index d808737d1048..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.util.truncatedString
-
-case class MergeRows(
- isSourceRowPresent: Expression,
- isTargetRowPresent: Expression,
- matchedConditions: Seq[Expression],
- matchedOutputs: Seq[Seq[Seq[Expression]]],
- notMatchedConditions: Seq[Expression],
- notMatchedOutputs: Seq[Seq[Expression]],
- targetOutput: Seq[Expression],
- performCardinalityCheck: Boolean,
- emitNotMatchedTargetRows: Boolean,
- output: Seq[Attribute],
- child: LogicalPlan)
- extends UnaryNode {
-
- require(targetOutput.nonEmpty || !emitNotMatchedTargetRows)
-
- override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- override lazy val references: AttributeSet = child.outputSet
-
- override def simpleString(maxFields: Int): String = {
- s"MergeRows${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
- copy(child = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala
deleted file mode 100644
index 936873466721..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class NoStatsUnaryNode(child: LogicalPlan) extends UnaryNode {
- override def output: Seq[Attribute] = child.output
- override def stats: Statistics = Statistics(Long.MaxValue)
-
- override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
- copy(child = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceIcebergData.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceIcebergData.scala
deleted file mode 100644
index 6648193229df..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceIcebergData.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.write.Write
-import org.apache.spark.sql.types.DataType
-
-/**
- * Replace data in an existing table.
- */
-case class ReplaceIcebergData(
- table: NamedRelation,
- query: LogicalPlan,
- originalTable: NamedRelation,
- write: Option[Write] = None)
- extends V2WriteCommandLike {
-
- override lazy val references: AttributeSet = query.outputSet
- override lazy val stringArgs: Iterator[Any] = Iterator(table, query, write)
-
- // the incoming query may include metadata columns
- lazy val dataInput: Seq[Attribute] = {
- val tableAttrNames = table.output.map(_.name)
- query.output.filter(attr => tableAttrNames.exists(conf.resolver(_, attr.name)))
- }
-
- override def outputResolved: Boolean = {
- assert(
- table.resolved && query.resolved,
- "`outputResolved` can only be called when `table` and `query` are both resolved.")
-
- // take into account only incoming data columns and ignore metadata columns in the query
- // they will be discarded after the logical write is built in the optimizer
- // metadata columns may be needed to request a correct distribution or ordering
- // but are not passed back to the data source during writes
-
- table.skipSchemaResolution || (dataInput.size == table.output.size &&
- dataInput.zip(table.output).forall { case (inAttr, outAttr) =>
- val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
- // names and types must match, nullability must be compatible
- inAttr.name == outAttr.name &&
- DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) &&
- (outAttr.nullable || !inAttr.nullable)
- })
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): ReplaceIcebergData = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala
deleted file mode 100644
index c2525369e7c7..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class ReplacePartitionField(
- table: Seq[String],
- transformFrom: Transform,
- transformTo: Transform,
- name: Option[String])
- extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"ReplacePartitionField ${table.quoted} ${transformFrom.describe} " +
- s"with ${name.map(n => s"$n=").getOrElse("")}${transformTo.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala
deleted file mode 100644
index ffddce9f3e0e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-trait RowLevelCommand extends Command with SupportsSubquery {
- def condition: Option[Expression]
- def rewritePlan: Option[LogicalPlan]
- def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala
deleted file mode 100644
index 8cd2c0ddad05..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class SetIdentifierFields(table: Seq[String], fields: Seq[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"SetIdentifierFields ${table.quoted} (${fields.quoted})"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala
deleted file mode 100644
index 6afe1478d747..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-case class TagOptions(snapshotId: Option[Long], snapshotRefRetain: Option[Long])
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala
deleted file mode 100644
index 1ecb727007db..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-/**
- * A node that hides the MERGE condition and actions from regular Spark resolution.
- */
-case class UnresolvedMergeIntoIcebergTable(
- targetTable: LogicalPlan,
- sourceTable: LogicalPlan,
- context: MergeIntoContext)
- extends BinaryCommand {
-
- def duplicateResolved: Boolean = targetTable.outputSet.intersect(sourceTable.outputSet).isEmpty
-
- override def left: LogicalPlan = targetTable
- override def right: LogicalPlan = sourceTable
-
- override protected def withNewChildrenInternal(
- newLeft: LogicalPlan,
- newRight: LogicalPlan): LogicalPlan = {
- copy(targetTable = newLeft, sourceTable = newRight)
- }
-}
-
-case class MergeIntoContext(
- mergeCondition: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction])
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala
deleted file mode 100644
index 58aef899d639..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-case class UpdateIcebergTable(
- table: LogicalPlan,
- assignments: Seq[Assignment],
- condition: Option[Expression],
- rewritePlan: Option[LogicalPlan] = None)
- extends RowLevelCommand {
-
- lazy val aligned: Boolean = AssignmentUtils.aligned(table, assignments)
-
- override def children: Seq[LogicalPlan] = if (rewritePlan.isDefined) {
- table :: rewritePlan.get :: Nil
- } else {
- table :: Nil
- }
-
- override def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand = {
- copy(rewritePlan = Some(newRewritePlan))
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): UpdateIcebergTable = {
- if (newChildren.size == 1) {
- copy(table = newChildren.head, rewritePlan = None)
- } else {
- require(newChildren.size == 2, "UpdateTable expects either one or two children")
- val Seq(newTable, newRewritePlan) = newChildren.take(2)
- copy(table = newTable, rewritePlan = Some(newRewritePlan))
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateRows.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateRows.scala
deleted file mode 100644
index 268150e6b086..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateRows.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.util.truncatedString
-
-case class UpdateRows(
- deleteOutput: Seq[Expression],
- insertOutput: Seq[Expression],
- output: Seq[Attribute],
- child: LogicalPlan)
- extends UnaryNode {
-
- override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- override def simpleString(maxFields: Int): String = {
- s"UpdateRows${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
- copy(child = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala
deleted file mode 100644
index e34d1e2daa05..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-
-// a node similar to V2WriteCommand in Spark but does not extend Command
-// as ReplaceData and WriteDelta that extend this trait are nested within other commands
-trait V2WriteCommandLike extends UnaryNode {
- def table: NamedRelation
- def query: LogicalPlan
- def outputResolved: Boolean
-
- override lazy val resolved: Boolean = table.resolved && query.resolved && outputResolved
-
- override def child: LogicalPlan = query
- override def output: Seq[Attribute] = Seq.empty
- override def producedAttributes: AttributeSet = outputSet
- // Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame
- // is created. That said, the statistics of a command is useless. Here we just return a dummy
- // statistics to avoid unnecessary statistics calculation of command's children.
- override def stats: Statistics = Statistics.DUMMY
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteIcebergDelta.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteIcebergDelta.scala
deleted file mode 100644
index 66ece84f883e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteIcebergDelta.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
-import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils.OPERATION_COLUMN
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.write.DeltaWrite
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.connector.write.SupportsDelta
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.types.StructField
-
-/**
- * Writes a delta of rows to an existing table.
- */
-case class WriteIcebergDelta(
- table: NamedRelation,
- query: LogicalPlan,
- originalTable: NamedRelation,
- projections: WriteDeltaProjections,
- write: Option[DeltaWrite] = None)
- extends V2WriteCommandLike {
-
- override protected lazy val stringArgs: Iterator[Any] = Iterator(table, query, write)
-
- private def operationResolved: Boolean = {
- val attr = query.output.head
- attr.name == OPERATION_COLUMN && attr.dataType == IntegerType && !attr.nullable
- }
-
- private def operation: SupportsDelta = {
- EliminateSubqueryAliases(table) match {
- case DataSourceV2Relation(RowLevelOperationTable(_, operation), _, _, _, _) =>
- operation match {
- case supportsDelta: SupportsDelta =>
- supportsDelta
- case _ =>
- throw new AnalysisException(s"Operation $operation is not a delta operation")
- }
- case _ =>
- throw new AnalysisException(s"Cannot retrieve row-level operation from $table")
- }
- }
-
- private def rowAttrsResolved: Boolean = {
- table.skipSchemaResolution || (projections.rowProjection match {
- case Some(projection) =>
- table.output.size == projection.schema.size &&
- projection.schema.zip(table.output).forall { case (field, outAttr) =>
- isCompatible(field, outAttr)
- }
- case None =>
- true
- })
- }
-
- private def rowIdAttrsResolved: Boolean = {
- val rowIdAttrs =
- V2ExpressionUtils.resolveRefs[AttributeReference](operation.rowId.toSeq, originalTable)
-
- projections.rowIdProjection.schema.forall { field =>
- rowIdAttrs.exists(rowIdAttr => isCompatible(field, rowIdAttr))
- }
- }
-
- private def metadataAttrsResolved: Boolean = {
- projections.metadataProjection match {
- case Some(projection) =>
- val metadataAttrs = V2ExpressionUtils.resolveRefs[AttributeReference](
- operation.requiredMetadataAttributes.toSeq,
- originalTable)
-
- projection.schema.forall { field =>
- metadataAttrs.exists(metadataAttr => isCompatible(field, metadataAttr))
- }
- case None =>
- true
- }
- }
-
- private def isCompatible(projectionField: StructField, outAttr: NamedExpression): Boolean = {
- val inType = CharVarcharUtils.getRawType(projectionField.metadata).getOrElse(outAttr.dataType)
- val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
- // names and types must match, nullability must be compatible
- projectionField.name == outAttr.name &&
- DataType.equalsIgnoreCompatibleNullability(inType, outType) &&
- (outAttr.nullable || !projectionField.nullable)
- }
-
- override def outputResolved: Boolean = {
- assert(
- table.resolved && query.resolved,
- "`outputResolved` can only be called when `table` and `query` are both resolved.")
-
- operationResolved && rowAttrsResolved && rowIdAttrsResolved && metadataAttrsResolved
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): WriteIcebergDelta = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
deleted file mode 100644
index 9986b30c45ed..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-/**
- * A CALL statement, as parsed from SQL.
- */
-case class CallStatement(name: Seq[String], args: Seq[CallArgument]) extends LeafParsedStatement
-
-/**
- * An argument in a CALL statement.
- */
-sealed trait CallArgument {
- def expr: Expression
-}
-
-/**
- * An argument in a CALL statement identified by name.
- */
-case class NamedArgument(name: String, expr: Expression) extends CallArgument
-
-/**
- * An argument in a CALL statement identified by position.
- */
-case class PositionalArgument(expr: Expression) extends CallArgument
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
deleted file mode 100644
index 84a00a4a9a88..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical.views
-
-import org.apache.spark.sql.catalyst.analysis.AnalysisContext
-import org.apache.spark.sql.catalyst.plans.logical.AnalysisOnlyCommand
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-// Align Iceberg's CreateIcebergView with Spark’s CreateViewCommand by extending AnalysisOnlyCommand.
-// The command’s children are analyzed then hidden, so the optimizer/planner won’t traverse the view body.
-case class CreateIcebergView(
- child: LogicalPlan,
- queryText: String,
- query: LogicalPlan,
- columnAliases: Seq[String],
- columnComments: Seq[Option[String]],
- queryColumnNames: Seq[String] = Seq.empty,
- comment: Option[String],
- properties: Map[String, String],
- allowExisting: Boolean,
- replace: Boolean,
- rewritten: Boolean = false,
- isAnalyzed: Boolean = false)
- extends AnalysisOnlyCommand {
-
- override def childrenToAnalyze: Seq[LogicalPlan] = child :: query :: Nil
-
- override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = {
- copy(isAnalyzed = true)
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
- assert(!isAnalyzed)
- copy(child = newChildren.head, query = newChildren.last)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/DropIcebergView.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/DropIcebergView.scala
deleted file mode 100644
index 092b6b33fb0c..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/DropIcebergView.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical.views
-
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.UnaryCommand
-
-case class DropIcebergView(child: LogicalPlan, ifExists: Boolean) extends UnaryCommand {
- override protected def withNewChildInternal(newChild: LogicalPlan): DropIcebergView =
- copy(child = newChild)
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ResolvedV2View.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ResolvedV2View.scala
deleted file mode 100644
index 4d384e857703..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ResolvedV2View.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical.views
-
-import org.apache.spark.sql.catalyst.analysis.LeafNodeWithoutStats
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-
-case class ResolvedV2View(catalog: ViewCatalog, identifier: Identifier)
- extends LeafNodeWithoutStats {
- override def output: Seq[Attribute] = Nil
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ShowIcebergViews.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ShowIcebergViews.scala
deleted file mode 100644
index cbfe23d94cbe..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/ShowIcebergViews.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical.views
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.ShowViews
-import org.apache.spark.sql.catalyst.plans.logical.UnaryCommand
-
-case class ShowIcebergViews(
- namespace: LogicalPlan,
- pattern: Option[String],
- override val output: Seq[Attribute] = ShowViews.getOutputAttrs)
- extends UnaryCommand {
- override def child: LogicalPlan = namespace
-
- override protected def withNewChildInternal(newChild: LogicalPlan): ShowIcebergViews =
- copy(namespace = newChild)
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala
deleted file mode 100644
index e28dcfb194b6..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class AddPartitionFieldExec(
- catalog: TableCatalog,
- ident: Identifier,
- transform: Transform,
- name: Option[String])
- extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- iceberg.table
- .updateSpec()
- .addField(name.orNull, Spark3Util.toIcebergTerm(transform))
- .commit()
-
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot add partition field to non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"AddPartitionField ${catalog.name}.${ident.quoted} ${name.map(n => s"$n=").getOrElse("")}${transform.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewSetPropertiesExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewSetPropertiesExec.scala
deleted file mode 100644
index d6630e51ff5a..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewSetPropertiesExec.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.connector.catalog.ViewChange
-
-case class AlterV2ViewSetPropertiesExec(
- catalog: ViewCatalog,
- ident: Identifier,
- properties: Map[String, String])
- extends LeafV2CommandExec {
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- val changes = properties.map { case (property, value) =>
- ViewChange.setProperty(property, value)
- }.toSeq
-
- catalog.alterView(ident, changes: _*)
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"AlterV2ViewSetProperties: ${ident}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewUnsetPropertiesExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewUnsetPropertiesExec.scala
deleted file mode 100644
index 7ebf0b9f7085..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewUnsetPropertiesExec.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.connector.catalog.ViewChange
-
-case class AlterV2ViewUnsetPropertiesExec(
- catalog: ViewCatalog,
- ident: Identifier,
- propertyKeys: Seq[String],
- ifExists: Boolean)
- extends LeafV2CommandExec {
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- if (!ifExists) {
- propertyKeys.filterNot(catalog.loadView(ident).properties.containsKey).foreach { property =>
- throw new AnalysisException(s"Cannot remove property that is not set: '$property'")
- }
- }
-
- val changes = propertyKeys.map(ViewChange.removeProperty)
- catalog.alterView(ident, changes: _*)
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"AlterV2ViewUnsetProperties: ${ident}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala
deleted file mode 100644
index a12277502521..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.iceberg.catalog.Procedure
-import scala.collection.compat.immutable.ArraySeq
-
-case class CallExec(output: Seq[Attribute], procedure: Procedure, input: InternalRow)
- extends LeafV2CommandExec {
-
- override protected def run(): Seq[InternalRow] = {
- ArraySeq.unsafeWrapArray(procedure.call(input))
- }
-
- override def simpleString(maxFields: Int): String = {
- s"CallExec${truncatedString(output, "[", ", ", "]", maxFields)} ${procedure.description}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala
deleted file mode 100644
index baf985f53a22..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.BranchOptions
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class CreateOrReplaceBranchExec(
- catalog: TableCatalog,
- ident: Identifier,
- branch: String,
- branchOptions: BranchOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean)
- extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val snapshotId: java.lang.Long = branchOptions.snapshotId
- .orElse(Option(iceberg.table.currentSnapshot()).map(_.snapshotId()))
- .map(java.lang.Long.valueOf)
- .orNull
-
- val manageSnapshots = iceberg.table().manageSnapshots()
- val refExists = null != iceberg.table().refs().get(branch)
-
- def safeCreateBranch(): Unit = {
- if (snapshotId == null) {
- manageSnapshots.createBranch(branch)
- } else {
- manageSnapshots.createBranch(branch, snapshotId)
- }
- }
-
- if (create && replace && !refExists) {
- safeCreateBranch()
- } else if (replace) {
- Preconditions.checkArgument(
- snapshotId != null,
- "Cannot complete replace branch operation on %s, main has no snapshot",
- ident)
- manageSnapshots.replaceBranch(branch, snapshotId)
- } else {
- if (refExists && ifNotExists) {
- return Nil
- }
-
- safeCreateBranch()
- }
-
- if (branchOptions.numSnapshots.nonEmpty) {
- manageSnapshots.setMinSnapshotsToKeep(branch, branchOptions.numSnapshots.get.toInt)
- }
-
- if (branchOptions.snapshotRetain.nonEmpty) {
- manageSnapshots.setMaxSnapshotAgeMs(branch, branchOptions.snapshotRetain.get)
- }
-
- if (branchOptions.snapshotRefRetain.nonEmpty) {
- manageSnapshots.setMaxRefAgeMs(branch, branchOptions.snapshotRefRetain.get)
- }
-
- manageSnapshots.commit()
-
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot create or replace branch on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplace branch: $branch for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala
deleted file mode 100644
index e486892614cb..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.TagOptions
-import org.apache.spark.sql.connector.catalog._
-
-case class CreateOrReplaceTagExec(
- catalog: TableCatalog,
- ident: Identifier,
- tag: String,
- tagOptions: TagOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean)
- extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val snapshotId: java.lang.Long = tagOptions.snapshotId
- .orElse(Option(iceberg.table.currentSnapshot()).map(_.snapshotId()))
- .map(java.lang.Long.valueOf)
- .orNull
-
- Preconditions.checkArgument(
- snapshotId != null,
- "Cannot complete create or replace tag operation on %s, main has no snapshot",
- ident)
-
- val manageSnapshot = iceberg.table.manageSnapshots()
- val refExists = null != iceberg.table().refs().get(tag)
-
- if (create && replace && !refExists) {
- manageSnapshot.createTag(tag, snapshotId)
- } else if (replace) {
- manageSnapshot.replaceTag(tag, snapshotId)
- } else {
- if (refExists && ifNotExists) {
- return Nil
- }
-
- manageSnapshot.createTag(tag, snapshotId)
- }
-
- if (tagOptions.snapshotRefRetain.nonEmpty) {
- manageSnapshot.setMaxRefAgeMs(tag, tagOptions.snapshotRefRetain.get)
- }
-
- manageSnapshot.commit()
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot create tag to non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"Create tag: $tag for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala
deleted file mode 100644
index c9d404765845..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.SupportsReplaceView
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.NoSuchViewException
-import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.types.StructType
-import scala.jdk.CollectionConverters._
-
-case class CreateV2ViewExec(
- catalog: ViewCatalog,
- ident: Identifier,
- queryText: String,
- viewSchema: StructType,
- columnAliases: Seq[String],
- columnComments: Seq[Option[String]],
- queryColumnNames: Seq[String],
- comment: Option[String],
- properties: Map[String, String],
- allowExisting: Boolean,
- replace: Boolean)
- extends LeafV2CommandExec {
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- val currentCatalogName = session.sessionState.catalogManager.currentCatalog.name
- val currentCatalog =
- if (!catalog.name().equals(currentCatalogName)) currentCatalogName else null
- val currentNamespace = session.sessionState.catalogManager.currentNamespace
-
- val engineVersion = "Spark " + org.apache.spark.SPARK_VERSION
- val newProperties = properties ++
- comment.map(ViewCatalog.PROP_COMMENT -> _) ++
- Map(
- ViewCatalog.PROP_CREATE_ENGINE_VERSION -> engineVersion,
- ViewCatalog.PROP_ENGINE_VERSION -> engineVersion)
-
- if (replace) {
- // CREATE OR REPLACE VIEW
- catalog match {
- case c: SupportsReplaceView =>
- try {
- replaceView(c, currentCatalog, currentNamespace, newProperties)
- } catch {
- // view might have been concurrently dropped during replace
- case _: NoSuchViewException =>
- replaceView(c, currentCatalog, currentNamespace, newProperties)
- }
- case _ =>
- if (catalog.viewExists(ident)) {
- catalog.dropView(ident)
- }
-
- createView(currentCatalog, currentNamespace, newProperties)
- }
- } else {
- try {
- // CREATE VIEW [IF NOT EXISTS]
- createView(currentCatalog, currentNamespace, newProperties)
- } catch {
- case _: ViewAlreadyExistsException if allowExisting => // Ignore
- }
- }
-
- Nil
- }
-
- private def replaceView(
- supportsReplaceView: SupportsReplaceView,
- currentCatalog: String,
- currentNamespace: Array[String],
- newProperties: Map[String, String]) = {
- supportsReplaceView.replaceView(
- ident,
- queryText,
- currentCatalog,
- currentNamespace,
- viewSchema,
- queryColumnNames.toArray,
- columnAliases.toArray,
- columnComments.map(c => c.orNull).toArray,
- newProperties.asJava)
- }
-
- private def createView(
- currentCatalog: String,
- currentNamespace: Array[String],
- newProperties: Map[String, String]) = {
- catalog.createView(
- ident,
- queryText,
- currentCatalog,
- currentNamespace,
- viewSchema,
- queryColumnNames.toArray,
- columnAliases.toArray,
- columnComments.map(c => c.orNull).toArray,
- newProperties.asJava)
- }
-
- override def simpleString(maxFields: Int): String = {
- s"CreateV2ViewExec: ${ident}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeV2ViewExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeV2ViewExec.scala
deleted file mode 100644
index f25ac8e3c3dc..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeV2ViewExec.scala
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.escapeSingleQuotedString
-import org.apache.spark.sql.connector.catalog.View
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.execution.LeafExecNode
-import scala.jdk.CollectionConverters._
-
-case class DescribeV2ViewExec(output: Seq[Attribute], view: View, isExtended: Boolean)
- extends V2CommandExec
- with LeafExecNode {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override protected def run(): Seq[InternalRow] = {
- if (isExtended) {
- (describeSchema :+ emptyRow) ++ describeExtended
- } else {
- describeSchema
- }
- }
-
- private def describeSchema: Seq[InternalRow] =
- view.schema().map { column =>
- toCatalystRow(column.name, column.dataType.simpleString, column.getComment().getOrElse(""))
- }
-
- private def emptyRow: InternalRow = toCatalystRow("", "", "")
-
- private def describeExtended: Seq[InternalRow] = {
- val outputColumns = view.queryColumnNames.mkString("[", ", ", "]")
- val properties: Map[String, String] =
- view.properties.asScala.toMap -- ViewCatalog.RESERVED_PROPERTIES.asScala
- val viewCatalogAndNamespace: Seq[String] = view.name.split("\\.").take(2)
- val viewProperties = properties.toSeq
- .sortBy(_._1)
- .map { case (key, value) =>
- s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
- }
- .mkString("[", ", ", "]")
-
- // omitting view text here because it is shown as
- // part of SHOW CREATE TABLE and can result in weird formatting in the DESCRIBE output
- toCatalystRow("# Detailed View Information", "", "") ::
- toCatalystRow("Comment", view.properties.getOrDefault(ViewCatalog.PROP_COMMENT, ""), "") ::
- toCatalystRow("View Catalog and Namespace", viewCatalogAndNamespace.quoted, "") ::
- toCatalystRow("View Query Output Columns", outputColumns, "") ::
- toCatalystRow("View Properties", viewProperties, "") ::
- toCatalystRow(
- "Created By",
- view.properties.getOrDefault(ViewCatalog.PROP_CREATE_ENGINE_VERSION, ""),
- "") ::
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DescribeV2ViewExec"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala
deleted file mode 100644
index e7d9c7b70d82..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropBranchExec(
- catalog: TableCatalog,
- ident: Identifier,
- branch: String,
- ifExists: Boolean)
- extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val ref = iceberg.table().refs().get(branch)
- if (ref != null || !ifExists) {
- iceberg.table().manageSnapshots().removeBranch(branch).commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop branch on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropBranch branch: ${branch} for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala
deleted file mode 100644
index 87b18594d573..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.relocated.com.google.common.collect.Sets
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropIdentifierFieldsExec(catalog: TableCatalog, ident: Identifier, fields: Seq[String])
- extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- val identifierFieldNames = Sets.newHashSet(schema.identifierFieldNames)
-
- for (name <- fields) {
- Preconditions.checkArgument(
- schema.findField(name) != null,
- "Cannot complete drop identifier fields operation: field %s not found",
- name)
- Preconditions.checkArgument(
- identifierFieldNames.contains(name),
- "Cannot complete drop identifier fields operation: %s is not an identifier field",
- name)
- identifierFieldNames.remove(name)
- }
-
- iceberg.table
- .updateSchema()
- .setIdentifierFields(identifierFieldNames)
- .commit();
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot drop identifier fields in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropIdentifierFields ${catalog.name}.${ident.quoted} (${fields.quoted})";
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala
deleted file mode 100644
index db43263e0e66..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class DropPartitionFieldExec(catalog: TableCatalog, ident: Identifier, transform: Transform)
- extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- transform match {
- case IdentityTransform(FieldReference(parts))
- if parts.size == 1 && schema.findField(parts.head) == null =>
- // the name is not present in the Iceberg schema, so it must be a partition field name, not a column name
- iceberg.table
- .updateSpec()
- .removeField(parts.head)
- .commit()
-
- case _ =>
- iceberg.table
- .updateSpec()
- .removeField(Spark3Util.toIcebergTerm(transform))
- .commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot drop partition field in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropPartitionField ${catalog.name}.${ident.quoted} ${transform.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala
deleted file mode 100644
index 79b4a1525591..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropTagExec(catalog: TableCatalog, ident: Identifier, tag: String, ifExists: Boolean)
- extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val ref = iceberg.table().refs().get(tag)
- if (ref != null || !ifExists) {
- iceberg.table().manageSnapshots().removeTag(tag).commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop tag on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropTag tag: ${tag} for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropV2ViewExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropV2ViewExec.scala
deleted file mode 100644
index 6dd1188b78e8..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropV2ViewExec.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.NoSuchViewException
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-
-case class DropV2ViewExec(catalog: ViewCatalog, ident: Identifier, ifExists: Boolean)
- extends LeafV2CommandExec {
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- val dropped = catalog.dropView(ident)
- if (!dropped && !ifExists) {
- throw new NoSuchViewException(ident)
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropV2View: ${ident}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala
deleted file mode 100644
index c960505a519e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-
-/**
- * A class similar to DataSourceV2Implicits in Spark but contains custom implicit helpers.
- */
-object ExtendedDataSourceV2Implicits {
- implicit class TableHelper(table: Table) {
- def asRowLevelOperationTable: RowLevelOperationTable = {
- table match {
- case rowLevelOperationTable: RowLevelOperationTable =>
- rowLevelOperationTable
- case _ =>
- throw new AnalysisException(s"Table ${table.name} is not a row-level operation table")
- }
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
deleted file mode 100644
index 53def0e95250..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.SparkCatalog
-import org.apache.iceberg.spark.SparkSessionCatalog
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.Strategy
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
-import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceBranch
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceTag
-import org.apache.spark.sql.catalyst.plans.logical.DescribeRelation
-import org.apache.spark.sql.catalyst.plans.logical.DropBranch
-import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.DropTag
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeRows
-import org.apache.spark.sql.catalyst.plans.logical.NoStatsUnaryNode
-import org.apache.spark.sql.catalyst.plans.logical.OrderAwareCoalesce
-import org.apache.spark.sql.catalyst.plans.logical.RenameTable
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.ReplacePartitionField
-import org.apache.spark.sql.catalyst.plans.logical.SetIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.SetViewProperties
-import org.apache.spark.sql.catalyst.plans.logical.SetWriteDistributionAndOrdering
-import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
-import org.apache.spark.sql.catalyst.plans.logical.ShowTableProperties
-import org.apache.spark.sql.catalyst.plans.logical.UnsetViewProperties
-import org.apache.spark.sql.catalyst.plans.logical.UpdateRows
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.plans.logical.views.CreateIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.DropIcebergView
-import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View
-import org.apache.spark.sql.catalyst.plans.logical.views.ShowIcebergViews
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.execution.OrderAwareCoalesceExec
-import org.apache.spark.sql.execution.SparkPlan
-import scala.jdk.CollectionConverters._
-
-case class ExtendedDataSourceV2Strategy(spark: SparkSession) extends Strategy with PredicateHelper {
-
- override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
- case c @ Call(procedure, args) =>
- val input = buildInternalRow(args)
- CallExec(c.output, procedure, input) :: Nil
-
- case AddPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform, name) =>
- AddPartitionFieldExec(catalog, ident, transform, name) :: Nil
-
- case CreateOrReplaceBranch(
- IcebergCatalogAndIdentifier(catalog, ident),
- branch,
- branchOptions,
- create,
- replace,
- ifNotExists) =>
- CreateOrReplaceBranchExec(
- catalog,
- ident,
- branch,
- branchOptions,
- create,
- replace,
- ifNotExists) :: Nil
-
- case CreateOrReplaceTag(
- IcebergCatalogAndIdentifier(catalog, ident),
- tag,
- tagOptions,
- create,
- replace,
- ifNotExists) =>
- CreateOrReplaceTagExec(catalog, ident, tag, tagOptions, create, replace, ifNotExists) :: Nil
-
- case DropBranch(IcebergCatalogAndIdentifier(catalog, ident), branch, ifExists) =>
- DropBranchExec(catalog, ident, branch, ifExists) :: Nil
-
- case DropTag(IcebergCatalogAndIdentifier(catalog, ident), tag, ifExists) =>
- DropTagExec(catalog, ident, tag, ifExists) :: Nil
-
- case DropPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform) =>
- DropPartitionFieldExec(catalog, ident, transform) :: Nil
-
- case ReplacePartitionField(
- IcebergCatalogAndIdentifier(catalog, ident),
- transformFrom,
- transformTo,
- name) =>
- ReplacePartitionFieldExec(catalog, ident, transformFrom, transformTo, name) :: Nil
-
- case SetIdentifierFields(IcebergCatalogAndIdentifier(catalog, ident), fields) =>
- SetIdentifierFieldsExec(catalog, ident, fields) :: Nil
-
- case DropIdentifierFields(IcebergCatalogAndIdentifier(catalog, ident), fields) =>
- DropIdentifierFieldsExec(catalog, ident, fields) :: Nil
-
- case SetWriteDistributionAndOrdering(
- IcebergCatalogAndIdentifier(catalog, ident),
- distributionMode,
- ordering) =>
- SetWriteDistributionAndOrderingExec(catalog, ident, distributionMode, ordering) :: Nil
-
- case ReplaceIcebergData(_: DataSourceV2Relation, query, r: DataSourceV2Relation, Some(write)) =>
- // refresh the cache using the original relation
- ReplaceDataExec(planLater(query), refreshCache(r), write) :: Nil
-
- case WriteIcebergDelta(
- _: DataSourceV2Relation,
- query,
- r: DataSourceV2Relation,
- projs,
- Some(write)) =>
- // refresh the cache using the original relation
- WriteDeltaExec(planLater(query), refreshCache(r), projs, write) :: Nil
-
- case MergeRows(
- isSourceRowPresent,
- isTargetRowPresent,
- matchedConditions,
- matchedOutputs,
- notMatchedConditions,
- notMatchedOutputs,
- targetOutput,
- performCardinalityCheck,
- emitNotMatchedTargetRows,
- output,
- child) =>
-
- MergeRowsExec(
- isSourceRowPresent,
- isTargetRowPresent,
- matchedConditions,
- matchedOutputs,
- notMatchedConditions,
- notMatchedOutputs,
- targetOutput,
- performCardinalityCheck,
- emitNotMatchedTargetRows,
- output,
- planLater(child)) :: Nil
-
- case UpdateRows(deleteOutput, insertOutput, output, child) =>
- UpdateRowsExec(deleteOutput, insertOutput, output, planLater(child)) :: Nil
-
- case NoStatsUnaryNode(child) =>
- planLater(child) :: Nil
-
- case OrderAwareCoalesce(numPartitions, coalescer, child) =>
- OrderAwareCoalesceExec(numPartitions, coalescer, planLater(child)) :: Nil
-
- case RenameTable(ResolvedV2View(oldCatalog: ViewCatalog, oldIdent), newName, isView @ true) =>
- val newIdent = Spark3Util.catalogAndIdentifier(spark, newName.toList.asJava)
- if (oldCatalog.name != newIdent.catalog().name()) {
- throw new AnalysisException(
- s"Cannot move view between catalogs: from=${oldCatalog.name} and to=${newIdent.catalog().name()}")
- }
- RenameV2ViewExec(oldCatalog, oldIdent, newIdent.identifier()) :: Nil
-
- case DropIcebergView(ResolvedIdentifier(viewCatalog: ViewCatalog, ident), ifExists) =>
- DropV2ViewExec(viewCatalog, ident, ifExists) :: Nil
-
- case CreateIcebergView(
- ResolvedIdentifier(viewCatalog: ViewCatalog, ident),
- queryText,
- query,
- columnAliases,
- columnComments,
- queryColumnNames,
- comment,
- properties,
- allowExisting,
- replace,
- _,
- _) =>
- CreateV2ViewExec(
- catalog = viewCatalog,
- ident = ident,
- queryText = queryText,
- columnAliases = columnAliases,
- columnComments = columnComments,
- queryColumnNames = queryColumnNames,
- viewSchema = query.schema,
- comment = comment,
- properties = properties,
- allowExisting = allowExisting,
- replace = replace) :: Nil
-
- case DescribeRelation(ResolvedV2View(catalog, ident), _, isExtended, output) =>
- DescribeV2ViewExec(output, catalog.loadView(ident), isExtended) :: Nil
-
- case ShowTableProperties(ResolvedV2View(catalog, ident), propertyKey, output) =>
- ShowV2ViewPropertiesExec(output, catalog.loadView(ident), propertyKey) :: Nil
-
- case ShowIcebergViews(ResolvedNamespace(catalog: ViewCatalog, namespace), pattern, output) =>
- ShowV2ViewsExec(output, catalog, namespace, pattern) :: Nil
-
- case ShowCreateTable(ResolvedV2View(catalog, ident), _, output) =>
- ShowCreateV2ViewExec(output, catalog.loadView(ident)) :: Nil
-
- case SetViewProperties(ResolvedV2View(catalog, ident), properties) =>
- AlterV2ViewSetPropertiesExec(catalog, ident, properties) :: Nil
-
- case UnsetViewProperties(ResolvedV2View(catalog, ident), propertyKeys, ifExists) =>
- AlterV2ViewUnsetPropertiesExec(catalog, ident, propertyKeys, ifExists) :: Nil
-
- case _ => Nil
- }
-
- private def buildInternalRow(exprs: Seq[Expression]): InternalRow = {
- val values = new Array[Any](exprs.size)
- for (index <- exprs.indices) {
- values(index) = exprs(index).eval()
- }
- new GenericInternalRow(values)
- }
-
- private def refreshCache(r: DataSourceV2Relation)(): Unit = {
- spark.sharedState.cacheManager.recacheByPlan(spark, r)
- }
-
- private object IcebergCatalogAndIdentifier {
- def unapply(identifier: Seq[String]): Option[(TableCatalog, Identifier)] = {
- val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(spark, identifier.asJava)
- catalogAndIdentifier.catalog match {
- case icebergCatalog: SparkCatalog =>
- Some((icebergCatalog, catalogAndIdentifier.identifier))
- case icebergCatalog: SparkSessionCatalog[_] =>
- Some((icebergCatalog, catalogAndIdentifier.identifier))
- case _ =>
- None
- }
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala
deleted file mode 100644
index 98b93b476bc9..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import java.util.Optional
-import java.util.UUID
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.write.DeltaWriteBuilder
-import org.apache.spark.sql.connector.write.LogicalWriteInfoImpl
-import org.apache.spark.sql.connector.write.WriteBuilder
-import org.apache.spark.sql.types.StructType
-
-/**
- * A rule that is inspired by V2Writes in Spark but supports Iceberg specific plans.
- */
-object ExtendedV2Writes extends Rule[LogicalPlan] with PredicateHelper {
-
- import DataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- case rd @ ReplaceIcebergData(r: DataSourceV2Relation, query, _, None) =>
- val rowSchema = StructType.fromAttributes(rd.dataInput)
- val writeBuilder = newWriteBuilder(r.table, rowSchema, Map.empty)
- val write = writeBuilder.build()
- val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
- rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery))
-
- case wd @ WriteIcebergDelta(r: DataSourceV2Relation, query, _, projections, None) =>
- val deltaWriteBuilder = newDeltaWriteBuilder(r.table, Map.empty, projections)
- val deltaWrite = deltaWriteBuilder.build()
- val newQuery = DistributionAndOrderingUtils.prepareQuery(deltaWrite, query, r.funCatalog)
- wd.copy(write = Some(deltaWrite), query = newQuery)
- }
-
- private def newWriteBuilder(
- table: Table,
- rowSchema: StructType,
- writeOptions: Map[String, String],
- queryId: String = UUID.randomUUID().toString): WriteBuilder = {
-
- val info = LogicalWriteInfoImpl(queryId, rowSchema, writeOptions.asOptions)
- table.asWritable.newWriteBuilder(info)
- }
-
- private def newDeltaWriteBuilder(
- table: Table,
- writeOptions: Map[String, String],
- projections: WriteDeltaProjections,
- queryId: String = UUID.randomUUID().toString): DeltaWriteBuilder = {
-
- val rowSchema = projections.rowProjection.map(_.schema).getOrElse(StructType(Nil))
- val rowIdSchema = projections.rowIdProjection.schema
- val metadataSchema = projections.metadataProjection.map(_.schema)
-
- val info = LogicalWriteInfoImpl(
- queryId,
- rowSchema,
- writeOptions.asOptions,
- Optional.of(rowIdSchema),
- Optional.ofNullable(metadataSchema.orNull))
-
- val writeBuilder = table.asWritable.newWriteBuilder(info)
- assert(writeBuilder.isInstanceOf[DeltaWriteBuilder], s"$writeBuilder must be DeltaWriteBuilder")
- writeBuilder.asInstanceOf[DeltaWriteBuilder]
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
deleted file mode 100644
index a183b1903c38..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.SparkException
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.BasePredicate
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Projection
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.UnaryExecNode
-import org.roaringbitmap.longlong.Roaring64Bitmap
-
-case class MergeRowsExec(
- isSourceRowPresent: Expression,
- isTargetRowPresent: Expression,
- matchedConditions: Seq[Expression],
- matchedOutputs: Seq[Seq[Seq[Expression]]],
- notMatchedConditions: Seq[Expression],
- notMatchedOutputs: Seq[Seq[Expression]],
- targetOutput: Seq[Expression],
- performCardinalityCheck: Boolean,
- emitNotMatchedTargetRows: Boolean,
- output: Seq[Attribute],
- child: SparkPlan)
- extends UnaryExecNode {
-
- private final val ROW_ID = "__row_id"
-
- @transient override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- @transient override lazy val references: AttributeSet = child.outputSet
-
- override def simpleString(maxFields: Int): String = {
- s"MergeRowsExec${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = {
- copy(child = newChild)
- }
-
- protected override def doExecute(): RDD[InternalRow] = {
- child.execute().mapPartitions(processPartition)
- }
-
- private def createProjection(exprs: Seq[Expression]): UnsafeProjection = {
- UnsafeProjection.create(exprs, child.output)
- }
-
- private def createPredicate(expr: Expression): BasePredicate = {
- GeneratePredicate.generate(expr, child.output)
- }
-
- // This method is responsible for processing a input row to emit the resultant row with an
- // additional column that indicates whether the row is going to be included in the final
- // output of merge or not.
- // 1. Found a target row for which there is no corresponding source row (join condition not met)
- // - Only project the target columns if we need to output unchanged rows (group-based commands)
- // 2. Found a source row for which there is no corresponding target row (join condition not met)
- // - Apply the not matched actions (i.e INSERT actions) if non match conditions are met.
- // 3. Found a source row for which there is a corresponding target row (join condition met)
- // - Apply the matched actions (i.e DELETE or UPDATE actions) if match conditions are met.
- private def processPartition(rowIterator: Iterator[InternalRow]): Iterator[InternalRow] = {
- val isSourceRowPresentPred = createPredicate(isSourceRowPresent)
- val isTargetRowPresentPred = createPredicate(isTargetRowPresent)
-
- val matchedActions = matchedConditions.zip(matchedOutputs).map { case (cond, outputs) =>
- outputs match {
- case Seq(output1, output2) =>
- Split(createPredicate(cond), createProjection(output1), createProjection(output2))
- case Seq(output) =>
- Project(createPredicate(cond), createProjection(output))
- case Nil =>
- Project(createPredicate(cond), EmptyProjection)
- }
- }
-
- val notMatchedActions = notMatchedConditions.zip(notMatchedOutputs).map { case (cond, output) =>
- Project(createPredicate(cond), createProjection(output))
- }
-
- val projectTargetCols = createProjection(targetOutput)
-
- val cardinalityCheck = if (performCardinalityCheck) {
- val rowIdOrdinal = child.output.indexWhere(attr => conf.resolver(attr.name, ROW_ID))
- assert(rowIdOrdinal != -1, "Cannot find row ID attr")
- BitmapCardinalityCheck(rowIdOrdinal)
- } else {
- EmptyCardinalityCheck
- }
-
- val mergeIterator = if (matchedActions.exists(_.isInstanceOf[Split])) {
- new SplittingMergeRowIterator(
- rowIterator,
- cardinalityCheck,
- isTargetRowPresentPred,
- matchedActions,
- notMatchedActions)
- } else {
- new MergeRowIterator(
- rowIterator,
- cardinalityCheck,
- isTargetRowPresentPred,
- isSourceRowPresentPred,
- projectTargetCols,
- matchedActions.asInstanceOf[Seq[Project]],
- notMatchedActions)
- }
-
- // null indicates a record must be discarded
- mergeIterator.filter(_ != null)
- }
-
- trait Action {
- def cond: BasePredicate
- }
-
- case class Project(cond: BasePredicate, proj: Projection) extends Action {
- def apply(row: InternalRow): InternalRow = proj.apply(row)
- }
-
- case class Split(cond: BasePredicate, proj: Projection, otherProj: Projection) extends Action {
- def projectRow(row: InternalRow): InternalRow = proj.apply(row)
- def projectExtraRow(row: InternalRow): InternalRow = otherProj.apply(row)
- }
-
- object EmptyProjection extends Projection {
- override def apply(row: InternalRow): InternalRow = null
- }
-
- class MergeRowIterator(
- private val rowIterator: Iterator[InternalRow],
- private val cardinalityCheck: CardinalityCheck,
- private val isTargetRowPresentPred: BasePredicate,
- private val isSourceRowPresentPred: BasePredicate,
- private val targetTableProj: Projection,
- private val matchedActions: Seq[Project],
- private val notMatchedActions: Seq[Project])
- extends Iterator[InternalRow] {
-
- override def hasNext: Boolean = rowIterator.hasNext
-
- override def next(): InternalRow = {
- val row = rowIterator.next()
-
- val isSourceRowPresent = isSourceRowPresentPred.eval(row)
- val isTargetRowPresent = isTargetRowPresentPred.eval(row)
-
- if (isTargetRowPresent && isSourceRowPresent) {
- cardinalityCheck.execute(row)
- applyMatchedActions(row)
- } else if (isSourceRowPresent) {
- applyNotMatchedActions(row)
- } else if (emitNotMatchedTargetRows && isTargetRowPresent) {
- targetTableProj.apply(row)
- } else {
- null
- }
- }
-
- private def applyMatchedActions(row: InternalRow): InternalRow = {
- for (action <- matchedActions) {
- if (action.cond.eval(row)) {
- return action.apply(row)
- }
- }
-
- if (emitNotMatchedTargetRows) targetTableProj.apply(row) else null
- }
-
- private def applyNotMatchedActions(row: InternalRow): InternalRow = {
- for (action <- notMatchedActions) {
- if (action.cond.eval(row)) {
- return action.apply(row)
- }
- }
-
- null
- }
- }
-
- /**
- * An iterator that splits updates into deletes and inserts.
- *
- * Each input row that represents an update becomes two output rows: a delete and an insert.
- */
- class SplittingMergeRowIterator(
- private val rowIterator: Iterator[InternalRow],
- private val cardinalityCheck: CardinalityCheck,
- private val isTargetRowPresentPred: BasePredicate,
- private val matchedActions: Seq[Action],
- private val notMatchedActions: Seq[Project])
- extends Iterator[InternalRow] {
-
- var cachedExtraRow: InternalRow = _
-
- override def hasNext: Boolean = cachedExtraRow != null || rowIterator.hasNext
-
- override def next(): InternalRow = {
- if (cachedExtraRow != null) {
- val extraRow = cachedExtraRow
- cachedExtraRow = null
- return extraRow
- }
-
- val row = rowIterator.next()
-
- // it should be OK to just check if the target row exists
- // as this iterator is only used for delta-based row-level plans
- // that are rewritten using an inner or right outer join
- if (isTargetRowPresentPred.eval(row)) {
- cardinalityCheck.execute(row)
- applyMatchedActions(row)
- } else {
- applyNotMatchedActions(row)
- }
- }
-
- private def applyMatchedActions(row: InternalRow): InternalRow = {
- for (action <- matchedActions) {
- if (action.cond.eval(row)) {
- action match {
- case split: Split =>
- cachedExtraRow = split.projectExtraRow(row)
- return split.projectRow(row)
- case project: Project =>
- return project.apply(row)
- }
- }
- }
-
- null
- }
-
- private def applyNotMatchedActions(row: InternalRow): InternalRow = {
- for (action <- notMatchedActions) {
- if (action.cond.eval(row)) {
- return action.apply(row)
- }
- }
-
- null
- }
- }
-
- sealed trait CardinalityCheck {
-
- def execute(inputRow: InternalRow): Unit
-
- protected def fail(): Unit = {
- throw new SparkException(
- "The ON search condition of the MERGE statement matched a single row from " +
- "the target table with multiple rows of the source table. This could result " +
- "in the target row being operated on more than once with an update or delete " +
- "operation and is not allowed.")
- }
- }
-
- object EmptyCardinalityCheck extends CardinalityCheck {
- def execute(inputRow: InternalRow): Unit = {}
- }
-
- case class BitmapCardinalityCheck(rowIdOrdinal: Int) extends CardinalityCheck {
- private val matchedRowIds = new Roaring64Bitmap()
-
- override def execute(inputRow: InternalRow): Unit = {
- val currentRowId = inputRow.getLong(rowIdOrdinal)
- if (matchedRowIds.contains(currentRowId)) {
- fail()
- }
- matchedRowIds.add(currentRowId)
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameV2ViewExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameV2ViewExec.scala
deleted file mode 100644
index 5dada1cab0bb..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameV2ViewExec.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-
-case class RenameV2ViewExec(catalog: ViewCatalog, oldIdent: Identifier, newIdent: Identifier)
- extends LeafV2CommandExec {
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.renameView(oldIdent, newIdent)
-
- Seq.empty
- }
-
- override def simpleString(maxFields: Int): String = {
- s"RenameV2View ${oldIdent} to {newIdent}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala
deleted file mode 100644
index 135c194a60eb..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.connector.write.Write
-import org.apache.spark.sql.execution.SparkPlan
-
-/**
- * Physical plan node to replace data in existing tables.
- */
-case class ReplaceDataExec(query: SparkPlan, refreshCache: () => Unit, write: Write)
- extends V2ExistingTableWriteExec {
-
- override lazy val references: AttributeSet = query.outputSet
- override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
-
- override protected def withNewChildInternal(newChild: SparkPlan): ReplaceDataExec = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala
deleted file mode 100644
index 00b998c49e83..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class ReplacePartitionFieldExec(
- catalog: TableCatalog,
- ident: Identifier,
- transformFrom: Transform,
- transformTo: Transform,
- name: Option[String])
- extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- transformFrom match {
- case IdentityTransform(FieldReference(parts))
- if parts.size == 1 && schema.findField(parts.head) == null =>
- // the name is not present in the Iceberg schema, so it must be a partition field name, not a column name
- iceberg.table
- .updateSpec()
- .removeField(parts.head)
- .addField(name.orNull, Spark3Util.toIcebergTerm(transformTo))
- .commit()
-
- case _ =>
- iceberg.table
- .updateSpec()
- .removeField(Spark3Util.toIcebergTerm(transformFrom))
- .addField(name.orNull, Spark3Util.toIcebergTerm(transformTo))
- .commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot replace partition field in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"ReplacePartitionField ${catalog.name}.${ident.quoted} ${transformFrom.describe} " +
- s"with ${name.map(n => s"$n=").getOrElse("")}${transformTo.describe}"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala
deleted file mode 100644
index 8025f1a072be..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * Replaces operations such as DELETE and MERGE with the corresponding rewrite plans.
- */
-object ReplaceRewrittenRowLevelCommand extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- case c: RowLevelCommand if c.rewritePlan.isDefined =>
- c.rewritePlan.get
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala
deleted file mode 100644
index 130d1e1bcd3c..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.planning.RewrittenRowLevelCommand
-import org.apache.spark.sql.catalyst.planning.ScanOperation
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.NoStatsUnaryNode
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.WriteIcebergDelta
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.StructType
-
-object RowLevelCommandScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
- import ExtendedDataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- // use native Spark planning for delta-based plans
- // unlike other commands, these plans have filters that can be pushed down directly
- case RewrittenRowLevelCommand(
- command,
- _: DataSourceV2Relation,
- rewritePlan: WriteIcebergDelta) =>
- val newRewritePlan = V2ScanRelationPushDown.apply(rewritePlan)
- command.withNewRewritePlan(newRewritePlan)
-
- // group-based MERGE operations are rewritten as joins and may be planned in a special way
- // the join condition is the MERGE condition and can be pushed into the source
- // this allows us to remove completely pushed down predicates from the join condition
- case UnplannedGroupBasedMergeOperation(
- command,
- rd: ReplaceIcebergData,
- join @ Join(_, _, _, Some(joinCond), _),
- relation: DataSourceV2Relation) =>
-
- val table = relation.table.asRowLevelOperationTable
- val scanBuilder = table.newScanBuilder(relation.options)
-
- val (pushedFilters, newJoinCond) = pushMergeFilters(joinCond, relation, scanBuilder)
- val pushedFiltersStr = if (pushedFilters.isLeft) {
- pushedFilters.left.get.mkString(", ")
- } else {
- pushedFilters.right.get.mkString(", ")
- }
-
- val (scan, output) = PushDownUtils.pruneColumns(scanBuilder, relation, relation.output, Nil)
-
- logInfo(s"""
- |Pushing MERGE operators to ${relation.name}
- |Pushed filters: $pushedFiltersStr
- |Original JOIN condition: $joinCond
- |New JOIN condition: $newJoinCond
- |Output: ${output.mkString(", ")}
- """.stripMargin)
-
- val newRewritePlan = rd transformDown {
- case j: Join if j eq join =>
- j.copy(condition = newJoinCond)
- case r: DataSourceV2Relation if r.table eq table =>
- DataSourceV2ScanRelation(r, scan, PushDownUtils.toOutputAttrs(scan.readSchema(), r))
- }
-
- command.withNewRewritePlan(newRewritePlan)
-
- // push down the filter from the command condition instead of the filter in the rewrite plan,
- // which may be negated for copy-on-write DELETE and UPDATE operations
- case RewrittenRowLevelCommand(command, relation: DataSourceV2Relation, rewritePlan) =>
- val table = relation.table.asRowLevelOperationTable
- val scanBuilder = table.newScanBuilder(relation.options)
-
- val (pushedFilters, remainingFilters) = command.condition match {
- case Some(cond) => pushFilters(cond, scanBuilder, relation.output)
- case None => (Left(Nil), Nil)
- }
-
- val pushedFiltersStr = if (pushedFilters.isLeft) {
- pushedFilters.left.get.mkString(", ")
- } else {
- pushedFilters.right.get.mkString(", ")
- }
-
- val (scan, output) = PushDownUtils.pruneColumns(scanBuilder, relation, relation.output, Nil)
-
- logInfo(s"""
- |Pushing operators to ${relation.name}
- |Pushed filters: $pushedFiltersStr
- |Filters that were not pushed: ${remainingFilters.mkString(",")}
- |Output: ${output.mkString(", ")}
- """.stripMargin)
-
- // replace DataSourceV2Relation with DataSourceV2ScanRelation for the row operation table
- // there may be multiple read relations for UPDATEs that rely on the UNION approach
- val newRewritePlan = rewritePlan transform {
- case r: DataSourceV2Relation if r.table eq table =>
- DataSourceV2ScanRelation(r, scan, toOutputAttrs(scan.readSchema(), r))
- }
-
- command.withNewRewritePlan(newRewritePlan)
- }
-
- private def pushFilters(
- cond: Expression,
- scanBuilder: ScanBuilder,
- tableAttrs: Seq[AttributeReference])
- : (Either[Seq[Filter], Seq[Predicate]], Seq[Expression]) = {
-
- val tableAttrSet = AttributeSet(tableAttrs)
- val filters = splitConjunctivePredicates(cond).filter(_.references.subsetOf(tableAttrSet))
- val normalizedFilters = DataSourceStrategy.normalizeExprs(filters, tableAttrs)
- val (_, normalizedFiltersWithoutSubquery) =
- normalizedFilters.partition(SubqueryExpression.hasSubquery)
-
- PushDownUtils.pushFilters(scanBuilder, normalizedFiltersWithoutSubquery)
- }
-
- // splits the join condition into predicates and tries to push down each predicate into the scan
- // completely pushed down predicates are removed from the join condition
- // joinCond can't have subqueries as it is validated by the rule that rewrites MERGE as a join
- private def pushMergeFilters(
- joinCond: Expression,
- relation: DataSourceV2Relation,
- scanBuilder: ScanBuilder): (Either[Seq[Filter], Seq[Predicate]], Option[Expression]) = {
-
- val (tableFilters, commonFilters) =
- splitConjunctivePredicates(joinCond).partition(_.references.subsetOf(relation.outputSet))
- val normalizedTableFilters = DataSourceStrategy.normalizeExprs(tableFilters, relation.output)
- val (pushedFilters, postScanFilters) =
- PushDownUtils.pushFilters(scanBuilder, normalizedTableFilters)
- val newJoinCond = (commonFilters ++ postScanFilters).reduceLeftOption(And)
-
- (pushedFilters, newJoinCond)
- }
-
- private def toOutputAttrs(
- schema: StructType,
- relation: DataSourceV2Relation): Seq[AttributeReference] = {
- val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
- val cleaned = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
- cleaned.toAttributes.map {
- // keep the attribute id during transformation
- a => a.withExprId(nameToAttr(a.name).exprId)
- }
- }
-}
-
-object UnplannedGroupBasedMergeOperation {
- type ReturnType = (RowLevelCommand, ReplaceIcebergData, Join, DataSourceV2Relation)
-
- def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
- case m @ MergeIntoIcebergTable(_, _, _, _, _, Some(rewritePlan)) =>
- rewritePlan match {
- case rd @ ReplaceIcebergData(DataSourceV2Relation(table, _, _, _, _), query, _, _) =>
- val joinsAndRelations = query.collect {
- case j @ Join(
- NoStatsUnaryNode(
- ScanOperation(_, pushDownFilters, pushUpFilters, r: DataSourceV2Relation)),
- _,
- _,
- _,
- _) if pushUpFilters.isEmpty && pushDownFilters.isEmpty && r.table.eq(table) =>
- j -> r
- }
-
- joinsAndRelations match {
- case Seq((join, relation)) =>
- Some(m, rd, join, relation)
- case _ =>
- None
- }
-
- case _ =>
- None
- }
-
- case _ =>
- None
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala
deleted file mode 100644
index 50c53473ab60..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import scala.jdk.CollectionConverters._
-
-case class SetIdentifierFieldsExec(catalog: TableCatalog, ident: Identifier, fields: Seq[String])
- extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- iceberg.table
- .updateSchema()
- .setIdentifierFields(fields.asJava)
- .commit();
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot set identifier fields in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"SetIdentifierFields ${catalog.name}.${ident.quoted} (${fields.quoted})";
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala
deleted file mode 100644
index 9a10949d5e9e..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.DistributionMode
-import org.apache.iceberg.NullOrder
-import org.apache.iceberg.SortDirection
-import org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE
-import org.apache.iceberg.expressions.Term
-import org.apache.iceberg.spark.SparkUtil
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class SetWriteDistributionAndOrderingExec(
- catalog: TableCatalog,
- ident: Identifier,
- distributionMode: Option[DistributionMode],
- sortOrder: Seq[(Term, SortDirection, NullOrder)])
- extends LeafV2CommandExec {
-
- import CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val txn = iceberg.table.newTransaction()
-
- val orderBuilder = txn.replaceSortOrder().caseSensitive(SparkUtil.caseSensitive(session))
- sortOrder.foreach {
- case (term, SortDirection.ASC, nullOrder) =>
- orderBuilder.asc(term, nullOrder)
- case (term, SortDirection.DESC, nullOrder) =>
- orderBuilder.desc(term, nullOrder)
- }
- orderBuilder.commit()
-
- distributionMode.foreach { mode =>
- txn
- .updateProperties()
- .set(WRITE_DISTRIBUTION_MODE, mode.modeName())
- .commit()
- }
-
- txn.commitTransaction()
-
- case table =>
- throw new UnsupportedOperationException(
- s"Cannot set write order of non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- val tableIdent = s"${catalog.name}.${ident.quoted}"
- val order = sortOrder
- .map { case (term, direction, nullOrder) =>
- s"$term $direction $nullOrder"
- }
- .mkString(", ")
- s"SetWriteDistributionAndOrdering $tableIdent $distributionMode $order"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateV2ViewExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateV2ViewExec.scala
deleted file mode 100644
index 07ac4aeda8fb..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateV2ViewExec.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.escapeSingleQuotedString
-import org.apache.spark.sql.connector.catalog.View
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.execution.LeafExecNode
-import scala.jdk.CollectionConverters._
-
-case class ShowCreateV2ViewExec(output: Seq[Attribute], view: View)
- extends V2CommandExec
- with LeafExecNode {
-
- override protected def run(): Seq[InternalRow] = {
- val builder = new StringBuilder
- builder ++= s"CREATE VIEW ${view.name} "
- showColumns(view, builder)
- showComment(view, builder)
- showProperties(view, builder)
- builder ++= s"AS\n${view.query}\n"
-
- Seq(toCatalystRow(builder.toString))
- }
-
- private def showColumns(view: View, builder: StringBuilder): Unit = {
- val columns = concatByMultiLines(
- view
- .schema()
- .fields
- .map(x => s"${x.name}${x.getComment().map(c => s" COMMENT '$c'").getOrElse("")}"))
- builder ++= columns
- }
-
- private def showComment(view: View, builder: StringBuilder): Unit = {
- Option(view.properties.get(ViewCatalog.PROP_COMMENT))
- .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
- .foreach(builder.append)
- }
-
- private def showProperties(view: View, builder: StringBuilder): Unit = {
- val showProps = view.properties.asScala.toMap -- ViewCatalog.RESERVED_PROPERTIES.asScala
- if (showProps.nonEmpty) {
- val props = conf.redactOptions(showProps).toSeq.sortBy(_._1).map { case (key, value) =>
- s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
- }
-
- builder ++= "TBLPROPERTIES "
- builder ++= concatByMultiLines(props)
- }
- }
-
- private def concatByMultiLines(iter: Iterable[String]): String = {
- iter.mkString("(\n ", ",\n ", ")\n")
- }
-
- override def simpleString(maxFields: Int): String = {
- s"ShowCreateV2ViewExec"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewPropertiesExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewPropertiesExec.scala
deleted file mode 100644
index ace43eb6c07b..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewPropertiesExec.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.View
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.execution.LeafExecNode
-import scala.jdk.CollectionConverters._
-
-case class ShowV2ViewPropertiesExec(output: Seq[Attribute], view: View, propertyKey: Option[String])
- extends V2CommandExec
- with LeafExecNode {
-
- override protected def run(): Seq[InternalRow] = {
- propertyKey match {
- case Some(p) =>
- val propValue = properties.getOrElse(p, s"View ${view.name()} does not have property: $p")
- Seq(toCatalystRow(p, propValue))
- case None =>
- properties.map { case (k, v) =>
- toCatalystRow(k, v)
- }.toSeq
- }
- }
-
- private def properties = {
- view.properties.asScala.toMap -- ViewCatalog.RESERVED_PROPERTIES.asScala
- }
-
- override def simpleString(maxFields: Int): String = {
- s"ShowV2ViewPropertiesExec"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewsExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewsExec.scala
deleted file mode 100644
index 83a1f84aecba..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowV2ViewsExec.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.connector.catalog.ViewCatalog
-import org.apache.spark.sql.execution.LeafExecNode
-import scala.collection.mutable.ArrayBuffer
-
-case class ShowV2ViewsExec(
- output: Seq[Attribute],
- catalog: ViewCatalog,
- namespace: Seq[String],
- pattern: Option[String])
- extends V2CommandExec
- with LeafExecNode {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override protected def run(): Seq[InternalRow] = {
- val rows = new ArrayBuffer[InternalRow]()
-
- // handle GLOBAL VIEWS
- val globalTemp = session.sessionState.catalog.globalTempViewManager.database
- if (namespace.nonEmpty && globalTemp == namespace.head) {
- pattern
- .map(p => session.sessionState.catalog.globalTempViewManager.listViewNames(p))
- .getOrElse(session.sessionState.catalog.globalTempViewManager.listViewNames("*"))
- .map(name => rows += toCatalystRow(globalTemp, name, true))
- } else {
- val views = catalog.listViews(namespace: _*)
- views.map { view =>
- if (pattern.map(StringUtils.filterPattern(Seq(view.name()), _).nonEmpty).getOrElse(true)) {
- rows += toCatalystRow(view.namespace().quoted, view.name(), false)
- }
- }
- }
-
- // include TEMP VIEWS
- pattern
- .map(p => session.sessionState.catalog.listLocalTempViews(p))
- .getOrElse(session.sessionState.catalog.listLocalTempViews("*"))
- .map(v => rows += toCatalystRow(v.database.toArray.quoted, v.table, true))
-
- rows.toSeq
- }
-
- override def simpleString(maxFields: Int): String = {
- s"ShowV2ViewsExec"
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/UpdateRowsExec.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/UpdateRowsExec.scala
deleted file mode 100644
index 9fd0dcd8c067..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/UpdateRowsExec.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.UnaryExecNode
-
-case class UpdateRowsExec(
- deleteOutput: Seq[Expression],
- insertOutput: Seq[Expression],
- output: Seq[Attribute],
- child: SparkPlan)
- extends UnaryExecNode {
-
- @transient override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- override def simpleString(maxFields: Int): String = {
- s"UpdateRowsExec${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def doExecute(): RDD[InternalRow] = {
- child.execute().mapPartitions(processPartition)
- }
-
- override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = {
- copy(child = newChild)
- }
-
- private def processPartition(rowIterator: Iterator[InternalRow]): Iterator[InternalRow] = {
- val deleteProj = createProjection(deleteOutput)
- val insertProj = createProjection(insertOutput)
- new UpdateAsDeleteAndInsertRowIterator(rowIterator, deleteProj, insertProj)
- }
-
- private def createProjection(exprs: Seq[Expression]): UnsafeProjection = {
- UnsafeProjection.create(exprs, child.output)
- }
-
- class UpdateAsDeleteAndInsertRowIterator(
- private val inputRows: Iterator[InternalRow],
- private val deleteProj: UnsafeProjection,
- private val insertProj: UnsafeProjection)
- extends Iterator[InternalRow] {
-
- var cachedInsertRow: InternalRow = _
-
- override def hasNext: Boolean = cachedInsertRow != null || inputRows.hasNext
-
- override def next(): InternalRow = {
- if (cachedInsertRow != null) {
- val insertRow = cachedInsertRow
- cachedInsertRow = null
- return insertRow
- }
-
- val row = inputRows.next()
- cachedInsertRow = insertProj.apply(row)
- deleteProj.apply(row)
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala
deleted file mode 100644
index 6766ad338b9f..000000000000
--- a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.sql.execution.dynamicpruning
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeMap
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.DynamicPruningSubquery
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
-import org.apache.spark.sql.catalyst.planning.RewrittenRowLevelCommand
-import org.apache.spark.sql.catalyst.plans.LeftSemi
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.JoinHint
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceIcebergData
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.Sort
-import org.apache.spark.sql.catalyst.plans.logical.Subquery
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
-import org.apache.spark.sql.catalyst.trees.TreePattern.SORT
-import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
-import org.apache.spark.sql.execution.datasources.v2.ExtendedDataSourceV2Implicits
-import scala.collection.compat.immutable.ArraySeq
-
-/**
- * A rule that adds a runtime filter for row-level commands.
- *
- * Note that only group-based rewrite plans (i.e. ReplaceData) are taken into account.
- * Row-based rewrite plans are subject to usual runtime filtering.
- */
-case class RowLevelCommandDynamicPruning(spark: SparkSession)
- extends Rule[LogicalPlan]
- with PredicateHelper {
-
- import ExtendedDataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- // apply special dynamic filtering only for plans that don't support deltas
- case RewrittenRowLevelCommand(
- command: RowLevelCommand,
- DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _),
- rewritePlan: ReplaceIcebergData)
- if conf.dynamicPartitionPruningEnabled && isCandidate(command) =>
-
- // use reference equality to find exactly the required scan relations
- val newRewritePlan = rewritePlan transformUp {
- case r: DataSourceV2ScanRelation if r.scan eq scan =>
- // use the original table instance that was loaded for this row-level operation
- // in order to leverage a regular batch scan in the group filter query
- val originalTable = r.relation.table.asRowLevelOperationTable.table
- val relation = r.relation.copy(table = originalTable)
- val matchingRowsPlan = buildMatchingRowsPlan(relation, command)
-
- val filterAttrs = ArraySeq.unsafeWrapArray(scan.filterAttributes)
- val buildKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan)
- val pruningKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r)
- val dynamicPruningCond = buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys)
-
- Filter(dynamicPruningCond, r)
- }
-
- // always optimize dynamic filtering subqueries for row-level commands as it is important
- // to rewrite introduced predicates as joins because Spark recently stopped optimizing
- // dynamic subqueries to facilitate broadcast reuse
- command.withNewRewritePlan(optimizeSubquery(newRewritePlan))
- }
-
- private def isCandidate(command: RowLevelCommand): Boolean = command.condition match {
- case Some(cond) if cond != Literal.TrueLiteral => true
- case _ => false
- }
-
- private def buildMatchingRowsPlan(
- relation: DataSourceV2Relation,
- command: RowLevelCommand): LogicalPlan = {
-
- // construct a filtering plan with the original scan relation
- val matchingRowsPlan = command match {
- case u: UpdateIcebergTable =>
- // UPDATEs with subqueries are rewritten using a UNION with two identical scan relations
- // the analyzer clones of them and assigns fresh expr IDs so that attributes don't collide
- // this rule assigns dynamic filters to both scan relations based on the update condition
- // the condition always refers to the original expr IDs and must be transformed
- // see RewriteUpdateTable for more details
- val attrMap = buildAttrMap(u.table.output, relation.output)
- val transformedCond = u.condition.get transform {
- case attr: AttributeReference if attrMap.contains(attr) => attrMap(attr)
- }
- Filter(transformedCond, relation)
-
- case m: MergeIntoIcebergTable =>
- Join(relation, m.sourceTable, LeftSemi, Some(m.mergeCondition), JoinHint.NONE)
- }
-
- // clone the original relation in the filtering plan and assign new expr IDs to avoid conflicts
- matchingRowsPlan transformUpWithNewOutput {
- case r: DataSourceV2Relation if r eq relation =>
- val oldOutput = r.output
- val newOutput = oldOutput.map(_.newInstance())
- r.copy(output = newOutput) -> oldOutput.zip(newOutput)
- }
- }
-
- private def buildDynamicPruningCond(
- matchingRowsPlan: LogicalPlan,
- buildKeys: Seq[Attribute],
- pruningKeys: Seq[Attribute]): Expression = {
-
- val buildQuery = Project(buildKeys, matchingRowsPlan)
- val dynamicPruningSubqueries = pruningKeys.zipWithIndex.map { case (key, index) =>
- DynamicPruningSubquery(key, buildQuery, buildKeys, index, onlyInBroadcast = false)
- }
- dynamicPruningSubqueries.reduce(And)
- }
-
- private def buildAttrMap(
- tableAttrs: Seq[Attribute],
- scanAttrs: Seq[Attribute]): AttributeMap[Attribute] = {
-
- val resolver = conf.resolver
- val attrMapping = tableAttrs.flatMap { tableAttr =>
- scanAttrs
- .find(scanAttr => resolver(scanAttr.name, tableAttr.name))
- .map(scanAttr => tableAttr -> scanAttr)
- }
- AttributeMap(attrMapping)
- }
-
- // borrowed from OptimizeSubqueries in Spark
- private def optimizeSubquery(plan: LogicalPlan): LogicalPlan =
- plan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
- case s: SubqueryExpression =>
- val Subquery(newPlan, _) = spark.sessionState.optimizer.execute(Subquery.fromExpression(s))
- // At this point we have an optimized subquery plan that we are going to attach
- // to this subquery expression. Here we can safely remove any top level sort
- // in the plan as tuples produced by a subquery are un-ordered.
- s.withNewPlan(removeTopLevelSort(newPlan))
- }
-
- // borrowed from OptimizeSubqueries in Spark
- private def removeTopLevelSort(plan: LogicalPlan): LogicalPlan = {
- if (!plan.containsPattern(SORT)) {
- return plan
- }
- plan match {
- case Sort(_, _, child) => child
- case Project(fields, child) => Project(fields, removeTopLevelSort(child))
- case other => other
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/TestExtendedParser.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/TestExtendedParser.java
deleted file mode 100644
index ef4f0090292c..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/TestExtendedParser.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import java.lang.reflect.Field;
-import java.util.Collections;
-import java.util.List;
-import org.apache.iceberg.NullOrder;
-import org.apache.iceberg.SortDirection;
-import org.apache.iceberg.expressions.Term;
-import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.catalyst.parser.AbstractSqlParser;
-import org.apache.spark.sql.catalyst.parser.AstBuilder;
-import org.apache.spark.sql.catalyst.parser.ParserInterface;
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-public class TestExtendedParser {
-
- private static SparkSession spark;
- private static final String SQL_PARSER_FIELD = "sqlParser";
- private ParserInterface originalParser;
-
- @BeforeAll
- public static void before() {
- spark =
- SparkSession.builder()
- .master("local")
- .appName("TestExtendedParser")
- .config(TestBase.DISABLE_UI)
- .getOrCreate();
- }
-
- @AfterAll
- public static void after() {
- if (spark != null) {
- spark.stop();
- }
- }
-
- @BeforeEach
- public void saveOriginalParser() throws Exception {
- Class> clazz = spark.sessionState().getClass();
- Field parserField = null;
- while (clazz != null && parserField == null) {
- try {
- parserField = clazz.getDeclaredField(SQL_PARSER_FIELD);
- } catch (NoSuchFieldException e) {
- clazz = clazz.getSuperclass();
- }
- }
- parserField.setAccessible(true);
- originalParser = (ParserInterface) parserField.get(spark.sessionState());
- }
-
- @AfterEach
- public void restoreOriginalParser() throws Exception {
- setSessionStateParser(spark.sessionState(), originalParser);
- }
-
- /**
- * Tests that the Iceberg extended SQL parser can correctly parse a sort order string and return
- * the expected RawOrderField.
- *
- * @throws Exception if reflection access fails
- */
- @Test
- public void testParseSortOrderWithRealIcebergExtendedParser() throws Exception {
- ParserInterface origParser = null;
- Class> clazz = spark.sessionState().getClass();
- while (clazz != null && origParser == null) {
- try {
- Field parserField = clazz.getDeclaredField(SQL_PARSER_FIELD);
- parserField.setAccessible(true);
- origParser = (ParserInterface) parserField.get(spark.sessionState());
- } catch (NoSuchFieldException e) {
- clazz = clazz.getSuperclass();
- }
- }
- assertThat(origParser).isNotNull();
-
- IcebergSparkSqlExtensionsParser icebergParser = new IcebergSparkSqlExtensionsParser(origParser);
-
- setSessionStateParser(spark.sessionState(), icebergParser);
-
- List fields =
- ExtendedParser.parseSortOrder(spark, "id ASC NULLS FIRST");
-
- assertThat(fields).isNotEmpty();
- ExtendedParser.RawOrderField first = fields.get(0);
- assertThat(first.direction()).isEqualTo(SortDirection.ASC);
- assertThat(first.nullOrder()).isEqualTo(NullOrder.NULLS_FIRST);
- }
-
- /**
- * Tests that parseSortOrder can find and use an ExtendedParser that is wrapped inside another
- * ParserInterface implementation.
- *
- * @throws Exception if reflection access fails
- */
- @Test
- public void testParseSortOrderFindsNestedExtendedParser() throws Exception {
- ExtendedParser icebergParser = mock(ExtendedParser.class);
-
- ExtendedParser.RawOrderField field =
- new ExtendedParser.RawOrderField(
- mock(Term.class), SortDirection.ASC, NullOrder.NULLS_FIRST);
- List expected = Collections.singletonList(field);
-
- when(icebergParser.parseSortOrder("id ASC NULLS FIRST")).thenReturn(expected);
-
- ParserInterface wrapper = new WrapperParser(icebergParser);
-
- setSessionStateParser(spark.sessionState(), wrapper);
-
- List result =
- ExtendedParser.parseSortOrder(spark, "id ASC NULLS FIRST");
- assertThat(result).isSameAs(expected);
-
- verify(icebergParser).parseSortOrder("id ASC NULLS FIRST");
- }
-
- /**
- * Tests that parseSortOrder throws an exception if no ExtendedParser instance can be found in the
- * parser chain.
- *
- * @throws Exception if reflection access fails
- */
- @Test
- public void testParseSortOrderThrowsWhenNoExtendedParserFound() throws Exception {
- ParserInterface dummy = mock(ParserInterface.class);
- setSessionStateParser(spark.sessionState(), dummy);
-
- assertThatThrownBy(() -> ExtendedParser.parseSortOrder(spark, "id ASC"))
- .isInstanceOf(IllegalStateException.class)
- .hasMessageContaining("Iceberg ExtendedParser");
- }
-
- /**
- * Tests that parseSortOrder can find an ExtendedParser in a parent class field of the parser.
- *
- * @throws Exception if reflection access fails
- */
- @Test
- public void testParseSortOrderFindsExtendedParserInParentClassField() throws Exception {
- ExtendedParser icebergParser = mock(ExtendedParser.class);
- ExtendedParser.RawOrderField field =
- new ExtendedParser.RawOrderField(
- mock(Term.class), SortDirection.ASC, NullOrder.NULLS_FIRST);
- List expected = Collections.singletonList(field);
- when(icebergParser.parseSortOrder("id ASC NULLS FIRST")).thenReturn(expected);
- ParserInterface parser = new GrandChildParser(icebergParser);
- setSessionStateParser(spark.sessionState(), parser);
-
- List result =
- ExtendedParser.parseSortOrder(spark, "id ASC NULLS FIRST");
- assertThat(result).isSameAs(expected);
- verify(icebergParser).parseSortOrder("id ASC NULLS FIRST");
- }
-
- private static void setSessionStateParser(Object sessionState, ParserInterface parser)
- throws Exception {
- Class> clazz = sessionState.getClass();
- Field targetField = null;
- while (clazz != null && targetField == null) {
- try {
- targetField = clazz.getDeclaredField(SQL_PARSER_FIELD);
- } catch (NoSuchFieldException e) {
- clazz = clazz.getSuperclass();
- }
- }
- if (targetField == null) {
- throw new IllegalStateException(
- "No suitable sqlParser field found in sessionState class hierarchy!");
- }
- targetField.setAccessible(true);
- targetField.set(sessionState, parser);
- }
-
- private static class WrapperParser extends AbstractSqlParser {
- private final ParserInterface delegate;
- private String name;
-
- WrapperParser(ParserInterface delegate) {
- this.delegate = delegate;
- this.name = "delegate";
- }
-
- public ParserInterface getDelegate() {
- return delegate;
- }
-
- @Override
- public AstBuilder astBuilder() {
- return null;
- }
- }
-
- private static class ChildParser extends WrapperParser {
- ChildParser(ParserInterface parent) {
- super(parent);
- }
- }
-
- private static class GrandChildParser extends ChildParser {
- GrandChildParser(ParserInterface parent) {
- super(parent);
- }
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java
deleted file mode 100644
index 8918dfec6584..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import java.util.Objects;
-
-public class Employee {
- private Integer id;
- private String dep;
-
- public Employee() {}
-
- public Employee(Integer id, String dep) {
- this.id = id;
- this.dep = dep;
- }
-
- public Integer getId() {
- return id;
- }
-
- public void setId(Integer id) {
- this.id = id;
- }
-
- public String getDep() {
- return dep;
- }
-
- public void setDep(String dep) {
- this.dep = dep;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- } else if (other == null || getClass() != other.getClass()) {
- return false;
- }
-
- Employee employee = (Employee) other;
- return Objects.equals(id, employee.id) && Objects.equals(dep, employee.dep);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(id, dep);
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ExtensionsTestBase.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ExtensionsTestBase.java
deleted file mode 100644
index 834640e24328..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ExtensionsTestBase.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTOREURIS;
-
-import java.net.InetAddress;
-import java.util.Random;
-import java.util.concurrent.ThreadLocalRandom;
-import org.apache.iceberg.CatalogUtil;
-import org.apache.iceberg.hive.HiveCatalog;
-import org.apache.iceberg.hive.TestHiveMetastore;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.spark.CatalogTestBase;
-import org.apache.iceberg.spark.TestBase;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.internal.SQLConf;
-import org.junit.jupiter.api.BeforeAll;
-
-public abstract class ExtensionsTestBase extends CatalogTestBase {
-
- private static final Random RANDOM = ThreadLocalRandom.current();
-
- @BeforeAll
- public static void startMetastoreAndSpark() {
- TestBase.metastore = new TestHiveMetastore();
- metastore.start();
- TestBase.hiveConf = metastore.hiveConf();
-
- TestBase.spark.close();
-
- TestBase.spark =
- SparkSession.builder()
- .master("local[2]")
- .config("spark.driver.host", InetAddress.getLoopbackAddress().getHostAddress())
- .config("spark.testing", "true")
- .config(SQLConf.PARTITION_OVERWRITE_MODE().key(), "dynamic")
- .config("spark.sql.extensions", IcebergSparkSessionExtensions.class.getName())
- .config("spark.hadoop." + METASTOREURIS.varname, hiveConf.get(METASTOREURIS.varname))
- .config("spark.sql.shuffle.partitions", "4")
- .config("spark.sql.hive.metastorePartitionPruningFallbackOnException", "true")
- .config("spark.sql.legacy.respectNullabilityInTextDatasetConversion", "true")
- .config(
- SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), String.valueOf(RANDOM.nextBoolean()))
- .config(TestBase.DISABLE_UI)
- .enableHiveSupport()
- .getOrCreate();
-
- TestBase.sparkContext = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
- TestBase.catalog =
- (HiveCatalog)
- CatalogUtil.loadCatalog(
- HiveCatalog.class.getName(), "hive", ImmutableMap.of(), hiveConf);
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ProcedureUtil.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ProcedureUtil.java
deleted file mode 100644
index de4acd74a7ed..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/ProcedureUtil.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.UUID;
-import org.apache.iceberg.ImmutableGenericPartitionStatisticsFile;
-import org.apache.iceberg.PartitionStatisticsFile;
-import org.apache.iceberg.io.FileIO;
-import org.apache.iceberg.io.PositionOutputStream;
-
-public class ProcedureUtil {
-
- private ProcedureUtil() {}
-
- static PartitionStatisticsFile writePartitionStatsFile(
- long snapshotId, String statsLocation, FileIO fileIO) {
- PositionOutputStream positionOutputStream;
- try {
- positionOutputStream = fileIO.newOutputFile(statsLocation).create();
- positionOutputStream.close();
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
-
- return ImmutableGenericPartitionStatisticsFile.builder()
- .snapshotId(snapshotId)
- .fileSizeInBytes(42L)
- .path(statsLocation)
- .build();
- }
-
- static String statsFileLocation(String tableLocation) {
- String statsFileName = "stats-file-" + UUID.randomUUID();
- return tableLocation.replaceFirst("file:", "") + "/metadata/" + statsFileName;
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkPlanUtil.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkPlanUtil.java
deleted file mode 100644
index 830d07d86eab..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkPlanUtil.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static scala.collection.JavaConverters.seqAsJavaListConverter;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.function.Predicate;
-import java.util.stream.Collectors;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.spark.sql.catalyst.expressions.Expression;
-import org.apache.spark.sql.execution.CommandResultExec;
-import org.apache.spark.sql.execution.SparkPlan;
-import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper;
-import org.apache.spark.sql.execution.datasources.v2.BatchScanExec;
-import scala.PartialFunction;
-import scala.collection.Seq;
-
-public class SparkPlanUtil {
-
- private static final AdaptiveSparkPlanHelper SPARK_HELPER = new AdaptiveSparkPlanHelper() {};
-
- private SparkPlanUtil() {}
-
- public static List collectLeaves(SparkPlan plan) {
- return toJavaList(SPARK_HELPER.collectLeaves(actualPlan(plan)));
- }
-
- public static List collectBatchScans(SparkPlan plan) {
- List leaves = collectLeaves(plan);
- return leaves.stream()
- .filter(scan -> scan instanceof BatchScanExec)
- .collect(Collectors.toList());
- }
-
- private static SparkPlan actualPlan(SparkPlan plan) {
- if (plan instanceof CommandResultExec) {
- return ((CommandResultExec) plan).commandPhysicalPlan();
- } else {
- return plan;
- }
- }
-
- public static List collectExprs(
- SparkPlan sparkPlan, Predicate predicate) {
- Seq> seq =
- SPARK_HELPER.collect(
- sparkPlan,
- new PartialFunction>() {
- @Override
- public List apply(SparkPlan plan) {
- List exprs = Lists.newArrayList();
-
- for (Expression expr : toJavaList(plan.expressions())) {
- exprs.addAll(collectExprs(expr, predicate));
- }
-
- return exprs;
- }
-
- @Override
- public boolean isDefinedAt(SparkPlan plan) {
- return true;
- }
- });
- return toJavaList(seq).stream().flatMap(Collection::stream).collect(Collectors.toList());
- }
-
- private static List collectExprs(
- Expression expression, Predicate predicate) {
- Seq seq =
- expression.collect(
- new PartialFunction() {
- @Override
- public Expression apply(Expression expr) {
- return expr;
- }
-
- @Override
- public boolean isDefinedAt(Expression expr) {
- return predicate.test(expr);
- }
- });
- return toJavaList(seq);
- }
-
- private static List toJavaList(Seq seq) {
- return seqAsJavaListConverter(seq).asJava();
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java
deleted file mode 100644
index 893f9931cfa2..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static org.apache.iceberg.DataOperations.DELETE;
-import static org.apache.iceberg.DataOperations.OVERWRITE;
-import static org.apache.iceberg.PlanningMode.DISTRIBUTED;
-import static org.apache.iceberg.PlanningMode.LOCAL;
-import static org.apache.iceberg.SnapshotSummary.ADDED_DELETE_FILES_PROP;
-import static org.apache.iceberg.SnapshotSummary.ADDED_DVS_PROP;
-import static org.apache.iceberg.SnapshotSummary.ADDED_FILES_PROP;
-import static org.apache.iceberg.SnapshotSummary.ADD_POS_DELETE_FILES_PROP;
-import static org.apache.iceberg.SnapshotSummary.CHANGED_PARTITION_COUNT_PROP;
-import static org.apache.iceberg.SnapshotSummary.DELETED_FILES_PROP;
-import static org.apache.iceberg.TableProperties.DATA_PLANNING_MODE;
-import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT;
-import static org.apache.iceberg.TableProperties.DELETE_PLANNING_MODE;
-import static org.apache.iceberg.TableProperties.FORMAT_VERSION;
-import static org.apache.iceberg.TableProperties.ORC_VECTORIZATION_ENABLED;
-import static org.apache.iceberg.TableProperties.PARQUET_VECTORIZATION_ENABLED;
-import static org.apache.iceberg.TableProperties.SPARK_WRITE_PARTITIONED_FANOUT_ENABLED;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_HASH;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_NONE;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_RANGE;
-import static org.assertj.core.api.Assertions.assertThat;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.stream.Collectors;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.Files;
-import org.apache.iceberg.Parameter;
-import org.apache.iceberg.ParameterizedTestExtension;
-import org.apache.iceberg.Parameters;
-import org.apache.iceberg.PlanningMode;
-import org.apache.iceberg.RowLevelOperationMode;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.SnapshotRef;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.data.parquet.GenericParquetWriter;
-import org.apache.iceberg.deletes.DeleteGranularity;
-import org.apache.iceberg.io.DataWriter;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.parquet.Parquet;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.spark.SparkCatalog;
-import org.apache.iceberg.spark.SparkSessionCatalog;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
-import org.apache.spark.sql.execution.SparkPlan;
-import org.junit.jupiter.api.extension.ExtendWith;
-
-@ExtendWith(ParameterizedTestExtension.class)
-public abstract class SparkRowLevelOperationsTestBase extends ExtensionsTestBase {
-
- private static final Random RANDOM = ThreadLocalRandom.current();
-
- @Parameter(index = 3)
- protected FileFormat fileFormat;
-
- @Parameter(index = 4)
- protected boolean vectorized;
-
- @Parameter(index = 5)
- protected String distributionMode;
-
- @Parameter(index = 6)
- protected boolean fanoutEnabled;
-
- @Parameter(index = 7)
- protected String branch;
-
- @Parameter(index = 8)
- protected PlanningMode planningMode;
-
- @Parameter(index = 9)
- protected int formatVersion;
-
- @Parameters(
- name =
- "catalogName = {0}, implementation = {1}, config = {2},"
- + " format = {3}, vectorized = {4}, distributionMode = {5},"
- + " fanout = {6}, branch = {7}, planningMode = {8}, formatVersion = {9}")
- public static Object[][] parameters() {
- return new Object[][] {
- {
- "testhive",
- SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"),
- FileFormat.ORC,
- true,
- WRITE_DISTRIBUTION_MODE_NONE,
- true,
- SnapshotRef.MAIN_BRANCH,
- LOCAL,
- 2
- },
- {
- "testhive",
- SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"),
- FileFormat.PARQUET,
- true,
- WRITE_DISTRIBUTION_MODE_NONE,
- false,
- "test",
- DISTRIBUTED,
- 2
- },
- {
- "testhadoop",
- SparkCatalog.class.getName(),
- ImmutableMap.of("type", "hadoop"),
- FileFormat.PARQUET,
- RANDOM.nextBoolean(),
- WRITE_DISTRIBUTION_MODE_HASH,
- true,
- null,
- LOCAL,
- 2
- },
- {
- "spark_catalog",
- SparkSessionCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default",
- "clients", "1",
- "parquet-enabled", "false",
- "cache-enabled",
- "false" // Spark will delete tables using v1, leaving the cache out of sync
- ),
- FileFormat.AVRO,
- false,
- WRITE_DISTRIBUTION_MODE_RANGE,
- false,
- "test",
- DISTRIBUTED,
- 2
- },
- {
- "testhadoop",
- SparkCatalog.class.getName(),
- ImmutableMap.of("type", "hadoop"),
- FileFormat.PARQUET,
- true,
- WRITE_DISTRIBUTION_MODE_HASH,
- true,
- null,
- LOCAL,
- 3
- },
- {
- "testhadoop",
- SparkCatalog.class.getName(),
- ImmutableMap.of("type", "hadoop"),
- FileFormat.PARQUET,
- false,
- WRITE_DISTRIBUTION_MODE_HASH,
- true,
- null,
- LOCAL,
- 3
- },
- {
- "spark_catalog",
- SparkSessionCatalog.class.getName(),
- ImmutableMap.of(
- "type",
- "hive",
- "default-namespace",
- "default",
- "clients",
- "1",
- "parquet-enabled",
- "false",
- "cache-enabled",
- "false" // Spark will delete tables using v1, leaving the cache out of sync
- ),
- FileFormat.AVRO,
- false,
- WRITE_DISTRIBUTION_MODE_RANGE,
- false,
- "test",
- DISTRIBUTED,
- 3
- },
- };
- }
-
- protected abstract Map extraTableProperties();
-
- protected void initTable() {
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s')",
- tableName,
- DEFAULT_FILE_FORMAT,
- fileFormat,
- WRITE_DISTRIBUTION_MODE,
- distributionMode,
- SPARK_WRITE_PARTITIONED_FANOUT_ENABLED,
- String.valueOf(fanoutEnabled),
- DATA_PLANNING_MODE,
- planningMode.modeName(),
- DELETE_PLANNING_MODE,
- planningMode.modeName(),
- FORMAT_VERSION,
- formatVersion);
-
- switch (fileFormat) {
- case PARQUET:
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%b')",
- tableName, PARQUET_VECTORIZATION_ENABLED, vectorized);
- break;
- case ORC:
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%b')",
- tableName, ORC_VECTORIZATION_ENABLED, vectorized);
- break;
- case AVRO:
- assertThat(vectorized).isFalse();
- break;
- }
-
- Map props = extraTableProperties();
- props.forEach(
- (prop, value) -> {
- sql("ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')", tableName, prop, value);
- });
- }
-
- protected void createAndInitTable(String schema) {
- createAndInitTable(schema, null);
- }
-
- protected void createAndInitTable(String schema, String jsonData) {
- createAndInitTable(schema, "", jsonData);
- }
-
- protected void createAndInitTable(String schema, String partitioning, String jsonData) {
- sql("CREATE TABLE %s (%s) USING iceberg %s", tableName, schema, partitioning);
- initTable();
-
- if (jsonData != null) {
- try {
- Dataset ds = toDS(schema, jsonData);
- ds.coalesce(1).writeTo(tableName).append();
- createBranchIfNeeded();
- } catch (NoSuchTableException e) {
- throw new RuntimeException("Failed to write data", e);
- }
- }
- }
-
- protected void append(String table, String jsonData) {
- append(table, null, jsonData);
- }
-
- protected void append(String table, String schema, String jsonData) {
- try {
- Dataset ds = toDS(schema, jsonData);
- ds.coalesce(1).writeTo(table).append();
- } catch (NoSuchTableException e) {
- throw new RuntimeException("Failed to write data", e);
- }
- }
-
- protected void createOrReplaceView(String name, String jsonData) {
- createOrReplaceView(name, null, jsonData);
- }
-
- protected void createOrReplaceView(String name, String schema, String jsonData) {
- Dataset ds = toDS(schema, jsonData);
- ds.createOrReplaceTempView(name);
- }
-
- protected void createOrReplaceView(String name, List data, Encoder encoder) {
- spark.createDataset(data, encoder).createOrReplaceTempView(name);
- }
-
- private Dataset toDS(String schema, String jsonData) {
- List jsonRows =
- Arrays.stream(jsonData.split("\n"))
- .filter(str -> !str.trim().isEmpty())
- .collect(Collectors.toList());
- Dataset jsonDS = spark.createDataset(jsonRows, Encoders.STRING());
-
- if (schema != null) {
- return spark.read().schema(schema).json(jsonDS);
- } else {
- return spark.read().json(jsonDS);
- }
- }
-
- protected void validateDelete(
- Snapshot snapshot, String changedPartitionCount, String deletedDataFiles) {
- validateSnapshot(snapshot, DELETE, changedPartitionCount, deletedDataFiles, null, null);
- }
-
- protected void validateCopyOnWrite(
- Snapshot snapshot,
- String changedPartitionCount,
- String deletedDataFiles,
- String addedDataFiles) {
- String operation = null == addedDataFiles && null != deletedDataFiles ? DELETE : OVERWRITE;
- validateSnapshot(
- snapshot, operation, changedPartitionCount, deletedDataFiles, null, addedDataFiles);
- }
-
- protected void validateMergeOnRead(
- Snapshot snapshot,
- String changedPartitionCount,
- String addedDeleteFiles,
- String addedDataFiles) {
- String operation = null == addedDataFiles && null != addedDeleteFiles ? DELETE : OVERWRITE;
- validateSnapshot(
- snapshot, operation, changedPartitionCount, null, addedDeleteFiles, addedDataFiles);
- }
-
- protected void validateSnapshot(
- Snapshot snapshot,
- String operation,
- String changedPartitionCount,
- String deletedDataFiles,
- String addedDeleteFiles,
- String addedDataFiles) {
- assertThat(snapshot.operation()).as("Operation must match").isEqualTo(operation);
- validateProperty(snapshot, CHANGED_PARTITION_COUNT_PROP, changedPartitionCount);
- validateProperty(snapshot, DELETED_FILES_PROP, deletedDataFiles);
- validateProperty(snapshot, ADDED_DELETE_FILES_PROP, addedDeleteFiles);
- validateProperty(snapshot, ADDED_FILES_PROP, addedDataFiles);
- if (formatVersion >= 3) {
- validateProperty(snapshot, ADDED_DVS_PROP, addedDeleteFiles);
- assertThat(snapshot.summary()).doesNotContainKey(ADD_POS_DELETE_FILES_PROP);
- }
- }
-
- protected void validateProperty(Snapshot snapshot, String property, Set expectedValues) {
- String actual = snapshot.summary().get(property);
- assertThat(actual)
- .as(
- "Snapshot property "
- + property
- + " has unexpected value, actual = "
- + actual
- + ", expected one of : "
- + String.join(",", expectedValues))
- .isIn(expectedValues);
- }
-
- protected void validateProperty(Snapshot snapshot, String property, String expectedValue) {
- if (null == expectedValue) {
- assertThat(snapshot.summary()).doesNotContainKey(property);
- } else {
- assertThat(snapshot.summary())
- .as("Snapshot property " + property + " has unexpected value.")
- .containsEntry(property, expectedValue);
- }
- }
-
- protected void sleep(long millis) {
- try {
- Thread.sleep(millis);
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- }
-
- protected DataFile writeDataFile(Table table, List records) {
- try {
- OutputFile file =
- Files.localOutput(
- temp.resolve(fileFormat.addExtension(UUID.randomUUID().toString())).toFile());
-
- DataWriter dataWriter =
- Parquet.writeData(file)
- .forTable(table)
- .createWriterFunc(GenericParquetWriter::create)
- .overwrite()
- .build();
-
- try {
- for (GenericRecord record : records) {
- dataWriter.write(record);
- }
- } finally {
- dataWriter.close();
- }
-
- return dataWriter.toDataFile();
-
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-
- @Override
- protected String commitTarget() {
- return branch == null ? tableName : String.format("%s.branch_%s", tableName, branch);
- }
-
- @Override
- protected String selectTarget() {
- return branch == null ? tableName : String.format("%s VERSION AS OF '%s'", tableName, branch);
- }
-
- protected void createBranchIfNeeded() {
- if (branch != null && !branch.equals(SnapshotRef.MAIN_BRANCH)) {
- sql("ALTER TABLE %s CREATE BRANCH %s", tableName, branch);
- }
- }
-
- // ORC currently does not support vectorized reads with deletes
- protected boolean supportsVectorization() {
- return vectorized && (isParquet() || isCopyOnWrite());
- }
-
- private boolean isParquet() {
- return fileFormat.equals(FileFormat.PARQUET);
- }
-
- private boolean isCopyOnWrite() {
- return extraTableProperties().containsValue(RowLevelOperationMode.COPY_ON_WRITE.modeName());
- }
-
- protected void assertAllBatchScansVectorized(SparkPlan plan) {
- List batchScans = SparkPlanUtil.collectBatchScans(plan);
- assertThat(batchScans).hasSizeGreaterThan(0).allMatch(SparkPlan::supportsColumnar);
- }
-
- protected void createTableWithDeleteGranularity(
- String schema, String partitionedBy, DeleteGranularity deleteGranularity) {
- createAndInitTable(schema, partitionedBy, null /* empty */);
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s')",
- tableName, TableProperties.DELETE_GRANULARITY, deleteGranularity);
- }
-}
diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java
deleted file mode 100644
index eb9bd976b81e..000000000000
--- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java
+++ /dev/null
@@ -1,1481 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.assertj.core.api.Assumptions.assumeThat;
-
-import java.io.File;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumWriter;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.HasTableOperations;
-import org.apache.iceberg.ManifestFiles;
-import org.apache.iceberg.ManifestReader;
-import org.apache.iceberg.Parameter;
-import org.apache.iceberg.ParameterizedTestExtension;
-import org.apache.iceberg.Parameters;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.io.FileIO;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.spark.Spark3Util;
-import org.apache.iceberg.spark.SparkCatalogConfig;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
-import org.apache.spark.sql.catalyst.parser.ParseException;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-import org.joda.time.DateTime;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.TestTemplate;
-import org.junit.jupiter.api.extension.ExtendWith;
-
-@ExtendWith(ParameterizedTestExtension.class)
-public class TestAddFilesProcedure extends ExtensionsTestBase {
-
- @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, formatVersion = {3}")
- public static Object[][] parameters() {
- return new Object[][] {
- {
- SparkCatalogConfig.HIVE.catalogName(),
- SparkCatalogConfig.HIVE.implementation(),
- SparkCatalogConfig.HIVE.properties(),
- 1
- },
- {
- SparkCatalogConfig.HADOOP.catalogName(),
- SparkCatalogConfig.HADOOP.implementation(),
- SparkCatalogConfig.HADOOP.properties(),
- 2
- },
- {
- SparkCatalogConfig.SPARK.catalogName(),
- SparkCatalogConfig.SPARK.implementation(),
- SparkCatalogConfig.SPARK.properties(),
- 2
- }
- };
- }
-
- @Parameter(index = 3)
- private int formatVersion;
-
- private final String sourceTableName = "source_table";
- private File fileTableDir;
-
- @BeforeEach
- public void setupTempDirs() {
- fileTableDir = temp.toFile();
- }
-
- @AfterEach
- public void dropTables() {
- sql("DROP TABLE IF EXISTS %s PURGE", sourceTableName);
- sql("DROP TABLE IF EXISTS %s", tableName);
- }
-
- @TestTemplate
- public void addDataUnpartitioned() {
- createUnpartitionedFileTable("parquet");
-
- createIcebergTable("id Integer, name String, dept String, subdept String");
-
- List