Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;

/** A base writer factory to be extended by query engine integrations. */
/**
* A base writer factory to be extended by query engine integrations.
*
* @deprecated deprecated as of version 1.11.0 and will be removed in 1.12.0. Use {@link
Comment thread
pvary marked this conversation as resolved.
Outdated
* RegistryBasedFileWriterFactory}
*/
@Deprecated
public abstract class BaseFileWriterFactory<T> implements FileWriterFactory<T>, Serializable {
private final Table table;
private final FileFormat dataFileFormat;
Expand Down Expand Up @@ -75,13 +81,6 @@ protected BaseFileWriterFactory(
this.positionDeleteRowSchema = null;
}

/**
* @deprecated This constructor is deprecated as of version 1.11.0 and will be removed in 1.12.0.
* Position deletes that include row data are no longer supported. Use {@link
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Position deletes that include row data are no longer supported

is this still true? i noticed its missing in the new deprecation message

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the deprecation comment here because this method was introduced in the current release cycle, before we decided to deprecate the entire class. Now that the class itself is deprecated, and the replacement class doesn’t provide a way to set the position‑delete row‑data schema, the constuctor is effectively deprecated as well. However, since there’s no new equivalent method, we don’t really have a suitable place to add a dedicated deprecation message.

* #BaseFileWriterFactory(Table, FileFormat, Schema, SortOrder, FileFormat, int[], Schema,
* SortOrder, Map)} instead.
*/
@Deprecated
protected BaseFileWriterFactory(
Table table,
FileFormat dataFileFormat,
Expand Down
170 changes: 147 additions & 23 deletions data/src/main/java/org/apache/iceberg/data/GenericFileWriterFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,36 @@
import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT;
import static org.apache.iceberg.TableProperties.DELETE_DEFAULT_FILE_FORMAT;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Map;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.data.avro.DataWriter;
import org.apache.iceberg.data.orc.GenericOrcWriter;
import org.apache.iceberg.data.parquet.GenericParquetWriter;
import org.apache.iceberg.deletes.PositionDeleteWriter;
import org.apache.iceberg.encryption.EncryptedOutputFile;
import org.apache.iceberg.formats.FormatModelRegistry;
import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GenericFileWriterFactory extends BaseFileWriterFactory<Record> {
public class GenericFileWriterFactory extends RegistryBasedFileWriterFactory<Record, Schema> {
private static final Logger LOG = LoggerFactory.getLogger(GenericFileWriterFactory.class);

private Table table;
private FileFormat format;
private Schema positionDeleteRowSchema;

GenericFileWriterFactory(
Table table,
Expand All @@ -50,13 +65,16 @@ public class GenericFileWriterFactory extends BaseFileWriterFactory<Record> {
super(
table,
dataFileFormat,
Record.class,
dataSchema,
dataSortOrder,
deleteFileFormat,
equalityFieldIds,
equalityDeleteRowSchema,
equalityDeleteSortOrder,
ImmutableMap.of());
ImmutableMap.of(),
null,
null);
}

/**
Expand All @@ -80,14 +98,19 @@ public class GenericFileWriterFactory extends BaseFileWriterFactory<Record> {
super(
table,
dataFileFormat,
Record.class,
dataSchema,
dataSortOrder,
deleteFileFormat,
equalityFieldIds,
equalityDeleteRowSchema,
equalityDeleteSortOrder,
positionDeleteRowSchema,
writerProperties);
writerProperties,
null,
null);
this.table = table;
this.format = dataFileFormat;
this.positionDeleteRowSchema = positionDeleteRowSchema;
}

/**
Expand All @@ -107,62 +130,163 @@ public class GenericFileWriterFactory extends BaseFileWriterFactory<Record> {
super(
table,
dataFileFormat,
Record.class,
dataSchema,
dataSortOrder,
deleteFileFormat,
equalityFieldIds,
equalityDeleteRowSchema,
equalityDeleteSortOrder,
positionDeleteRowSchema);
ImmutableMap.of(),
dataSchema,
equalityDeleteRowSchema);
this.table = table;
this.format = dataFileFormat;
this.positionDeleteRowSchema = positionDeleteRowSchema;
}

static Builder builderFor(Table table) {
return new Builder(table);
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
Comment thread
pvary marked this conversation as resolved.
Outdated
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureDataWrite(Avro.DataWriteBuilder builder) {
builder.createWriterFunc(DataWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureEqualityDelete(Avro.DeleteWriteBuilder builder) {
builder.createWriterFunc(DataWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configurePositionDelete(Avro.DeleteWriteBuilder builder) {
builder.createWriterFunc(DataWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureDataWrite(Parquet.DataWriteBuilder builder) {
builder.createWriterFunc(GenericParquetWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureEqualityDelete(Parquet.DeleteWriteBuilder builder) {
builder.createWriterFunc(GenericParquetWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configurePositionDelete(Parquet.DeleteWriteBuilder builder) {
builder.createWriterFunc(GenericParquetWriter::create);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureDataWrite(ORC.DataWriteBuilder builder) {
builder.createWriterFunc(GenericOrcWriter::buildWriter);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configureEqualityDelete(ORC.DeleteWriteBuilder builder) {
builder.createWriterFunc(GenericOrcWriter::buildWriter);
throwUnsupportedOperationException();
}

@Override
/**
* @deprecated Since 1.10.0, will be removed in 1.11.0. It won't be called starting 1.10.0 as the
* configuration is done by the {@link FormatModelRegistry}.
*/
@Deprecated
protected void configurePositionDelete(ORC.DeleteWriteBuilder builder) {
builder.createWriterFunc(GenericOrcWriter::buildWriter);
throwUnsupportedOperationException();
}

private void throwUnsupportedOperationException() {
throw new UnsupportedOperationException(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should those methods be no-ops instead? Otherwise users will hit this exception when calling those methods. Typically when we deprecate something we keep/maintain the functionality without failing immediately

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These methods were originally defined in the abstract BaseFileWriterFactory so that subclasses, such as GenericFileWriterFactory could implement them. BaseFileWriterFactory also relied on them to configure writers. The new RegistryBasedFileWriterFactory neither needs nor uses these methods, so any external calls to them would now represent a potential source of errors.

I considered three options:

  1. Remove the methods – This causes compile‑time failures when subclasses use @Override.
  2. Keep the methods but throw an exception – This fails fast but breaks callers that still rely on the old configuration path.
  3. Keep the methods as no‑ops – This avoids failures but silently ignores calls, which can mask bugs.

Both options (2) and (3) behave unpredictably if someone overrides newDataWriter, newEqualityDeleteWriter, or newPositionDeleteWriter and continues to rely on the old configure... methods: option (2) throws, option (3) silently discards the call.
After thinking through the trade‑offs, the best approach seems to be keeping the methods as-is and marking them as deprecated, leaving their behavior unchanged. This provides a clear migration signal without introducing new failure modes.

"Method is deprecated and should not be called. "
+ "Configuration is already done by the registry.");
}

@Override
public PositionDeleteWriter<Record> newPositionDeleteWriter(
EncryptedOutputFile file, PartitionSpec spec, StructLike partition) {
if (positionDeleteRowSchema == null) {
return super.newPositionDeleteWriter(file, spec, partition);
} else {
LOG.info(
Comment thread
pvary marked this conversation as resolved.
Outdated
"Deprecated feature used. Position delete row schema is used to create the position delete writer.");
MetricsConfig metricsConfig =
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this part of the code actually mimic

public PositionDeleteWriter<T> newPositionDeleteWriter(
to maintain existing behavior? Because right now I think it's a copy of
public PositionDeleteWriter<Record> newPosDeleteWriter(

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally misunderstood this comment.
Fixed now. Please check

table != null
? MetricsConfig.forPositionDelete(table)
: MetricsConfig.fromProperties(ImmutableMap.of());

try {
return switch (format) {
case AVRO ->
Avro.writeDeletes(file)
.createWriterFunc(DataWriter::create)
.withPartition(partition)
.overwrite()
.rowSchema(positionDeleteRowSchema)
.withSpec(spec)
.withKeyMetadata(file.keyMetadata())
.buildPositionWriter();
case ORC ->
ORC.writeDeletes(file)
.createWriterFunc(GenericOrcWriter::buildWriter)
.withPartition(partition)
.overwrite()
.rowSchema(positionDeleteRowSchema)
.withSpec(spec)
.withKeyMetadata(file.keyMetadata())
.buildPositionWriter();
case PARQUET ->
Parquet.writeDeletes(file)
.createWriterFunc(GenericParquetWriter::create)
.withPartition(partition)
.overwrite()
.metricsConfig(metricsConfig)
Comment thread
pvary marked this conversation as resolved.
Outdated
.rowSchema(positionDeleteRowSchema)
.withSpec(spec)
.withKeyMetadata(file.keyMetadata())
.buildPositionWriter();
default ->
throw new UnsupportedOperationException(
"Cannot write pos-deletes for unsupported file format: " + format);
};
} catch (IOException e) {
throw new UncheckedIOException("Failed to create new position delete writer", e);
}
}
}

public static class Builder {
Expand Down
Loading
Loading