apache · yadavay-amzn · May 16, 2026 · laskoviymishka · May 16, 2026 · laskoviymishka
diff --git a/...a-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/channel/Coordinator.java b/...a-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/channel/Coordinator.java
@@ -53,6 +53,9 @@
 import org.apache.iceberg.connect.events.Event;
 import org.apache.iceberg.connect.events.StartCommit;
 import org.apache.iceberg.connect.events.TableReference;
+import org.apache.iceberg.exceptions.CleanableFailure;
+import org.apache.iceberg.exceptions.CommitFailedException;
+import org.apache.iceberg.exceptions.CommitStateUnknownException;
 import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.relocated.com.google.common.collect.Streams;
@@ -150,12 +153,28 @@ protected boolean receive(Envelope envelope) {
   private void commit(boolean partialCommit) {
     try {
       doCommit(partialCommit);
-    } catch (Exception e) {
+    } catch (CommitFailedException | CommitStateUnknownException e) {
       LOG.warn(
-          "Coordinator {} failed to commit for commit {}, will try again next cycle",
-          taskId,
+          "Commit {} failed, will retry on next cycle: {}",
           commitState.currentCommitId(),
+          e.getMessage(),
           e);
+    } catch (RuntimeException e) {
+      if (e instanceof CleanableFailure) {
+        LOG.warn(
+            "Commit {} failed, will retry on next cycle: {}",
+            commitState.currentCommitId(),
+            e.getMessage(),
+            e);
+      } else {
+        LOG.error(
+            "Commit {} failed fatally for task {}: {}",
+            commitState.currentCommitId(),
+            taskId,
+            e.getMessage(),
+            e);
+        throw e;
+      }
     } finally {
       commitState.endCurrentCommit();
     }

diff --git a/...nnect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestCoordinator.java b/...nnect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestCoordinator.java
@@ -19,12 +19,16 @@
 package org.apache.iceberg.connect.channel;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
 import java.time.OffsetDateTime;
 import java.util.List;
 import java.util.UUID;
+import org.apache.iceberg.AppendFiles;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DataFiles;
 import org.apache.iceberg.DataOperations;
@@ -45,6 +49,8 @@
 import org.apache.iceberg.connect.events.StartCommit;
 import org.apache.iceberg.connect.events.TableReference;
 import org.apache.iceberg.connect.events.TopicPartitionOffset;
+import org.apache.iceberg.exceptions.CommitFailedException;
+import org.apache.iceberg.exceptions.CommitStateUnknownException;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Types.StructType;
@@ -135,14 +141,50 @@ public void testCommitError() {
             .withRecordCount(5)
             .build();
 
-    coordinatorTest(ImmutableList.of(badDataFile), ImmutableList.of(), null);
+    assertThatThrownBy(
+            () -> coordinatorTest(ImmutableList.of(badDataFile), ImmutableList.of(), null))
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessageContaining("Cannot find partition spec");
 
     // no commit messages sent
     assertThat(producer.history()).hasSize(1);
 
     assertThat(table.snapshots()).isEmpty();
   }
 
+  @Test
+  public void testCommitFailedExceptionSwallowed() {
+    // Verify issue #15878: a CommitFailedException from the catalog (e.g., Glue concurrent
+    // update) is logged and swallowed so the coordinator retries on the next cycle, rather
+    // than killing the task permanently.
+    Table spiedTable = spy(table);
+    AppendFiles spiedAppend = spy(table.newAppend());
+    doThrow(new CommitFailedException("Glue detected concurrent update"))
+        .when(spiedAppend)
+        .commit();
+    when(spiedTable.newAppend()).thenReturn(spiedAppend);
+    when(catalog.loadTable(TABLE_IDENTIFIER)).thenReturn(spiedTable);
+
+    // Should not throw -- CommitFailedException is retryable
+    coordinatorTest(
+        ImmutableList.of(EventTestUtil.createDataFile()), ImmutableList.of(), EventTestUtil.now());
+  }
+
+  @Test
+  public void testCommitStateUnknownExceptionSwallowed() {
+    Table spiedTable = spy(table);
+    AppendFiles spiedAppend = spy(table.newAppend());
+    doThrow(new CommitStateUnknownException(new RuntimeException("connection reset")))
+        .when(spiedAppend)
+        .commit();
+    when(spiedTable.newAppend()).thenReturn(spiedAppend);
+    when(catalog.loadTable(TABLE_IDENTIFIER)).thenReturn(spiedTable);
+
+    // Should not throw -- CommitStateUnknownException is retryable
+    coordinatorTest(
+        ImmutableList.of(EventTestUtil.createDataFile()), ImmutableList.of(), EventTestUtil.now());
+  }
+
   private void assertCommitTable(int idx, UUID commitId, OffsetDateTime ts) {
     byte[] bytes = producer.history().get(idx).value();
     Event commitTable = AvroUtil.decode(bytes);
@@ -289,7 +331,8 @@ public void testCoordinatorCommittedOffsetValidation() {
     Snapshot firstSnapshot = table.currentSnapshot();
     assertThat(firstSnapshot.summary()).containsEntry(OFFSETS_SNAPSHOT_PROP, "{\"0\":7}");
 
-    // Trigger commit to the table
+    // Trigger commit to the table; the coordinator detects stale offsets via the
+    // ValidationException (a CleanableFailure), logs a warning, and retries next cycle.
     coordinatorTest(
         ImmutableList.of(EventTestUtil.createDataFile()), ImmutableList.of(), EventTestUtil.now());