From 30a24e7508c11a4387d493f1dba5d6861bbd5e87 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 15 May 2026 22:05:31 -0700 Subject: [PATCH 1/5] -DtestParallelism=auto + split modules --- .github/workflows/spark-ci.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index 5b13ae9bc463..37f747a6d001 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -81,6 +81,9 @@ jobs: jvm: [17, 21] spark: ['3.4', '3.5', '4.0', '4.1'] scala: ['2.12', '2.13'] + # Split iceberg-spark and iceberg-spark-extensions/-runtime into + # separate jobs so they run concurrently rather than serially. + module: ['spark', 'spark-extensions-and-runtime'] exclude: # Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369) # Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831) @@ -108,15 +111,22 @@ jobs: with: tool-cache: false - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: | + - name: Run iceberg-spark tests + if: matrix.module == 'spark' + run: | ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DflinkVersions= -DkafkaVersions= \ :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:check \ + -Pquick=true -x javadoc -DtestParallelism=auto + - name: Run iceberg-spark-extensions and iceberg-spark-runtime tests + if: matrix.module == 'spark-extensions-and-runtime' + run: | + ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DflinkVersions= -DkafkaVersions= \ :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala }}:check \ :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala }}:check \ - -Pquick=true -x javadoc + -Pquick=true -x javadoc -DtestParallelism=auto - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: failure() with: - name: test logs + name: test logs (${{ matrix.jvm }}, ${{ matrix.spark }}, ${{ matrix.scala }}, ${{ matrix.module }}) path: | **/build/testlogs From d485f3483feda8bc9daefc0d98dbf04fbf5e5a03 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 15 May 2026 22:15:51 -0700 Subject: [PATCH 2/5] max-parallel: 20 --- .github/workflows/spark-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index 37f747a6d001..df96dbea9940 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -76,7 +76,9 @@ jobs: spark-tests: runs-on: ubuntu-24.04 strategy: - max-parallel: 15 + # 20 is the Apache infra policy ceiling (infra.apache.org/github-actions-policy.html). + # Keep matrix <= 20 jobs; exceeding it queues a second wave and slows CI. + max-parallel: 20 matrix: jvm: [17, 21] spark: ['3.4', '3.5', '4.0', '4.1'] From d908d38c835e2e4e411ed34f6d674c49c1847cb4 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 15 May 2026 22:50:28 -0700 Subject: [PATCH 3/5] refactor --- .github/workflows/spark-ci.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index df96dbea9940..0bd84527e722 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -83,9 +83,9 @@ jobs: jvm: [17, 21] spark: ['3.4', '3.5', '4.0', '4.1'] scala: ['2.12', '2.13'] - # Split iceberg-spark and iceberg-spark-extensions/-runtime into - # separate jobs so they run concurrently rather than serially. - module: ['spark', 'spark-extensions-and-runtime'] + # Split iceberg-spark (core) from iceberg-spark-extensions/-runtime + # so they run as concurrent jobs rather than serially in one job. + tests: [core, extensions] exclude: # Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369) # Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831) @@ -113,19 +113,15 @@ jobs: with: tool-cache: false - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - name: Run iceberg-spark tests - if: matrix.module == 'spark' + - name: Run tests run: | + if [[ "${{ matrix.tests }}" == "core" ]]; then + projects=":iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:check" + else + projects=":iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala }}:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala }}:check" + fi ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DflinkVersions= -DkafkaVersions= \ - :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:check \ - -Pquick=true -x javadoc -DtestParallelism=auto - - name: Run iceberg-spark-extensions and iceberg-spark-runtime tests - if: matrix.module == 'spark-extensions-and-runtime' - run: | - ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DflinkVersions= -DkafkaVersions= \ - :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala }}:check \ - :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala }}:check \ - -Pquick=true -x javadoc -DtestParallelism=auto + $projects -Pquick=true -x javadoc -DtestParallelism=auto - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: failure() with: From fdefde6ba5bcccb2a815bbfa6aa4e165be3f8c99 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 16 May 2026 09:42:31 -0700 Subject: [PATCH 4/5] fix --- .github/workflows/spark-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index 0bd84527e722..260493f3c9cb 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -75,6 +75,7 @@ concurrency: jobs: spark-tests: runs-on: ubuntu-24.04 + timeout-minutes: 90 strategy: # 20 is the Apache infra policy ceiling (infra.apache.org/github-actions-policy.html). # Keep matrix <= 20 jobs; exceeding it queues a second wave and slows CI. @@ -125,6 +126,7 @@ jobs: - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: failure() with: - name: test logs (${{ matrix.jvm }}, ${{ matrix.spark }}, ${{ matrix.scala }}, ${{ matrix.module }}) + name: test logs (${{ matrix.jvm }}, ${{ matrix.spark }}, ${{ matrix.scala }}, ${{ matrix.tests }}) + retention-days: 7 path: | **/build/testlogs From 3183505adf129584139389413d7aba2752cf94f0 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 16 May 2026 14:03:12 -0400 Subject: [PATCH 5/5] pr review --- .github/workflows/spark-ci.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index 260493f3c9cb..15991785ca7f 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 timeout-minutes: 90 strategy: - # 20 is the Apache infra policy ceiling (infra.apache.org/github-actions-policy.html). + # 20 is the Apache infra policy ceiling (https://infra.apache.org/github-actions-policy.html). # Keep matrix <= 20 jobs; exceeding it queues a second wave and slows CI. max-parallel: 20 matrix: @@ -110,9 +110,6 @@ jobs: with: # Read-only: java-ci's build-checks (17) is the global canonical writer. cache-read-only: true - - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - with: - tool-cache: false - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - name: Run tests run: |