diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt index acdf8d9b51737..ed854dbf4ad64 100644 --- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt @@ -2,81 +2,81 @@ Boolean decode ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor RLE readBooleans decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cold reader, trueRatio=0.0 0 0 0 25171.1 0.0 1.0X -reused reader, trueRatio=0.0 0 0 0 25171.1 0.0 1.0X -cold reader, trueRatio=0.1 1 1 0 755.2 1.3 0.0X -reused reader, trueRatio=0.1 1 1 0 754.1 1.3 0.0X -cold reader, trueRatio=0.5 1 1 0 835.6 1.2 0.0X -reused reader, trueRatio=0.5 1 1 0 833.3 1.2 0.0X -cold reader, trueRatio=0.9 1 1 0 753.3 1.3 0.0X -reused reader, trueRatio=0.9 1 1 0 753.6 1.3 0.0X -cold reader, trueRatio=1.0 0 0 0 25165.0 0.0 1.0X -reused reader, trueRatio=1.0 0 0 0 25183.2 0.0 1.0X +cold reader, trueRatio=0.0 0 0 0 5521.0 0.2 1.0X +reused reader, trueRatio=0.0 0 0 0 3516.0 0.3 0.6X +cold reader, trueRatio=0.1 2 2 0 616.4 1.6 0.1X +reused reader, trueRatio=0.1 2 2 0 587.4 1.7 0.1X +cold reader, trueRatio=0.5 2 2 0 674.2 1.5 0.1X +reused reader, trueRatio=0.5 2 2 0 632.9 1.6 0.1X +cold reader, trueRatio=0.9 2 2 0 615.6 1.6 0.1X +reused reader, trueRatio=0.9 2 2 0 582.0 1.7 0.1X +cold reader, trueRatio=1.0 0 0 0 2541.1 0.4 0.5X +reused reader, trueRatio=1.0 0 0 0 3524.8 0.3 0.6X ================================================================================================ Integer decode ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor RLE readIntegers dictionary-id decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PACKED cold, bitWidth=4 2 2 0 478.0 2.1 1.0X -PACKED reused, bitWidth=4 2 2 0 475.7 2.1 1.0X -RLE, bitWidth=4 0 0 0 4508.2 0.2 9.4X -PACKED cold, bitWidth=8 2 2 0 518.0 1.9 1.1X -PACKED reused, bitWidth=8 2 2 0 515.7 1.9 1.1X -RLE, bitWidth=8 0 0 0 4508.9 0.2 9.4X -PACKED cold, bitWidth=12 8 8 0 136.1 7.3 0.3X -PACKED reused, bitWidth=12 8 8 0 136.1 7.3 0.3X -RLE, bitWidth=12 0 0 0 4503.7 0.2 9.4X -PACKED cold, bitWidth=20 3 3 0 353.3 2.8 0.7X -PACKED reused, bitWidth=20 3 3 0 352.1 2.8 0.7X -RLE, bitWidth=20 0 0 0 4508.9 0.2 9.4X +PACKED cold, bitWidth=4 2 2 0 486.4 2.1 1.0X +PACKED reused, bitWidth=4 2 2 0 441.2 2.3 0.9X +RLE, bitWidth=4 0 0 0 4514.6 0.2 9.3X +PACKED cold, bitWidth=8 2 2 0 524.5 1.9 1.1X +PACKED reused, bitWidth=8 2 2 0 521.4 1.9 1.1X +RLE, bitWidth=8 0 0 0 4501.4 0.2 9.3X +PACKED cold, bitWidth=12 3 3 0 413.5 2.4 0.9X +PACKED reused, bitWidth=12 3 3 0 412.8 2.4 0.8X +RLE, bitWidth=12 0 0 0 4511.1 0.2 9.3X +PACKED cold, bitWidth=20 3 3 0 354.2 2.8 0.7X +PACKED reused, bitWidth=20 3 3 0 353.8 2.8 0.7X +RLE, bitWidth=20 0 0 0 4511.5 0.2 9.3X ================================================================================================ Nullable batch decode with def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor Nullable batch with def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 6502.7 0.2 1.0X -nullRatio=0.1, random 8 9 0 123.7 8.1 0.0X -nullRatio=0.1, clustered 6 6 0 169.7 5.9 0.0X -nullRatio=0.3, random 12 12 0 87.3 11.5 0.0X -nullRatio=0.3, clustered 6 6 0 168.0 6.0 0.0X -nullRatio=0.5, random 13 13 0 79.8 12.5 0.0X -nullRatio=0.5, clustered 6 6 0 171.7 5.8 0.0X -nullRatio=0.9, random 8 8 0 136.0 7.4 0.0X -nullRatio=0.9, clustered 6 6 0 181.6 5.5 0.0X -nullRatio=1.0, random 0 0 0 5072.8 0.2 0.8X +nullRatio=0.0, n/a 0 0 0 6566.4 0.2 1.0X +nullRatio=0.1, random 9 9 0 115.6 8.6 0.0X +nullRatio=0.1, clustered 7 7 0 159.3 6.3 0.0X +nullRatio=0.3, random 13 13 0 80.5 12.4 0.0X +nullRatio=0.3, clustered 7 7 0 159.4 6.3 0.0X +nullRatio=0.5, random 15 15 0 70.5 14.2 0.0X +nullRatio=0.5, clustered 7 7 0 160.9 6.2 0.0X +nullRatio=0.9, random 9 9 0 122.8 8.1 0.0X +nullRatio=0.9, clustered 6 6 0 169.6 5.9 0.0X +nullRatio=1.0, random 0 0 0 5078.2 0.2 0.8X ================================================================================================ Nullable batch decode without def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor Nullable batch without def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 11512.6 0.1 1.0X -nullRatio=0.1, random 7 7 0 144.6 6.9 0.0X -nullRatio=0.1, clustered 5 5 0 197.2 5.1 0.0X -nullRatio=0.3, random 11 11 0 99.6 10.0 0.0X -nullRatio=0.3, clustered 6 6 0 189.8 5.3 0.0X -nullRatio=0.5, random 12 12 0 89.0 11.2 0.0X -nullRatio=0.5, clustered 5 5 0 194.7 5.1 0.0X -nullRatio=0.9, random 7 7 0 151.9 6.6 0.0X +nullRatio=0.0, n/a 0 0 0 11527.9 0.1 1.0X +nullRatio=0.1, random 8 8 0 138.1 7.2 0.0X +nullRatio=0.1, clustered 6 6 0 188.9 5.3 0.0X +nullRatio=0.3, random 11 11 0 97.9 10.2 0.0X +nullRatio=0.3, clustered 6 6 0 188.5 5.3 0.0X +nullRatio=0.5, random 12 12 0 87.8 11.4 0.0X +nullRatio=0.5, clustered 5 6 0 190.9 5.2 0.0X +nullRatio=0.9, random 7 7 1 151.4 6.6 0.0X nullRatio=0.9, clustered 5 5 0 200.5 5.0 0.0X -nullRatio=1.0, random 0 0 0 11945.0 0.1 1.0X +nullRatio=1.0, random 0 0 0 11898.7 0.1 1.0X diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt index 2b245cb206303..f07b581719e3a 100644 --- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt +++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt @@ -2,81 +2,81 @@ Boolean decode ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor RLE readBooleans decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cold reader, trueRatio=0.0 0 0 0 60530.9 0.0 1.0X -reused reader, trueRatio=0.0 0 0 0 56821.1 0.0 0.9X -cold reader, trueRatio=0.1 1 1 0 709.0 1.4 0.0X -reused reader, trueRatio=0.1 1 1 0 708.2 1.4 0.0X -cold reader, trueRatio=0.5 1 1 0 747.9 1.3 0.0X -reused reader, trueRatio=0.5 1 1 0 746.5 1.3 0.0X -cold reader, trueRatio=0.9 1 1 0 709.0 1.4 0.0X -reused reader, trueRatio=0.9 1 2 0 705.6 1.4 0.0X -cold reader, trueRatio=1.0 0 0 0 59164.7 0.0 1.0X -reused reader, trueRatio=1.0 0 0 0 58832.7 0.0 1.0X +cold reader, trueRatio=0.0 0 0 0 6390.0 0.2 1.0X +reused reader, trueRatio=0.0 1 1 0 1610.3 0.6 0.3X +cold reader, trueRatio=0.1 2 2 0 645.6 1.5 0.1X +reused reader, trueRatio=0.1 2 2 0 645.3 1.5 0.1X +cold reader, trueRatio=0.5 1 1 0 713.9 1.4 0.1X +reused reader, trueRatio=0.5 1 1 0 712.9 1.4 0.1X +cold reader, trueRatio=0.9 2 2 0 645.5 1.5 0.1X +reused reader, trueRatio=0.9 2 2 0 644.8 1.6 0.1X +cold reader, trueRatio=1.0 1 1 0 1618.8 0.6 0.3X +reused reader, trueRatio=1.0 1 1 0 1735.1 0.6 0.3X ================================================================================================ Integer decode ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor RLE readIntegers dictionary-id decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PACKED cold, bitWidth=4 2 2 0 517.0 1.9 1.0X -PACKED reused, bitWidth=4 2 2 0 518.3 1.9 1.0X -RLE, bitWidth=4 0 0 0 18145.5 0.1 35.1X -PACKED cold, bitWidth=8 2 2 0 570.0 1.8 1.1X -PACKED reused, bitWidth=8 2 2 0 556.9 1.8 1.1X -RLE, bitWidth=8 0 0 0 18098.2 0.1 35.0X -PACKED cold, bitWidth=12 2 2 0 454.5 2.2 0.9X -PACKED reused, bitWidth=12 2 2 0 453.0 2.2 0.9X -RLE, bitWidth=12 0 0 0 18164.4 0.1 35.1X -PACKED cold, bitWidth=20 3 3 0 374.6 2.7 0.7X -PACKED reused, bitWidth=20 3 3 0 374.2 2.7 0.7X -RLE, bitWidth=20 0 0 0 18462.1 0.1 35.7X +PACKED cold, bitWidth=4 2 2 0 513.5 1.9 1.0X +PACKED reused, bitWidth=4 2 2 0 514.2 1.9 1.0X +RLE, bitWidth=4 0 0 0 15609.6 0.1 30.4X +PACKED cold, bitWidth=8 2 2 0 572.9 1.7 1.1X +PACKED reused, bitWidth=8 2 2 0 572.2 1.7 1.1X +RLE, bitWidth=8 0 0 0 15616.6 0.1 30.4X +PACKED cold, bitWidth=12 2 2 0 456.1 2.2 0.9X +PACKED reused, bitWidth=12 2 2 0 448.3 2.2 0.9X +RLE, bitWidth=12 0 0 0 15630.6 0.1 30.4X +PACKED cold, bitWidth=20 3 3 0 373.1 2.7 0.7X +PACKED reused, bitWidth=20 3 3 0 368.4 2.7 0.7X +RLE, bitWidth=20 0 0 0 18706.5 0.1 36.4X ================================================================================================ Nullable batch decode with def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor Nullable batch with def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 6625.9 0.2 1.0X -nullRatio=0.1, random 9 9 0 121.8 8.2 0.0X -nullRatio=0.1, clustered 6 6 0 170.3 5.9 0.0X -nullRatio=0.3, random 13 13 0 83.8 11.9 0.0X -nullRatio=0.3, clustered 6 6 0 170.4 5.9 0.0X -nullRatio=0.5, random 14 14 0 75.8 13.2 0.0X -nullRatio=0.5, clustered 6 6 0 172.1 5.8 0.0X -nullRatio=0.9, random 8 8 0 131.0 7.6 0.0X -nullRatio=0.9, clustered 6 6 0 181.7 5.5 0.0X -nullRatio=1.0, random 0 0 0 8305.2 0.1 1.3X +nullRatio=0.0, n/a 0 0 0 6516.5 0.2 1.0X +nullRatio=0.1, random 9 9 0 116.1 8.6 0.0X +nullRatio=0.1, clustered 6 6 0 168.5 5.9 0.0X +nullRatio=0.3, random 13 13 0 82.9 12.1 0.0X +nullRatio=0.3, clustered 6 6 0 168.4 5.9 0.0X +nullRatio=0.5, random 14 14 0 74.7 13.4 0.0X +nullRatio=0.5, clustered 6 6 0 170.8 5.9 0.0X +nullRatio=0.9, random 8 8 0 130.6 7.7 0.0X +nullRatio=0.9, clustered 6 6 0 180.1 5.6 0.0X +nullRatio=1.0, random 0 0 0 8343.6 0.1 1.3X ================================================================================================ Nullable batch decode without def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure AMD EPYC 7763 64-Core Processor Nullable batch without def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 11869.8 0.1 1.0X -nullRatio=0.1, random 7 7 0 149.1 6.7 0.0X -nullRatio=0.1, clustered 5 5 0 208.1 4.8 0.0X -nullRatio=0.3, random 10 10 0 101.1 9.9 0.0X -nullRatio=0.3, clustered 5 5 0 206.6 4.8 0.0X -nullRatio=0.5, random 12 12 0 90.7 11.0 0.0X -nullRatio=0.5, clustered 5 5 0 206.4 4.8 0.0X -nullRatio=0.9, random 7 7 0 160.2 6.2 0.0X -nullRatio=0.9, clustered 5 5 0 213.5 4.7 0.0X -nullRatio=1.0, random 0 0 0 11957.2 0.1 1.0X +nullRatio=0.0, n/a 0 0 0 11799.7 0.1 1.0X +nullRatio=0.1, random 7 7 0 148.3 6.7 0.0X +nullRatio=0.1, clustered 5 5 0 208.4 4.8 0.0X +nullRatio=0.3, random 10 11 0 100.5 9.9 0.0X +nullRatio=0.3, clustered 5 5 0 205.4 4.9 0.0X +nullRatio=0.5, random 12 12 0 90.4 11.1 0.0X +nullRatio=0.5, clustered 5 5 0 205.3 4.9 0.0X +nullRatio=0.9, random 7 7 0 158.2 6.3 0.0X +nullRatio=0.9, clustered 5 5 0 212.0 4.7 0.0X +nullRatio=1.0, random 0 0 0 12119.2 0.1 1.0X diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt index 10def40f3e1e8..786b1eb6e8a26 100644 --- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt +++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt @@ -2,81 +2,81 @@ Boolean decode ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +AMD EPYC 9V74 80-Core Processor RLE readBooleans decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cold reader, trueRatio=0.0 0 0 0 47464.1 0.0 1.0X -reused reader, trueRatio=0.0 0 0 0 47485.6 0.0 1.0X -cold reader, trueRatio=0.1 1 1 0 894.1 1.1 0.0X -reused reader, trueRatio=0.1 1 1 0 897.4 1.1 0.0X -cold reader, trueRatio=0.5 1 1 0 1027.8 1.0 0.0X -reused reader, trueRatio=0.5 1 1 0 1029.3 1.0 0.0X -cold reader, trueRatio=0.9 1 1 0 893.8 1.1 0.0X -reused reader, trueRatio=0.9 1 1 0 896.6 1.1 0.0X -cold reader, trueRatio=1.0 0 0 0 47421.1 0.0 1.0X -reused reader, trueRatio=1.0 0 0 0 47485.6 0.0 1.0X +cold reader, trueRatio=0.0 0 0 0 84229.7 0.0 1.0X +reused reader, trueRatio=0.0 0 0 0 84501.2 0.0 1.0X +cold reader, trueRatio=0.1 1 1 0 869.0 1.2 0.0X +reused reader, trueRatio=0.1 1 1 0 869.0 1.2 0.0X +cold reader, trueRatio=0.5 1 1 0 1002.7 1.0 0.0X +reused reader, trueRatio=0.5 1 1 0 1004.9 1.0 0.0X +cold reader, trueRatio=0.9 1 1 0 861.0 1.2 0.0X +reused reader, trueRatio=0.9 1 1 0 861.4 1.2 0.0X +cold reader, trueRatio=1.0 0 0 0 84774.5 0.0 1.0X +reused reader, trueRatio=1.0 0 0 0 84918.7 0.0 1.0X ================================================================================================ Integer decode ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +AMD EPYC 9V74 80-Core Processor RLE readIntegers dictionary-id decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PACKED cold, bitWidth=4 2 2 0 488.1 2.0 1.0X -PACKED reused, bitWidth=4 2 2 0 485.9 2.1 1.0X -RLE, bitWidth=4 0 0 0 19116.1 0.1 39.2X -PACKED cold, bitWidth=8 2 2 0 482.1 2.1 1.0X -PACKED reused, bitWidth=8 2 2 0 479.7 2.1 1.0X -RLE, bitWidth=8 0 0 0 18567.1 0.1 38.0X -PACKED cold, bitWidth=12 3 3 0 362.1 2.8 0.7X -PACKED reused, bitWidth=12 3 3 0 361.3 2.8 0.7X -RLE, bitWidth=12 0 0 0 19126.9 0.1 39.2X -PACKED cold, bitWidth=20 3 3 0 308.6 3.2 0.6X -PACKED reused, bitWidth=20 3 3 0 306.5 3.3 0.6X -RLE, bitWidth=20 0 0 0 19074.4 0.1 39.1X +PACKED cold, bitWidth=4 2 2 0 463.9 2.2 1.0X +PACKED reused, bitWidth=4 2 2 0 463.1 2.2 1.0X +RLE, bitWidth=4 0 0 0 18397.4 0.1 39.7X +PACKED cold, bitWidth=8 2 2 0 464.9 2.2 1.0X +PACKED reused, bitWidth=8 2 2 0 461.7 2.2 1.0X +RLE, bitWidth=8 0 0 0 18524.4 0.1 39.9X +PACKED cold, bitWidth=12 3 3 1 335.9 3.0 0.7X +PACKED reused, bitWidth=12 3 3 1 335.1 3.0 0.7X +RLE, bitWidth=12 0 0 0 18504.8 0.1 39.9X +PACKED cold, bitWidth=20 4 4 0 283.1 3.5 0.6X +PACKED reused, bitWidth=20 4 4 0 282.5 3.5 0.6X +RLE, bitWidth=20 0 0 0 18381.2 0.1 39.6X ================================================================================================ Nullable batch decode with def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +AMD EPYC 9V74 80-Core Processor Nullable batch with def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 6441.9 0.2 1.0X -nullRatio=0.1, random 9 9 0 113.0 8.9 0.0X -nullRatio=0.1, clustered 7 7 0 144.8 6.9 0.0X -nullRatio=0.3, random 13 13 0 78.9 12.7 0.0X -nullRatio=0.3, clustered 7 7 0 156.6 6.4 0.0X -nullRatio=0.5, random 15 15 0 71.9 13.9 0.0X -nullRatio=0.5, clustered 7 7 0 159.2 6.3 0.0X -nullRatio=0.9, random 8 8 0 124.8 8.0 0.0X -nullRatio=0.9, clustered 6 6 0 171.3 5.8 0.0X -nullRatio=1.0, random 0 0 0 8031.7 0.1 1.2X +nullRatio=0.0, n/a 0 0 0 5905.5 0.2 1.0X +nullRatio=0.1, random 10 10 0 108.8 9.2 0.0X +nullRatio=0.1, clustered 7 7 0 151.3 6.6 0.0X +nullRatio=0.3, random 14 14 0 76.3 13.1 0.0X +nullRatio=0.3, clustered 7 7 0 153.7 6.5 0.0X +nullRatio=0.5, random 15 15 1 70.2 14.2 0.0X +nullRatio=0.5, clustered 7 7 0 157.9 6.3 0.0X +nullRatio=0.9, random 8 8 0 126.3 7.9 0.0X +nullRatio=0.9, clustered 6 6 0 173.9 5.7 0.0X +nullRatio=1.0, random 0 0 0 7335.0 0.1 1.2X ================================================================================================ Nullable batch decode without def-level materialization ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +AMD EPYC 9V74 80-Core Processor Nullable batch without def-levels: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -nullRatio=0.0, n/a 0 0 0 11037.9 0.1 1.0X -nullRatio=0.1, random 8 8 0 139.2 7.2 0.0X -nullRatio=0.1, clustered 6 6 0 190.1 5.3 0.0X -nullRatio=0.3, random 11 11 0 96.7 10.3 0.0X -nullRatio=0.3, clustered 6 6 0 188.5 5.3 0.0X -nullRatio=0.5, random 12 12 0 86.9 11.5 0.0X -nullRatio=0.5, clustered 6 6 0 188.0 5.3 0.0X -nullRatio=0.9, random 7 7 0 149.4 6.7 0.0X -nullRatio=0.9, clustered 5 5 0 197.9 5.1 0.0X -nullRatio=1.0, random 0 0 0 11675.8 0.1 1.1X +nullRatio=0.0, n/a 0 0 0 9240.0 0.1 1.0X +nullRatio=0.1, random 8 8 0 132.8 7.5 0.0X +nullRatio=0.1, clustered 6 6 0 187.2 5.3 0.0X +nullRatio=0.3, random 12 12 0 90.9 11.0 0.0X +nullRatio=0.3, clustered 6 6 0 188.0 5.3 0.0X +nullRatio=0.5, random 13 13 0 82.6 12.1 0.0X +nullRatio=0.5, clustered 5 6 1 191.2 5.2 0.0X +nullRatio=0.9, random 7 7 0 149.6 6.7 0.0X +nullRatio=0.9, clustered 5 5 0 206.6 4.8 0.0X +nullRatio=1.0, random 0 0 0 9886.6 0.1 1.1X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala index 008b18cdcab16..35cdd925f8e53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala @@ -158,6 +158,16 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase { val bytes = encodeRle( packedFriendlyBooleans(NUM_ROWS, trueRatio), bitWidth = 1) + // Pre-warm the cold-reader code path (fresh reader + initFromPage + readBooleans) + // so the class methods are fully JIT-compiled before benchmark.run() measures. + // The first case in a group otherwise pays for tiered-compilation transitions on + // sub-millisecond iterations, producing noise between runs. + (0 until 3).foreach { _ => + val r = new VectorizedRleValuesReader(1, false) + r.initFromPage(NUM_ROWS, toInputStream(bytes)) + r.readBooleans(NUM_ROWS, vec, 0) + } + benchmark.addCase(f"cold reader, trueRatio=${trueRatio}%.1f") { _ => val reader = new VectorizedRleValuesReader(1, false) reader.initFromPage(NUM_ROWS, toInputStream(bytes)) @@ -189,6 +199,13 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase { val packedBytes = encodeRle( packedFriendlyDictIds(NUM_ROWS, bitWidth), bitWidth) + // See `runBooleanBenchmark` for why the cold-reader path is pre-warmed here. + (0 until 3).foreach { _ => + val r = new VectorizedRleValuesReader(bitWidth, false) + r.initFromPage(NUM_ROWS, toInputStream(packedBytes)) + r.readIntegers(NUM_ROWS, vec, 0) + } + benchmark.addCase(s"PACKED cold, bitWidth=$bitWidth") { _ => val reader = new VectorizedRleValuesReader(bitWidth, false) reader.initFromPage(NUM_ROWS, toInputStream(packedBytes))