diff --git a/src/main/java/io/airlift/slice/FnvHash.java b/src/main/java/io/airlift/slice/FnvHash.java index 66b4f854..9a8b4031 100644 --- a/src/main/java/io/airlift/slice/FnvHash.java +++ b/src/main/java/io/airlift/slice/FnvHash.java @@ -30,11 +30,13 @@ private FnvHash() public static int hash32(Slice data) { - int hash = FNV_32_OFFSET_BASIS; + byte[] base = data.byteArray(); + int offset = data.byteArrayOffset(); + int length = data.length(); - for (int i = 0; i < data.length(); ++i) { - byte dataByte = data.getByte(i); - hash ^= dataByte; + int hash = FNV_32_OFFSET_BASIS; + for (int i = 0; i < length; i++) { + hash ^= base[offset + i]; hash *= FNV_32_PRIME; } @@ -43,11 +45,13 @@ public static int hash32(Slice data) public static long hash64(Slice data) { - long hash = FNV_64_OFFSET_BASIS; + byte[] base = data.byteArray(); + int offset = data.byteArrayOffset(); + int length = data.length(); - for (int i = 0; i < data.length(); ++i) { - byte dataByte = data.getByte(i); - hash ^= dataByte; + long hash = FNV_64_OFFSET_BASIS; + for (int i = 0; i < length; i++) { + hash ^= base[offset + i]; hash *= FNV_64_PRIME; } diff --git a/src/main/java/io/airlift/slice/Murmur3Hash128.java b/src/main/java/io/airlift/slice/Murmur3Hash128.java index a6467910..d77d2a16 100644 --- a/src/main/java/io/airlift/slice/Murmur3Hash128.java +++ b/src/main/java/io/airlift/slice/Murmur3Hash128.java @@ -15,6 +15,10 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; + public final class Murmur3Hash128 { private static final long C1 = 0x87c37b91114253d5L; @@ -22,6 +26,8 @@ public final class Murmur3Hash128 private static final long DEFAULT_SEED = 0; + private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + private Murmur3Hash128() {} public static Slice hash(Slice data) @@ -37,18 +43,18 @@ public static Slice hash(Slice data, int offset, int length) @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") public static Slice hash(long seed, Slice data, int offset, int length) { - final int fastLimit = offset + length - (2 * SizeOf.SIZE_OF_LONG) + 1; + byte[] base = data.byteArray(); + int baseOffset = data.byteArrayOffset() + offset; long h1 = seed; long h2 = seed; - int current = offset; + int current = 0; + int fastLimit = length - 15; while (current < fastLimit) { - long k1 = data.getLong(current); - current += SizeOf.SIZE_OF_LONG; - - long k2 = data.getLong(current); - current += SizeOf.SIZE_OF_LONG; + long k1 = (long) LONG_HANDLE.get(base, baseOffset + current); + long k2 = (long) LONG_HANDLE.get(base, baseOffset + current + 8); + current += 16; k1 *= C1; k1 = Long.rotateLeft(k1, 31); @@ -71,22 +77,23 @@ public static Slice hash(long seed, Slice data, int offset, int length) long k1 = 0; long k2 = 0; + int tailBase = baseOffset + current; switch (length & 15) { case 15: - k2 ^= ((long) data.getUnsignedByte(current + 14)) << 48; + k2 ^= ((long) base[tailBase + 14] & 0xFF) << 48; case 14: - k2 ^= ((long) data.getUnsignedByte(current + 13)) << 40; + k2 ^= ((long) base[tailBase + 13] & 0xFF) << 40; case 13: - k2 ^= ((long) data.getUnsignedByte(current + 12)) << 32; + k2 ^= ((long) base[tailBase + 12] & 0xFF) << 32; case 12: - k2 ^= ((long) data.getUnsignedByte(current + 11)) << 24; + k2 ^= ((long) base[tailBase + 11] & 0xFF) << 24; case 11: - k2 ^= ((long) data.getUnsignedByte(current + 10)) << 16; + k2 ^= ((long) base[tailBase + 10] & 0xFF) << 16; case 10: - k2 ^= ((long) data.getUnsignedByte(current + 9)) << 8; + k2 ^= ((long) base[tailBase + 9] & 0xFF) << 8; case 9: - k2 ^= ((long) data.getUnsignedByte(current + 8)) << 0; + k2 ^= ((long) base[tailBase + 8] & 0xFF); k2 *= C2; k2 = Long.rotateLeft(k2, 33); @@ -94,21 +101,21 @@ public static Slice hash(long seed, Slice data, int offset, int length) h2 ^= k2; case 8: - k1 ^= ((long) data.getUnsignedByte(current + 7)) << 56; + k1 ^= ((long) base[tailBase + 7] & 0xFF) << 56; case 7: - k1 ^= ((long) data.getUnsignedByte(current + 6)) << 48; + k1 ^= ((long) base[tailBase + 6] & 0xFF) << 48; case 6: - k1 ^= ((long) data.getUnsignedByte(current + 5)) << 40; + k1 ^= ((long) base[tailBase + 5] & 0xFF) << 40; case 5: - k1 ^= ((long) data.getUnsignedByte(current + 4)) << 32; + k1 ^= ((long) base[tailBase + 4] & 0xFF) << 32; case 4: - k1 ^= ((long) data.getUnsignedByte(current + 3)) << 24; + k1 ^= ((long) base[tailBase + 3] & 0xFF) << 24; case 3: - k1 ^= ((long) data.getUnsignedByte(current + 2)) << 16; + k1 ^= ((long) base[tailBase + 2] & 0xFF) << 16; case 2: - k1 ^= ((long) data.getUnsignedByte(current + 1)) << 8; + k1 ^= ((long) base[tailBase + 1] & 0xFF) << 8; case 1: - k1 ^= ((long) data.getUnsignedByte(current + 0)) << 0; + k1 ^= ((long) base[tailBase] & 0xFF); k1 *= C1; k1 = Long.rotateLeft(k1, 31); @@ -151,18 +158,18 @@ public static long hash64(Slice data, int offset, int length) @SuppressFBWarnings({"SF_SWITCH_NO_DEFAULT", "SF_SWITCH_FALLTHROUGH"}) public static long hash64(long seed, Slice data, int offset, int length) { - final int fastLimit = offset + length - (2 * SizeOf.SIZE_OF_LONG) + 1; + byte[] base = data.byteArray(); + int baseOffset = data.byteArrayOffset() + offset; long h1 = seed; long h2 = seed; - int current = offset; + int current = 0; + int fastLimit = length - 15; while (current < fastLimit) { - long k1 = data.getLong(current); - current += SizeOf.SIZE_OF_LONG; - - long k2 = data.getLong(current); - current += SizeOf.SIZE_OF_LONG; + long k1 = (long) LONG_HANDLE.get(base, baseOffset + current); + long k2 = (long) LONG_HANDLE.get(base, baseOffset + current + 8); + current += 16; k1 *= C1; k1 = Long.rotateLeft(k1, 31); @@ -185,22 +192,23 @@ public static long hash64(long seed, Slice data, int offset, int length) long k1 = 0; long k2 = 0; + int tailBase = baseOffset + current; switch (length & 15) { case 15: - k2 ^= ((long) data.getUnsignedByte(current + 14)) << 48; + k2 ^= ((long) base[tailBase + 14] & 0xFF) << 48; case 14: - k2 ^= ((long) data.getUnsignedByte(current + 13)) << 40; + k2 ^= ((long) base[tailBase + 13] & 0xFF) << 40; case 13: - k2 ^= ((long) data.getUnsignedByte(current + 12)) << 32; + k2 ^= ((long) base[tailBase + 12] & 0xFF) << 32; case 12: - k2 ^= ((long) data.getUnsignedByte(current + 11)) << 24; + k2 ^= ((long) base[tailBase + 11] & 0xFF) << 24; case 11: - k2 ^= ((long) data.getUnsignedByte(current + 10)) << 16; + k2 ^= ((long) base[tailBase + 10] & 0xFF) << 16; case 10: - k2 ^= ((long) data.getUnsignedByte(current + 9)) << 8; + k2 ^= ((long) base[tailBase + 9] & 0xFF) << 8; case 9: - k2 ^= ((long) data.getUnsignedByte(current + 8)) << 0; + k2 ^= ((long) base[tailBase + 8] & 0xFF); k2 *= C2; k2 = Long.rotateLeft(k2, 33); @@ -208,21 +216,21 @@ public static long hash64(long seed, Slice data, int offset, int length) h2 ^= k2; case 8: - k1 ^= ((long) data.getUnsignedByte(current + 7)) << 56; + k1 ^= ((long) base[tailBase + 7] & 0xFF) << 56; case 7: - k1 ^= ((long) data.getUnsignedByte(current + 6)) << 48; + k1 ^= ((long) base[tailBase + 6] & 0xFF) << 48; case 6: - k1 ^= ((long) data.getUnsignedByte(current + 5)) << 40; + k1 ^= ((long) base[tailBase + 5] & 0xFF) << 40; case 5: - k1 ^= ((long) data.getUnsignedByte(current + 4)) << 32; + k1 ^= ((long) base[tailBase + 4] & 0xFF) << 32; case 4: - k1 ^= ((long) data.getUnsignedByte(current + 3)) << 24; + k1 ^= ((long) base[tailBase + 3] & 0xFF) << 24; case 3: - k1 ^= ((long) data.getUnsignedByte(current + 2)) << 16; + k1 ^= ((long) base[tailBase + 2] & 0xFF) << 16; case 2: - k1 ^= ((long) data.getUnsignedByte(current + 1)) << 8; + k1 ^= ((long) base[tailBase + 1] & 0xFF) << 8; case 1: - k1 ^= ((long) data.getUnsignedByte(current + 0)) << 0; + k1 ^= ((long) base[tailBase] & 0xFF); k1 *= C1; k1 = Long.rotateLeft(k1, 31); diff --git a/src/main/java/io/airlift/slice/SpookyHashV2.java b/src/main/java/io/airlift/slice/SpookyHashV2.java index 4c161898..a1d6da82 100644 --- a/src/main/java/io/airlift/slice/SpookyHashV2.java +++ b/src/main/java/io/airlift/slice/SpookyHashV2.java @@ -13,7 +13,10 @@ */ package io.airlift.slice; -import static io.airlift.slice.SizeOf.SIZE_OF_LONG; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; + import static java.lang.Long.rotateLeft; /** @@ -24,6 +27,9 @@ public class SpookyHashV2 private static final long MAGIC_CONSTANT = 0xDEAD_BEEF_DEAD_BEEFL; private static final int SHORT_THRESHOLD = 192; + private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + private static final VarHandle INT_HANDLE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN); + private SpookyHashV2() { } @@ -31,10 +37,10 @@ private SpookyHashV2() public static long hash64(Slice data, int offset, int length, long seed) { if (length < SHORT_THRESHOLD) { - return shortHash64(data, offset, length, seed); + return shortHash64(data.byteArray(), data.byteArrayOffset() + offset, length, seed); } - return longHash64(data, offset, length, seed); + return longHash64(data.byteArray(), data.byteArrayOffset() + offset, length, seed); } public static int hash32(Slice data, int offset, int length, long seed) @@ -42,7 +48,7 @@ public static int hash32(Slice data, int offset, int length, long seed) return (int) hash64(data, offset, length, seed); } - private static long shortHash64(Slice data, int offset, int length, long seed) + private static long shortHash64(byte[] base, int offset, int length, long seed) { int limit = offset + length; @@ -53,10 +59,8 @@ private static long shortHash64(Slice data, int offset, int length, long seed) int current = offset; while (current <= limit - 32) { - h2 += data.getLong(current); - current += SIZE_OF_LONG; - h3 += data.getLong(current); - current += SIZE_OF_LONG; + h2 += (long) LONG_HANDLE.get(base, current); + h3 += (long) LONG_HANDLE.get(base, current + 8); // mix h2 = rotateLeft(h2, 50); @@ -96,22 +100,17 @@ private static long shortHash64(Slice data, int offset, int length, long seed) h1 += h2; h3 ^= h1; - h0 += data.getLong(current); - current += SIZE_OF_LONG; - - h1 += data.getLong(current); - current += SIZE_OF_LONG; + h0 += (long) LONG_HANDLE.get(base, current + 16); + h1 += (long) LONG_HANDLE.get(base, current + 24); + current += 32; } int remainder = limit - current; if (remainder >= 16) { - h2 += data.getLong(current); - current += SIZE_OF_LONG; - remainder -= SIZE_OF_LONG; - - h3 += data.getLong(current); - current += SIZE_OF_LONG; - remainder -= SIZE_OF_LONG; + h2 += (long) LONG_HANDLE.get(base, current); + h3 += (long) LONG_HANDLE.get(base, current + 8); + current += 16; + remainder -= 16; // mix h2 = rotateLeft(h2, 50); @@ -156,39 +155,39 @@ private static long shortHash64(Slice data, int offset, int length, long seed) h3 += ((long) length) << 56; switch (remainder) { case 15: - h3 += (data.getByte(current + 14) & 0xFFL) << 48; + h3 += (base[current + 14] & 0xFFL) << 48; case 14: - h3 += (data.getByte(current + 13) & 0xFFL) << 40; + h3 += (base[current + 13] & 0xFFL) << 40; case 13: - h3 += (data.getByte(current + 12) & 0xFFL) << 32; + h3 += (base[current + 12] & 0xFFL) << 32; case 12: - h3 += data.getUnsignedInt(current + 8); - h2 += data.getLong(current); + h3 += (int) INT_HANDLE.get(base, current + 8) & 0xFFFFFFFFL; + h2 += (long) LONG_HANDLE.get(base, current); break; case 11: - h3 += (data.getByte(current + 10) & 0xFFL) << 16; + h3 += (base[current + 10] & 0xFFL) << 16; case 10: - h3 += (data.getByte(current + 9) & 0xFFL) << 8; + h3 += (base[current + 9] & 0xFFL) << 8; case 9: - h3 += (data.getByte(current + 8) & 0xFFL); + h3 += (base[current + 8] & 0xFFL); case 8: - h2 += data.getLong(current); + h2 += (long) LONG_HANDLE.get(base, current); break; case 7: - h2 += (data.getByte(current + 6) & 0xFFL) << 48; + h2 += (base[current + 6] & 0xFFL) << 48; case 6: - h2 += (data.getByte(current + 5) & 0xFFL) << 40; + h2 += (base[current + 5] & 0xFFL) << 40; case 5: - h2 += (data.getByte(current + 4) & 0xFFL) << 32; + h2 += (base[current + 4] & 0xFFL) << 32; case 4: - h2 += data.getUnsignedInt(current); + h2 += (int) INT_HANDLE.get(base, current) & 0xFFFFFFFFL; break; case 3: - h2 += (data.getByte(current + 2) & 0xFFL) << 16; + h2 += (base[current + 2] & 0xFFL) << 16; case 2: - h2 += (data.getByte(current + 1) & 0xFFL) << 8; + h2 += (base[current + 1] & 0xFFL) << 8; case 1: - h2 += (data.getByte(current) & 0xFFL); + h2 += (base[current] & 0xFFL); break; case 0: h2 += MAGIC_CONSTANT; @@ -236,7 +235,7 @@ private static long shortHash64(Slice data, int offset, int length, long seed) return h0; } - private static long longHash64(Slice data, int offset, int length, long seed) + private static long longHash64(byte[] base, int offset, int length, long seed) { int limit = offset + length; @@ -254,144 +253,134 @@ private static long longHash64(Slice data, int offset, int length, long seed) long h11 = MAGIC_CONSTANT; int current = offset; - while (current <= limit - 12 * SIZE_OF_LONG) { - h0 += data.getLong(current); - current += SIZE_OF_LONG; + while (current <= limit - 96) { + h0 += (long) LONG_HANDLE.get(base, current); h2 ^= h10; h11 ^= h0; h0 = rotateLeft(h0, 11); h11 += h1; - h1 += data.getLong(current); - current += SIZE_OF_LONG; + h1 += (long) LONG_HANDLE.get(base, current + 8); h3 ^= h11; h0 ^= h1; h1 = rotateLeft(h1, 32); h0 += h2; - h2 += data.getLong(current); - current += SIZE_OF_LONG; + h2 += (long) LONG_HANDLE.get(base, current + 16); h4 ^= h0; h1 ^= h2; h2 = rotateLeft(h2, 43); h1 += h3; - h3 += data.getLong(current); - current += SIZE_OF_LONG; + h3 += (long) LONG_HANDLE.get(base, current + 24); h5 ^= h1; h2 ^= h3; h3 = rotateLeft(h3, 31); h2 += h4; - h4 += data.getLong(current); - current += SIZE_OF_LONG; + h4 += (long) LONG_HANDLE.get(base, current + 32); h6 ^= h2; h3 ^= h4; h4 = rotateLeft(h4, 17); h3 += h5; - h5 += data.getLong(current); - current += SIZE_OF_LONG; + h5 += (long) LONG_HANDLE.get(base, current + 40); h7 ^= h3; h4 ^= h5; h5 = rotateLeft(h5, 28); h4 += h6; - h6 += data.getLong(current); - current += SIZE_OF_LONG; + h6 += (long) LONG_HANDLE.get(base, current + 48); h8 ^= h4; h5 ^= h6; h6 = rotateLeft(h6, 39); h5 += h7; - h7 += data.getLong(current); - current += SIZE_OF_LONG; + h7 += (long) LONG_HANDLE.get(base, current + 56); h9 ^= h5; h6 ^= h7; h7 = rotateLeft(h7, 57); h6 += h8; - h8 += data.getLong(current); - current += SIZE_OF_LONG; + h8 += (long) LONG_HANDLE.get(base, current + 64); h10 ^= h6; h7 ^= h8; h8 = rotateLeft(h8, 55); h7 += h9; - h9 += data.getLong(current); - current += SIZE_OF_LONG; + h9 += (long) LONG_HANDLE.get(base, current + 72); h11 ^= h7; h8 ^= h9; h9 = rotateLeft(h9, 54); h8 += h10; - h10 += data.getLong(current); - current += SIZE_OF_LONG; + h10 += (long) LONG_HANDLE.get(base, current + 80); h0 ^= h8; h9 ^= h10; h10 = rotateLeft(h10, 22); h9 += h11; - h11 += data.getLong(current); - current += SIZE_OF_LONG; + h11 += (long) LONG_HANDLE.get(base, current + 88); h1 ^= h9; h10 ^= h11; h11 = rotateLeft(h11, 46); h10 += h0; + + current += 96; } int remaining = limit - current; - int sequences = remaining / SIZE_OF_LONG; + int sequences = remaining / 8; // handle remaining whole 8-byte sequences switch (sequences) { case 11: - h10 += data.getLong(current + 10 * SIZE_OF_LONG); + h10 += (long) LONG_HANDLE.get(base, current + 80); case 10: - h9 += data.getLong(current + 9 * SIZE_OF_LONG); + h9 += (long) LONG_HANDLE.get(base, current + 72); case 9: - h8 += data.getLong(current + 8 * SIZE_OF_LONG); + h8 += (long) LONG_HANDLE.get(base, current + 64); case 8: - h7 += data.getLong(current + 7 * SIZE_OF_LONG); + h7 += (long) LONG_HANDLE.get(base, current + 56); case 7: - h6 += data.getLong(current + 6 * SIZE_OF_LONG); + h6 += (long) LONG_HANDLE.get(base, current + 48); case 6: - h5 += data.getLong(current + 5 * SIZE_OF_LONG); + h5 += (long) LONG_HANDLE.get(base, current + 40); case 5: - h4 += data.getLong(current + 4 * SIZE_OF_LONG); + h4 += (long) LONG_HANDLE.get(base, current + 32); case 4: - h3 += data.getLong(current + 3 * SIZE_OF_LONG); + h3 += (long) LONG_HANDLE.get(base, current + 24); case 3: - h2 += data.getLong(current + 2 * SIZE_OF_LONG); + h2 += (long) LONG_HANDLE.get(base, current + 16); case 2: - h1 += data.getLong(current + SIZE_OF_LONG); + h1 += (long) LONG_HANDLE.get(base, current + 8); case 1: - h0 += data.getLong(current); + h0 += (long) LONG_HANDLE.get(base, current); case 0: break; default: throw new AssertionError("Unexpected value for sequences: " + sequences); } - current += SIZE_OF_LONG * sequences; + current += 8 * sequences; // read the last sequence of 0-7 bytes long last = 0; switch (limit - current) { case 7: - last |= (data.getByte(current + 6) & 0xFFL) << 48; + last |= (base[current + 6] & 0xFFL) << 48; case 6: - last |= (data.getByte(current + 5) & 0xFFL) << 40; + last |= (base[current + 5] & 0xFFL) << 40; case 5: - last |= (data.getByte(current + 4) & 0xFFL) << 32; + last |= (base[current + 4] & 0xFFL) << 32; case 4: - last |= (data.getByte(current + 3) & 0xFFL) << 24; + last |= (base[current + 3] & 0xFFL) << 24; case 3: - last |= (data.getByte(current + 2) & 0xFFL) << 16; + last |= (base[current + 2] & 0xFFL) << 16; case 2: - last |= (data.getByte(current + 1) & 0xFFL) << 8; + last |= (base[current + 1] & 0xFFL) << 8; case 1: - last |= (data.getByte(current) & 0xFFL); + last |= (base[current] & 0xFFL); case 0: break; default: