diff --git a/src/main/java/io/airlift/slice/DynamicSliceOutput.java b/src/main/java/io/airlift/slice/DynamicSliceOutput.java index debb8d60..581b7521 100644 --- a/src/main/java/io/airlift/slice/DynamicSliceOutput.java +++ b/src/main/java/io/airlift/slice/DynamicSliceOutput.java @@ -83,7 +83,7 @@ public int writableBytes() public void writeByte(int value) { slice = Slices.ensureSize(slice, size + SIZE_OF_BYTE); - slice.setByte(size, value); + slice.setByteUnchecked(size, value); size += SIZE_OF_BYTE; } @@ -91,7 +91,7 @@ public void writeByte(int value) public void writeShort(int value) { slice = Slices.ensureSize(slice, size + SIZE_OF_SHORT); - slice.setShort(size, value); + slice.setShortUnchecked(size, value); size += SIZE_OF_SHORT; } @@ -99,7 +99,7 @@ public void writeShort(int value) public void writeInt(int value) { slice = Slices.ensureSize(slice, size + SIZE_OF_INT); - slice.setInt(size, value); + slice.setIntUnchecked(size, value); size += SIZE_OF_INT; } @@ -107,7 +107,7 @@ public void writeInt(int value) public void writeLong(long value) { slice = Slices.ensureSize(slice, size + SIZE_OF_LONG); - slice.setLong(size, value); + slice.setLongUnchecked(size, value); size += SIZE_OF_LONG; } @@ -115,7 +115,7 @@ public void writeLong(long value) public void writeFloat(float value) { slice = Slices.ensureSize(slice, size + SIZE_OF_FLOAT); - slice.setFloat(size, value); + slice.setFloatUnchecked(size, value); size += SIZE_OF_FLOAT; } @@ -123,7 +123,7 @@ public void writeFloat(float value) public void writeDouble(double value) { slice = Slices.ensureSize(slice, size + SIZE_OF_DOUBLE); - slice.setDouble(size, value); + slice.setDoubleUnchecked(size, value); size += SIZE_OF_DOUBLE; } diff --git a/src/main/java/io/airlift/slice/Slice.java b/src/main/java/io/airlift/slice/Slice.java index f3091035..7fd12b28 100644 --- a/src/main/java/io/airlift/slice/Slice.java +++ b/src/main/java/io/airlift/slice/Slice.java @@ -35,6 +35,7 @@ import static io.airlift.slice.SizeOf.SIZE_OF_SHORT; import static io.airlift.slice.SizeOf.instanceSize; import static io.airlift.slice.SizeOf.sizeOf; +import static java.lang.Long.numberOfTrailingZeros; import static java.lang.invoke.MethodHandles.byteArrayViewVarHandle; import static java.nio.ByteOrder.LITTLE_ENDIAN; import static java.nio.charset.StandardCharsets.UTF_8; @@ -199,7 +200,7 @@ public void clear() public void clear(int offset, int length) { - Arrays.fill(base, baseOffset, baseOffset + size, (byte) 0); + Arrays.fill(base, baseOffset + offset, baseOffset + offset + length, (byte) 0); } /** @@ -779,6 +780,11 @@ void setLongUnchecked(int index, long value) public void setFloat(int index, float value) { checkFromIndexSize(index, SIZE_OF_FLOAT, length()); + setFloatUnchecked(index, value); + } + + void setFloatUnchecked(int index, float value) + { FLOAT_HANDLE.set(base, baseOffset + index, value); } @@ -792,6 +798,11 @@ public void setFloat(int index, float value) public void setDouble(int index, double value) { checkFromIndexSize(index, SIZE_OF_DOUBLE, length()); + setDoubleUnchecked(index, value); + } + + void setDoubleUnchecked(int index, double value) + { DOUBLE_HANDLE.set(base, baseOffset + index, value); } @@ -1085,9 +1096,25 @@ public int indexOfByte(int b) public int indexOfByte(byte b) { - for (int i = 0; i < size; i++) { - if (getByteUnchecked(i) == b) { - return i; + return indexOfByte(b, 0, size); + } + + private int indexOfByte(byte b, int offset, int size) + { + long pattern = (b & 0xFFL) * 0x01010101_01010101L; + + for (; offset < size - 7; offset += 8) { + long value = getLongUnchecked(offset); + long xor = value ^ pattern; + long hasZero = (xor - 0x01010101_01010101L) & ~xor & 0x80808080_80808080L; + if (hasZero != 0) { + return offset + (numberOfTrailingZeros(hasZero) >>> 3); + } + } + + for (; offset < size; offset++) { + if (getByteUnchecked(offset) == b) { + return offset; } } return -1; @@ -1150,7 +1177,7 @@ public int indexOf(Slice pattern, int offset) } // Try fast match of head and the rest - if (value == head && equalsUnchecked(index, pattern, 0, pattern.length())) { + if (value == head && equalsUnchecked(index, pattern.byteArray(), pattern.byteArrayOffset(), pattern.length())) { return index; } @@ -1160,6 +1187,63 @@ public int indexOf(Slice pattern, int offset) return -1; } + /** + * Returns the index of the last occurrence of the pattern within this slice. + * If the pattern is not found -1 is returned. If pattern is empty, the + * length of this slice is returned. + */ + public int lastIndexOf(Slice slice) + { + return lastIndexOf(slice, size); + } + + /** + * Returns the index of the last occurrence of the pattern within this slice, + * searching backward starting at the given offset. + * If the pattern is not found -1 is returned. If pattern is empty, the + * offset is returned. + */ + public int lastIndexOf(Slice pattern, int offset) + { + if (size == 0 && pattern.size == 0) { + return 0; + } + + if (size == 0 || offset < 0) { + return -1; + } + + if (pattern.length() == 0) { + return Math.min(offset, size); + } + + // Clamp offset to the last valid position + int lastValidIndex = size - pattern.length(); + int index = Math.min(offset, lastValidIndex); + if (index < 0) { + return -1; + } + + byte firstByte = pattern.getByteUnchecked(0); + while (index >= 0) { + // seek to first byte match + while (index > 0 && getByteUnchecked(index) != firstByte) { + index--; + } + if (getByteUnchecked(index) != firstByte) { + break; + } + + if (equalsUnchecked(index, pattern.byteArray(), pattern.byteArrayOffset(), pattern.length())) { + return index; + } + + index--; + } + + return -1; + } + int indexOfBruteForce(Slice pattern, int offset) { if (size == 0 || offset >= size || offset < 0) { @@ -1173,16 +1257,13 @@ int indexOfBruteForce(Slice pattern, int offset) byte firstByte = pattern.getByteUnchecked(0); int lastValidIndex = size - pattern.length(); int index = offset; - while (true) { - // seek to first byte match - while (index < lastValidIndex && getByteUnchecked(index) != firstByte) { - index++; - } - if (index > lastValidIndex) { + while (index <= lastValidIndex) { + index = indexOfByte(firstByte, index, lastValidIndex + 1); + if (index < 0) { break; } - if (equalsUnchecked(index, pattern, 0, pattern.length())) { + if (equalsUnchecked(index, pattern.byteArray(), pattern.byteArrayOffset(), pattern.length())) { return index; } @@ -1217,10 +1298,6 @@ public int compareTo(int offset, int length, Slice that, int otherOffset, int ot if ((this == that) && (offset == otherOffset) && (length == otherLength)) { return 0; } - - checkFromIndexSize(offset, length, length()); - checkFromIndexSize(otherOffset, otherLength, that.length()); - return Arrays.compareUnsigned( base, baseOffset + offset, @@ -1248,7 +1325,7 @@ public boolean equals(Object o) return false; } - return equalsUnchecked(0, that, 0, length()); + return equalsUnchecked(0, that.byteArray(), that.byteArrayOffset(), length()); } /** @@ -1289,22 +1366,26 @@ public boolean equals(int offset, int length, Slice that, int otherOffset, int o if ((this == that) && (offset == otherOffset)) { return true; } + return equalsUnchecked(offset, that.byteArray(), that.byteArrayOffset() + otherOffset, length); + } - checkFromIndexSize(offset, length, length()); - checkFromIndexSize(otherOffset, otherLength, that.length()); - + public boolean equals(int offset, int length, byte[] that, int otherOffset, int otherLength) + { + if (length != otherLength) { + return false; + } return equalsUnchecked(offset, that, otherOffset, length); } - boolean equalsUnchecked(int offset, Slice that, int otherOffset, int length) + boolean equalsUnchecked(int offset, byte[] that, int otherOffset, int length) { return Arrays.equals( base, baseOffset + offset, baseOffset + offset + length, - that.base, - that.baseOffset + otherOffset, - that.baseOffset + otherOffset + length); + that, + otherOffset, + otherOffset + length); } /** diff --git a/src/main/java/io/airlift/slice/SliceUtf8.java b/src/main/java/io/airlift/slice/SliceUtf8.java index cf642ad6..c2ac510d 100644 --- a/src/main/java/io/airlift/slice/SliceUtf8.java +++ b/src/main/java/io/airlift/slice/SliceUtf8.java @@ -1068,8 +1068,9 @@ public static int setCodePointAt(int codePoint, Slice utf8, int position) } if (codePoint < 0x800) { // 110x_xxxx 10xx_xxxx - utf8.setByte(position, 0b1100_0000 | (codePoint >>> 6)); - utf8.setByte(position + 1, 0b1000_0000 | (codePoint & 0b0011_1111)); + checkFromIndexSize(position, 1, utf8.length()); + utf8.setByteUnchecked(position, 0b1100_0000 | (codePoint >>> 6)); + utf8.setByteUnchecked(position + 1, 0b1000_0000 | (codePoint & 0b0011_1111)); return 2; } if (MIN_SURROGATE <= codePoint && codePoint <= MAX_SURROGATE) { @@ -1077,17 +1078,19 @@ public static int setCodePointAt(int codePoint, Slice utf8, int position) } if (codePoint < 0x1_0000) { // 1110_xxxx 10xx_xxxx 10xx_xxxx - utf8.setByte(position, 0b1110_0000 | ((codePoint >>> 12) & 0b0000_1111)); - utf8.setByte(position + 1, 0b1000_0000 | ((codePoint >>> 6) & 0b0011_1111)); - utf8.setByte(position + 2, 0b1000_0000 | (codePoint & 0b0011_1111)); + checkFromIndexSize(position, 2, utf8.length()); + utf8.setByteUnchecked(position, 0b1110_0000 | ((codePoint >>> 12) & 0b0000_1111)); + utf8.setByteUnchecked(position + 1, 0b1000_0000 | ((codePoint >>> 6) & 0b0011_1111)); + utf8.setByteUnchecked(position + 2, 0b1000_0000 | (codePoint & 0b0011_1111)); return 3; } if (codePoint < 0x11_0000) { + checkFromIndexSize(position, 3, utf8.length()); // 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx - utf8.setByte(position, 0b1111_0000 | ((codePoint >>> 18) & 0b0000_0111)); - utf8.setByte(position + 1, 0b1000_0000 | ((codePoint >>> 12) & 0b0011_1111)); - utf8.setByte(position + 2, 0b1000_0000 | ((codePoint >>> 6) & 0b0011_1111)); - utf8.setByte(position + 3, 0b1000_0000 | (codePoint & 0b0011_1111)); + utf8.setByteUnchecked(position, 0b1111_0000 | ((codePoint >>> 18) & 0b0000_0111)); + utf8.setByteUnchecked(position + 1, 0b1000_0000 | ((codePoint >>> 12) & 0b0011_1111)); + utf8.setByteUnchecked(position + 2, 0b1000_0000 | ((codePoint >>> 6) & 0b0011_1111)); + utf8.setByteUnchecked(position + 3, 0b1000_0000 | (codePoint & 0b0011_1111)); return 4; } // Per RFC3629, UTF-8 is limited to 4 bytes, so more bytes are illegal diff --git a/src/test/java/io/airlift/slice/BenchmarkSlice.java b/src/test/java/io/airlift/slice/BenchmarkSlice.java index 6679ed49..22061c45 100644 --- a/src/test/java/io/airlift/slice/BenchmarkSlice.java +++ b/src/test/java/io/airlift/slice/BenchmarkSlice.java @@ -50,7 +50,7 @@ public Object compareTo(BenchmarkData data) @Benchmark public Object equalsUnchecked(BenchmarkData data) { - return data.slice1.equalsUnchecked(0, data.slice2, 0, data.slice1.length()); + return data.slice1.equalsUnchecked(0, data.slice2.byteArray(), data.slice2.byteArrayOffset(), data.slice1.length()); } @Benchmark diff --git a/src/test/java/io/airlift/slice/TestSlice.java b/src/test/java/io/airlift/slice/TestSlice.java index e53ee939..a3f1d663 100644 --- a/src/test/java/io/airlift/slice/TestSlice.java +++ b/src/test/java/io/airlift/slice/TestSlice.java @@ -163,6 +163,7 @@ private static void assertSlicesEquals(Slice slice, Slice other) assertThat(slice.compareTo(0, size, other, 0, size)).isEqualTo(0); for (int i = 0; i < slice.length(); i++) { assertThat(slice.equals(i, size - i, other, i, size - i)).isTrue(); + assertThat(slice.equals(i, size - i, other.byteArray(), other.byteArrayOffset() + i, size - i)).isTrue(); assertThat(slice.hashCode(i, size - i)).isEqualTo(other.hashCode(i, size - i)); assertThat(slice.compareTo(i, size - i, other, i, size - i)).isEqualTo(0); } @@ -664,6 +665,20 @@ public void testBytesSlice() } } + @Test + public void testClear() + { + byte[] bytes = new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8}; + + Slice slice = Slices.wrappedBuffer(bytes); + + assertThat(slice.getByte(1)).isEqualTo((byte) 1); + assertThat(slice.getByte(2)).isEqualTo((byte) 2); + slice.clear(1, 1); + assertThat(slice.getByte(1)).isEqualTo((byte) 0); + assertThat(slice.getByte(2)).isEqualTo((byte) 2); + } + private void assertBytesSlice(Slice slice, int index) { // fill slice with FF @@ -830,6 +845,21 @@ public static void assertIndexOf(Slice data, Slice pattern, int offset, int expe assertThat(data.indexOfBruteForce(pattern, offset)).isEqualTo(expected); } + private static void assertLastIndexOf(Slice data, Slice pattern, int expected) + { + assertLastIndexOf(data, pattern, data.length(), expected); + } + + private static void assertLastIndexOf(Slice data, Slice pattern, int offset, int expected) + { + assertThat(data.lastIndexOf(pattern, offset)) + .describedAs("Slice '%s'.lastIndexOf('%s', %d)".formatted(data.toStringUtf8(), pattern.toStringUtf8(), offset)) + .isEqualTo(expected); + assertThat(data.toStringUtf8().lastIndexOf(pattern.toStringUtf8(), offset)) + .describedAs("String '%s'.lastIndexOf('%s', %d)".formatted(data.toStringUtf8(), pattern.toStringUtf8(), offset)) + .isEqualTo(expected); + } + public static void assertIndexOf(Slice data, Slice pattern) { int index; @@ -857,10 +887,53 @@ public static void assertIndexOf(Slice data, Slice pattern) assertThat(bruteForce).isEqualTo(indexOf); } + @Test + public void testLastIndexOf() + { + // no match + assertLastIndexOf(utf8Slice("no-match-bigger"), utf8Slice("test"), -1); + assertLastIndexOf(utf8Slice("no"), utf8Slice("test"), -1); + + // exact match + assertLastIndexOf(utf8Slice("test"), utf8Slice("test"), 0); + + // match at start + assertLastIndexOf(utf8Slice("test-start"), utf8Slice("test"), 0); + + // match at end + assertLastIndexOf(utf8Slice("end-test"), utf8Slice("test"), 4); + + // match in middle + assertLastIndexOf(utf8Slice("a-test-middle"), utf8Slice("test"), 2); + + // multiple matches - should return last + assertLastIndexOf(utf8Slice("this-test-is-a-test"), utf8Slice("test"), 15); + + // empty pattern + assertLastIndexOf(utf8Slice("test"), EMPTY_SLICE, 4); + + // empty data + assertLastIndexOf(EMPTY_SLICE, EMPTY_SLICE, 0); + assertLastIndexOf(EMPTY_SLICE, utf8Slice("test"), -1); + + // pattern larger than data + assertLastIndexOf(utf8Slice("ab"), utf8Slice("abc"), -1); + + // with offset + assertLastIndexOf(utf8Slice("this-test-is-a-test"), utf8Slice("test"), 14, 5); + assertLastIndexOf(utf8Slice("this-test-is-a-test"), utf8Slice("test"), 5, 5); + assertLastIndexOf(utf8Slice("this-test-is-a-test"), utf8Slice("test"), 4, -1); + assertLastIndexOf(utf8Slice("test"), utf8Slice("no"), -1, -1); + + // empty pattern with offset + assertLastIndexOf(utf8Slice("test"), EMPTY_SLICE, 2, 2); + assertLastIndexOf(utf8Slice("test"), EMPTY_SLICE, 10, 4); + } + @Test public void testIndexOfByte() { - Slice slice = utf8Slice("apple"); + Slice slice = utf8Slice("appleappleappleappleappleappleappleappleappleappleappleappleappleappleapple!"); assertThat(slice.indexOfByte((byte) 'a')).isEqualTo(0); assertThat(slice.indexOfByte((byte) 'p')).isEqualTo(1); @@ -871,6 +944,7 @@ public void testIndexOfByte() assertThat(slice.indexOfByte('p')).isEqualTo(1); assertThat(slice.indexOfByte('e')).isEqualTo(4); assertThat(slice.indexOfByte('x')).isEqualTo(-1); + assertThat(slice.indexOfByte('!')).isEqualTo(slice.length() - 1); assertThatThrownBy(() -> slice.indexOfByte(-1)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> slice.indexOfByte(-123)).isInstanceOf(IllegalArgumentException.class);