-
Notifications
You must be signed in to change notification settings - Fork 3.2k
GCM encryption stream #3231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GCM encryption stream #3231
Changes from 12 commits
8a0c7a7
c4a9378
51a5b6e
9364788
07579ee
16192f5
23567e0
08983e1
3445297
91305e9
82fc5b0
375ef20
cf8827f
de84a93
7cdba0e
a64f4f4
89f3015
fcb1ba0
792a7ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.encryption; | ||
|
|
||
| import org.apache.iceberg.io.InputFile; | ||
| import org.apache.iceberg.io.SeekableInputStream; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
|
|
||
| public class AesGcmInputFile implements InputFile { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not implement
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right. This would be the result of the |
||
| private final InputFile sourceFile; | ||
| private final byte[] dataKey; | ||
| private final byte[] fileAADPrefix; | ||
| private long plaintextLength; | ||
|
|
||
| public AesGcmInputFile(InputFile sourceFile, byte[] dataKey, byte[] fileAADPrefix) { | ||
| this.sourceFile = sourceFile; | ||
| this.dataKey = dataKey; | ||
| this.fileAADPrefix = fileAADPrefix; | ||
| this.plaintextLength = -1; | ||
| } | ||
|
|
||
| @Override | ||
| public long getLength() { | ||
| if (plaintextLength == -1) { | ||
| // Presumes all streams use hard-coded plaintext block size. | ||
| plaintextLength = AesGcmInputStream.calculatePlaintextLength(sourceFile.getLength()); | ||
| } | ||
|
|
||
| return plaintextLength; | ||
| } | ||
|
|
||
| @Override | ||
| public SeekableInputStream newStream() { | ||
| long ciphertextLength = sourceFile.getLength(); | ||
| Preconditions.checkState( | ||
| ciphertextLength >= Ciphers.GCM_STREAM_HEADER_LENGTH, | ||
| "Invalid encrypted stream: %d is shorter than the GCM stream header length", | ||
| ciphertextLength); | ||
| return new AesGcmInputStream(sourceFile.newStream(), ciphertextLength, dataKey, fileAADPrefix); | ||
| } | ||
|
|
||
| @Override | ||
| public String location() { | ||
| return sourceFile.location(); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean exists() { | ||
| return sourceFile.exists(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,264 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.encryption; | ||
|
|
||
| import java.io.EOFException; | ||
| import java.io.IOException; | ||
| import java.nio.ByteBuffer; | ||
| import java.nio.ByteOrder; | ||
| import org.apache.iceberg.io.IOUtil; | ||
| import org.apache.iceberg.io.SeekableInputStream; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
|
|
||
| public class AesGcmInputStream extends SeekableInputStream { | ||
| private final SeekableInputStream sourceStream; | ||
| private final byte[] fileAADPrefix; | ||
| private final Ciphers.AesGcmDecryptor decryptor; | ||
| private final byte[] cipherBlockBuffer; | ||
| private final long numBlocks; | ||
| private final int lastCipherBlockSize; | ||
| private final long plainStreamSize; | ||
|
|
||
| private long plainStreamPosition; | ||
| private long currentPlainBlockIndex; | ||
| private byte[] currentPlainBlock; | ||
| private int currentPlainBlockSize; | ||
| private byte[] singleByte; | ||
|
|
||
| AesGcmInputStream( | ||
| SeekableInputStream sourceStream, long sourceLength, byte[] aesKey, byte[] fileAADPrefix) { | ||
| this.sourceStream = sourceStream; | ||
| this.fileAADPrefix = fileAADPrefix; | ||
| this.decryptor = new Ciphers.AesGcmDecryptor(aesKey); | ||
| this.cipherBlockBuffer = new byte[Ciphers.CIPHER_BLOCK_SIZE]; | ||
|
|
||
| this.plainStreamPosition = 0; | ||
| this.currentPlainBlockIndex = -1; | ||
| this.currentPlainBlock = null; | ||
| this.currentPlainBlockSize = 0; | ||
|
|
||
| long streamLength = sourceLength - Ciphers.GCM_STREAM_HEADER_LENGTH; | ||
| long numFullBlocks = Math.toIntExact(streamLength / Ciphers.CIPHER_BLOCK_SIZE); | ||
| long cipherFullBlockLength = numFullBlocks * Ciphers.CIPHER_BLOCK_SIZE; | ||
| int cipherBytesInLastBlock = Math.toIntExact(streamLength - cipherFullBlockLength); | ||
| boolean fullBlocksOnly = (0 == cipherBytesInLastBlock); | ||
| this.numBlocks = fullBlocksOnly ? numFullBlocks : numFullBlocks + 1; | ||
| this.lastCipherBlockSize = | ||
| fullBlocksOnly ? Ciphers.CIPHER_BLOCK_SIZE : cipherBytesInLastBlock; // never 0 | ||
|
|
||
| long lastPlainBlockSize = | ||
| (long) lastCipherBlockSize - Ciphers.NONCE_LENGTH - Ciphers.GCM_TAG_LENGTH; | ||
| this.plainStreamSize = | ||
| numFullBlocks * Ciphers.PLAIN_BLOCK_SIZE + (fullBlocksOnly ? 0 : lastPlainBlockSize); | ||
| this.singleByte = new byte[1]; | ||
| } | ||
|
|
||
| private void validateHeader() throws IOException { | ||
| byte[] headerBytes = new byte[Ciphers.GCM_STREAM_HEADER_LENGTH]; | ||
| IOUtil.readFully(sourceStream, headerBytes, 0, headerBytes.length); | ||
|
rdblue marked this conversation as resolved.
|
||
|
|
||
| Preconditions.checkState( | ||
| Ciphers.GCM_STREAM_MAGIC.equals(ByteBuffer.wrap(headerBytes, 0, 4)), | ||
| "Invalid GCM stream: magic does not match AGS1"); | ||
|
|
||
| int plainBlockSize = ByteBuffer.wrap(headerBytes, 4, 4).order(ByteOrder.LITTLE_ENDIAN).getInt(); | ||
| Preconditions.checkState( | ||
| plainBlockSize == Ciphers.PLAIN_BLOCK_SIZE, | ||
| "Invalid GCM stream: block size %d != %d", | ||
| plainBlockSize, | ||
| Ciphers.PLAIN_BLOCK_SIZE); | ||
| } | ||
|
|
||
| @Override | ||
| public int available() { | ||
| long maxAvailable = plainStreamSize - plainStreamPosition; | ||
| // See InputStream.available contract | ||
| if (maxAvailable >= Integer.MAX_VALUE) { | ||
| return Integer.MAX_VALUE; | ||
| } else { | ||
| return (int) maxAvailable; | ||
| } | ||
| } | ||
|
|
||
| private int availableInCurrentBlock() { | ||
| if (currentPlainBlockIndex < 0) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I was refactoring, there was a bug that I fixed when the block for Here's a diff that does what I'm talking about and still passes tests: diff --git a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
index a63134f31d..88e9b36c25 100644
--- a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
+++ b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
@@ -97,7 +97,7 @@ public class AesGcmInputStream extends SeekableInputStream {
}
private int availableInCurrentBlock() {
- if (currentPlainBlockIndex < 0) {
+ if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
return 0;
}
@@ -130,10 +130,6 @@ public class AesGcmInputStream extends SeekableInputStream {
remainingBytesToRead -= bytesToCopy;
resultBufferOffset += bytesToCopy;
this.plainStreamPosition += bytesToCopy;
- if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
- // invalidate the current block
- this.currentPlainBlockIndex = -1;
- }
} else if (available() > 0) {
decryptBlock(blockIndex(plainStreamPosition));
@@ -157,10 +153,6 @@ public class AesGcmInputStream extends SeekableInputStream {
}
this.plainStreamPosition = newPos;
- if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
- // invalidate the current block
- this.currentPlainBlockIndex = -1;
- }
}
@Override
@@ -177,10 +169,6 @@ public class AesGcmInputStream extends SeekableInputStream {
}
this.plainStreamPosition += n;
- if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
- // invalidate the current block
- this.currentPlainBlockIndex = -1;
- }
return n;
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SGTM. Applied this change. |
||
| return 0; | ||
| } | ||
|
|
||
| return currentPlainBlockSize - offsetInBlock(plainStreamPosition); | ||
| } | ||
|
|
||
| @Override | ||
| public int read(byte[] b, int off, int len) throws IOException { | ||
| Preconditions.checkArgument(len >= 0, "Invalid read length: " + len); | ||
|
|
||
| if (available() <= 0 && len > 0) { | ||
| throw new EOFException(); | ||
| } | ||
|
|
||
| if (len == 0) { | ||
| return 0; | ||
| } | ||
|
|
||
| int totalBytesRead = 0; | ||
| int resultBufferOffset = off; | ||
| int remainingBytesToRead = len; | ||
|
|
||
| while (remainingBytesToRead > 0) { | ||
| int availableInBlock = availableInCurrentBlock(); | ||
| if (availableInBlock > 0) { | ||
| int bytesToCopy = Math.min(availableInBlock, remainingBytesToRead); | ||
| int offsetInBlock = offsetInBlock(plainStreamPosition); | ||
| System.arraycopy(currentPlainBlock, offsetInBlock, b, resultBufferOffset, bytesToCopy); | ||
| totalBytesRead += bytesToCopy; | ||
| remainingBytesToRead -= bytesToCopy; | ||
| resultBufferOffset += bytesToCopy; | ||
| this.plainStreamPosition += bytesToCopy; | ||
| if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) { | ||
| // invalidate the current block | ||
| this.currentPlainBlockIndex = -1; | ||
| } | ||
|
|
||
| } else if (available() > 0) { | ||
| decryptBlock(blockIndex(plainStreamPosition)); | ||
|
|
||
| } else { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| // return -1 for EOF | ||
| return totalBytesRead > 0 ? totalBytesRead : -1; | ||
| } | ||
|
|
||
| @Override | ||
| public void seek(long newPos) throws IOException { | ||
| if (newPos < 0) { | ||
| throw new IOException("Invalid position: " + newPos); | ||
| } else if (newPos > plainStreamSize) { | ||
| throw new EOFException( | ||
| "Invalid position: " + newPos + " > stream length, " + plainStreamSize); | ||
| } | ||
|
|
||
| this.plainStreamPosition = newPos; | ||
| if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) { | ||
| // invalidate the current block | ||
| this.currentPlainBlockIndex = -1; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public long skip(long n) { | ||
| if (n <= 0) { | ||
| return 0; | ||
| } | ||
|
|
||
| long bytesLeftInStream = plainStreamSize - plainStreamPosition; | ||
| if (n > bytesLeftInStream) { | ||
| // skip the rest of the stream | ||
| this.plainStreamPosition = plainStreamSize; | ||
| return bytesLeftInStream; | ||
| } | ||
|
|
||
| this.plainStreamPosition += n; | ||
| if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) { | ||
| // invalidate the current block | ||
| this.currentPlainBlockIndex = -1; | ||
| } | ||
|
|
||
| return n; | ||
|
rdblue marked this conversation as resolved.
|
||
| } | ||
|
|
||
| @Override | ||
| public long getPos() throws IOException { | ||
| return plainStreamPosition; | ||
| } | ||
|
|
||
| @Override | ||
| public int read() throws IOException { | ||
| int read = read(singleByte); | ||
| if (read == -1) { | ||
| return -1; | ||
| } | ||
|
|
||
| return singleByte[0]; | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| sourceStream.close(); | ||
| this.currentPlainBlock = null; | ||
| } | ||
|
|
||
| private void decryptBlock(long blockIndex) throws IOException { | ||
| if (blockIndex == currentPlainBlockIndex) { | ||
| return; | ||
| } | ||
|
|
||
| long blockPositionInStream = blockOffset(blockIndex); | ||
| if (sourceStream.getPos() != blockPositionInStream) { | ||
| if (sourceStream.getPos() == 0) { | ||
| validateHeader(); | ||
| } | ||
|
|
||
| sourceStream.seek(blockPositionInStream); | ||
| } | ||
|
|
||
| boolean isLastBlock = blockIndex == numBlocks - 1; | ||
| int cipherBlockSize = isLastBlock ? lastCipherBlockSize : Ciphers.CIPHER_BLOCK_SIZE; | ||
| IOUtil.readFully(sourceStream, cipherBlockBuffer, 0, cipherBlockSize); | ||
|
|
||
| // TODO: the AAD should probably use a long block index. | ||
| byte[] blockAAD = Ciphers.streamBlockAAD(fileAADPrefix, Math.toIntExact(blockIndex)); | ||
| this.currentPlainBlock = decryptor.decrypt(cipherBlockBuffer, 0, cipherBlockSize, blockAAD); | ||
| this.currentPlainBlockSize = cipherBlockSize - Ciphers.NONCE_LENGTH - Ciphers.GCM_TAG_LENGTH; | ||
| this.currentPlainBlockIndex = blockIndex; | ||
| } | ||
|
|
||
| private static long blockIndex(long plainPosition) { | ||
| return plainPosition / Ciphers.PLAIN_BLOCK_SIZE; | ||
| } | ||
|
|
||
| private static int offsetInBlock(long plainPosition) { | ||
| return Math.toIntExact(plainPosition % Ciphers.PLAIN_BLOCK_SIZE); | ||
| } | ||
|
|
||
| private static long blockOffset(long blockIndex) { | ||
| return blockIndex * Ciphers.CIPHER_BLOCK_SIZE + Ciphers.GCM_STREAM_HEADER_LENGTH; | ||
| } | ||
|
|
||
| static long calculatePlaintextLength(long sourceLength) { | ||
| long streamLength = sourceLength - Ciphers.GCM_STREAM_HEADER_LENGTH; | ||
|
|
||
| if (streamLength == 0) { | ||
| return 0; | ||
| } | ||
|
|
||
| long numberOfFullBlocks = streamLength / Ciphers.CIPHER_BLOCK_SIZE; | ||
| long fullBlockSize = numberOfFullBlocks * Ciphers.CIPHER_BLOCK_SIZE; | ||
| long cipherBytesInLastBlock = streamLength - fullBlockSize; | ||
| boolean fullBlocksOnly = (0 == cipherBytesInLastBlock); | ||
| long plainBytesInLastBlock = | ||
| fullBlocksOnly | ||
| ? 0 | ||
| : (cipherBytesInLastBlock - Ciphers.NONCE_LENGTH - Ciphers.GCM_TAG_LENGTH); | ||
|
|
||
| return (numberOfFullBlocks * Ciphers.PLAIN_BLOCK_SIZE) + plainBytesInLastBlock; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needed? Can we just leave it as it was or is it now failing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, the CI is failing unfortunately, since util classes are now required to have private constructors.