-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Fix precision loss when dealing with JSON numbers containing decimal point #28882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
803aff5
d8134ab
f8c0ada
39f44de
3d467c5
4297e9a
ad69d22
a2dd63c
70032a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,6 @@ | |
| import com.google.common.primitives.SignedBytes; | ||
| import io.airlift.slice.Slice; | ||
| import io.airlift.slice.SliceOutput; | ||
| import io.airlift.slice.Slices; | ||
| import io.trino.spi.TrinoException; | ||
| import io.trino.spi.block.ArrayBlockBuilder; | ||
| import io.trino.spi.block.Block; | ||
|
|
@@ -81,6 +80,7 @@ | |
| import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; | ||
| import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; | ||
| import static com.google.common.base.Verify.verify; | ||
| import static io.airlift.slice.Slices.utf8Slice; | ||
| import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder; | ||
| import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT; | ||
| import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; | ||
|
|
@@ -696,12 +696,8 @@ public static Slice currentTokenAsVarchar(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> Slices.utf8Slice(parser.getText()); | ||
| // Avoidance of loss of precision does not seem to be possible here because of Jackson implementation. | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToVarchar(UNBOUNDED_LENGTH, parser.getDoubleValue()); | ||
| // An alternative is calling getLongValue and then BigintOperators.castToVarchar. | ||
| // It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers. | ||
| case VALUE_NUMBER_INT -> Slices.utf8Slice(parser.getText()); | ||
| case VALUE_STRING, FIELD_NAME -> utf8Slice(parser.getText()); | ||
| case VALUE_NUMBER_INT, VALUE_NUMBER_FLOAT -> utf8Slice(parser.getDecimalValue().toString()); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The returned text might be longer than double.toString we used to use.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this actually have to go through Git DoubleValue or can you just fetch the raw text out of the JsonParser and avoid the big decimal code entirelly?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Different JSON syntaxes encode same number:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory, json values are supposed to be normalized, so it shouldn't matter.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a benchmark and posted results: |
||
| case VALUE_TRUE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, true); | ||
| case VALUE_FALSE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, false); | ||
| default -> throw new JsonCastException(format("Unexpected token when cast to %s: %s", StandardTypes.VARCHAR, parser.getText())); | ||
|
|
@@ -713,7 +709,7 @@ public static Long currentTokenAsBigint(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBigint(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBigint(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToLong(parser.getDoubleValue()); | ||
| case VALUE_NUMBER_INT -> parser.getLongValue(); | ||
| case VALUE_TRUE -> BooleanOperators.castToBigint(true); | ||
|
|
@@ -727,7 +723,7 @@ public static Long currentTokenAsInteger(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToInteger(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToInteger(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToInteger(parser.getDoubleValue()); | ||
| case VALUE_NUMBER_INT -> (long) toIntExact(parser.getLongValue()); | ||
| case VALUE_TRUE -> BooleanOperators.castToInteger(true); | ||
|
|
@@ -741,7 +737,7 @@ public static Long currentTokenAsSmallint(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToSmallint(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToSmallint(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToSmallint(parser.getDoubleValue()); | ||
| case VALUE_NUMBER_INT -> (long) Shorts.checkedCast(parser.getLongValue()); | ||
| case VALUE_TRUE -> BooleanOperators.castToSmallint(true); | ||
|
|
@@ -755,7 +751,7 @@ public static Long currentTokenAsTinyint(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToTinyint(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToTinyint(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToTinyint(parser.getDoubleValue()); | ||
| case VALUE_NUMBER_INT -> (long) SignedBytes.checkedCast(parser.getLongValue()); | ||
| case VALUE_TRUE -> BooleanOperators.castToTinyint(true); | ||
|
|
@@ -769,7 +765,7 @@ public static Double currentTokenAsDouble(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToDouble(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToDouble(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> parser.getDoubleValue(); | ||
| // An alternative is calling getLongValue and then BigintOperators.castToDouble. | ||
| // It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers. | ||
|
|
@@ -785,7 +781,7 @@ public static Long currentTokenAsReal(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToFloat(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToFloat(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> (long) floatToRawIntBits(parser.getFloatValue()); | ||
| // An alternative is calling getLongValue and then BigintOperators.castToReal. | ||
| // It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers. | ||
|
|
@@ -801,7 +797,7 @@ public static Boolean currentTokenAsBoolean(JsonParser parser) | |
| { | ||
| return switch (parser.currentToken()) { | ||
| case VALUE_NULL -> null; | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBoolean(Slices.utf8Slice(parser.getText())); | ||
| case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBoolean(utf8Slice(parser.getText())); | ||
| case VALUE_NUMBER_FLOAT -> DoubleOperators.castToBoolean(parser.getDoubleValue()); | ||
| case VALUE_NUMBER_INT -> BigintOperators.castToBoolean(parser.getLongValue()); | ||
| case VALUE_TRUE -> true; | ||
|
|
@@ -907,7 +903,7 @@ static BlockBuilderAppender createBlockBuilderAppender(Type type) | |
| if (type instanceof JsonType) { | ||
| return (parser, blockBuilder) -> { | ||
| String json = JSON_MAPPED_UNORDERED.writeValueAsString(parser.readValueAsTree()); | ||
| JSON.writeSlice(blockBuilder, Slices.utf8Slice(json)); | ||
| JSON.writeSlice(blockBuilder, utf8Slice(json)); | ||
| }; | ||
| } | ||
| if (type instanceof ArrayType arrayType) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.