Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.function.BoundSignature;
import io.trino.spi.function.FunctionMetadata;
import io.trino.spi.function.Signature;
Expand Down Expand Up @@ -53,7 +52,7 @@ public class JsonToArrayCast
extends SqlScalarFunction
{
public static final JsonToArrayCast JSON_TO_ARRAY = new JsonToArrayCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToArrayCast.class, "toArray", ArrayType.class, BlockBuilderAppender.class, ConnectorSession.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToArrayCast.class, "toArray", ArrayType.class, BlockBuilderAppender.class, Slice.class);
Comment thread
findepi marked this conversation as resolved.

private static final JsonMapper JSON_MAPPER = new JsonMapper(createJsonFactory());

Expand Down Expand Up @@ -86,7 +85,7 @@ protected SpecializedSqlScalarFunction specialize(BoundSignature boundSignature)
}

@UsedByGeneratedCode
public static Block toArray(ArrayType arrayType, BlockBuilderAppender arrayAppender, ConnectorSession connectorSession, Slice json)
public static Block toArray(ArrayType arrayType, BlockBuilderAppender arrayAppender, Slice json)
{
try (JsonParser jsonParser = createJsonParser(JSON_MAPPER, json)) {
jsonParser.nextToken();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.block.SqlMap;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.function.BoundSignature;
import io.trino.spi.function.FunctionMetadata;
import io.trino.spi.function.Signature;
Expand Down Expand Up @@ -56,7 +55,7 @@ public class JsonToMapCast
extends SqlScalarFunction
{
public static final JsonToMapCast JSON_TO_MAP = new JsonToMapCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToMapCast.class, "toMap", MapType.class, BlockBuilderAppender.class, ConnectorSession.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToMapCast.class, "toMap", MapType.class, BlockBuilderAppender.class, Slice.class);

private static final JsonMapper JSON_MAPPER = new JsonMapper(createJsonFactory());

Expand Down Expand Up @@ -90,7 +89,7 @@ protected SpecializedSqlScalarFunction specialize(BoundSignature boundSignature)
}

@UsedByGeneratedCode
public static SqlMap toMap(MapType mapType, BlockBuilderAppender mapAppender, ConnectorSession connectorSession, Slice json)
public static SqlMap toMap(MapType mapType, BlockBuilderAppender mapAppender, Slice json)
{
try (JsonParser jsonParser = createJsonParser(JSON_MAPPER, json)) {
jsonParser.nextToken();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.block.SqlRow;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.function.BoundSignature;
import io.trino.spi.function.FunctionMetadata;
import io.trino.spi.function.Signature;
Expand Down Expand Up @@ -54,7 +53,7 @@ public class JsonToRowCast
extends SqlScalarFunction
{
public static final JsonToRowCast JSON_TO_ROW = new JsonToRowCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender.class, ConnectorSession.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender.class, Slice.class);

private static final JsonMapper JSON_MAPPER = new JsonMapper(createJsonFactory());

Expand Down Expand Up @@ -91,11 +90,7 @@ protected SpecializedSqlScalarFunction specialize(BoundSignature boundSignature)
}

@UsedByGeneratedCode
public static SqlRow toRow(
RowType rowType,
BlockBuilderAppender rowAppender,
ConnectorSession connectorSession,
Slice json)
public static SqlRow toRow(RowType rowType, BlockBuilderAppender rowAppender, Slice json)
{
try (JsonParser jsonParser = createJsonParser(JSON_MAPPER, json)) {
jsonParser.nextToken();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
import static io.trino.operator.scalar.JsonStringToMapCast.JSON_STRING_TO_MAP_NAME;
import static io.trino.operator.scalar.JsonStringToRowCast.JSON_STRING_TO_ROW_NAME;

/**
* Replaces certain {@code CAST(json_parse(x) AS T)} with functions logically
* implementing {@code CAST(a_json AS T)} along with validation that input is
* well-formed JSON. This avoids cost of validation and canonicalization done
* by {@code json_parse}.
*/
public class SpecializeCastWithJsonParse
implements IrOptimizerRule
{
Expand Down
26 changes: 11 additions & 15 deletions core/trino-main/src/main/java/io/trino/util/JsonUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.primitives.SignedBytes;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
import io.trino.spi.TrinoException;
import io.trino.spi.block.ArrayBlockBuilder;
import io.trino.spi.block.Block;
Expand Down Expand Up @@ -81,6 +80,7 @@
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.google.common.base.Verify.verify;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder;
import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT;
import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
Expand Down Expand Up @@ -696,12 +696,8 @@ public static Slice currentTokenAsVarchar(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> Slices.utf8Slice(parser.getText());
// Avoidance of loss of precision does not seem to be possible here because of Jackson implementation.
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToVarchar(UNBOUNDED_LENGTH, parser.getDoubleValue());
// An alternative is calling getLongValue and then BigintOperators.castToVarchar.
// It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers.
case VALUE_NUMBER_INT -> Slices.utf8Slice(parser.getText());
case VALUE_STRING, FIELD_NAME -> utf8Slice(parser.getText());
case VALUE_NUMBER_INT, VALUE_NUMBER_FLOAT -> utf8Slice(parser.getDecimalValue().toString());
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The returned text might be longer than double.toString we used to use.
This can cause query failures when casting json to varchar(n) for certain n values.
I think it's fine. The format of returned string is something that is not guaranteed to be stable.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this actually have to go through Git DoubleValue or can you just fetch the raw text out of the JsonParser and avoid the big decimal code entirelly?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Different JSON syntaxes encode same number: 100.0, 100.000, 1e2, 10e1, 00000000100.0.
I don't expect JSON number to VARCHAR cast to simply "leak" JSON internal syntax.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, json values are supposed to be normalized, so it shouldn't matter.
in practice, SpecializeCastWithJsonParse assumes the cast normalizes the values.
Query result could depend on whether SpecializeCastWithJsonParse kicks in or not. This feels wrong.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

case VALUE_TRUE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, true);
case VALUE_FALSE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, false);
default -> throw new JsonCastException(format("Unexpected token when cast to %s: %s", StandardTypes.VARCHAR, parser.getText()));
Expand All @@ -713,7 +709,7 @@ public static Long currentTokenAsBigint(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBigint(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBigint(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToLong(parser.getDoubleValue());
case VALUE_NUMBER_INT -> parser.getLongValue();
case VALUE_TRUE -> BooleanOperators.castToBigint(true);
Expand All @@ -727,7 +723,7 @@ public static Long currentTokenAsInteger(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToInteger(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToInteger(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToInteger(parser.getDoubleValue());
case VALUE_NUMBER_INT -> (long) toIntExact(parser.getLongValue());
case VALUE_TRUE -> BooleanOperators.castToInteger(true);
Expand All @@ -741,7 +737,7 @@ public static Long currentTokenAsSmallint(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToSmallint(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToSmallint(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToSmallint(parser.getDoubleValue());
case VALUE_NUMBER_INT -> (long) Shorts.checkedCast(parser.getLongValue());
case VALUE_TRUE -> BooleanOperators.castToSmallint(true);
Expand All @@ -755,7 +751,7 @@ public static Long currentTokenAsTinyint(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToTinyint(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToTinyint(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToTinyint(parser.getDoubleValue());
case VALUE_NUMBER_INT -> (long) SignedBytes.checkedCast(parser.getLongValue());
case VALUE_TRUE -> BooleanOperators.castToTinyint(true);
Expand All @@ -769,7 +765,7 @@ public static Double currentTokenAsDouble(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToDouble(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToDouble(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> parser.getDoubleValue();
// An alternative is calling getLongValue and then BigintOperators.castToDouble.
// It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers.
Expand All @@ -785,7 +781,7 @@ public static Long currentTokenAsReal(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToFloat(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToFloat(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> (long) floatToRawIntBits(parser.getFloatValue());
// An alternative is calling getLongValue and then BigintOperators.castToReal.
// It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers.
Expand All @@ -801,7 +797,7 @@ public static Boolean currentTokenAsBoolean(JsonParser parser)
{
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBoolean(Slices.utf8Slice(parser.getText()));
case VALUE_STRING, FIELD_NAME -> VarcharOperators.castToBoolean(utf8Slice(parser.getText()));
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToBoolean(parser.getDoubleValue());
case VALUE_NUMBER_INT -> BigintOperators.castToBoolean(parser.getLongValue());
case VALUE_TRUE -> true;
Expand Down Expand Up @@ -907,7 +903,7 @@ static BlockBuilderAppender createBlockBuilderAppender(Type type)
if (type instanceof JsonType) {
return (parser, blockBuilder) -> {
String json = JSON_MAPPED_UNORDERED.writeValueAsString(parser.readValueAsTree());
JSON.writeSlice(blockBuilder, Slices.utf8Slice(json));
JSON.writeSlice(blockBuilder, utf8Slice(json));
};
}
if (type instanceof ArrayType arrayType) {
Expand Down
Loading