diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralRelBuilder.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralRelBuilder.java new file mode 100644 index 000000000..6a86de507 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralRelBuilder.java @@ -0,0 +1,110 @@ +/** + * Copyright 2023-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Preconditions; + +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.Pair; + +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_AGGREGATE_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_EXCHANGE_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_FILTER_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_JOIN_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_MATCH_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_PROJECT_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_REPEAT_UNION_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SET_OP_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SNAPSHOT_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SORT_EXCHANGE_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SORT_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SPOOL_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_TABLE_SCAN_FACTORY; +import static org.apache.calcite.rel.core.RelFactories.DEFAULT_VALUES_FACTORY; + + +/** + * CoralRelBuilder overrides {@link #rename} to handle {@link CoralUncollect} nodes. + * Instead of wrapping CoralUncollect with a Project RelNode, it rebuilds the CoralUncollect + * using {@link CoralUncollect#copy(org.apache.calcite.rel.type.RelDataType)} to set the rowType. + * + *

This avoids an extra and unnecessary (SELECT ... FROM ... AS ...) wrapper in unparsed SQL. + * + *

This class was previously named {@code HiveRelBuilder} and has been renamed to better + * reflect that it is used across all Coral translation targets. + */ +public class CoralRelBuilder extends RelBuilder { + protected CoralRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) { + super(context, cluster, relOptSchema); + } + + public static RelBuilder create(FrameworkConfig config) { + return Frameworks.withPrepare(config, (cluster, relOptSchema, rootSchema, statement) -> { + cluster = RelOptCluster.create(cluster.getPlanner(), + new RexBuilder(new CoralJavaTypeFactoryImpl(cluster.getTypeFactory().getTypeSystem()))); + return new CoralRelBuilder(config.getContext(), cluster, relOptSchema); + }); + } + + public static RelBuilderFactory proto(final Context context) { + return (cluster, schema) -> new CoralRelBuilder(context, cluster, schema); + } + + public static final RelBuilderFactory LOGICAL_BUILDER = + CoralRelBuilder.proto(Contexts.of(DEFAULT_PROJECT_FACTORY, DEFAULT_FILTER_FACTORY, DEFAULT_JOIN_FACTORY, + DEFAULT_SORT_FACTORY, DEFAULT_EXCHANGE_FACTORY, DEFAULT_SORT_EXCHANGE_FACTORY, DEFAULT_AGGREGATE_FACTORY, + DEFAULT_MATCH_FACTORY, DEFAULT_SET_OP_FACTORY, DEFAULT_VALUES_FACTORY, DEFAULT_TABLE_SCAN_FACTORY, + DEFAULT_SNAPSHOT_FACTORY, DEFAULT_SPOOL_FACTORY, DEFAULT_REPEAT_UNION_FACTORY)); + + @Override + public RelBuilder rename(List fieldNames) { + final List oldFieldNames = peek().getRowType().getFieldNames(); + Preconditions.checkArgument(fieldNames.size() <= oldFieldNames.size(), "More names than fields"); + final List newFieldNames = new ArrayList<>(oldFieldNames); + for (int i = 0; i < fieldNames.size(); i++) { + final String s = fieldNames.get(i); + if (s != null) { + newFieldNames.set(i, s); + } + } + if (oldFieldNames.equals(newFieldNames)) { + return this; + } + if (peek() instanceof Values) { + final Values v = (Values) build(); + final RelDataTypeFactory.Builder b = getTypeFactory().builder(); + for (Pair p : Pair.zip(newFieldNames, v.getRowType().getFieldList())) { + b.add(p.left, p.right.getType()); + } + return values(v.tuples, b.build()); + } + if (peek() instanceof CoralUncollect) { + final CoralUncollect v = (CoralUncollect) build(); + final RelDataTypeFactory.Builder b = getTypeFactory().builder(); + for (Pair p : Pair.zip(newFieldNames, v.getRowType().getFieldList())) { + b.add(p.left, p.right.getType()); + } + push(v.copy(b.build())); + return this; + } + + return project(fields(), newFieldNames, true); + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralTypeSystem.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralTypeSystem.java new file mode 100644 index 000000000..164806505 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralTypeSystem.java @@ -0,0 +1,202 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystemImpl; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; + + +/** + * Coral's type system configuration for Calcite, defining precision and scale defaults + * compatible with Hive/Spark SQL semantics. + * + *

This class was previously named {@code HiveTypeSystem} and has been renamed to better + * reflect that it is a dialect-agnostic Calcite type system configuration used across + * all Coral translation targets (Hive, Spark, Trino, etc.). + */ +public class CoralTypeSystem extends RelDataTypeSystemImpl { + private static final int MAX_DECIMAL_PRECISION = 38; + private static final int MAX_DECIMAL_SCALE = 38; + private static final int DEFAULT_DECIMAL_PRECISION = 10; + private static final int MAX_CHAR_PRECISION = Integer.MAX_VALUE; + private static final int DEFAULT_VARCHAR_PRECISION = 65535; + private static final int DEFAULT_CHAR_PRECISION = 255; + private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE; + private static final int MAX_TIMESTAMP_PRECISION = 9; + private static final int DEFAULT_TINYINT_PRECISION = 3; + private static final int DEFAULT_SMALLINT_PRECISION = 5; + private static final int DEFAULT_INTEGER_PRECISION = 10; + private static final int DEFAULT_BIGINT_PRECISION = 19; + + @Override + public int getMaxScale(SqlTypeName typeName) { + switch (typeName) { + case DECIMAL: + return getMaxNumericScale(); + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return SqlTypeName.MAX_INTERVAL_FRACTIONAL_SECOND_PRECISION; + default: + return -1; + } + } + + @Override + public int getDefaultPrecision(SqlTypeName typeName) { + switch (typeName) { + case BINARY: + case VARBINARY: + case TIME: + case TIMESTAMP: + return RelDataType.PRECISION_NOT_SPECIFIED; + case CHAR: + return DEFAULT_CHAR_PRECISION; + case VARCHAR: + return DEFAULT_VARCHAR_PRECISION; + case DECIMAL: + return DEFAULT_DECIMAL_PRECISION; + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION; + case TINYINT: + return DEFAULT_TINYINT_PRECISION; + case SMALLINT: + return DEFAULT_SMALLINT_PRECISION; + case INTEGER: + return DEFAULT_INTEGER_PRECISION; + case BIGINT: + return DEFAULT_BIGINT_PRECISION; + default: + return -1; + } + } + + @Override + public int getMaxPrecision(SqlTypeName typeName) { + switch (typeName) { + case DECIMAL: + return getMaxNumericPrecision(); + case VARCHAR: + case CHAR: + return MAX_CHAR_PRECISION; + case VARBINARY: + case BINARY: + return MAX_BINARY_PRECISION; + case TIME: + case TIMESTAMP: + return MAX_TIMESTAMP_PRECISION; + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return SqlTypeName.MAX_INTERVAL_START_PRECISION; + default: + return -1; + } + } + + @Override + public int getMaxNumericScale() { + return MAX_DECIMAL_SCALE; + } + + @Override + public int getMaxNumericPrecision() { + return MAX_DECIMAL_PRECISION; + } + + @Override + public RelDataType deriveSumType(RelDataTypeFactory typeFactory, RelDataType argumentType) { + switch (argumentType.getSqlTypeName()) { + case TINYINT: + case SMALLINT: + return nullableType(typeFactory, SqlTypeName.INTEGER); + case INTEGER: + case BIGINT: + return nullableType(typeFactory, SqlTypeName.BIGINT); + case REAL: + case FLOAT: + case DOUBLE: + return nullableType(typeFactory, SqlTypeName.DOUBLE); + case DECIMAL: + return nullableType(typeFactory, SqlTypeName.DECIMAL); + default: + return argumentType; + } + } + + @Override + public boolean shouldConvertRaggedUnionTypesToVarying() { + return true; + } + + @Override + public boolean isSchemaCaseSensitive() { + return false; + } + + @Override + public RelDataType deriveDecimalDivideType(RelDataTypeFactory typeFactory, RelDataType type1, RelDataType type2) { + if (SqlTypeUtil.isExactNumeric(type1) && SqlTypeUtil.isExactNumeric(type2)) { + if (SqlTypeUtil.isDecimal(type1) || SqlTypeUtil.isDecimal(type2)) { + return super.deriveDecimalDivideType(typeFactory, type1, type2); + } else { + return nullableType(typeFactory, SqlTypeName.DOUBLE); + } + } + return null; + } + + @Override + public RelDataType deriveDecimalMultiplyType(RelDataTypeFactory typeFactory, RelDataType type1, RelDataType type2) { + if (SqlTypeUtil.isExactNumeric(type1) && SqlTypeUtil.isExactNumeric(type2)) { + if (SqlTypeUtil.isDecimal(type1) || SqlTypeUtil.isDecimal(type2)) { + return super.deriveDecimalMultiplyType(typeFactory, type1, type2); + } else if (SqlTypeUtil.isBigint(type1) || SqlTypeUtil.isBigint(type2)) { + return nullableType(typeFactory, SqlTypeName.BIGINT); + } + } + return null; + } + + private RelDataType nullableType(RelDataTypeFactory typeFactory, SqlTypeName typeName) { + return typeFactory.createTypeWithNullability(typeFactory.createSqlType(typeName), true); + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralUncollect.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralUncollect.java new file mode 100644 index 000000000..15a7e5280 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralUncollect.java @@ -0,0 +1,90 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.List; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelInput; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.sql.SqlUnnestOperator; +import org.apache.calcite.sql.type.MapSqlType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * RelNode to represent Coral's Uncollect (UNNEST/LATERAL VIEW EXPLODE) semantics. + * + *

Coral semantics differ from Calcite Uncollect in handling operand types array(struct). + * For array(struct), Calcite flattens internal struct to multiple columns whereas Coral + * returns single column of struct type (matching Hive/Spark behavior). + * + *

This class was previously named {@code HiveUncollect} and has been renamed to better + * reflect that it is used across all Coral translation targets. + */ +public class CoralUncollect extends Uncollect { + private static final Logger LOGGER = LoggerFactory.getLogger(CoralUncollect.class); + + public CoralUncollect(RelOptCluster cluster, RelTraitSet traitSet, RelNode input, boolean withOrdinality) { + super(cluster, traitSet, input, withOrdinality); + } + + public CoralUncollect(RelInput input) { + super(input); + } + + @Override + public RelNode copy(RelTraitSet traitSet, RelNode input) { + assert traitSet.containsIfApplicable(Convention.NONE); + CoralUncollect result = new CoralUncollect(getCluster(), traitSet, input, withOrdinality); + result.rowType = this.rowType; + return result; + } + + /** + * Create a copy of this CoralUncollect object with the specified rowType. + * @param rowType rowType of the newly created CoralUncollect object + * @return a new CoralUncollect object + */ + public RelNode copy(RelDataType rowType) { + assert traitSet.containsIfApplicable(Convention.NONE); + CoralUncollect result = new CoralUncollect(getCluster(), traitSet, input, withOrdinality); + result.rowType = rowType; + return result; + } + + @Override + protected RelDataType deriveRowType() { + if (rowType != null) { + return rowType; + } + RelDataType inputType = input.getRowType(); + assert inputType.isStruct() : inputType + " is not a struct"; + final List fields = inputType.getFieldList(); + final RelDataTypeFactory.Builder builder = input.getCluster().getTypeFactory().builder(); + for (RelDataTypeField field : fields) { + if (field.getType() instanceof MapSqlType) { + builder.add(SqlUnnestOperator.MAP_KEY_COLUMN_NAME, field.getType().getKeyType()); + builder.add(SqlUnnestOperator.MAP_VALUE_COLUMN_NAME, field.getType().getValueType()); + } else { + RelDataType ret = field.getType().getComponentType(); + builder.add(field.getName(), ret); + } + } + if (withOrdinality) { + builder.add(SqlUnnestOperator.ORDINALITY_COLUMN_NAME, SqlTypeName.INTEGER); + } + return builder.build(); + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveRelBuilder.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveRelBuilder.java index 09deaa1e2..9d12be695 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveRelBuilder.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveRelBuilder.java @@ -1,127 +1,42 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.common; -import java.util.ArrayList; -import java.util.List; - -import com.google.common.base.Preconditions; - import org.apache.calcite.plan.Context; -import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptSchema; -import org.apache.calcite.rel.core.Values; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.tools.FrameworkConfig; -import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.Pair; - -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_AGGREGATE_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_EXCHANGE_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_FILTER_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_JOIN_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_MATCH_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_PROJECT_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_REPEAT_UNION_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SET_OP_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SNAPSHOT_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SORT_EXCHANGE_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SORT_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_SPOOL_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_TABLE_SCAN_FACTORY; -import static org.apache.calcite.rel.core.RelFactories.DEFAULT_VALUES_FACTORY; /** - * HiveRelBuilder overrides {@link #rename} method. - * Instead of wrapping round HiveUncollect with a Project RelNode, it tries to rebuild - * HiveUncollect by calling {@link com.linkedin.coral.common.HiveUncollect#copy(org.apache.calcite.rel.type.RelDataType)} - * which sets the rowType. - * - * The benefit of eliminating the Project RelNode is that it avoids an extra and unnecessary - * (SELECT ... FROM ... AS ...) wrapper in the unparsed SQL queries. For example, in Trino, - * this allows us to generate "FROM ... CROSS JOIN UNNEST(...)" instead of - * "FROM ... CROSS JOIN (SELECT ... FROM UNNEST(...))". + * @deprecated Use {@link CoralRelBuilder} instead. This class has been renamed to better reflect + * that it is used across all Coral translation targets, not just Hive. + * This class will be removed in a future release. */ -public class HiveRelBuilder extends RelBuilder { +@Deprecated +public class HiveRelBuilder extends CoralRelBuilder { private HiveRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) { super(context, cluster, relOptSchema); } + /** @deprecated Use {@link CoralRelBuilder#create(FrameworkConfig)} instead. */ + @Deprecated public static RelBuilder create(FrameworkConfig config) { - return Frameworks.withPrepare(config, (cluster, relOptSchema, rootSchema, statement) -> { - cluster = RelOptCluster.create(cluster.getPlanner(), - new RexBuilder(new CoralJavaTypeFactoryImpl(cluster.getTypeFactory().getTypeSystem()))); - return new HiveRelBuilder(config.getContext(), cluster, relOptSchema); - }); + return CoralRelBuilder.create(config); } - /** Creates a {@link RelBuilderFactory}, a partially-created RelBuilder. - * Just add a {@link RelOptCluster} and a {@link RelOptSchema} - * - * Note that this function creates a HiveRelBuilder instead of a RelBuilder as in its parent. - * */ + /** @deprecated Use {@link CoralRelBuilder#proto(Context)} instead. */ + @Deprecated public static RelBuilderFactory proto(final Context context) { - return (cluster, schema) -> new HiveRelBuilder(context, cluster, schema); + return CoralRelBuilder.proto(context); } - /** - * Note that this static variable is created with HiveRelBuilder.proto instead of RelBuilder.proto as in its parent. - */ - public static final RelBuilderFactory LOGICAL_BUILDER = - HiveRelBuilder.proto(Contexts.of(DEFAULT_PROJECT_FACTORY, DEFAULT_FILTER_FACTORY, DEFAULT_JOIN_FACTORY, - DEFAULT_SORT_FACTORY, DEFAULT_EXCHANGE_FACTORY, DEFAULT_SORT_EXCHANGE_FACTORY, DEFAULT_AGGREGATE_FACTORY, - DEFAULT_MATCH_FACTORY, DEFAULT_SET_OP_FACTORY, DEFAULT_VALUES_FACTORY, DEFAULT_TABLE_SCAN_FACTORY, - DEFAULT_SNAPSHOT_FACTORY, DEFAULT_SPOOL_FACTORY, DEFAULT_REPEAT_UNION_FACTORY)); - - /** Almost the same as the parent method except the handling of HiveUncollect. - * See also {@link org.apache.calcite.tools.RelBuilder#rename} for details. - * - * @param fieldNames List of desired field names; may contain null values or - * have fewer fields than the current row type - */ - @Override - public RelBuilder rename(List fieldNames) { - final List oldFieldNames = peek().getRowType().getFieldNames(); - Preconditions.checkArgument(fieldNames.size() <= oldFieldNames.size(), "More names than fields"); - final List newFieldNames = new ArrayList<>(oldFieldNames); - for (int i = 0; i < fieldNames.size(); i++) { - final String s = fieldNames.get(i); - if (s != null) { - newFieldNames.set(i, s); - } - } - if (oldFieldNames.equals(newFieldNames)) { - return this; - } - if (peek() instanceof Values) { - // Special treatment for VALUES. Re-build it rather than add a project. - final Values v = (Values) build(); - final RelDataTypeFactory.Builder b = getTypeFactory().builder(); - for (Pair p : Pair.zip(newFieldNames, v.getRowType().getFieldList())) { - b.add(p.left, p.right.getType()); - } - return values(v.tuples, b.build()); - } - if (peek() instanceof HiveUncollect) { - // Special treatment for HiveUncollect. Re-build it rather than add a project. - final HiveUncollect v = (HiveUncollect) build(); - final RelDataTypeFactory.Builder b = getTypeFactory().builder(); - for (Pair p : Pair.zip(newFieldNames, v.getRowType().getFieldList())) { - b.add(p.left, p.right.getType()); - } - push(v.copy(b.build())); - return this; - } - - return project(fields(), newFieldNames, true); - } + /** @deprecated Use {@link CoralRelBuilder#LOGICAL_BUILDER} instead. */ + @Deprecated + public static final RelBuilderFactory LOGICAL_BUILDER = CoralRelBuilder.LOGICAL_BUILDER; } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java index d6c7b0f2e..88bc93889 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java @@ -5,208 +5,12 @@ */ package com.linkedin.coral.common; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeSystemImpl; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql.type.SqlTypeUtil; - -// Precision and scale values copied from Hive source code -public class HiveTypeSystem extends RelDataTypeSystemImpl { - // TODO: This should come from type system; Currently there is no definition - // in type system for this. - private static final int MAX_DECIMAL_PRECISION = 38; - private static final int MAX_DECIMAL_SCALE = 38; - private static final int DEFAULT_DECIMAL_PRECISION = 10; - // STRING type in Hive is represented as VARCHAR with precision Integer.MAX_VALUE. - // In turn, the max VARCHAR precision should be 65535. However, the value is not - // used for validation, but rather only internally by the optimizer to know the max - // precision supported by the system. Thus, no VARCHAR precision should fall between - // 65535 and Integer.MAX_VALUE; the check for VARCHAR precision is done in Hive. - private static final int MAX_CHAR_PRECISION = Integer.MAX_VALUE; - private static final int DEFAULT_VARCHAR_PRECISION = 65535; - private static final int DEFAULT_CHAR_PRECISION = 255; - private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE; - private static final int MAX_TIMESTAMP_PRECISION = 9; - private static final int DEFAULT_TINYINT_PRECISION = 3; - private static final int DEFAULT_SMALLINT_PRECISION = 5; - private static final int DEFAULT_INTEGER_PRECISION = 10; - private static final int DEFAULT_BIGINT_PRECISION = 19; - - @Override - public int getMaxScale(SqlTypeName typeName) { - switch (typeName) { - case DECIMAL: - return getMaxNumericScale(); - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - return SqlTypeName.MAX_INTERVAL_FRACTIONAL_SECOND_PRECISION; - default: - return -1; - } - } - - @Override - public int getDefaultPrecision(SqlTypeName typeName) { - switch (typeName) { - // Hive will always require user to specify exact sizes for char, varchar; - // Binary doesn't need any sizes; Decimal has the default of 10. - case BINARY: - case VARBINARY: - case TIME: - case TIMESTAMP: - return RelDataType.PRECISION_NOT_SPECIFIED; - case CHAR: - return DEFAULT_CHAR_PRECISION; - case VARCHAR: - return DEFAULT_VARCHAR_PRECISION; - case DECIMAL: - return DEFAULT_DECIMAL_PRECISION; - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION; - case TINYINT: - return DEFAULT_TINYINT_PRECISION; - case SMALLINT: - return DEFAULT_SMALLINT_PRECISION; - case INTEGER: - return DEFAULT_INTEGER_PRECISION; - case BIGINT: - return DEFAULT_BIGINT_PRECISION; - default: - return -1; - } - } - - @Override - public int getMaxPrecision(SqlTypeName typeName) { - switch (typeName) { - case DECIMAL: - return getMaxNumericPrecision(); - case VARCHAR: - case CHAR: - return MAX_CHAR_PRECISION; - case VARBINARY: - case BINARY: - return MAX_BINARY_PRECISION; - case TIME: - case TIMESTAMP: - return MAX_TIMESTAMP_PRECISION; - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - return SqlTypeName.MAX_INTERVAL_START_PRECISION; - default: - return -1; - } - } - - @Override - public int getMaxNumericScale() { - return MAX_DECIMAL_SCALE; - } - - @Override - public int getMaxNumericPrecision() { - return MAX_DECIMAL_PRECISION; - } - - @Override - public RelDataType deriveSumType(RelDataTypeFactory typeFactory, RelDataType argumentType) { - switch (argumentType.getSqlTypeName()) { - case TINYINT: - case SMALLINT: - return nullableType(typeFactory, SqlTypeName.INTEGER); - case INTEGER: - case BIGINT: - return nullableType(typeFactory, SqlTypeName.BIGINT); - case REAL: - case FLOAT: - case DOUBLE: - return nullableType(typeFactory, SqlTypeName.DOUBLE); - case DECIMAL: - return nullableType(typeFactory, SqlTypeName.DECIMAL); - default: - return argumentType; - } - } - - @Override - public boolean shouldConvertRaggedUnionTypesToVarying() { - return true; - } - - @Override - public boolean isSchemaCaseSensitive() { - return false; - } - - /** - * This method needs to be overridden to make sure that the "/" operator returns {@link org.apache.calcite.sql.type.ReturnTypes#DOUBLE} - * if neither of the operands is decimal type, which is compatible with the expected data type in Hive/Spark. - */ - @Override - public RelDataType deriveDecimalDivideType(RelDataTypeFactory typeFactory, RelDataType type1, RelDataType type2) { - if (SqlTypeUtil.isExactNumeric(type1) && SqlTypeUtil.isExactNumeric(type2)) { - if (SqlTypeUtil.isDecimal(type1) || SqlTypeUtil.isDecimal(type2)) { - return super.deriveDecimalDivideType(typeFactory, type1, type2); - } else { - return nullableType(typeFactory, SqlTypeName.DOUBLE); - } - } - return null; - } - - /** - * This override makes sure that the multiply operator, "*", returns {@link org.apache.calcite.sql.type.ReturnTypes#BIGINT} - * if neither of the operands is decimal type, which is compatible with the expected data type in Hive/Spark. - */ - @Override - public RelDataType deriveDecimalMultiplyType(RelDataTypeFactory typeFactory, RelDataType type1, RelDataType type2) { - if (SqlTypeUtil.isExactNumeric(type1) && SqlTypeUtil.isExactNumeric(type2)) { - if (SqlTypeUtil.isDecimal(type1) || SqlTypeUtil.isDecimal(type2)) { - return super.deriveDecimalMultiplyType(typeFactory, type1, type2); - } else if (SqlTypeUtil.isBigint(type1) || SqlTypeUtil.isBigint(type2)) { - return nullableType(typeFactory, SqlTypeName.BIGINT); - } - } - return null; - } - - private RelDataType nullableType(RelDataTypeFactory typeFactory, SqlTypeName typeName) { - return typeFactory.createTypeWithNullability(typeFactory.createSqlType(typeName), true); - } +/** + * @deprecated Use {@link CoralTypeSystem} instead. This class has been renamed to better reflect + * that it is a dialect-agnostic Calcite type system configuration. + * This class will be removed in a future release. + */ +@Deprecated +public class HiveTypeSystem extends CoralTypeSystem { } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveUncollect.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveUncollect.java index ff7a1d1b1..e509398e7 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveUncollect.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveUncollect.java @@ -1,36 +1,23 @@ /** - * Copyright 2018-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.common; -import java.util.List; - -import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelInput; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.sql.SqlUnnestOperator; -import org.apache.calcite.sql.type.MapSqlType; -import org.apache.calcite.sql.type.SqlTypeName; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** - * RelNode to represent Hive's Uncollect semantics. - * Hive semantics differ from Calcite Uncollect in handling operand types array(struct). - * For array(struct), Calcite flattens internal struct to multiple columns whereas Hive - * returns single column of struct type + * @deprecated Use {@link CoralUncollect} instead. This class has been renamed to better reflect + * that it is used across all Coral translation targets, not just Hive. + * This class will be removed in a future release. */ -public class HiveUncollect extends Uncollect { - private static final Logger LOGGER = LoggerFactory.getLogger(HiveUncollect.class); +@Deprecated +public class HiveUncollect extends CoralUncollect { public HiveUncollect(RelOptCluster cluster, RelTraitSet traitSet, RelNode input, boolean withOrdinality) { super(cluster, traitSet, input, withOrdinality); @@ -39,50 +26,4 @@ public HiveUncollect(RelOptCluster cluster, RelTraitSet traitSet, RelNode input, public HiveUncollect(RelInput input) { super(input); } - - @Override - public RelNode copy(RelTraitSet traitSet, RelNode input) { - assert traitSet.containsIfApplicable(Convention.NONE); - HiveUncollect result = new HiveUncollect(getCluster(), traitSet, input, withOrdinality); - // copy the rowType as well - result.rowType = this.rowType; - return result; - } - - /** - * Create a copy of this HiveUncollect object with the specified rowType. - * @param rowType rowType of the newly created HiveUncollect object - * @return a new HiveUncollect object - */ - public RelNode copy(RelDataType rowType) { - assert traitSet.containsIfApplicable(Convention.NONE); - HiveUncollect result = new HiveUncollect(getCluster(), traitSet, input, withOrdinality); - // set the rowType of the new HiveUncollect object - result.rowType = rowType; - return result; - } - - @Override - protected RelDataType deriveRowType() { - if (rowType != null) { - return rowType; - } - RelDataType inputType = input.getRowType(); - assert inputType.isStruct() : inputType + " is not a struct"; - final List fields = inputType.getFieldList(); - final RelDataTypeFactory.Builder builder = input.getCluster().getTypeFactory().builder(); - for (RelDataTypeField field : fields) { - if (field.getType() instanceof MapSqlType) { - builder.add(SqlUnnestOperator.MAP_KEY_COLUMN_NAME, field.getType().getKeyType()); - builder.add(SqlUnnestOperator.MAP_VALUE_COLUMN_NAME, field.getType().getValueType()); - } else { - RelDataType ret = field.getType().getComponentType(); - builder.add(field.getName(), ret); - } - } - if (withOrdinality) { - builder.add(SqlUnnestOperator.ORDINALITY_COLUMN_NAME, SqlTypeName.INTEGER); - } - return builder.build(); - } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreDbSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreDbSchema.java new file mode 100644 index 000000000..858ebd89c --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreDbSchema.java @@ -0,0 +1,104 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.*; + +import static com.google.common.base.Preconditions.checkNotNull; + + +/** + * Adaptor from a local metastore catalog providing database and table names + * to Calcite {@link Schema}. + * + *

This class is a replacement for {@link HiveDbSchema} to work with localMetastore in coral-spark-plan module. + */ +public class LocalMetastoreDbSchema implements Schema { + + private final Map>> localMetastore; + private final String dbName; + + public LocalMetastoreDbSchema(Map>> localMetastore, String dbName) { + checkNotNull(localMetastore); + checkNotNull(dbName); + this.localMetastore = localMetastore; + this.dbName = dbName; + } + + @Override + public Table getTable(String tableName) { + if (localMetastore.containsKey(dbName) && localMetastore.get(dbName).containsKey(tableName)) { + return new LocalMetastoreHiveTable(tableName, localMetastore.get(dbName).get(tableName)); + } + return null; + } + + @Override + public Set getTableNames() { + return ImmutableSet.copyOf(localMetastore.get(dbName).keySet()); + } + + @Override + public RelProtoDataType getType(String s) { + return null; + } + + @Override + public Set getTypeNames() { + return null; + } + + @Override + public Collection getFunctions(String name) { + return ImmutableList.of(); + } + + @Override + public Set getFunctionNames() { + return ImmutableSet.of(); + } + + /** + * A DB does not have subschema + * @param name name of the schema + * @return Calcite schema + */ + @Override + public Schema getSubSchema(String name) { + return null; + } + + @Override + public Set getSubSchemaNames() { + return ImmutableSet.of(); + } + + @Override + public Expression getExpression(SchemaPlus parentSchema, String name) { + return null; + } + + @Override + public boolean isMutable() { + return true; + } + + // TODO: This needs to be snapshot of current state of catalog + @Override + public Schema snapshot(SchemaVersion schemaVersion) { + return this; + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveDbSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveDbSchema.java index 236c58151..0e2240e53 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveDbSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveDbSchema.java @@ -1,104 +1,21 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.common; -import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Set; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; - -import org.apache.calcite.linq4j.tree.Expression; -import org.apache.calcite.rel.type.RelProtoDataType; -import org.apache.calcite.schema.*; - -import static com.google.common.base.Preconditions.checkNotNull; /** - * This class is a replacement for {@link HiveDbSchema} to work with localMetastore in coral-spark-plan module - * - * Adaptor from Hive catalog providing database and table names - * to Calcite {@link Schema} + * @deprecated Use {@link LocalMetastoreDbSchema} instead. This class will be removed in a future release. */ -public class LocalMetastoreHiveDbSchema implements Schema { - - private final Map>> localMetastore; - private final String dbName; +@Deprecated +public class LocalMetastoreHiveDbSchema extends LocalMetastoreDbSchema { public LocalMetastoreHiveDbSchema(Map>> localMetastore, String dbName) { - checkNotNull(localMetastore); - checkNotNull(dbName); - this.localMetastore = localMetastore; - this.dbName = dbName; - } - - @Override - public Table getTable(String tableName) { - if (localMetastore.containsKey(dbName) && localMetastore.get(dbName).containsKey(tableName)) { - return new LocalMetastoreHiveTable(tableName, localMetastore.get(dbName).get(tableName)); - } - return null; - } - - @Override - public Set getTableNames() { - return ImmutableSet.copyOf(localMetastore.get(dbName).keySet()); - } - - @Override - public RelProtoDataType getType(String s) { - return null; - } - - @Override - public Set getTypeNames() { - return null; - } - - @Override - public Collection getFunctions(String name) { - return ImmutableList.of(); - } - - @Override - public Set getFunctionNames() { - return ImmutableSet.of(); - } - - /** - * A Hive DB does not have subschema - * @param name name of the schema - * @return Calcite schema - */ - @Override - public Schema getSubSchema(String name) { - return null; - } - - @Override - public Set getSubSchemaNames() { - return ImmutableSet.of(); - } - - @Override - public Expression getExpression(SchemaPlus parentSchema, String name) { - return null; - } - - @Override - public boolean isMutable() { - return true; - } - - // TODO: This needs to be snapshot of current state of catalog - @Override - public Schema snapshot(SchemaVersion schemaVersion) { - return this; + super(localMetastore, dbName); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveSchema.java index 42c6890da..a5ca61bb6 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreHiveSchema.java @@ -1,112 +1,21 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.common; -import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Set; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; - -import org.apache.calcite.linq4j.tree.Expression; -import org.apache.calcite.rel.type.RelProtoDataType; -import org.apache.calcite.schema.*; - -import static com.google.common.base.Preconditions.checkNotNull; /** - * This class is a replacement for {@link HiveSchema} to work with localMetastore in coral-spark-plan module - * - * Adaptor from Hive catalog providing database and table names - * to Calcite {@link Schema}. This class represents the "root" schema - * that holds all hive databases as subschema and no tables. + * @deprecated Use {@link LocalMetastoreSchema} instead. This class will be removed in a future release. */ -public class LocalMetastoreHiveSchema implements Schema { - - private final Map>> localMetastore; +@Deprecated +public class LocalMetastoreHiveSchema extends LocalMetastoreSchema { - /** - * Create HiveSchema using input metastore client to read hive catalog - * @param localMetastore map like a Hive metastore client - */ public LocalMetastoreHiveSchema(Map>> localMetastore) { - this.localMetastore = checkNotNull(localMetastore); - } - - /** - * This always returns null as root hive schema does not have tables. - * @param name name of the table - * @return get calcite table representation - */ - @Override - public Table getTable(String name) { - return null; - } - - @Override - public Set getTableNames() { - return ImmutableSet.of(); - } - - @Override - public RelProtoDataType getType(String s) { - return null; - } - - @Override - public Set getTypeNames() { - return null; - } - - @Override - public Collection getFunctions(String name) { - return ImmutableList.of(); - } - - @Override - public Set getFunctionNames() { - return ImmutableSet.of(); - } - - @Override - public Schema getSubSchema(String name) { - if (localMetastore.containsKey(name)) { - return new LocalMetastoreHiveDbSchema(localMetastore, name); - } - return null; - } - - @Override - public Set getSubSchemaNames() { - return ImmutableSet.copyOf(localMetastore.keySet()); - } - - @Override - public Expression getExpression(SchemaPlus parentSchema, String name) { - return null; - } - - @Override - public boolean isMutable() { - return true; - } - - // TODO: This needs to be snapshot of current state of catalog - @Override - public Schema snapshot(SchemaVersion schemaVersion) { - return this; - } - - /** - * @return Hive metastore client, because we don't have MSC in this class, return null for now - */ - public HiveMetastoreClient getHiveMetastoreClient() { - return null; + super(localMetastore); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreSchema.java new file mode 100644 index 000000000..8c23dc5e5 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/LocalMetastoreSchema.java @@ -0,0 +1,112 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.*; + +import static com.google.common.base.Preconditions.checkNotNull; + + +/** + * Adaptor from a local metastore catalog providing database and table names + * to Calcite {@link Schema}. This class represents the "root" schema + * that holds all databases as subschema and no tables. + * + *

This class is a replacement for {@link HiveSchema} to work with localMetastore in coral-spark-plan module. + */ +public class LocalMetastoreSchema implements Schema { + + private final Map>> localMetastore; + + /** + * Create LocalMetastoreSchema using input metastore map to read catalog + * @param localMetastore map like a Hive metastore client + */ + public LocalMetastoreSchema(Map>> localMetastore) { + this.localMetastore = checkNotNull(localMetastore); + } + + /** + * This always returns null as root schema does not have tables. + * @param name name of the table + * @return get calcite table representation + */ + @Override + public Table getTable(String name) { + return null; + } + + @Override + public Set getTableNames() { + return ImmutableSet.of(); + } + + @Override + public RelProtoDataType getType(String s) { + return null; + } + + @Override + public Set getTypeNames() { + return null; + } + + @Override + public Collection getFunctions(String name) { + return ImmutableList.of(); + } + + @Override + public Set getFunctionNames() { + return ImmutableSet.of(); + } + + @Override + public Schema getSubSchema(String name) { + if (localMetastore.containsKey(name)) { + return new LocalMetastoreDbSchema(localMetastore, name); + } + return null; + } + + @Override + public Set getSubSchemaNames() { + return ImmutableSet.copyOf(localMetastore.keySet()); + } + + @Override + public Expression getExpression(SchemaPlus parentSchema, String name) { + return null; + } + + @Override + public boolean isMutable() { + return true; + } + + // TODO: This needs to be snapshot of current state of catalog + @Override + public Schema snapshot(SchemaVersion schemaVersion) { + return this; + } + + /** + * @return Hive metastore client, because we don't have MSC in this class, return null for now + */ + public HiveMetastoreClient getHiveMetastoreClient() { + return null; + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index f76150684..09aa56bed 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -93,7 +93,7 @@ protected ToRelConverter(@Nonnull HiveMetastoreClient hiveMetastoreClient) { // if the service uses its own service loader (see Trino) new Driver(); config = Frameworks.newConfigBuilder().convertletTable(convertletTable).defaultSchema(schemaPlus) - .typeSystem(new HiveTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) + .typeSystem(new CoralTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) .programs(Programs.ofRules(Programs.RULE_SET)).build(); } @@ -117,7 +117,7 @@ protected ToRelConverter(@Nonnull CoralCatalog coralCatalog) { // if the service uses its own service loader (see Trino) new Driver(); config = Frameworks.newConfigBuilder().convertletTable(convertletTable).defaultSchema(schemaPlus) - .typeSystem(new HiveTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) + .typeSystem(new CoralTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) .programs(Programs.ofRules(Programs.RULE_SET)).build(); } @@ -133,7 +133,7 @@ protected ToRelConverter(Map>> localMetaStore) // if the service uses its own service loader (see Trino) new Driver(); config = Frameworks.newConfigBuilder().convertletTable(convertletTable).defaultSchema(schemaPlus) - .typeSystem(new HiveTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) + .typeSystem(new CoralTypeSystem()).traitDefs((List) null).operatorTable(getOperatorTable()) .programs(Programs.ofRules(Programs.RULE_SET)).build(); } @@ -272,7 +272,7 @@ protected RelBuilder getRelBuilder() { // 2. Converted expression is harder to validate for correctness(because it appears different from input) if (relBuilder == null) { Hook.REL_BUILDER_SIMPLIFY.add(Hook.propertyJ(false)); - relBuilder = HiveRelBuilder.create(config); + relBuilder = CoralRelBuilder.create(config); } return relBuilder; } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToHiveTypeStringConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToHiveTypeStringConverter.java index 78f45b0f7..dfcbe2367 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToHiveTypeStringConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToHiveTypeStringConverter.java @@ -5,166 +5,26 @@ */ package com.linkedin.coral.common.utils; -import java.util.ArrayList; -import java.util.List; - import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rel.type.RelRecordType; -import org.apache.calcite.sql.type.ArraySqlType; -import org.apache.calcite.sql.type.MapSqlType; /** - * Transforms a RelDataType to a Hive type string such that it is parseable and semantically correct. - * Some example of Hive type string for a RelDataType are as follows: - * - * Example 1: - * RelDataType: - * struct(s1:integer,s2:varchar) - * Hive Type String: - * struct<s1:int,s2:string> - * - * Example 2: - * RelDataType: - * map(varchar,struct(s1:integer,s2:varchar)) - * Hive Type String: - * map<string,struct<s1:int,s2:string>> - * - * Example 3: - * RelDataType: - * array(struct(s1:integer,s2:varchar)) - * Hive Type String: - * array<struct<s1:int,s2:string>> + * @deprecated Use {@link RelDataTypeToTypeStringConverter} instead. This class will be removed in a future release. */ +@Deprecated public class RelDataTypeToHiveTypeStringConverter { private RelDataTypeToHiveTypeStringConverter() { } public RelDataTypeToHiveTypeStringConverter(boolean convertUnionTypes) { - this.convertUnionTypes = convertUnionTypes; + new RelDataTypeToTypeStringConverter(convertUnionTypes); } - /** - * If true, Coral will convert single uniontypes back to Hive's native uniontype representation. This is necessary - * because some engines have readers that unwrap Hive single uniontypes to just the underlying data type, causing - * the loss of information that the column was originally a uniontype in Hive. This can be problematic when calling - * the `coalesce_struct` UDF on such columns, as they are expected to be treated as uniontypes. Retaining the - * original uniontype record and passing it into `coalesce_struct` ensures correct handling. - * - * Example: - * RelDataType: - * struct(tag:integer,field0:varchar) - * Hive Type String: - * uniontype<string> - */ - private static boolean convertUnionTypes = false; - /** * @param relDataType a given RelDataType - * @return a syntactically and semantically correct Hive type string for relDataType + * @return a syntactically and semantically correct type string for relDataType */ public static String convertRelDataType(RelDataType relDataType) { - switch (relDataType.getSqlTypeName()) { - case ROW: - return buildStructDataTypeString((RelRecordType) relDataType); - case ARRAY: - return buildArrayDataTypeString((ArraySqlType) relDataType); - case MAP: - return buildMapDataTypeString((MapSqlType) relDataType); - case INTEGER: - return "int"; - case SMALLINT: - return "smallint"; - case TINYINT: - return "tinyint"; - case BIGINT: - return "bigint"; - case DOUBLE: - return "double"; - case REAL: - case FLOAT: - return "float"; - case BOOLEAN: - return "boolean"; - case CHAR: - case VARCHAR: - return "string"; - case DATE: - return "date"; - case TIME: - case TIMESTAMP: - return "timestamp"; - case BINARY: - case VARBINARY: - case OTHER: - return "binary"; - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - case INTERVAL_MONTH: - case INTERVAL_YEAR: - case INTERVAL_YEAR_MONTH: - return "interval"; - case NULL: - return "null"; - default: - throw new RuntimeException(String.format( - "Unhandled RelDataType %s in Converter from RelDataType to Hive DataType", relDataType.getSqlTypeName())); - } - } - - /** - * Build a Hive struct/row string with format: - * row<[field_name]:[field_type],...> - * @param relRecordType a given struct RelDataType - * @return a string that represents the given relRecordType - */ - private static String buildStructDataTypeString(RelRecordType relRecordType) { - List structFieldStrings = new ArrayList<>(); - - // Convert single uniontypes as structs back to native Hive representation - if (convertUnionTypes && relRecordType.getFieldList().size() == 2 - && relRecordType.getFieldList().get(0).getName().equals("tag") - && relRecordType.getFieldList().get(1).getName().equals("field0")) { - return String.format("uniontype<%s>", convertRelDataType(relRecordType.getFieldList().get(1).getType())); - } - - for (RelDataTypeField fieldRelDataType : relRecordType.getFieldList()) { - structFieldStrings - .add(String.format("%s:%s", fieldRelDataType.getName(), convertRelDataType(fieldRelDataType.getType()))); - } - String subFieldsString = String.join(",", structFieldStrings); - return String.format("struct<%s>", subFieldsString); - } - - /** - * Build a Hive array string with format: - * array<[field_type]> - * @param arraySqlType a given array RelDataType - * @return a string that represents the given arraySqlType - */ - private static String buildArrayDataTypeString(ArraySqlType arraySqlType) { - String elementDataTypeString = convertRelDataType(arraySqlType.getComponentType()); - return String.format("array<%s>", elementDataTypeString); - } - - /** - * Build a Hive map string with format: - * map<[key_type],[value_type]> - * @param mapSqlType a given map RelDataType - * @return a string that represents the given mapSqlType - */ - private static String buildMapDataTypeString(MapSqlType mapSqlType) { - String keyDataTypeString = convertRelDataType(mapSqlType.getKeyType()); - String valueDataTypeString = convertRelDataType(mapSqlType.getValueType()); - return String.format("map<%s,%s>", keyDataTypeString, valueDataTypeString); + return RelDataTypeToTypeStringConverter.convertRelDataType(relDataType); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToTypeStringConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToTypeStringConverter.java new file mode 100644 index 000000000..3f78fa7c4 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/utils/RelDataTypeToTypeStringConverter.java @@ -0,0 +1,170 @@ +/** + * Copyright 2022-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common.utils; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.sql.type.ArraySqlType; +import org.apache.calcite.sql.type.MapSqlType; + + +/** + * Transforms a RelDataType to a type string such that it is parseable and semantically correct. + * Some example of type strings for a RelDataType are as follows: + * + * Example 1: + * RelDataType: + * struct(s1:integer,s2:varchar) + * Type String: + * struct<s1:int,s2:string> + * + * Example 2: + * RelDataType: + * map(varchar,struct(s1:integer,s2:varchar)) + * Type String: + * map<string,struct<s1:int,s2:string>> + * + * Example 3: + * RelDataType: + * array(struct(s1:integer,s2:varchar)) + * Type String: + * array<struct<s1:int,s2:string>> + */ +public class RelDataTypeToTypeStringConverter { + private RelDataTypeToTypeStringConverter() { + } + + public RelDataTypeToTypeStringConverter(boolean convertUnionTypes) { + this.convertUnionTypes = convertUnionTypes; + } + + /** + * If true, Coral will convert single uniontypes back to native uniontype representation. This is necessary + * because some engines have readers that unwrap single uniontypes to just the underlying data type, causing + * the loss of information that the column was originally a uniontype. This can be problematic when calling + * the `coalesce_struct` UDF on such columns, as they are expected to be treated as uniontypes. Retaining the + * original uniontype record and passing it into `coalesce_struct` ensures correct handling. + * + * Example: + * RelDataType: + * struct(tag:integer,field0:varchar) + * Type String: + * uniontype<string> + */ + private static boolean convertUnionTypes = false; + + /** + * @param relDataType a given RelDataType + * @return a syntactically and semantically correct type string for relDataType + */ + public static String convertRelDataType(RelDataType relDataType) { + switch (relDataType.getSqlTypeName()) { + case ROW: + return buildStructDataTypeString((RelRecordType) relDataType); + case ARRAY: + return buildArrayDataTypeString((ArraySqlType) relDataType); + case MAP: + return buildMapDataTypeString((MapSqlType) relDataType); + case INTEGER: + return "int"; + case SMALLINT: + return "smallint"; + case TINYINT: + return "tinyint"; + case BIGINT: + return "bigint"; + case DOUBLE: + return "double"; + case REAL: + case FLOAT: + return "float"; + case BOOLEAN: + return "boolean"; + case CHAR: + case VARCHAR: + return "string"; + case DATE: + return "date"; + case TIME: + case TIMESTAMP: + return "timestamp"; + case BINARY: + case VARBINARY: + case OTHER: + return "binary"; + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + case INTERVAL_MONTH: + case INTERVAL_YEAR: + case INTERVAL_YEAR_MONTH: + return "interval"; + case NULL: + return "null"; + default: + throw new RuntimeException(String.format( + "Unhandled RelDataType %s in Converter from RelDataType to DataType", relDataType.getSqlTypeName())); + } + } + + /** + * Build a struct/row string with format: + * row<[field_name]:[field_type],...> + * @param relRecordType a given struct RelDataType + * @return a string that represents the given relRecordType + */ + private static String buildStructDataTypeString(RelRecordType relRecordType) { + List structFieldStrings = new ArrayList<>(); + + // Convert single uniontypes as structs back to native representation + if (convertUnionTypes && relRecordType.getFieldList().size() == 2 + && relRecordType.getFieldList().get(0).getName().equals("tag") + && relRecordType.getFieldList().get(1).getName().equals("field0")) { + return String.format("uniontype<%s>", convertRelDataType(relRecordType.getFieldList().get(1).getType())); + } + + for (RelDataTypeField fieldRelDataType : relRecordType.getFieldList()) { + structFieldStrings + .add(String.format("%s:%s", fieldRelDataType.getName(), convertRelDataType(fieldRelDataType.getType()))); + } + String subFieldsString = String.join(",", structFieldStrings); + return String.format("struct<%s>", subFieldsString); + } + + /** + * Build an array string with format: + * array<[field_type]> + * @param arraySqlType a given array RelDataType + * @return a string that represents the given arraySqlType + */ + private static String buildArrayDataTypeString(ArraySqlType arraySqlType) { + String elementDataTypeString = convertRelDataType(arraySqlType.getComponentType()); + return String.format("array<%s>", elementDataTypeString); + } + + /** + * Build a map string with format: + * map<[key_type],[value_type]> + * @param mapSqlType a given map RelDataType + * @return a string that represents the given mapSqlType + */ + private static String buildMapDataTypeString(MapSqlType mapSqlType) { + String keyDataTypeString = convertRelDataType(mapSqlType.getKeyType()); + String valueDataTypeString = convertRelDataType(mapSqlType.getValueType()); + return String.format("map<%s,%s>", keyDataTypeString, valueDataTypeString); + } +}