diff --git a/coral-catalog-hive/build.gradle b/coral-catalog-hive/build.gradle new file mode 100644 index 000000000..64d61abd9 --- /dev/null +++ b/coral-catalog-hive/build.gradle @@ -0,0 +1,13 @@ +apply plugin: 'java-library' + +dependencies { + api project(':coral-catalog-spi') + + api(deps.'hive'.'hive-metastore') { + exclude group: 'com.linkedin.metastore-autometrics', module: 'autometrics-reporter' + exclude group: 'com.linkedin.metastore-audit', module: 'metastore-audit-logging' + exclude group: 'org.apache.avro', module: 'avro-tools' + } + + api deps.'hadoop'.'hadoop-common' +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapter.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapter.java new file mode 100644 index 000000000..40b969df4 --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapter.java @@ -0,0 +1,250 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; + +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.linkedin.coral.common.types.CoralDataType; +import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter; +import com.linkedin.coral.common.types.StructField; +import com.linkedin.coral.common.types.StructType; + + +/** + * Calcite adapter for Hive tables, bridging Hive metadata to Calcite's ScannableTable interface. + * + * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + */ +public class HiveCalciteTableAdapter implements ScannableTable { + + private static final Logger LOG = LoggerFactory.getLogger(HiveCalciteTableAdapter.class); + protected final org.apache.hadoop.hive.metastore.api.Table hiveTable; + private Deserializer deserializer; + + static final String TBLPROPERTIES_FUNCTIONS_KEY = "functions"; + static final String TBLPROPERTIES_DEPENDENCIES_KEY = "dependencies"; + + private static final Splitter tblpropertiesSplitter = + Splitter.on(Pattern.compile("\\s+")).omitEmptyStrings().trimResults(); + + private static final Splitter.MapSplitter functionsKeyValueSplitter = + tblpropertiesSplitter.withKeyValueSeparator(Splitter.on(":").limit(2)); + + /** + * Constructor to create bridge from hive table to calcite table. + * @param hiveTable Hive table + */ + public HiveCalciteTableAdapter(org.apache.hadoop.hive.metastore.api.Table hiveTable) { + Preconditions.checkNotNull(hiveTable); + this.hiveTable = hiveTable; + } + + /** + * Constructor accepting HiveTable for unified catalog integration. + * @param coralTable HiveTable from catalog + */ + public HiveCalciteTableAdapter(HiveTable coralTable) { + Preconditions.checkNotNull(coralTable); + this.hiveTable = coralTable.getHiveTable(); + } + + public Map getDaliFunctionParams() { + checkDaliTable(); + final String functionsValue = hiveTable.getParameters().get(TBLPROPERTIES_FUNCTIONS_KEY); + Map params = new HashMap<>(); + if (functionsValue != null) { + params = functionsKeyValueSplitter.split(functionsValue); + } + return params; + } + + public List getDaliUdfDependencies() { + checkDaliTable(); + final String propertyValue = hiveTable.getParameters().get(TBLPROPERTIES_DEPENDENCIES_KEY); + if (propertyValue != null) { + return tblpropertiesSplitter.splitToList(propertyValue).stream() + .map(s -> s.toLowerCase().startsWith("ivy://") ? s : "ivy://" + s).collect(Collectors.toList()); + } + return ImmutableList.of(); + } + + public boolean isDaliTable() { + return hiveTable.getOwner().equalsIgnoreCase("daliview"); + } + + private void checkDaliTable() { + // FIXME: this fails unit test right now + // Preconditions.checkState(isDaliTable()); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + RelDataType hiveType = getRowTypeFromHiveType(typeFactory); + + try { + RelDataType coralType = getRowTypeFromCoralType(typeFactory); + + if (!RelOptUtil.areRowTypesEqual(hiveType, coralType, false)) { + LOG.warn("Hive and Coral type conversion mismatch for table {}.{}. Hive: {}, Coral: {}", hiveTable.getDbName(), + hiveTable.getTableName(), hiveType, coralType); + } + } catch (Exception e) { + LOG.warn("Coral type validation failed for table {}.{}. Proceeding with Hive type. Error: {}", + hiveTable.getDbName(), hiveTable.getTableName(), e.getMessage(), e); + } + + return hiveType; + } + + private RelDataType getRowTypeFromCoralType(RelDataTypeFactory typeFactory) { + StructType structType = (StructType) getCoralSchema(); + return CoralTypeToRelDataTypeConverter.convert(structType, typeFactory); + } + + private RelDataType getRowTypeFromHiveType(RelDataTypeFactory typeFactory) { + final List cols = getColumns(); + final List fieldTypes = new ArrayList<>(cols.size()); + final List fieldNames = new ArrayList<>(cols.size()); + final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); + + allCols.forEach(col -> { + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); + final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory); + final String colName = col.getName(); + if (!fieldNames.contains(colName)) { + fieldNames.add(colName); + fieldTypes.add(relType); + } + }); + + return typeFactory.createStructType(fieldTypes, fieldNames); + } + + public CoralDataType getCoralSchema() { + final List cols = getColumns(); + final List fields = new ArrayList<>(); + final List fieldNames = new ArrayList<>(); + + final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); + + for (FieldSchema col : allCols) { + final String colName = col.getName(); + + if (!fieldNames.contains(colName)) { + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); + final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo); + + fields.add(StructField.of(colName, coralType)); + fieldNames.add(colName); + } + } + + return StructType.of(fields, true); + } + + private List getColumns() { + StorageDescriptor sd = hiveTable.getSd(); + String serDeLib = getSerializationLib(); + if (serDeLib == null || serDeLib.isEmpty()) { + return sd.getCols(); + } else { + try { + return MetaStoreUtils.getFieldsFromDeserializer(hiveTable.getTableName(), getDeserializer()); + } catch (Exception e) { + LOG.warn("Failed to get columns using deserializer: {}", e.getMessage()); + return sd.getCols(); + } + } + } + + private String getSerializationLib() { + return hiveTable.getSd().getSerdeInfo().getSerializationLib(); + } + + private Deserializer getDeserializer() { + if (deserializer == null) { + deserializer = getDeserializerFromMetaStore(); + } + return deserializer; + } + + private Deserializer getDeserializerFromMetaStore() { + try { + return MetaStoreUtils.getDeserializer(new Configuration(false), hiveTable, false); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + @Override + public Schema.TableType getJdbcTableType() { + TableType tableType = Enum.valueOf(TableType.class, hiveTable.getTableType()); + switch (tableType) { + case VIRTUAL_VIEW: + return Schema.TableType.VIEW; + case MANAGED_TABLE: + return Schema.TableType.TABLE; + case INDEX_TABLE: + return Schema.TableType.INDEX; + default: + throw new RuntimeException("Unknown table type: " + hiveTable.getTableType()); + } + } + + @Override + public boolean isRolledUp(String s) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg(String s, SqlCall sqlCall, SqlNode sqlNode, + CalciteConnectionConfig calciteConnectionConfig) { + return true; + } + + @Override + public Enumerable scan(DataContext dataContext) { + throw new RuntimeException("Calcite runtime is not supported"); + } +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapterFactory.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapterFactory.java new file mode 100644 index 000000000..5911761a0 --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteTableAdapterFactory.java @@ -0,0 +1,43 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.List; + +import org.apache.calcite.schema.Table; + +import com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory; +import com.linkedin.coral.common.catalog.CoralTable; +import com.linkedin.coral.common.catalog.TableType; + + +/** + * SPI implementation of {@link CoralCalciteTableAdapterFactory} for Hive tables. + * + *

This factory creates the appropriate Calcite table adapter based on whether + * the Hive table is a physical table or a view: + *

    + *
  • Views → {@link HiveCalciteViewAdapter}
  • + *
  • Tables → {@link HiveCalciteTableAdapter}
  • + *
+ */ +public class HiveCalciteTableAdapterFactory implements CoralCalciteTableAdapterFactory { + + @Override + public boolean supports(CoralTable coralTable) { + return coralTable instanceof HiveTable; + } + + @Override + public Table createAdapter(CoralTable coralTable, List schemaPath) { + HiveTable hiveTable = (HiveTable) coralTable; + if (hiveTable.tableType() == TableType.VIEW) { + return new HiveCalciteViewAdapter(hiveTable, schemaPath); + } else { + return new HiveCalciteTableAdapter(hiveTable); + } + } +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteViewAdapter.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteViewAdapter.java new file mode 100644 index 000000000..8222137e5 --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveCalciteViewAdapter.java @@ -0,0 +1,63 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.List; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.schema.TranslatableTable; +import org.apache.hadoop.hive.metastore.api.Table; + +import com.linkedin.coral.com.google.common.base.Throwables; +import com.linkedin.coral.com.google.common.collect.ImmutableList; + + +/** + * Calcite adapter for Hive views, extending HiveCalciteTableAdapter with TranslatableTable support + * for recursive expansion of view definitions. + * + * @see HiveCalciteTableAdapter + * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + */ +public class HiveCalciteViewAdapter extends HiveCalciteTableAdapter implements TranslatableTable { + private final List schemaPath; + + /** + * Constructor to create bridge from hive table to calcite table. + * + * @param hiveTable Hive table + * @param schemaPath Calcite schema path + */ + public HiveCalciteViewAdapter(Table hiveTable, List schemaPath) { + super(hiveTable); + this.schemaPath = schemaPath; + } + + /** + * Constructor accepting HiveTable for unified catalog integration. + * + * @param coralTable HiveTable from catalog + * @param schemaPath Calcite schema path + */ + public HiveCalciteViewAdapter(HiveTable coralTable, List schemaPath) { + super(coralTable); + this.schemaPath = schemaPath; + } + + @Override + public RelNode toRel(RelOptTable.ToRelContext relContext, RelOptTable relOptTable) { + try { + RelRoot root = relContext.expandView(relOptTable.getRowType(), hiveTable.getViewExpandedText(), schemaPath, + ImmutableList.of(hiveTable.getTableName())); + return root.rel; + } catch (Exception e) { + Throwables.propagateIfInstanceOf(e, RuntimeException.class); + throw new RuntimeException("Error while parsing view definition", e); + } + } +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveTable.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveTable.java new file mode 100644 index 000000000..3cc1f8ca6 --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveTable.java @@ -0,0 +1,99 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Iterables; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.linkedin.coral.common.catalog.CoralTable; +import com.linkedin.coral.common.catalog.TableType; +import com.linkedin.coral.common.types.CoralDataType; +import com.linkedin.coral.common.types.StructField; +import com.linkedin.coral.common.types.StructType; + +import static com.google.common.base.Preconditions.*; + + +/** + * Implementation of {@link CoralTable} interface for Hive tables. + * This class wraps a Hive metastore Table object and provides + * a unified CoralTable API for accessing table metadata. + */ +public class HiveTable implements CoralTable { + + private final Table table; + + /** + * Creates a new HiveTable wrapping the given Hive table. + * + * @param table Hive metastore Table object (must not be null) + */ + public HiveTable(Table table) { + this.table = checkNotNull(table, "Hive table cannot be null"); + } + + @Override + public String name() { + return table.getDbName() + "." + table.getTableName(); + } + + @Override + public Map properties() { + return table.getParameters() != null ? table.getParameters() : Collections.emptyMap(); + } + + @Override + public TableType tableType() { + String hiveTableType = table.getTableType(); + if (hiveTableType != null && hiveTableType.toUpperCase().contains("VIEW")) { + return TableType.VIEW; + } + return TableType.TABLE; + } + + /** + * INTERNAL API + * @deprecated This method is for internal use only and will be removed in a future release. + * Do not depend on this API. + * + * @return Hive metastore Table object + */ + public org.apache.hadoop.hive.metastore.api.Table getHiveTable() { + return table; + } + + @Override + public CoralDataType getSchema() { + final List cols = table.getSd() != null ? table.getSd().getCols() : Collections.emptyList(); + final List fields = new ArrayList<>(); + final List fieldNames = new ArrayList<>(); + + final Iterable allCols = Iterables.concat(cols, table.getPartitionKeys()); + + for (FieldSchema col : allCols) { + final String colName = col.getName(); + + if (!fieldNames.contains(colName)) { + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); + final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo); + + fields.add(StructField.of(colName, coralType)); + fieldNames.add(colName); + } + } + + return StructType.of(fields, true); + } +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveToCoralTypeConverter.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveToCoralTypeConverter.java new file mode 100644 index 000000000..6e71b3c88 --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/HiveToCoralTypeConverter.java @@ -0,0 +1,132 @@ +/** + * Copyright 2024-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.typeinfo.*; + +import com.linkedin.coral.common.types.*; + + +/** + * Converts Hive TypeInfo objects to Coral data types. + * This enables integration between Hive's type system and Coral's type system. + */ +public final class HiveToCoralTypeConverter { + + private HiveToCoralTypeConverter() { + } + + /** + * Converts a Hive TypeInfo to a Coral data type. + * @param typeInfo the Hive type to convert + * @return the corresponding Coral data type + */ + public static CoralDataType convert(TypeInfo typeInfo) { + if (typeInfo == null) { + throw new IllegalArgumentException("TypeInfo cannot be null"); + } + + switch (typeInfo.getCategory()) { + case PRIMITIVE: + return convertPrimitive((PrimitiveTypeInfo) typeInfo); + case LIST: + return convertList((ListTypeInfo) typeInfo); + case MAP: + return convertMap((MapTypeInfo) typeInfo); + case STRUCT: + return convertStruct((StructTypeInfo) typeInfo); + case UNION: + return convertUnion((UnionTypeInfo) typeInfo); + default: + throw new UnsupportedOperationException("Unsupported type category: " + typeInfo.getCategory()); + } + } + + private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) { + boolean nullable = true; + + switch (type.getPrimitiveCategory()) { + case BOOLEAN: + return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable); + case BYTE: + return PrimitiveType.of(CoralTypeKind.TINYINT, nullable); + case SHORT: + return PrimitiveType.of(CoralTypeKind.SMALLINT, nullable); + case INT: + return PrimitiveType.of(CoralTypeKind.INT, nullable); + case LONG: + return PrimitiveType.of(CoralTypeKind.BIGINT, nullable); + case FLOAT: + return PrimitiveType.of(CoralTypeKind.FLOAT, nullable); + case DOUBLE: + return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable); + case STRING: + return PrimitiveType.of(CoralTypeKind.STRING, nullable); + case DATE: + return PrimitiveType.of(CoralTypeKind.DATE, nullable); + case TIMESTAMP: + return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable); + case BINARY: + return BinaryType.of(BinaryType.LENGTH_UNBOUNDED, nullable); + case DECIMAL: + DecimalTypeInfo decimalType = (DecimalTypeInfo) type; + return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable); + case VARCHAR: + VarcharTypeInfo varcharType = (VarcharTypeInfo) type; + return VarcharType.of(varcharType.getLength(), nullable); + case CHAR: + CharTypeInfo charType = (CharTypeInfo) type; + return CharType.of(charType.getLength(), nullable); + case VOID: + return PrimitiveType.of(CoralTypeKind.NULL, true); + case UNKNOWN: + return PrimitiveType.of(CoralTypeKind.STRING, true); + default: + throw new UnsupportedOperationException("Unsupported primitive type: " + type.getPrimitiveCategory()); + } + } + + private static CoralDataType convertList(ListTypeInfo listType) { + CoralDataType elementType = convert(listType.getListElementTypeInfo()); + return ArrayType.of(elementType, true); + } + + private static CoralDataType convertMap(MapTypeInfo mapType) { + CoralDataType keyType = convert(mapType.getMapKeyTypeInfo()); + CoralDataType valueType = convert(mapType.getMapValueTypeInfo()); + return MapType.of(keyType, valueType, true); + } + + private static CoralDataType convertStruct(StructTypeInfo structType) { + List fieldNames = structType.getAllStructFieldNames(); + List fieldTypeInfos = structType.getAllStructFieldTypeInfos(); + + List fields = new ArrayList<>(); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + CoralDataType fieldType = convert(fieldTypeInfos.get(i)); + fields.add(StructField.of(fieldNames.get(i), fieldType)); + } + + return StructType.of(fields, true); + } + + private static CoralDataType convertUnion(UnionTypeInfo unionType) { + List memberTypes = unionType.getAllUnionObjectTypeInfos(); + + List fields = new ArrayList<>(); + fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true))); + + for (int i = 0; i < memberTypes.size(); i++) { + CoralDataType fieldType = convert(memberTypes.get(i)); + fields.add(StructField.of("field" + i, fieldType)); + } + + return StructType.of(fields, true); + } +} diff --git a/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/TypeConverter.java b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/TypeConverter.java new file mode 100644 index 000000000..76114f21a --- /dev/null +++ b/coral-catalog-hive/src/main/java/com/linkedin/coral/catalog/hive/TypeConverter.java @@ -0,0 +1,225 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.hive; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.*; + + +/** + * Copied from org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter + */ + +public class TypeConverter { + + private TypeConverter() { + + } + + public static RelDataType convert(TypeInfo typeInfo, RelDataTypeFactory relTypeFactory) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + return convert((PrimitiveTypeInfo) typeInfo, relTypeFactory); + case LIST: + return convert((ListTypeInfo) typeInfo, relTypeFactory); + case MAP: + return convert((MapTypeInfo) typeInfo, relTypeFactory); + case STRUCT: + return convert((StructTypeInfo) typeInfo, relTypeFactory); + case UNION: + return convert((UnionTypeInfo) typeInfo, relTypeFactory); + default: + throw new RuntimeException("Unknown type category: " + typeInfo.getCategory()); + } + } + + public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) { + RelDataType convertedType = null; + + switch (type.getPrimitiveCategory()) { + case VOID: + convertedType = dtFactory.createSqlType(SqlTypeName.NULL); + break; + case BOOLEAN: + convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN); + break; + case BYTE: + convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT); + break; + case SHORT: + convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT); + break; + case INT: + convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER); + break; + case LONG: + convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT); + break; + case FLOAT: + convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT); + break; + case DOUBLE: + convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); + break; + case STRING: + convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); + break; + case DATE: + convertedType = dtFactory.createSqlType(SqlTypeName.DATE); + break; + case TIMESTAMP: + convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP); + break; + case BINARY: + convertedType = dtFactory.createSqlType(SqlTypeName.BINARY); + break; + case DECIMAL: + DecimalTypeInfo dtInf = (DecimalTypeInfo) type; + convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); + break; + case VARCHAR: + convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()); + break; + case CHAR: + convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()); + break; + case UNKNOWN: + convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); + break; + default: + throw new RuntimeException("Unknown primitive type category: " + type.getPrimitiveCategory()); + } + + if (null == convertedType) { + throw new RuntimeException("Unsupported Type : " + type.getTypeName()); + } + + return dtFactory.createTypeWithNullability(convertedType, true); + } + + public static RelDataType convert(ListTypeInfo lstType, RelDataTypeFactory dtFactory) { + RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory); + RelDataType arrayType = dtFactory.createArrayType(elemType, -1); + return dtFactory.createTypeWithNullability(arrayType, true); + } + + public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) { + RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory); + RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory); + RelDataType type = dtFactory.createMapType(keyType, valueType); + return dtFactory.createTypeWithNullability(type, true); + } + + public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFactory dtFactory) { + List fTypes = new ArrayList<>(structType.getAllStructFieldTypeInfos().size()); + for (TypeInfo ti : structType.getAllStructFieldTypeInfos()) { + fTypes.add(convert(ti, dtFactory)); + } + RelDataType rowType = dtFactory.createStructType(fTypes, structType.getAllStructFieldNames()); + // TODO: Return nullable record type. + // All types in Hive are effectively nullable since the data is injected from external source. + // Calcite does not support nullable record type and since we don't create our own type factory + // ... we've problem! The call below only makes the fields of the struct nullable which is + // not the same as nullable struct. + return dtFactory.createTypeWithNullability(rowType, true); + } + + // Mimic the StructTypeInfo conversion to convert a UnionTypeInfo to the corresponding RelDataType + // The schema of output Struct conforms to https://github.com/trinodb/trino/pull/3483 + // except we adopted "integer" for the type of "tag" field instead of "tinyint" in the Trino patch + // for compatibility with other platforms that Iceberg currently doesn't support tinyint type. + + // Note: this is subject to change in the future pending on the discussion in + // https://mail-archives.apache.org/mod_mbox/iceberg-dev/202112.mbox/browser + public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) { + List fTypes = unionType.getAllUnionObjectTypeInfos().stream() + .map(typeInfo -> convert(typeInfo, dtFactory)).collect(Collectors.toList()); + List fNames = IntStream.range(0, unionType.getAllUnionObjectTypeInfos().size()).mapToObj(i -> "field" + i) + .collect(Collectors.toList()); + fTypes.add(0, dtFactory.createSqlType(SqlTypeName.INTEGER)); + fNames.add(0, "tag"); + + RelDataType rowType = dtFactory.createStructType(fTypes, fNames); + return dtFactory.createTypeWithNullability(rowType, true); + } + + public static TypeInfo convert(RelDataType rType) { + if (rType.isStruct()) { + return convertStructType(rType); + } else if (rType.getComponentType() != null) { + return convertListType(rType); + } else if (rType.getKeyType() != null) { + return convertMapType(rType); + } else { + return convertPrimtiveType(rType); + } + } + + public static TypeInfo convertStructType(RelDataType rType) { + List fTypes = rType.getFieldList().stream().map(f -> convert(f.getType())).collect(Collectors.toList()); + List fNames = rType.getFieldNames(); + return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); + } + + public static TypeInfo convertMapType(RelDataType rType) { + return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), convert(rType.getValueType())); + } + + public static TypeInfo convertListType(RelDataType rType) { + return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); + } + + public static TypeInfo convertPrimtiveType(RelDataType rType) { + switch (rType.getSqlTypeName()) { + case BOOLEAN: + return TypeInfoFactory.booleanTypeInfo; + case TINYINT: + return TypeInfoFactory.byteTypeInfo; + case SMALLINT: + return TypeInfoFactory.shortTypeInfo; + case INTEGER: + return TypeInfoFactory.intTypeInfo; + case BIGINT: + return TypeInfoFactory.longTypeInfo; + case FLOAT: + return TypeInfoFactory.floatTypeInfo; + case DOUBLE: + return TypeInfoFactory.doubleTypeInfo; + case DATE: + return TypeInfoFactory.dateTypeInfo; + case TIMESTAMP: + return TypeInfoFactory.timestampTypeInfo; + case BINARY: + return TypeInfoFactory.binaryTypeInfo; + case DECIMAL: + return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); + case VARCHAR: + if (rType.getPrecision() == Integer.MAX_VALUE) { + return TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + } else { + return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); + } + case CHAR: + if (rType.getPrecision() > HiveChar.MAX_CHAR_LENGTH) { + return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); + } else { + return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); + } + case OTHER: + default: + return TypeInfoFactory.voidTypeInfo; + } + } +} diff --git a/coral-catalog-hive/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory b/coral-catalog-hive/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory new file mode 100644 index 000000000..99c22cacf --- /dev/null +++ b/coral-catalog-hive/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory @@ -0,0 +1 @@ +com.linkedin.coral.catalog.hive.HiveCalciteTableAdapterFactory diff --git a/coral-catalog-iceberg/build.gradle b/coral-catalog-iceberg/build.gradle new file mode 100644 index 000000000..f74ad16dc --- /dev/null +++ b/coral-catalog-iceberg/build.gradle @@ -0,0 +1,20 @@ +apply plugin: 'java-library' + +dependencies { + api project(':coral-catalog-spi') + + api deps.'linkedin-iceberg'.'iceberg-api' + api deps.'linkedin-iceberg'.'iceberg-core' + + // Temporary: needed for IcebergHiveTableConverter bridge code (issue #575) + implementation(deps.'linkedin-iceberg'.'iceberg-hive-metastore') { + exclude group: 'org.apache.hive', module: 'hive-metastore' + exclude group: 'org.apache.hadoop', module: 'hadoop-common' + } + implementation(deps.'hive'.'hive-metastore') { + exclude group: 'com.linkedin.metastore-autometrics', module: 'autometrics-reporter' + exclude group: 'com.linkedin.metastore-audit', module: 'metastore-audit-logging' + exclude group: 'org.apache.avro', module: 'avro-tools' + } + implementation deps.'hadoop'.'hadoop-common' +} diff --git a/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapter.java b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapter.java new file mode 100644 index 000000000..c7e6705de --- /dev/null +++ b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapter.java @@ -0,0 +1,79 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.iceberg; + +import com.google.common.base.Preconditions; + +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; + +import com.linkedin.coral.common.catalog.TableType; +import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter; +import com.linkedin.coral.common.types.StructType; + + +/** + * Calcite adapter for Apache Iceberg tables, bridging Iceberg metadata to Calcite's ScannableTable interface. + * + *

Uses two-stage conversion: Iceberg -> Coral -> Calcite. + * + * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + */ +public class IcebergCalciteTableAdapter implements ScannableTable { + + private final IcebergTable coralTable; + + /** + * Creates IcebergCalciteTableAdapter from IcebergTable. + * + * @param coralTable IcebergTable from catalog + */ + public IcebergCalciteTableAdapter(IcebergTable coralTable) { + Preconditions.checkNotNull(coralTable); + this.coralTable = coralTable; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + StructType structType = (StructType) coralTable.getSchema(); + return CoralTypeToRelDataTypeConverter.convert(structType, typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + @Override + public Schema.TableType getJdbcTableType() { + return coralTable.tableType() == TableType.VIEW ? Schema.TableType.VIEW : Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg(String column, SqlCall call, SqlNode parent, + CalciteConnectionConfig config) { + return true; + } + + @Override + public Enumerable scan(DataContext root) { + throw new RuntimeException("Calcite runtime execution is not supported"); + } +} diff --git a/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapterFactory.java b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapterFactory.java new file mode 100644 index 000000000..a8b4e6074 --- /dev/null +++ b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergCalciteTableAdapterFactory.java @@ -0,0 +1,32 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.iceberg; + +import java.util.List; + +import org.apache.calcite.schema.Table; + +import com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory; +import com.linkedin.coral.common.catalog.CoralTable; + + +/** + * SPI implementation of {@link CoralCalciteTableAdapterFactory} for Iceberg tables. + * + *

This factory creates {@link IcebergCalciteTableAdapter} for Iceberg tables. + */ +public class IcebergCalciteTableAdapterFactory implements CoralCalciteTableAdapterFactory { + + @Override + public boolean supports(CoralTable coralTable) { + return coralTable instanceof IcebergTable; + } + + @Override + public Table createAdapter(CoralTable coralTable, List schemaPath) { + return new IcebergCalciteTableAdapter((IcebergTable) coralTable); + } +} diff --git a/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergHiveTableConverter.java b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergHiveTableConverter.java new file mode 100644 index 000000000..8386e09c8 --- /dev/null +++ b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergHiveTableConverter.java @@ -0,0 +1,79 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.iceberg; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.hive.HiveSchemaUtil; + + +/** + * Utility class to convert Iceberg datasets to Hive Table objects for backward compatibility. + * + *

TEMPORARY BRIDGE CODE: This converter exists as a temporary workaround and will be removed + * once the refactoring in issue #575 is complete. + * + * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + */ +public class IcebergHiveTableConverter { + + private IcebergHiveTableConverter() { + } + + /** + * Converts IcebergTable to a Hive Table object for backward compatibility with function resolution. + * + *

NOTE: This is temporary glue code that will be removed after + * issue #575 is resolved. + * + * @param icebergCoralTable Iceberg coral table to convert + * @return Hive Table object with complete metadata and schema + */ + public static Table toHiveTable(IcebergTable icebergCoralTable) { + org.apache.iceberg.Table icebergTable = icebergCoralTable.getIcebergTable(); + + String fullName = icebergCoralTable.name(); + String dbName; + String tableName; + int dotIndex = fullName.indexOf('.'); + if (dotIndex > 0) { + dbName = fullName.substring(0, dotIndex); + tableName = fullName.substring(dotIndex + 1); + } else { + dbName = "default"; + tableName = fullName; + } + + StorageDescriptor storageDescriptor = new StorageDescriptor(); + SerDeInfo serDeInfo = new SerDeInfo(); + + Map hiveParameters = new HashMap<>(icebergCoralTable.properties()); + + Map serdeParams = new HashMap<>(); + if (hiveParameters.containsKey("avro.schema.literal")) { + serdeParams.put("avro.schema.literal", hiveParameters.get("avro.schema.literal")); + } + serDeInfo.setParameters(serdeParams); + storageDescriptor.setSerdeInfo(serDeInfo); + + try { + storageDescriptor.setCols(HiveSchemaUtil.convert(icebergTable.schema())); + } catch (Exception e) { + storageDescriptor.setCols(new ArrayList<>()); + } + + Table hiveTable = new Table(tableName, dbName, StringUtils.EMPTY, 0, 0, 0, storageDescriptor, new ArrayList<>(), + hiveParameters, StringUtils.EMPTY, StringUtils.EMPTY, "MANAGED_TABLE"); + + return hiveTable; + } +} diff --git a/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergTable.java b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergTable.java new file mode 100644 index 000000000..6fba549ae --- /dev/null +++ b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergTable.java @@ -0,0 +1,71 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.iceberg; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.iceberg.Table; + +import com.linkedin.coral.common.catalog.CoralTable; +import com.linkedin.coral.common.catalog.TableType; +import com.linkedin.coral.common.types.CoralDataType; + +import static com.google.common.base.Preconditions.*; + + +/** + * Implementation of {@link CoralTable} interface for Apache Iceberg tables. + * This class wraps an Iceberg Table object and provides a unified + * CoralTable API for accessing table metadata. + */ +public class IcebergTable implements CoralTable { + + private final Table table; + + /** + * Creates a new IcebergTable wrapping the given Iceberg table. + * + * @param table Iceberg Table object (must not be null) + */ + public IcebergTable(Table table) { + this.table = checkNotNull(table, "Iceberg table cannot be null"); + } + + @Override + public String name() { + return table.name(); + } + + @Override + public Map properties() { + if (table.properties() != null) { + return new HashMap<>(table.properties()); + } + return new HashMap<>(); + } + + @Override + public TableType tableType() { + return TableType.TABLE; + } + + /** + * INTERNAL API + * @deprecated This method is for internal use only and will be removed in a future release. + * Do not depend on this API. + * + * @return Iceberg Table object + */ + public org.apache.iceberg.Table getIcebergTable() { + return table; + } + + @Override + public CoralDataType getSchema() { + return IcebergToCoralTypeConverter.convert(table.schema()); + } +} diff --git a/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergToCoralTypeConverter.java b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergToCoralTypeConverter.java new file mode 100644 index 000000000..dba766b6b --- /dev/null +++ b/coral-catalog-iceberg/src/main/java/com/linkedin/coral/catalog/iceberg/IcebergToCoralTypeConverter.java @@ -0,0 +1,127 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.catalog.iceberg; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +import com.linkedin.coral.common.types.*; + + +/** + * Converts Iceberg Schema and Types to Coral data types. + * This is the first stage of the two-stage conversion: Iceberg -> Coral -> Calcite. + */ +public class IcebergToCoralTypeConverter { + + private IcebergToCoralTypeConverter() { + } + + /** + * Converts Iceberg Schema to Coral StructType. + * + * @param icebergSchema Iceberg table schema + * @return StructType representing the Iceberg schema in Coral type system + */ + public static StructType convert(Schema icebergSchema) { + List columns = icebergSchema.columns(); + List fields = new ArrayList<>(columns.size()); + + for (Types.NestedField field : columns) { + CoralDataType fieldType = convert(field.type(), field.isOptional()); + fields.add(StructField.of(field.name(), fieldType)); + } + + return StructType.of(fields, true); + } + + /** + * Converts Iceberg Type to Coral CoralDataType. + * + * @param icebergType Iceberg type + * @param nullable Whether this type instance is nullable + * @return CoralDataType representing the Iceberg type + */ + public static CoralDataType convert(Type icebergType, boolean nullable) { + Type.TypeID typeId = icebergType.typeId(); + + switch (typeId) { + case STRUCT: + return convertStruct((Types.StructType) icebergType, nullable); + case LIST: + return convertList((Types.ListType) icebergType, nullable); + case MAP: + return convertMap((Types.MapType) icebergType, nullable); + default: + return convertPrimitive(icebergType, nullable); + } + } + + private static CoralDataType convertPrimitive(Type icebergType, boolean nullable) { + Type.TypeID typeId = icebergType.typeId(); + + switch (typeId) { + case BOOLEAN: + return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable); + case INTEGER: + return PrimitiveType.of(CoralTypeKind.INT, nullable); + case LONG: + return PrimitiveType.of(CoralTypeKind.BIGINT, nullable); + case FLOAT: + return PrimitiveType.of(CoralTypeKind.FLOAT, nullable); + case DOUBLE: + return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable); + case DATE: + return PrimitiveType.of(CoralTypeKind.DATE, nullable); + case TIME: + return PrimitiveType.of(CoralTypeKind.TIME, nullable); + case TIMESTAMP: + return TimestampType.of(6, nullable); + case STRING: + return VarcharType.of(Integer.MAX_VALUE, nullable); + case UUID: + return CharType.of(36, nullable); + case FIXED: + Types.FixedType fixedType = (Types.FixedType) icebergType; + return BinaryType.of(fixedType.length(), nullable); + case BINARY: + return BinaryType.of(BinaryType.LENGTH_UNBOUNDED, nullable); + case DECIMAL: + Types.DecimalType decimalType = (Types.DecimalType) icebergType; + return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable); + default: + throw new UnsupportedOperationException( + "Unsupported Iceberg primitive type: " + icebergType + " (TypeID: " + typeId + ")"); + } + } + + private static ArrayType convertList(Types.ListType listType, boolean nullable) { + CoralDataType elementType = convert(listType.elementType(), listType.isElementOptional()); + return ArrayType.of(elementType, nullable); + } + + private static MapType convertMap(Types.MapType mapType, boolean nullable) { + CoralDataType keyType = convert(mapType.keyType(), false); + CoralDataType valueType = convert(mapType.valueType(), mapType.isValueOptional()); + return MapType.of(keyType, valueType, nullable); + } + + private static StructType convertStruct(Types.StructType structType, boolean nullable) { + List fields = structType.fields(); + List coralFields = new ArrayList<>(fields.size()); + + for (Types.NestedField field : fields) { + CoralDataType fieldType = convert(field.type(), field.isOptional()); + coralFields.add(StructField.of(field.name(), fieldType)); + } + + return StructType.of(coralFields, nullable); + } +} diff --git a/coral-catalog-iceberg/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory b/coral-catalog-iceberg/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory new file mode 100644 index 000000000..dbc348059 --- /dev/null +++ b/coral-catalog-iceberg/src/main/resources/META-INF/services/com.linkedin.coral.common.catalog.CoralCalciteTableAdapterFactory @@ -0,0 +1 @@ +com.linkedin.coral.catalog.iceberg.IcebergCalciteTableAdapterFactory diff --git a/coral-catalog-spi/build.gradle b/coral-catalog-spi/build.gradle new file mode 100644 index 000000000..e79598967 --- /dev/null +++ b/coral-catalog-spi/build.gradle @@ -0,0 +1,12 @@ +apply plugin: 'java-library' + +dependencies { + api(deps.'linkedin-calcite-core') { + artifact { + name = 'calcite-core' + extension = 'jar' + type = 'jar' + classifier = 'shaded' + } + } +} diff --git a/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterFactory.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterFactory.java new file mode 100644 index 000000000..20f99590e --- /dev/null +++ b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterFactory.java @@ -0,0 +1,54 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common.catalog; + +import java.util.List; + +import org.apache.calcite.schema.Table; + + +/** + * SPI (Service Provider Interface) for creating Calcite table adapters from {@link CoralTable} instances. + * + *

Implementations of this interface are discovered at runtime via {@link java.util.ServiceLoader}, + * allowing format-specific adapters (Hive, Iceberg, etc.) to be loaded without requiring their + * classes on the compile-time classpath of the consuming module. + * + *

Each implementation should handle a specific {@link CoralTable} subtype and create the appropriate + * Calcite {@link Table} adapter for it. + * + * @see CoralCalciteTableAdapterRegistry Registry that discovers and dispatches to factories + */ +public interface CoralCalciteTableAdapterFactory { + + /** + * Returns whether this factory can create an adapter for the given CoralTable. + * + * @param coralTable The table to check + * @return true if this factory supports the given table type + */ + boolean supports(CoralTable coralTable); + + /** + * Creates a Calcite {@link Table} adapter for the given CoralTable. + * + * @param coralTable The table to create an adapter for + * @param schemaPath The Calcite schema path for the table + * @return A Calcite Table adapter + */ + Table createAdapter(CoralTable coralTable, List schemaPath); + + /** + * Returns the priority of this factory. Lower values indicate higher priority. + * When multiple factories support the same CoralTable type, the one with the + * lowest priority value is used. + * + * @return Priority value (default 100) + */ + default int priority() { + return 100; + } +} diff --git a/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterRegistry.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterRegistry.java new file mode 100644 index 000000000..3635e23f0 --- /dev/null +++ b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCalciteTableAdapterRegistry.java @@ -0,0 +1,77 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common.catalog; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.ServiceLoader; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.stream.Collectors; + +import org.apache.calcite.schema.Table; + + +/** + * Registry that discovers {@link CoralCalciteTableAdapterFactory} implementations via + * {@link ServiceLoader} and dispatches table adapter creation to the appropriate factory. + * + *

Factories are ordered by {@link CoralCalciteTableAdapterFactory#priority()} (lowest first). + * The first factory that {@link CoralCalciteTableAdapterFactory#supports(CoralTable) supports} + * a given CoralTable is used to create the adapter. + * + *

In environments with complex classloader hierarchies (e.g., Spark), factories can also + * be registered explicitly via {@link #register(CoralCalciteTableAdapterFactory)}. + */ +public final class CoralCalciteTableAdapterRegistry { + + private static final CopyOnWriteArrayList EXPLICIT_FACTORIES = + new CopyOnWriteArrayList<>(); + + private CoralCalciteTableAdapterRegistry() { + } + + /** + * Explicitly registers a factory. Use this in environments where ServiceLoader + * discovery may not work (e.g., complex classloader hierarchies in Spark). + * + * @param factory The factory to register + */ + public static void register(CoralCalciteTableAdapterFactory factory) { + EXPLICIT_FACTORIES.addIfAbsent(factory); + } + + /** + * Creates a Calcite {@link Table} adapter for the given CoralTable by finding an appropriate factory. + * + * @param coralTable The table to create an adapter for + * @param schemaPath The Calcite schema path + * @return A Calcite Table adapter + * @throws UnsupportedOperationException if no factory supports the given table type + */ + public static Table createAdapter(CoralTable coralTable, List schemaPath) { + List allFactories = getAllFactories(); + for (CoralCalciteTableAdapterFactory factory : allFactories) { + if (factory.supports(coralTable)) { + return factory.createAdapter(coralTable, schemaPath); + } + } + throw new UnsupportedOperationException( + "No CoralCalciteTableAdapterFactory found for table type: " + coralTable.getClass().getName()); + } + + private static List getAllFactories() { + List factories = new ArrayList<>(EXPLICIT_FACTORIES); + + ServiceLoader loader = ServiceLoader.load(CoralCalciteTableAdapterFactory.class); + for (CoralCalciteTableAdapterFactory factory : loader) { + factories.add(factory); + } + + return factories.stream().sorted(Comparator.comparingInt(CoralCalciteTableAdapterFactory::priority)) + .collect(Collectors.toList()); + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/CoralCatalog.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCatalog.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/catalog/CoralCatalog.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralCatalog.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/CoralTable.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralTable.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/catalog/CoralTable.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/CoralTable.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/TableType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/TableType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/catalog/TableType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/catalog/TableType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/ArrayType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/ArrayType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/ArrayType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/ArrayType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/BinaryType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/BinaryType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/BinaryType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/BinaryType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CharType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CharType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/CharType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CharType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralDataType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralDataType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/CoralDataType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralDataType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/DecimalType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/DecimalType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/DecimalType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/DecimalType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/MapType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/MapType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/MapType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/MapType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/PrimitiveType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/PrimitiveType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/PrimitiveType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/PrimitiveType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/StructField.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/StructField.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/StructField.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/StructField.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/StructType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/StructType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/StructType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/StructType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/TimestampType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/TimestampType.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/VarcharType.java b/coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/VarcharType.java similarity index 100% rename from coral-common/src/main/java/com/linkedin/coral/common/types/VarcharType.java rename to coral-catalog-spi/src/main/java/com/linkedin/coral/common/types/VarcharType.java diff --git a/coral-common/build.gradle b/coral-common/build.gradle index d1c2058ab..19a302e9a 100644 --- a/coral-common/build.gradle +++ b/coral-common/build.gradle @@ -10,19 +10,25 @@ dependencies { } } - api(deps.'hive'.'hive-metastore') { + // New catalog SPI and implementation modules + api project(':coral-catalog-spi') + api project(':coral-catalog-hive') + api project(':coral-catalog-iceberg') + + // Hive/Hadoop/Iceberg dependencies are now transitively provided by the catalog modules. + // These are kept as implementation deps for the deprecated wrapper classes still in coral-common + // (HiveMetastoreClient, HiveMscAdapter, HiveSchema, HiveDbSchema, etc.). + implementation(deps.'hive'.'hive-metastore') { exclude group: 'com.linkedin.metastore-autometrics', module: 'autometrics-reporter' exclude group: 'com.linkedin.metastore-audit', module: 'metastore-audit-logging' - // avro-tools brings in whole bunch of hadoop classes causing duplicates and conflicts exclude group: 'org.apache.avro', module: 'avro-tools' } - api deps.'hadoop'.'hadoop-common' + implementation deps.'hadoop'.'hadoop-common' - // LinkedIn Iceberg dependencies - api deps.'linkedin-iceberg'.'iceberg-api' - api deps.'linkedin-iceberg'.'iceberg-core' - api(deps.'linkedin-iceberg'.'iceberg-hive-metastore') { + implementation deps.'linkedin-iceberg'.'iceberg-api' + implementation deps.'linkedin-iceberg'.'iceberg-core' + implementation(deps.'linkedin-iceberg'.'iceberg-hive-metastore') { exclude group: 'org.apache.hive', module: 'hive-metastore' exclude group: 'org.apache.hadoop', module: 'hadoop-common' } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java index 37b967a63..5d906382a 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java @@ -17,13 +17,11 @@ import org.apache.calcite.rel.type.RelProtoDataType; import org.apache.calcite.schema.*; +import com.linkedin.coral.common.catalog.CoralCalciteTableAdapterRegistry; import com.linkedin.coral.common.catalog.CoralCatalog; import com.linkedin.coral.common.catalog.CoralTable; -import com.linkedin.coral.common.catalog.HiveTable; -import com.linkedin.coral.common.catalog.IcebergTable; import static com.google.common.base.Preconditions.checkNotNull; -import static com.linkedin.coral.common.catalog.TableType.VIEW; /** @@ -33,13 +31,9 @@ * {@link CoralCatalog} to Calcite's {@link Schema} interface. It represents a specific database/namespace * and dispatches table lookups to the appropriate format-specific implementations. * - *

Multi-format Dispatch: This class automatically dispatches to the correct table implementation - * based on the underlying table format: - *

    - *
  • {@link IcebergCalciteTableAdapter} for Iceberg tables ({@link IcebergTable})
  • - *
  • {@link HiveCalciteTableAdapter} for Hive tables ({@link HiveTable})
  • - *
  • {@link HiveCalciteViewAdapter} for Hive views ({@link HiveTable} with VIEW type)
  • - *
+ *

Multi-format Dispatch: This class uses the SPI-based + * {@link CoralCalciteTableAdapterRegistry} to dispatch table lookups to the appropriate + * format-specific adapter factories discovered at runtime via {@link java.util.ServiceLoader}. * *

Relationship to HiveDbSchema: *

    @@ -77,12 +71,8 @@ public class CoralDatabaseSchema implements Schema { /** * Returns a Calcite Table for the specified table name. * - *

    This method performs format-aware dispatch: - *

      - *
    • Iceberg tables → {@link IcebergCalciteTableAdapter}
    • - *
    • Hive views → {@link HiveCalciteViewAdapter}
    • - *
    • Hive tables → {@link HiveCalciteTableAdapter}
    • - *
    + *

    This method uses the SPI-based {@link CoralCalciteTableAdapterRegistry} to dispatch + * to the appropriate format-specific adapter factory. * * @param name Table name * @return Calcite Table implementation, or null if table doesn't exist @@ -94,20 +84,8 @@ public Table getTable(String name) { return null; } - // Dispatch based on CoralTable implementation type - if (coralTable instanceof IcebergTable) { - return new IcebergCalciteTableAdapter((IcebergTable) coralTable); - } else if (coralTable instanceof HiveTable) { - HiveTable hiveTable = (HiveTable) coralTable; - // Check if it's a view - if (hiveTable.tableType() == VIEW) { - return new HiveCalciteViewAdapter(hiveTable, ImmutableList.of(CoralRootSchema.ROOT_SCHEMA, dbName)); - } else { - return new HiveCalciteTableAdapter(hiveTable); - } - } - - return null; + return CoralCalciteTableAdapterRegistry.createAdapter(coralTable, + ImmutableList.of(CoralRootSchema.ROOT_SCHEMA, dbName)); } /** diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteTableAdapter.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteTableAdapter.java index c49af6f69..b450da1e4 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteTableAdapter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteTableAdapter.java @@ -5,346 +5,29 @@ */ package com.linkedin.coral.common; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; - -import org.apache.calcite.DataContext; -import org.apache.calcite.config.CalciteConnectionConfig; -import org.apache.calcite.linq4j.Enumerable; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.schema.ScannableTable; -import org.apache.calcite.schema.Schema; -import org.apache.calcite.schema.Statistic; -import org.apache.calcite.schema.Statistics; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNode; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.linkedin.coral.common.catalog.HiveTable; -import com.linkedin.coral.common.types.CoralDataType; -import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter; -import com.linkedin.coral.common.types.StructField; -import com.linkedin.coral.common.types.StructType; /** - * Calcite adapter for Hive tables, bridging Hive metadata to Calcite's ScannableTable interface. - * - *

    This adapter converts Hive {@link org.apache.hadoop.hive.metastore.api.Table} representations - * into Calcite's {@link ScannableTable}, enabling Hive tables to be queried through Calcite's - * SQL processing engine. - * - *

    Integration with ParseTreeBuilder and HiveFunctionResolver: This class provides critical - * Dali UDF metadata extraction methods ({@link #getDaliFunctionParams()} and {@link #getDaliUdfDependencies()}) - * that are tightly coupled to {@link org.apache.hadoop.hive.metastore.api.Table} and used by: - *

      - *
    • {@code HiveFunctionResolver} - for resolving Dali function names to implementing classes
    • - *
    • {@code ParseTreeBuilder} - for parsing view definitions with UDF metadata
    • - *
    - * - *

    These components currently require {@link org.apache.hadoop.hive.metastore.api.Table} and cannot - * work directly with {@link com.linkedin.coral.common.catalog.CoralTable}. This tight coupling is being - * addressed in issue #575, which will refactor - * these APIs to accept {@code CoralTable} instead, enabling multi-format support (Hive, Iceberg, etc.). - * - *

    Implementing this as a ScannableTable, instead of Table, is hacky approach to make calcite - * correctly generate relational algebra. This will have to go away gradually. - * - * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteTableAdapter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ -public class HiveCalciteTableAdapter implements ScannableTable { - - private static final Logger LOG = LoggerFactory.getLogger(HiveCalciteTableAdapter.class); - protected final org.apache.hadoop.hive.metastore.api.Table hiveTable; - private Deserializer deserializer; - - /** - * Any functions the user registers during view creation should also be - * specified in the table properties of the created view under the key - * {@value #TBLPROPERTIES_FUNCTIONS_KEY} - * - * e.g 'functions' = 'f:c1 g:c2' - */ - static final String TBLPROPERTIES_FUNCTIONS_KEY = "functions"; - - /** - * Any dependencies the user adds during view creation can be - * specified in the table properties of the created view under the key - * {@value #TBLPROPERTIES_DEPENDENCIES_KEY} or under - * [fn].{@value #TBLPROPERTIES_DEPENDENCIES_KEY} for function specific - * dependencies - * - * e.g 'dependencies' = 'o1:m1:v1 o2:m2:v2?transitive=false' - */ - static final String TBLPROPERTIES_DEPENDENCIES_KEY = "dependencies"; - - private static final Splitter tblpropertiesSplitter = - Splitter.on(Pattern.compile("\\s+")).omitEmptyStrings().trimResults(); - - private static final Splitter.MapSplitter functionsKeyValueSplitter = - tblpropertiesSplitter.withKeyValueSeparator(Splitter.on(":").limit(2)); +@Deprecated +public class HiveCalciteTableAdapter extends com.linkedin.coral.catalog.hive.HiveCalciteTableAdapter { /** - * Constructor to create bridge from hive table to calcite table - * @param hiveTable Hive table + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteTableAdapter#HiveCalciteTableAdapter(org.apache.hadoop.hive.metastore.api.Table)} instead. */ + @Deprecated public HiveCalciteTableAdapter(org.apache.hadoop.hive.metastore.api.Table hiveTable) { - Preconditions.checkNotNull(hiveTable); - this.hiveTable = hiveTable; + super(hiveTable); } /** - * Constructor accepting HiveCoralTable for unified catalog integration. - * @param coralTable HiveCoralTable from catalog + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteTableAdapter#HiveCalciteTableAdapter(com.linkedin.coral.catalog.hive.HiveTable)} instead. */ + @Deprecated public HiveCalciteTableAdapter(HiveTable coralTable) { - Preconditions.checkNotNull(coralTable); - this.hiveTable = coralTable.getHiveTable(); - } - - /** - * Get dali function params from table TBLPROPERTIES clause parameters. - * The 'functions' parameter in TBLPROPERTIES clause is a whitespace-separated list of function base name - * used in the view, followed by colon(:), followed by the corresponding full class names. Example: - * 'functions' = 'func_1:com.linkedin.Func1 func_2:com.linkedin.Func2' - * @return returns a mapping of function name to class name as stored in the - * {@code functions} parameter key of table parameters - */ - public Map getDaliFunctionParams() { - checkDaliTable(); - final String functionsValue = hiveTable.getParameters().get(TBLPROPERTIES_FUNCTIONS_KEY); - Map params = new HashMap<>(); - if (functionsValue != null) { - params = functionsKeyValueSplitter.split(functionsValue); - } - return params; - } - - /** - * Get Dali UDF dependencies from the "dependencies" Hive table property. - * The 'dependencies' parameter in TBLPROPERTIES clause is a whitespace-separated list of the ivy coordinates - * of the artifacts a UDF requires. Example: - * 'dependencies' = 'ivy://com.linkedin.foo:foo1:0.0.1 ivy://com.linkedin.foo:foo2:0.0.1' - * @return returns a string list of the ivy coordinates as stored in the - * {@code dependencies} table property. The return value is null if - * {@code dependencies} is not set. - */ - public List getDaliUdfDependencies() { - checkDaliTable(); - final String propertyValue = hiveTable.getParameters().get(TBLPROPERTIES_DEPENDENCIES_KEY); - if (propertyValue != null) { - return tblpropertiesSplitter.splitToList(propertyValue).stream() - .map(s -> s.toLowerCase().startsWith("ivy://") ? s : "ivy://" + s).collect(Collectors.toList()); - } - return ImmutableList.of(); - } - - public boolean isDaliTable() { - return hiveTable.getOwner().equalsIgnoreCase("daliview"); - } - - private void checkDaliTable() { - // FIXME: this fails unit test right now - // Preconditions.checkState(isDaliTable()); - } - - /** - * Returns the row type (schema) for this table. - * - * Current behavior (validation/shadow mode): - * - Always returns the legacy Hive → Calcite direct conversion - * - Validates against the new Hive → Coral → Calcite two-stage conversion - * - Logs warnings if conversions don't match or if validation fails - * - * This allows safe validation of the new conversion path in production - * before switching to use it as the primary path. - * - * @param typeFactory Calcite type factory - * @return RelDataType representing the table schema - */ - @Override - public RelDataType getRowType(RelDataTypeFactory typeFactory) { - // Always compute and return the legacy Hive direct conversion (production path) - RelDataType hiveType = getRowTypeFromHiveType(typeFactory); - - // Validate against new two-stage Coral conversion (shadow/validation mode) - try { - RelDataType coralType = getRowTypeFromCoralType(typeFactory); - - // Compare using structural equality (not reference equality) - if (!RelOptUtil.areRowTypesEqual(hiveType, coralType, false)) { - LOG.warn("Hive and Coral type conversion mismatch for table {}.{}. Hive: {}, Coral: {}", hiveTable.getDbName(), - hiveTable.getTableName(), hiveType, coralType); - } - } catch (Exception e) { - // Log validation failure but continue with Hive type (zero production impact) - LOG.warn("Coral type validation failed for table {}.{}. Proceeding with Hive type. Error: {}", - hiveTable.getDbName(), hiveTable.getTableName(), e.getMessage(), e); - } - - // Always return the battle-tested Hive conversion result - return hiveType; - } - - /** - * Two-stage conversion: Hive → Coral → Calcite. - * This is the preferred path when using CoralCatalog. - */ - private RelDataType getRowTypeFromCoralType(RelDataTypeFactory typeFactory) { - // Step 1: Hive → Coral - StructType structType = (StructType) getCoralSchema(); - - // Step 2: Coral → Calcite - return CoralTypeToRelDataTypeConverter.convert(structType, typeFactory); - } - - /** - * Direct conversion: Hive → Calcite. - * This is the legacy path for backward compatibility. - */ - private RelDataType getRowTypeFromHiveType(RelDataTypeFactory typeFactory) { - final List cols = getColumns(); - final List fieldTypes = new ArrayList<>(cols.size()); - final List fieldNames = new ArrayList<>(cols.size()); - final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); - - allCols.forEach(col -> { - final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); - final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory); - final String colName = col.getName(); - if (!fieldNames.contains(colName)) { - fieldNames.add(colName); - fieldTypes.add(relType); - } - }); - - return typeFactory.createStructType(fieldTypes, fieldNames); - } - - /** - * Returns the table schema in Coral type system. - * This includes both regular columns (from StorageDescriptor) and partition columns. - * Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter. - * - * @return StructType representing the full table schema (columns + partitions) - */ - public CoralDataType getCoralSchema() { - final List cols = getColumns(); - final List fields = new ArrayList<>(); - final List fieldNames = new ArrayList<>(); - - // Combine regular columns and partition keys (same as HiveCalciteTableAdapter.getRowType) - final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); - - for (FieldSchema col : allCols) { - final String colName = col.getName(); - - // Skip duplicate columns (partition keys might overlap with regular columns) - if (!fieldNames.contains(colName)) { - // Convert Hive type string to TypeInfo, then to CoralDataType - final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); - final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo); - - fields.add(StructField.of(colName, coralType)); - fieldNames.add(colName); - } - } - - // Return struct type representing the table schema - // Table-level struct is nullable (Hive convention) - return StructType.of(fields, true); - } - - private List getColumns() { - StorageDescriptor sd = hiveTable.getSd(); - String serDeLib = getSerializationLib(); - if (serDeLib == null || serDeLib.isEmpty()) { - // views don't have serde library - return sd.getCols(); - } else { - try { - return MetaStoreUtils.getFieldsFromDeserializer(hiveTable.getTableName(), getDeserializer()); - } catch (Exception e) { - // if there is an exception like failing to get the deserializer or failing to get columns using deserializer, - // we use sd.getCols() to avoid throwing exception - LOG.warn("Failed to get columns using deserializer: {}", e.getMessage()); - return sd.getCols(); - } - } - } - - private String getSerializationLib() { - return hiveTable.getSd().getSerdeInfo().getSerializationLib(); - } - - private Deserializer getDeserializer() { - if (deserializer == null) { - deserializer = getDeserializerFromMetaStore(); - } - return deserializer; - } - - private Deserializer getDeserializerFromMetaStore() { - try { - return MetaStoreUtils.getDeserializer(new Configuration(false), hiveTable, false); - } catch (Throwable e) { // Catch Throwable here because it may throw Exception or Error - throw new RuntimeException(e); - } - } - - @Override - public Statistic getStatistic() { - return Statistics.UNKNOWN; - } - - @Override - public Schema.TableType getJdbcTableType() { - TableType tableType = Enum.valueOf(TableType.class, hiveTable.getTableType()); - switch (tableType) { - case VIRTUAL_VIEW: - return Schema.TableType.VIEW; - case MANAGED_TABLE: - return Schema.TableType.TABLE; - case INDEX_TABLE: - return Schema.TableType.INDEX; - default: - throw new RuntimeException("Unknown table type: " + hiveTable.getTableType()); - } - } - - @Override - public boolean isRolledUp(String s) { - return false; - } - - @Override - public boolean rolledUpColumnValidInsideAgg(String s, SqlCall sqlCall, SqlNode sqlNode, - CalciteConnectionConfig calciteConnectionConfig) { - return true; - } - - @Override - public Enumerable scan(DataContext dataContext) { - throw new RuntimeException("Calcite runtime is not supported"); + super(coralTable.getHiveTable()); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteViewAdapter.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteViewAdapter.java index a129a8d13..f43906933 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteViewAdapter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveCalciteViewAdapter.java @@ -7,70 +7,31 @@ import java.util.List; -import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.schema.TranslatableTable; import org.apache.hadoop.hive.metastore.api.Table; -import com.linkedin.coral.com.google.common.base.Throwables; -import com.linkedin.coral.com.google.common.collect.ImmutableList; import com.linkedin.coral.common.catalog.HiveTable; /** - * Calcite adapter for Hive views, extending HiveCalciteTableAdapter with TranslatableTable support - * for recursive expansion of view definitions. - * - *

    This adapter enables Calcite to process Hive views by implementing the TranslatableTable interface, - * which allows the view definition to be expanded and converted into a relational algebra tree. - * - *

    Integration with ParseTreeBuilder: This class inherits the Dali UDF metadata extraction - * methods from {@link HiveCalciteTableAdapter}, which are used by {@code ParseTreeBuilder} and - * {@code HiveFunctionResolver} for parsing view definitions with custom functions. These components - * are currently tightly coupled to {@link org.apache.hadoop.hive.metastore.api.Table} and cannot - * work directly with {@link com.linkedin.coral.common.catalog.CoralTable}. - * - *

    This coupling is being addressed in issue #575, - * which will refactor these APIs to accept {@code CoralTable} instead, enabling multi-format support. - * - * @see HiveCalciteTableAdapter - * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteViewAdapter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ -public class HiveCalciteViewAdapter extends HiveCalciteTableAdapter implements TranslatableTable { - private final List schemaPath; +@Deprecated +public class HiveCalciteViewAdapter extends com.linkedin.coral.catalog.hive.HiveCalciteViewAdapter { /** - * Constructor to create bridge from hive table to calcite table - * - * @param hiveTable Hive table - * @param schemaPath Calcite schema path + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteViewAdapter#HiveCalciteViewAdapter(Table, List)} instead. */ + @Deprecated public HiveCalciteViewAdapter(Table hiveTable, List schemaPath) { - super(hiveTable); - this.schemaPath = schemaPath; + super(hiveTable, schemaPath); } /** - * Constructor accepting HiveCoralTable for unified catalog integration. - * - * @param coralTable HiveCoralTable from catalog - * @param schemaPath Calcite schema path + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveCalciteViewAdapter#HiveCalciteViewAdapter(com.linkedin.coral.catalog.hive.HiveTable, List)} instead. */ + @Deprecated public HiveCalciteViewAdapter(HiveTable coralTable, List schemaPath) { - super(coralTable); - this.schemaPath = schemaPath; - } - - @Override - public RelNode toRel(RelOptTable.ToRelContext relContext, RelOptTable relOptTable) { - try { - RelRoot root = relContext.expandView(relOptTable.getRowType(), hiveTable.getViewExpandedText(), schemaPath, - ImmutableList.of(hiveTable.getTableName())); - return root.rel; - } catch (Exception e) { - Throwables.propagateIfInstanceOf(e, RuntimeException.class); - throw new RuntimeException("Error while parsing view definition", e); - } + super(coralTable.getHiveTable(), schemaPath); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java index 0b25b5048..ee0186c35 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java @@ -5,139 +5,29 @@ */ package com.linkedin.coral.common; -import java.util.ArrayList; -import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.*; - -import com.linkedin.coral.common.types.*; +import com.linkedin.coral.common.types.CoralDataType; /** - * Converts Hive TypeInfo objects to Coral data types. - * This enables integration between Hive's type system and Coral's type system. + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveToCoralTypeConverter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ +@Deprecated public final class HiveToCoralTypeConverter { private HiveToCoralTypeConverter() { - // Utility class - prevent instantiation } /** * Converts a Hive TypeInfo to a Coral data type. * @param typeInfo the Hive type to convert * @return the corresponding Coral data type + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveToCoralTypeConverter#convert(TypeInfo)} instead. */ + @Deprecated public static CoralDataType convert(TypeInfo typeInfo) { - if (typeInfo == null) { - throw new IllegalArgumentException("TypeInfo cannot be null"); - } - - switch (typeInfo.getCategory()) { - case PRIMITIVE: - return convertPrimitive((PrimitiveTypeInfo) typeInfo); - case LIST: - return convertList((ListTypeInfo) typeInfo); - case MAP: - return convertMap((MapTypeInfo) typeInfo); - case STRUCT: - return convertStruct((StructTypeInfo) typeInfo); - case UNION: - return convertUnion((UnionTypeInfo) typeInfo); - default: - throw new UnsupportedOperationException("Unsupported type category: " + typeInfo.getCategory()); - } - } - - private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) { - boolean nullable = true; // Hive types are generally nullable - - switch (type.getPrimitiveCategory()) { - case BOOLEAN: - return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable); - case BYTE: - return PrimitiveType.of(CoralTypeKind.TINYINT, nullable); - case SHORT: - return PrimitiveType.of(CoralTypeKind.SMALLINT, nullable); - case INT: - return PrimitiveType.of(CoralTypeKind.INT, nullable); - case LONG: - return PrimitiveType.of(CoralTypeKind.BIGINT, nullable); - case FLOAT: - return PrimitiveType.of(CoralTypeKind.FLOAT, nullable); - case DOUBLE: - return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable); - case STRING: - return PrimitiveType.of(CoralTypeKind.STRING, nullable); - case DATE: - return PrimitiveType.of(CoralTypeKind.DATE, nullable); - case TIMESTAMP: - // Hive TIMESTAMP has no explicit precision (matches TypeConverter behavior) - // Use PRECISION_NOT_SPECIFIED (-1) to match Calcite's behavior - return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable); - case BINARY: - // Hive BINARY is unbounded/variable-length - return BinaryType.of(BinaryType.LENGTH_UNBOUNDED, nullable); - case DECIMAL: - DecimalTypeInfo decimalType = (DecimalTypeInfo) type; - return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable); - case VARCHAR: - VarcharTypeInfo varcharType = (VarcharTypeInfo) type; - return VarcharType.of(varcharType.getLength(), nullable); - case CHAR: - CharTypeInfo charType = (CharTypeInfo) type; - return CharType.of(charType.getLength(), nullable); - case VOID: - return PrimitiveType.of(CoralTypeKind.NULL, true); - case UNKNOWN: - return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback - default: - throw new UnsupportedOperationException("Unsupported primitive type: " + type.getPrimitiveCategory()); - } - } - - private static CoralDataType convertList(ListTypeInfo listType) { - CoralDataType elementType = convert(listType.getListElementTypeInfo()); - return ArrayType.of(elementType, true); // Lists are nullable in Hive - } - - private static CoralDataType convertMap(MapTypeInfo mapType) { - CoralDataType keyType = convert(mapType.getMapKeyTypeInfo()); - CoralDataType valueType = convert(mapType.getMapValueTypeInfo()); - return MapType.of(keyType, valueType, true); // Maps are nullable in Hive - } - - private static CoralDataType convertStruct(StructTypeInfo structType) { - List fieldNames = structType.getAllStructFieldNames(); - List fieldTypeInfos = structType.getAllStructFieldTypeInfos(); - - List fields = new ArrayList<>(); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - CoralDataType fieldType = convert(fieldTypeInfos.get(i)); - fields.add(StructField.of(fieldNames.get(i), fieldType)); - } - - return StructType.of(fields, true); // Structs are nullable in Hive - } - - private static CoralDataType convertUnion(UnionTypeInfo unionType) { - // For UNION types, create a struct conforming to Trino's union representation - // Schema: {tag, field0, field1, ..., fieldN} - // See: https://github.com/trinodb/trino/pull/3483 - List memberTypes = unionType.getAllUnionObjectTypeInfos(); - - // Create fields: "tag" field first (INTEGER), then "field0", "field1", etc. - List fields = new ArrayList<>(); - - // Add "tag" field (INTEGER) to indicate which union member is active - fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true))); - - // Add fields for each possible type in the union - for (int i = 0; i < memberTypes.size(); i++) { - CoralDataType fieldType = convert(memberTypes.get(i)); - fields.add(StructField.of("field" + i, fieldType)); - } - - return StructType.of(fields, true); + return com.linkedin.coral.catalog.hive.HiveToCoralTypeConverter.convert(typeInfo); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/IcebergCalciteTableAdapter.java b/coral-common/src/main/java/com/linkedin/coral/common/IcebergCalciteTableAdapter.java index 87a93ad40..933242430 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/IcebergCalciteTableAdapter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/IcebergCalciteTableAdapter.java @@ -5,106 +5,21 @@ */ package com.linkedin.coral.common; -import com.google.common.base.Preconditions; - -import org.apache.calcite.DataContext; -import org.apache.calcite.config.CalciteConnectionConfig; -import org.apache.calcite.linq4j.Enumerable; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.schema.ScannableTable; -import org.apache.calcite.schema.Schema; -import org.apache.calcite.schema.Statistic; -import org.apache.calcite.schema.Statistics; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNode; - import com.linkedin.coral.common.catalog.IcebergTable; -import com.linkedin.coral.common.catalog.TableType; -import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter; -import com.linkedin.coral.common.types.StructType; /** - * Calcite adapter for Apache Iceberg tables, bridging Iceberg metadata to Calcite's ScannableTable interface. - * - *

    This adapter provides native Iceberg schema to Calcite using two-stage conversion: - * Iceberg → Coral → Calcite. - * - *

    This class uses IcebergCoralTable to access Iceberg table metadata and converts - * through the Coral type system for better abstraction and consistency with HiveCalciteTableAdapter. - * - *

    Integration with ParseTreeBuilder and HiveFunctionResolver: While this adapter itself - * doesn't provide Dali UDF methods, Iceberg tables with UDF metadata still need to work with - * {@code ParseTreeBuilder} and {@code HiveFunctionResolver}, which are currently tightly coupled to - * {@link org.apache.hadoop.hive.metastore.api.Table}. The temporary workaround is - * {@link com.linkedin.coral.common.catalog.IcebergHiveTableConverter}, which converts Iceberg tables - * to Hive Table objects for UDF resolution. - * - *

    This coupling is being addressed in issue #575, - * which will refactor {@code ParseTreeBuilder} and {@code HiveFunctionResolver} to accept - * {@link com.linkedin.coral.common.catalog.CoralTable} instead, enabling direct Iceberg support without conversion. - * - * @see com.linkedin.coral.common.catalog.IcebergHiveTableConverter - * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergCalciteTableAdapter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ -public class IcebergCalciteTableAdapter implements ScannableTable { - - private final IcebergTable coralTable; +@Deprecated +public class IcebergCalciteTableAdapter extends com.linkedin.coral.catalog.iceberg.IcebergCalciteTableAdapter { /** - * Creates IcebergCalciteTableAdapter from IcebergCoralTable. - * - * @param coralTable IcebergCoralTable from catalog + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergCalciteTableAdapter#IcebergCalciteTableAdapter(com.linkedin.coral.catalog.iceberg.IcebergTable)} instead. */ + @Deprecated public IcebergCalciteTableAdapter(IcebergTable coralTable) { - Preconditions.checkNotNull(coralTable); - this.coralTable = coralTable; - } - - /** - * Returns the row type (schema) for this Iceberg table. - * - * Uses two-stage conversion: Iceberg → Coral → Calcite. - * This provides a unified type system abstraction across table formats. - * - * @param typeFactory Calcite type factory - * @return RelDataType representing the table schema - */ - @Override - public RelDataType getRowType(RelDataTypeFactory typeFactory) { - // Step 1: Iceberg → Coral - StructType structType = (StructType) coralTable.getSchema(); - - // Step 2: Coral → Calcite - return CoralTypeToRelDataTypeConverter.convert(structType, typeFactory); - } - - @Override - public Statistic getStatistic() { - // Future enhancement: Could use Iceberg statistics here - // Iceberg provides rich statistics: row count, file count, size, etc. - return Statistics.UNKNOWN; - } - - @Override - public Schema.TableType getJdbcTableType() { - return coralTable.tableType() == TableType.VIEW ? Schema.TableType.VIEW : Schema.TableType.TABLE; - } - - @Override - public boolean isRolledUp(String column) { - return false; - } - - @Override - public boolean rolledUpColumnValidInsideAgg(String column, SqlCall call, SqlNode parent, - CalciteConnectionConfig config) { - return true; - } - - @Override - public Enumerable scan(DataContext root) { - throw new RuntimeException("Calcite runtime execution is not supported"); + super(coralTable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/IcebergToCoralTypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/IcebergToCoralTypeConverter.java index cdb62fe0b..d03929d47 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/IcebergToCoralTypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/IcebergToCoralTypeConverter.java @@ -5,179 +5,36 @@ */ package com.linkedin.coral.common; -import java.util.ArrayList; -import java.util.List; - import org.apache.iceberg.Schema; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; -import com.linkedin.coral.common.types.*; +import com.linkedin.coral.common.types.CoralDataType; +import com.linkedin.coral.common.types.StructType; /** - * Converts Iceberg Schema and Types to Coral data types. - * This is the first stage of the two-stage conversion: Iceberg → Coral → Calcite. - * - * This converter provides a unified type system abstraction by converting Iceberg types - * to Coral's intermediate type representation, which can then be converted to Calcite - * RelDataType using {@link com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter}. + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergToCoralTypeConverter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ +@Deprecated public class IcebergToCoralTypeConverter { private IcebergToCoralTypeConverter() { - // Utility class } /** - * Converts Iceberg Schema to Coral StructType. - * - * @param icebergSchema Iceberg table schema - * @return StructType representing the Iceberg schema in Coral type system + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergToCoralTypeConverter#convert(Schema)} instead. */ + @Deprecated public static StructType convert(Schema icebergSchema) { - List columns = icebergSchema.columns(); - List fields = new ArrayList<>(columns.size()); - - for (Types.NestedField field : columns) { - // Convert field type using the main dispatcher - CoralDataType fieldType = convert(field.type(), field.isOptional()); - fields.add(StructField.of(field.name(), fieldType)); - } - - // Table-level struct is nullable (consistent with Hive convention) - return StructType.of(fields, true); + return com.linkedin.coral.catalog.iceberg.IcebergToCoralTypeConverter.convert(icebergSchema); } /** - * Main dispatcher - converts Iceberg Type to Coral CoralDataType based on type category. - * - * @param icebergType Iceberg type - * @param nullable Whether this type instance is nullable - * @return CoralDataType representing the Iceberg type + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergToCoralTypeConverter#convert(Type, boolean)} instead. */ + @Deprecated public static CoralDataType convert(Type icebergType, boolean nullable) { - Type.TypeID typeId = icebergType.typeId(); - - switch (typeId) { - case STRUCT: - return convertStruct((Types.StructType) icebergType, nullable); - case LIST: - return convertList((Types.ListType) icebergType, nullable); - case MAP: - return convertMap((Types.MapType) icebergType, nullable); - default: - // Handle all primitive types - return convertPrimitive(icebergType, nullable); - } - } - - /** - * Converts Iceberg primitive types to Coral CoralDataType. - * Handles all atomic types: BOOLEAN, INTEGER, LONG, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, - * STRING, UUID, FIXED, BINARY, DECIMAL. - * - * @param icebergType Iceberg primitive type - * @param nullable Whether this type instance is nullable - * @return CoralDataType representing the primitive type - */ - private static CoralDataType convertPrimitive(Type icebergType, boolean nullable) { - Type.TypeID typeId = icebergType.typeId(); - - switch (typeId) { - case BOOLEAN: - return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable); - case INTEGER: - return PrimitiveType.of(CoralTypeKind.INT, nullable); - case LONG: - return PrimitiveType.of(CoralTypeKind.BIGINT, nullable); - case FLOAT: - return PrimitiveType.of(CoralTypeKind.FLOAT, nullable); - case DOUBLE: - return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable); - case DATE: - return PrimitiveType.of(CoralTypeKind.DATE, nullable); - case TIME: - return PrimitiveType.of(CoralTypeKind.TIME, nullable); - case TIMESTAMP: - // Iceberg has two timestamp variants: TimestampType.withZone() and TimestampType.withoutZone() - // (see https://github.com/linkedin/iceberg/blob/ebf4776724f346310105a58a6966a69dba2200c1/api/src/main/java/org/apache/iceberg/types/Types.java#L49) - // Both return typeId() == TypeID.TIMESTAMP, so they both reach this case statement. - // - // LIMITATION: Coral's TimestampType currently does NOT distinguish between with/without timezone. - // We map both variants to the same TimestampType(precision=6) for now. - // Future enhancement: Add timezone awareness to Coral's type system to preserve this distinction. - // - // Iceberg timestamps always have microsecond precision (6 fractional digits). - return TimestampType.of(6, nullable); - case STRING: - // String in Iceberg maps to VARCHAR with max length - return VarcharType.of(Integer.MAX_VALUE, nullable); - case UUID: - // Represent UUID as CHAR(36) to preserve UUID semantics - return CharType.of(36, nullable); - case FIXED: - Types.FixedType fixedType = (Types.FixedType) icebergType; - // Fixed-length binary - preserve length - return BinaryType.of(fixedType.length(), nullable); - case BINARY: - // Variable-length binary - return BinaryType.of(BinaryType.LENGTH_UNBOUNDED, nullable); - case DECIMAL: - Types.DecimalType decimalType = (Types.DecimalType) icebergType; - return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable); - default: - throw new UnsupportedOperationException( - "Unsupported Iceberg primitive type: " + icebergType + " (TypeID: " + typeId + ")"); - } - } - - /** - * Converts Iceberg ListType to Coral ArrayType. - * - * @param listType Iceberg list type - * @param nullable Whether the list itself is nullable - * @return ArrayType representing the list/array - */ - private static ArrayType convertList(Types.ListType listType, boolean nullable) { - // Recursively convert element type with its nullability - CoralDataType elementType = convert(listType.elementType(), listType.isElementOptional()); - return ArrayType.of(elementType, nullable); - } - - /** - * Converts Iceberg MapType to Coral MapType. - * - * @param mapType Iceberg map type - * @param nullable Whether the map itself is nullable - * @return MapType representing the map - */ - private static MapType convertMap(Types.MapType mapType, boolean nullable) { - // Recursively convert key and value types - // Iceberg map keys are always required (not nullable) - CoralDataType keyType = convert(mapType.keyType(), false); - CoralDataType valueType = convert(mapType.valueType(), mapType.isValueOptional()); - - return MapType.of(keyType, valueType, nullable); - } - - /** - * Converts Iceberg StructType to Coral StructType. - * - * @param structType Iceberg struct type - * @param nullable Whether the struct itself is nullable - * @return StructType representing the struct - */ - private static StructType convertStruct(Types.StructType structType, boolean nullable) { - List fields = structType.fields(); - List coralFields = new ArrayList<>(fields.size()); - - for (Types.NestedField field : fields) { - // Recursively convert field type using main dispatcher with field's nullability - CoralDataType fieldType = convert(field.type(), field.isOptional()); - coralFields.add(StructField.of(field.name(), fieldType)); - } - - return StructType.of(coralFields, nullable); + return com.linkedin.coral.catalog.iceberg.IcebergToCoralTypeConverter.convert(icebergType, nullable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index f76150684..2ff3f6fe8 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -38,11 +38,11 @@ import org.apache.calcite.util.Util; import com.linkedin.coral.com.google.common.annotations.VisibleForTesting; +import com.linkedin.coral.catalog.hive.HiveTable; +import com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter; +import com.linkedin.coral.catalog.iceberg.IcebergTable; import com.linkedin.coral.common.catalog.CoralCatalog; import com.linkedin.coral.common.catalog.CoralTable; -import com.linkedin.coral.common.catalog.HiveTable; -import com.linkedin.coral.common.catalog.IcebergHiveTableConverter; -import com.linkedin.coral.common.catalog.IcebergTable; import static com.google.common.base.Preconditions.checkNotNull; diff --git a/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java index 9e4e47ab5..1ff06f5db 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java @@ -5,221 +5,84 @@ */ package com.linkedin.coral.common; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.*; /** - * Copied from org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter + * @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ - +@Deprecated public class TypeConverter { private TypeConverter() { - } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(TypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated public static RelDataType convert(TypeInfo typeInfo, RelDataTypeFactory relTypeFactory) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - return convert((PrimitiveTypeInfo) typeInfo, relTypeFactory); - case LIST: - return convert((ListTypeInfo) typeInfo, relTypeFactory); - case MAP: - return convert((MapTypeInfo) typeInfo, relTypeFactory); - case STRUCT: - return convert((StructTypeInfo) typeInfo, relTypeFactory); - case UNION: - return convert((UnionTypeInfo) typeInfo, relTypeFactory); - default: - throw new RuntimeException("Unknown type category: " + typeInfo.getCategory()); - } + return com.linkedin.coral.catalog.hive.TypeConverter.convert(typeInfo, relTypeFactory); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(PrimitiveTypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) { - RelDataType convertedType = null; - - switch (type.getPrimitiveCategory()) { - case VOID: - convertedType = dtFactory.createSqlType(SqlTypeName.NULL); - break; - case BOOLEAN: - convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN); - break; - case BYTE: - convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT); - break; - case SHORT: - convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT); - break; - case INT: - convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER); - break; - case LONG: - convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT); - break; - case FLOAT: - convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT); - break; - case DOUBLE: - convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); - break; - case STRING: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); - break; - case DATE: - convertedType = dtFactory.createSqlType(SqlTypeName.DATE); - break; - case TIMESTAMP: - convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP); - break; - case BINARY: - convertedType = dtFactory.createSqlType(SqlTypeName.BINARY); - break; - case DECIMAL: - DecimalTypeInfo dtInf = (DecimalTypeInfo) type; - convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); - break; - case VARCHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()); - break; - case CHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()); - break; - case UNKNOWN: - convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); - break; - default: - throw new RuntimeException("Unknown primitive type category: " + type.getPrimitiveCategory()); - } - - if (null == convertedType) { - throw new RuntimeException("Unsupported Type : " + type.getTypeName()); - } - - return dtFactory.createTypeWithNullability(convertedType, true); + return com.linkedin.coral.catalog.hive.TypeConverter.convert(type, dtFactory); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(ListTypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated public static RelDataType convert(ListTypeInfo lstType, RelDataTypeFactory dtFactory) { - RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory); - RelDataType arrayType = dtFactory.createArrayType(elemType, -1); - return dtFactory.createTypeWithNullability(arrayType, true); + return com.linkedin.coral.catalog.hive.TypeConverter.convert(lstType, dtFactory); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(MapTypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) { - RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory); - RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory); - RelDataType type = dtFactory.createMapType(keyType, valueType); - return dtFactory.createTypeWithNullability(type, true); + return com.linkedin.coral.catalog.hive.TypeConverter.convert(mapType, dtFactory); } - public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFactory dtFactory) { - List fTypes = new ArrayList<>(structType.getAllStructFieldTypeInfos().size()); - for (TypeInfo ti : structType.getAllStructFieldTypeInfos()) { - fTypes.add(convert(ti, dtFactory)); - } - RelDataType rowType = dtFactory.createStructType(fTypes, structType.getAllStructFieldNames()); - // TODO: Return nullable record type. - // All types in Hive are effectively nullable since the data is injected from external source. - // Calcite does not support nullable record type and since we don't create our own type factory - // ... we've problem! The call below only makes the fields of the struct nullable which is - // not the same as nullable struct. - return dtFactory.createTypeWithNullability(rowType, true); + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(StructTypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated + public static RelDataType convert(StructTypeInfo structType, RelDataTypeFactory dtFactory) { + return com.linkedin.coral.catalog.hive.TypeConverter.convert(structType, dtFactory); } - // Mimic the StructTypeInfo conversion to convert a UnionTypeInfo to the corresponding RelDataType - // The schema of output Struct conforms to https://github.com/trinodb/trino/pull/3483 - // except we adopted "integer" for the type of "tag" field instead of "tinyint" in the Trino patch - // for compatibility with other platforms that Iceberg currently doesn't support tinyint type. - - // Note: this is subject to change in the future pending on the discussion in - // https://mail-archives.apache.org/mod_mbox/iceberg-dev/202112.mbox/browser + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(UnionTypeInfo, RelDataTypeFactory)} instead. */ + @Deprecated public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) { - List fTypes = unionType.getAllUnionObjectTypeInfos().stream() - .map(typeInfo -> convert(typeInfo, dtFactory)).collect(Collectors.toList()); - List fNames = IntStream.range(0, unionType.getAllUnionObjectTypeInfos().size()).mapToObj(i -> "field" + i) - .collect(Collectors.toList()); - fTypes.add(0, dtFactory.createSqlType(SqlTypeName.INTEGER)); - fNames.add(0, "tag"); - - RelDataType rowType = dtFactory.createStructType(fTypes, fNames); - return dtFactory.createTypeWithNullability(rowType, true); + return com.linkedin.coral.catalog.hive.TypeConverter.convert(unionType, dtFactory); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convert(RelDataType)} instead. */ + @Deprecated public static TypeInfo convert(RelDataType rType) { - if (rType.isStruct()) { - return convertStructType(rType); - } else if (rType.getComponentType() != null) { - return convertListType(rType); - } else if (rType.getKeyType() != null) { - return convertMapType(rType); - } else { - return convertPrimtiveType(rType); - } + return com.linkedin.coral.catalog.hive.TypeConverter.convert(rType); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convertStructType(RelDataType)} instead. */ + @Deprecated public static TypeInfo convertStructType(RelDataType rType) { - List fTypes = rType.getFieldList().stream().map(f -> convert(f.getType())).collect(Collectors.toList()); - List fNames = rType.getFieldNames(); - return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); + return com.linkedin.coral.catalog.hive.TypeConverter.convertStructType(rType); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convertMapType(RelDataType)} instead. */ + @Deprecated public static TypeInfo convertMapType(RelDataType rType) { - return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), convert(rType.getValueType())); + return com.linkedin.coral.catalog.hive.TypeConverter.convertMapType(rType); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convertListType(RelDataType)} instead. */ + @Deprecated public static TypeInfo convertListType(RelDataType rType) { - return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); + return com.linkedin.coral.catalog.hive.TypeConverter.convertListType(rType); } + /** @deprecated Use {@link com.linkedin.coral.catalog.hive.TypeConverter#convertPrimtiveType(RelDataType)} instead. */ + @Deprecated public static TypeInfo convertPrimtiveType(RelDataType rType) { - switch (rType.getSqlTypeName()) { - case BOOLEAN: - return TypeInfoFactory.booleanTypeInfo; - case TINYINT: - return TypeInfoFactory.byteTypeInfo; - case SMALLINT: - return TypeInfoFactory.shortTypeInfo; - case INTEGER: - return TypeInfoFactory.intTypeInfo; - case BIGINT: - return TypeInfoFactory.longTypeInfo; - case FLOAT: - return TypeInfoFactory.floatTypeInfo; - case DOUBLE: - return TypeInfoFactory.doubleTypeInfo; - case DATE: - return TypeInfoFactory.dateTypeInfo; - case TIMESTAMP: - return TypeInfoFactory.timestampTypeInfo; - case BINARY: - return TypeInfoFactory.binaryTypeInfo; - case DECIMAL: - return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); - case VARCHAR: - if (rType.getPrecision() == Integer.MAX_VALUE) { - return TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); - } else { - return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); - } - case CHAR: - if (rType.getPrecision() > HiveChar.MAX_CHAR_LENGTH) { - return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); - } else { - return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); - } - case OTHER: - default: - return TypeInfoFactory.voidTypeInfo; - } + return com.linkedin.coral.catalog.hive.TypeConverter.convertPrimtiveType(rType); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/HiveTable.java b/coral-common/src/main/java/com/linkedin/coral/common/catalog/HiveTable.java index 6f92e1deb..55ca586cd 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/catalog/HiveTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/catalog/HiveTable.java @@ -5,124 +5,24 @@ */ package com.linkedin.coral.common.catalog; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.Iterables; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; - -import com.linkedin.coral.common.HiveToCoralTypeConverter; -import com.linkedin.coral.common.types.CoralDataType; -import com.linkedin.coral.common.types.StructField; -import com.linkedin.coral.common.types.StructType; - -import static com.google.common.base.Preconditions.*; /** - * Implementation of {@link CoralTable} interface for Hive tables. - * This class wraps a Hive metastore Table object and provides - * a unified CoralTable API for accessing table metadata. + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveTable} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ -public class HiveTable implements CoralTable { - - private final Table table; +@Deprecated +public class HiveTable extends com.linkedin.coral.catalog.hive.HiveTable { /** - * Creates a new HiveCoralTable wrapping the given Hive table. + * Creates a new HiveTable wrapping the given Hive table. * * @param table Hive metastore Table object (must not be null) + * @deprecated Use {@link com.linkedin.coral.catalog.hive.HiveTable#HiveTable(Table)} instead. */ + @Deprecated public HiveTable(Table table) { - this.table = checkNotNull(table, "Hive table cannot be null"); - } - - /** - * Returns the fully qualified table name in the format "database.table". - * - * @return Fully qualified table name - */ - @Override - public String name() { - return table.getDbName() + "." + table.getTableName(); - } - - /** - * Returns the table properties/parameters. - * This includes Hive table properties, SerDe properties, - * and any custom properties set on the table. - * - * @return Map of table properties - */ - @Override - public Map properties() { - return table.getParameters() != null ? table.getParameters() : Collections.emptyMap(); - } - - /** - * Returns the table type (TABLE or VIEW). - * Hive table types like MANAGED_TABLE, EXTERNAL_TABLE map to TABLE. - * VIRTUAL_VIEW and MATERIALIZED_VIEW map to VIEW. - * - * @return TableType enum value - */ - @Override - public TableType tableType() { - String hiveTableType = table.getTableType(); - if (hiveTableType != null && hiveTableType.toUpperCase().contains("VIEW")) { - return TableType.VIEW; - } - return TableType.TABLE; - } - - /** - * INTERNAL API - * @deprecated This method is for internal use only and will be removed in a future release. - * Do not depend on this API. - * - * @return Hive metastore Table object - */ - public org.apache.hadoop.hive.metastore.api.Table getHiveTable() { - return table; - } - - /** - * Returns the table schema in Coral type system. - * This includes both regular columns (from StorageDescriptor) and partition columns. - * Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter. - * - * @return StructType representing the full table schema (columns + partitions) - */ - @Override - public CoralDataType getSchema() { - final List cols = table.getSd() != null ? table.getSd().getCols() : Collections.emptyList(); - final List fields = new ArrayList<>(); - final List fieldNames = new ArrayList<>(); - - final Iterable allCols = Iterables.concat(cols, table.getPartitionKeys()); - - for (FieldSchema col : allCols) { - final String colName = col.getName(); - - // Skip duplicate columns (partition keys might overlap with regular columns) - if (!fieldNames.contains(colName)) { - // Convert Hive type string to TypeInfo, then to CoralDataType - final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); - final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo); - - fields.add(StructField.of(colName, coralType)); - fieldNames.add(colName); - } - } - - // Return struct type representing the table schema - // Table-level struct is nullable (Hive convention) - return StructType.of(fields, true); + super(table); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergHiveTableConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergHiveTableConverter.java index b72df9d8b..953d912a6 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergHiveTableConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergHiveTableConverter.java @@ -5,115 +5,39 @@ */ package com.linkedin.coral.common.catalog; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.iceberg.hive.HiveSchemaUtil; - /** - * Utility class to convert Iceberg datasets to Hive Table objects for backward compatibility. - * - *

    TEMPORARY BRIDGE CODE: This converter exists as a temporary workaround and will be removed - * once the refactoring in issue #575 is complete. - * - *

    Why this exists: The existing ParseTreeBuilder - * and HiveFunctionResolver are currently tightly coupled - * to Hive's {@code org.apache.hadoop.hive.metastore.api.Table} class and cannot work directly with - * {@link CoralTable} or {@link IcebergTable}. This converter bridges the gap by converting - * Iceberg tables to Hive Table objects for: - *

      - *
    • Dali UDF resolution (extracting "functions" and "dependencies" from table properties)
    • - *
    • Table identification (database name, table name)
    • - *
    • Ownership and permission checks (owner field)
    • - *
    - * - *

    Future: Once ParseTreeBuilder and HiveFunctionResolver are refactored to accept {@link CoralTable} - * instead of Hive Table (see issue #575), - * this converter will no longer be needed and should be removed. IcebergCoralTable will be passable directly. - * - *

    What gets converted: - *

      - *
    • Iceberg schema → Hive columns (via {@code HiveSchemaUtil.convert()})
    • - *
    • All Iceberg table properties → Hive table parameters (including Dali UDF metadata)
    • - *
    • Table metadata (name, owner, timestamps, table type)
    • - *
    • Storage descriptor with SerDe info (for compatibility)
    • - *
    - * - * @see Issue #575: Refactor ParseTreeBuilder to Use CoralTable + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ +@Deprecated public class IcebergHiveTableConverter { private IcebergHiveTableConverter() { - // Utility class - prevent instantiation } /** - * Converts IcebergCoralTable to a Hive Table object for backward compatibility with function resolution. - * - *

    NOTE: This is temporary glue code that will be removed after - * issue #575 is resolved. + * Converts IcebergTable to a Hive Table object for backward compatibility. * * @param icebergCoralTable Iceberg coral table to convert - * @return Hive Table object with complete metadata and schema + * @return Hive Table object + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter#toHiveTable} instead. */ - public static Table toHiveTable(IcebergTable icebergCoralTable) { - org.apache.iceberg.Table icebergTable = icebergCoralTable.getIcebergTable(); - - // Parse db.table name (format: "dbname.tablename") - String fullName = icebergCoralTable.name(); - String dbName; - String tableName; - int dotIndex = fullName.indexOf('.'); - if (dotIndex > 0) { - dbName = fullName.substring(0, dotIndex); - tableName = fullName.substring(dotIndex + 1); - } else { - // Fallback if no dot (shouldn't happen in practice) - dbName = "default"; - tableName = fullName; - } - - // Convert Iceberg schema to Hive columns using HiveSchemaUtil - StorageDescriptor storageDescriptor = new StorageDescriptor(); - SerDeInfo serDeInfo = new SerDeInfo(); - - // Copy all Iceberg table properties to Hive table parameters - // This includes Dali UDF metadata ("functions", "dependencies") and any other custom properties - Map hiveParameters = new HashMap<>(icebergCoralTable.properties()); - - // Set SerDe parameters (include avro.schema.literal if present) - Map serdeParams = new HashMap<>(); - if (hiveParameters.containsKey("avro.schema.literal")) { - serdeParams.put("avro.schema.literal", hiveParameters.get("avro.schema.literal")); - } - serDeInfo.setParameters(serdeParams); - storageDescriptor.setSerdeInfo(serDeInfo); - - // Convert Iceberg schema to Hive columns - try { - storageDescriptor.setCols(HiveSchemaUtil.convert(icebergTable.schema())); - } catch (Exception e) { - // If schema conversion fails, set empty columns list - // This shouldn't break function resolution as it only needs properties - storageDescriptor.setCols(new ArrayList<>()); - } - - // Create Hive Table object with all metadata - Table hiveTable = new Table(tableName, dbName, StringUtils.EMPTY, // owner - 0, // createTime - 0, // lastModifiedTime - 0, // retention - storageDescriptor, new ArrayList<>(), // partition keys - hiveParameters, StringUtils.EMPTY, // viewOriginalText - StringUtils.EMPTY, // viewExpandedText - "MANAGED_TABLE"); // tableType + @Deprecated + public static org.apache.hadoop.hive.metastore.api.Table toHiveTable( + com.linkedin.coral.catalog.iceberg.IcebergTable icebergCoralTable) { + return com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter.toHiveTable(icebergCoralTable); + } - return hiveTable; + /** + * Converts the deprecated IcebergTable wrapper to a Hive Table object. + * + * @param icebergCoralTable Deprecated IcebergTable wrapper + * @return Hive Table object + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter#toHiveTable} instead. + */ + @Deprecated + public static org.apache.hadoop.hive.metastore.api.Table toHiveTable(IcebergTable icebergCoralTable) { + return com.linkedin.coral.catalog.iceberg.IcebergHiveTableConverter.toHiveTable(icebergCoralTable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergTable.java b/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergTable.java index aacafb9fc..b6ebfc05e 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/catalog/IcebergTable.java @@ -5,90 +5,24 @@ */ package com.linkedin.coral.common.catalog; -import java.util.HashMap; -import java.util.Map; - import org.apache.iceberg.Table; -import com.linkedin.coral.common.IcebergToCoralTypeConverter; -import com.linkedin.coral.common.types.CoralDataType; - -import static com.google.common.base.Preconditions.*; - /** - * Implementation of {@link CoralTable} interface for Apache Iceberg tables. - * This class wraps an Iceberg Table object and provides a unified - * CoralTable API for accessing table metadata. + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergTable} instead. + * This class is retained for backward compatibility and will be removed in a future release. */ -public class IcebergTable implements CoralTable { - - private final Table table; +@Deprecated +public class IcebergTable extends com.linkedin.coral.catalog.iceberg.IcebergTable { /** - * Creates a new IcebergCoralTable wrapping the given Iceberg table. + * Creates a new IcebergTable wrapping the given Iceberg table. * * @param table Iceberg Table object (must not be null) + * @deprecated Use {@link com.linkedin.coral.catalog.iceberg.IcebergTable#IcebergTable(Table)} instead. */ + @Deprecated public IcebergTable(Table table) { - this.table = checkNotNull(table, "Iceberg table cannot be null"); - } - - /** - * Returns the fully qualified table name from Iceberg table. - * Uses table.name() which returns the full table identifier. - * - * @return Fully qualified table name - */ - @Override - public String name() { - return table.name(); - } - - /** - * Returns the table properties from Iceberg table metadata. - * This includes properties set on the Iceberg table. - * - * @return Map of table properties - */ - @Override - public Map properties() { - if (table.properties() != null) { - return new HashMap<>(table.properties()); - } - return new HashMap<>(); - } - - /** - * Returns the table type. - * Iceberg tables are always considered physical tables (TABLE type). - * - * @return TableType.TABLE - */ - @Override - public TableType tableType() { - return TableType.TABLE; - } - - /** - * INTERNAL API - * @deprecated This method is for internal use only and will be removed in a future release. - * Do not depend on this API. - * - * @return Iceberg Table object - */ - public org.apache.iceberg.Table getIcebergTable() { - return table; - } - - /** - * Returns the table schema in Coral type system. - * Converts Iceberg schema to Coral types using IcebergToCoralTypeConverter. - * - * @return StructType representing the Iceberg table schema - */ - @Override - public CoralDataType getSchema() { - return IcebergToCoralTypeConverter.convert(table.schema()); + super(table); } } diff --git a/settings.gradle b/settings.gradle index 6991013c5..562e22722 100644 --- a/settings.gradle +++ b/settings.gradle @@ -5,6 +5,9 @@ pluginManagement { } } +include 'coral-catalog-spi' +include 'coral-catalog-hive' +include 'coral-catalog-iceberg' include 'coral-common' include 'coral-dbt' include 'coral-hive'