diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralRexBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralRexBuilder.java new file mode 100644 index 000000000..d2f36aaae --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralRexBuilder.java @@ -0,0 +1,41 @@ +/** + * Copyright 2022-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; + + +public class CoralRexBuilder extends RexBuilder { + /** + * Creates a RexBuilder. + * + * @param typeFactory Type factory + */ + public CoralRexBuilder(RelDataTypeFactory typeFactory) { + super(typeFactory); + } + + /** + * CoralRexBuilder overrides this method to make field access case-insensitively, + * because in Hive 1.1, if the base table `t` contains non-lowercase struct field like `s struct(A:string)`, + * the schema of the view `v` based on the base table would become `s struct(a:string)`, + * translation for SQL `SELECT * FROM v WHERE v.s.A='xxx'` will fail with the following exception + * if caseSensitive=true, given Calcite would convert `v.s.A` to `v.s.a` to be aligned with the + * schema of view `v` during the validation phase: + * + * java.lang.AssertionError: Type 'RecordType(VARCHAR(2147483647) A)' has no field 'a' + * + * Setting caseSensitive=false would not cause regression because Calcite doesn't allow + * two struct fields which only differ in casing like `struct(a:string,A:string)`, check + * org.apache.calcite.sql.validate.DelegatingScope.fullyQualify for more info + */ + @Override + public RexNode makeFieldAccess(RexNode expr, String fieldName, boolean caseSensitive) { + return super.makeFieldAccess(expr, fieldName, false); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlConformance.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlConformance.java new file mode 100644 index 000000000..8850b2576 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlConformance.java @@ -0,0 +1,41 @@ +/** + * Copyright 2021-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import org.apache.calcite.sql.validate.SqlConformance; +import org.apache.calcite.sql.validate.SqlConformanceEnum; +import org.apache.calcite.sql.validate.SqlDelegatingConformance; + + +public class CoralSqlConformance extends SqlDelegatingConformance { + + public static final SqlConformance CORAL_SQL = new CoralSqlConformance(); + + /** + * @deprecated Use {@link #CORAL_SQL} instead. + */ + @Deprecated + public static final SqlConformance HIVE_SQL = CORAL_SQL; + + protected CoralSqlConformance() { + super(SqlConformanceEnum.PRAGMATIC_2003); + } + + @Override + public boolean allowNiladicParentheses() { + return true; + } + + @Override + public boolean isSortByAlias() { + return true; + } + + @Override + public boolean isHavingAlias() { + return true; + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlToRelConverter.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlToRelConverter.java new file mode 100644 index 000000000..ced7b68cc --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlToRelConverter.java @@ -0,0 +1,113 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlExplainFormat; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlUnnestOperator; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.SqlRexConvertletTable; +import org.apache.calcite.sql2rel.SqlToRelConverter; + +import com.linkedin.coral.common.HiveUncollect; +import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; + + +/** + * Class to convert SQL to Calcite RelNode. This class + * specializes the functionality provided by {@link SqlToRelConverter}. + */ +class CoralSqlToRelConverter extends SqlToRelConverter { + + CoralSqlToRelConverter(RelOptTable.ViewExpander viewExpander, SqlValidator validator, + Prepare.CatalogReader catalogReader, RelOptCluster cluster, SqlRexConvertletTable convertletTable, + Config config) { + super(viewExpander, validator, catalogReader, cluster, convertletTable, config); + } + + // This differs from base class in two ways: + // 1. This does not validate the type of converted rel rowType with that of validated node. This is because + // hive is lax in enforcing view schemas. + // 2. This skips calling some methods because (1) those are private, and (2) not required for our usecase + public RelRoot convertQuery(SqlNode query, final boolean needsValidation, final boolean top) { + if (needsValidation) { + query = validator.validate(query); + } + + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(cluster.getMetadataProvider())); + RelNode result = convertQueryRecursive(query, top, null).rel; + RelCollation collation = RelCollations.EMPTY; + + if (SQL2REL_LOGGER.isDebugEnabled()) { + SQL2REL_LOGGER.debug(RelOptUtil.dumpPlan("Plan after converting SqlNode to RelNode", result, + SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES)); + } + + final RelDataType validatedRowType = validator.getValidatedNodeType(query); + return RelRoot.of(result, validatedRowType, query.getKind()).withCollation(collation); + } + + @Override + protected void convertFrom(Blackboard bb, SqlNode from) { + if (from == null) { + super.convertFrom(bb, from); + return; + } + switch (from.getKind()) { + case UNNEST: + convertUnnestFrom(bb, from); + break; + default: + super.convertFrom(bb, from); + break; + } + } + + private void convertUnnestFrom(Blackboard bb, SqlNode from) { + final SqlCall call; + call = (SqlCall) from; + final List nodes = call.getOperandList(); + final SqlUnnestOperator operator = (SqlUnnestOperator) call.getOperator(); + // FIXME: base class calls 'replaceSubqueries for operands here but that's a private + // method. This is not an issue for our usecases with hive but we may need handling in future + final List exprs = new ArrayList<>(); + final List fieldNames = new ArrayList<>(); + for (Ord node : Ord.zip(nodes)) { + exprs.add(bb.convertExpression(node.e)); + // In Hive, "LATERAL VIEW EXPLODE(arr) t" is equivalent to "LATERAL VIEW EXPLODE(arr) t AS col". + // Use the default column name "col" if not specified. + fieldNames.add(node.e.getKind() == SqlKind.AS ? validator.deriveAlias(node.e, node.i) + : CoralSqlUnnestOperator.ARRAY_ELEMENT_COLUMN_NAME); + } + final RelNode input = RelOptUtil.createProject((null != bb.root) ? bb.root : LogicalValues.createOneRow(cluster), + exprs, fieldNames, true); + Uncollect uncollect = + new HiveUncollect(cluster, cluster.traitSetOf(Convention.NONE), input, operator.withOrdinality); + bb.setRoot(uncollect, true); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlValidator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlValidator.java new file mode 100644 index 000000000..b03b585cb --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralSqlValidator.java @@ -0,0 +1,65 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import org.apache.calcite.adapter.java.JavaTypeFactory; +import org.apache.calcite.config.NullCollation; +import org.apache.calcite.prepare.CalciteCatalogReader; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlInsert; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.SqlUtil; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlConformance; +import org.apache.calcite.sql.validate.SqlValidatorImpl; +import org.apache.calcite.sql.validate.SqlValidatorScope; + +import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; + + +public class CoralSqlValidator extends SqlValidatorImpl { + + public CoralSqlValidator(SqlOperatorTable opTab, CalciteCatalogReader catalogReader, JavaTypeFactory typeFactory, + SqlConformance conformance) { + super(opTab, catalogReader, typeFactory, conformance); + setDefaultNullCollation(NullCollation.LOW); + } + + @Override + protected RelDataType getLogicalSourceRowType(RelDataType sourceRowType, SqlInsert insert) { + final RelDataType superType = super.getLogicalSourceRowType(sourceRowType, insert); + return ((JavaTypeFactory) typeFactory).toSql(superType); + } + + @Override + protected RelDataType getLogicalTargetRowType(RelDataType targetRowType, SqlInsert insert) { + final RelDataType superType = super.getLogicalTargetRowType(targetRowType, insert); + return ((JavaTypeFactory) typeFactory).toSql(superType); + } + + @Override + protected void inferUnknownTypes(RelDataType inferredType, SqlValidatorScope scope, SqlNode node) { + if (SqlUtil.isNullLiteral(node, false)) { + setValidatedNodeType(node, typeFactory.createSqlType(SqlTypeName.NULL)); + return; + } + super.inferUnknownTypes(inferredType, scope, node); + } + + @Override + public SqlNode expand(SqlNode expr, SqlValidatorScope scope) { + if (expr instanceof SqlBasicCall + && ((SqlBasicCall) expr).getOperator().equals(FunctionFieldReferenceOperator.DOT)) { + SqlBasicCall dotCall = (SqlBasicCall) expr; + if (dotCall.operand(0) instanceof SqlBasicCall) { + return expr; + } + } + return super.expand(expr, scope); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralViewExpander.java new file mode 100644 index 000000000..d9d24c4d8 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/CoralViewExpander.java @@ -0,0 +1,51 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import java.util.List; + +import javax.annotation.Nonnull; + +import com.google.common.base.Preconditions; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.util.Util; + +import com.linkedin.coral.common.FuzzyUnionSqlRewriter; + + +/** + * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} + * interface to support expansion of views to relational algebra. + */ +public class CoralViewExpander implements RelOptTable.ViewExpander { + + private final HiveToRelConverter hiveToRelConverter; + /** + * Instantiates a new view expander. + * + * @param hiveToRelConverter Hive to Rel converter + */ + public CoralViewExpander(@Nonnull HiveToRelConverter hiveToRelConverter) { + this.hiveToRelConverter = hiveToRelConverter; + } + + @Override + public RelRoot expandView(RelDataType rowType, String queryString, List schemaPath, List viewPath) { + Preconditions.checkNotNull(viewPath); + Preconditions.checkState(!viewPath.isEmpty()); + + String dbName = Util.last(schemaPath); + String tableName = viewPath.get(0); + + SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) + .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); + return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveRexBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveRexBuilder.java index 941ba122d..337bc7de8 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveRexBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveRexBuilder.java @@ -6,11 +6,13 @@ package com.linkedin.coral.hive.hive2rel; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; -public class HiveRexBuilder extends RexBuilder { +/** + * @deprecated Use {@link CoralRexBuilder} instead. + */ +@Deprecated +public class HiveRexBuilder extends CoralRexBuilder { /** * Creates a RexBuilder. * @@ -19,23 +21,4 @@ public class HiveRexBuilder extends RexBuilder { public HiveRexBuilder(RelDataTypeFactory typeFactory) { super(typeFactory); } - - /** - * HiveRexBuilder overrides this method to make field access case-insensitively, - * because in Hive 1.1, if the base table `t` contains non-lowercase struct field like `s struct(A:string)`, - * the schema of the view `v` based on the base table would become `s struct(a:string)`, - * translation for SQL `SELECT * FROM v WHERE v.s.A='xxx'` will fail with the following exception - * if caseSensitive=true, given Calcite would convert `v.s.A` to `v.s.a` to be aligned with the - * schema of view `v` during the validation phase: - * - * java.lang.AssertionError: Type 'RecordType(VARCHAR(2147483647) A)' has no field 'a' - * - * Setting caseSensitive=false would not cause regression because Calcite doesn't allow - * two struct fields which only differ in casing like `struct(a:string,A:string)`, check - * org.apache.calcite.sql.validate.DelegatingScope.fullyQualify for more info - */ - @Override - public RexNode makeFieldAccess(RexNode expr, String fieldName, boolean caseSensitive) { - return super.makeFieldAccess(expr, fieldName, false); - } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlConformance.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlConformance.java index c6a711576..e048983c9 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlConformance.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlConformance.java @@ -6,30 +6,17 @@ package com.linkedin.coral.hive.hive2rel; import org.apache.calcite.sql.validate.SqlConformance; -import org.apache.calcite.sql.validate.SqlConformanceEnum; -import org.apache.calcite.sql.validate.SqlDelegatingConformance; -public class HiveSqlConformance extends SqlDelegatingConformance { +/** + * @deprecated Use {@link CoralSqlConformance} instead. + */ +@Deprecated +public class HiveSqlConformance extends CoralSqlConformance { - public static final SqlConformance HIVE_SQL = new HiveSqlConformance(); + public static final SqlConformance HIVE_SQL = CoralSqlConformance.CORAL_SQL; private HiveSqlConformance() { - super(SqlConformanceEnum.PRAGMATIC_2003); - } - - @Override - public boolean allowNiladicParentheses() { - return true; - } - - @Override - public boolean isSortByAlias() { - return true; - } - - @Override - public boolean isHavingAlias() { - return true; + super(); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlToRelConverter.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlToRelConverter.java index 42ee94f45..5039016ca 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlToRelConverter.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlToRelConverter.java @@ -5,109 +5,22 @@ */ package com.linkedin.coral.hive.hive2rel; -import java.util.ArrayList; -import java.util.List; - -import org.apache.calcite.linq4j.Ord; -import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollations; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rel.logical.LogicalValues; -import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlExplainFormat; -import org.apache.calcite.sql.SqlExplainLevel; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlUnnestOperator; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.SqlRexConvertletTable; -import org.apache.calcite.sql2rel.SqlToRelConverter; - -import com.linkedin.coral.common.HiveUncollect; -import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; /** - * Class to convert Hive SQL to Calcite RelNode. This class - * specializes the functionality provided by {@link SqlToRelConverter}. + * @deprecated Use {@link CoralSqlToRelConverter} instead. */ -class HiveSqlToRelConverter extends SqlToRelConverter { +@Deprecated +class HiveSqlToRelConverter extends CoralSqlToRelConverter { HiveSqlToRelConverter(RelOptTable.ViewExpander viewExpander, SqlValidator validator, Prepare.CatalogReader catalogReader, RelOptCluster cluster, SqlRexConvertletTable convertletTable, Config config) { super(viewExpander, validator, catalogReader, cluster, convertletTable, config); } - - // This differs from base class in two ways: - // 1. This does not validate the type of converted rel rowType with that of validated node. This is because - // hive is lax in enforcing view schemas. - // 2. This skips calling some methods because (1) those are private, and (2) not required for our usecase - public RelRoot convertQuery(SqlNode query, final boolean needsValidation, final boolean top) { - if (needsValidation) { - query = validator.validate(query); - } - - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(cluster.getMetadataProvider())); - RelNode result = convertQueryRecursive(query, top, null).rel; - RelCollation collation = RelCollations.EMPTY; - - if (SQL2REL_LOGGER.isDebugEnabled()) { - SQL2REL_LOGGER.debug(RelOptUtil.dumpPlan("Plan after converting SqlNode to RelNode", result, - SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES)); - } - - final RelDataType validatedRowType = validator.getValidatedNodeType(query); - return RelRoot.of(result, validatedRowType, query.getKind()).withCollation(collation); - } - - @Override - protected void convertFrom(Blackboard bb, SqlNode from) { - if (from == null) { - super.convertFrom(bb, from); - return; - } - switch (from.getKind()) { - case UNNEST: - convertUnnestFrom(bb, from); - break; - default: - super.convertFrom(bb, from); - break; - } - } - - private void convertUnnestFrom(Blackboard bb, SqlNode from) { - final SqlCall call; - call = (SqlCall) from; - final List nodes = call.getOperandList(); - final SqlUnnestOperator operator = (SqlUnnestOperator) call.getOperator(); - // FIXME: base class calls 'replaceSubqueries for operands here but that's a private - // method. This is not an issue for our usecases with hive but we may need handling in future - final List exprs = new ArrayList<>(); - final List fieldNames = new ArrayList<>(); - for (Ord node : Ord.zip(nodes)) { - exprs.add(bb.convertExpression(node.e)); - // In Hive, "LATERAL VIEW EXPLODE(arr) t" is equivalent to "LATERAL VIEW EXPLODE(arr) t AS col". - // Use the default column name "col" if not specified. - fieldNames.add(node.e.getKind() == SqlKind.AS ? validator.deriveAlias(node.e, node.i) - : CoralSqlUnnestOperator.ARRAY_ELEMENT_COLUMN_NAME); - } - final RelNode input = RelOptUtil.createProject((null != bb.root) ? bb.root : LogicalValues.createOneRow(cluster), - exprs, fieldNames, true); - Uncollect uncollect = - new HiveUncollect(cluster, cluster.traitSetOf(Convention.NONE), input, operator.withOrdinality); - bb.setRoot(uncollect, true); - } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlValidator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlValidator.java index 66ce83bb5..085b76f96 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlValidator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveSqlValidator.java @@ -6,60 +6,19 @@ package com.linkedin.coral.hive.hive2rel; import org.apache.calcite.adapter.java.JavaTypeFactory; -import org.apache.calcite.config.NullCollation; import org.apache.calcite.prepare.CalciteCatalogReader; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlInsert; -import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperatorTable; -import org.apache.calcite.sql.SqlUtil; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlConformance; -import org.apache.calcite.sql.validate.SqlValidatorImpl; -import org.apache.calcite.sql.validate.SqlValidatorScope; -import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; - -public class HiveSqlValidator extends SqlValidatorImpl { +/** + * @deprecated Use {@link CoralSqlValidator} instead. + */ +@Deprecated +public class HiveSqlValidator extends CoralSqlValidator { public HiveSqlValidator(SqlOperatorTable opTab, CalciteCatalogReader catalogReader, JavaTypeFactory typeFactory, SqlConformance conformance) { super(opTab, catalogReader, typeFactory, conformance); - setDefaultNullCollation(NullCollation.LOW); - } - - @Override - protected RelDataType getLogicalSourceRowType(RelDataType sourceRowType, SqlInsert insert) { - final RelDataType superType = super.getLogicalSourceRowType(sourceRowType, insert); - return ((JavaTypeFactory) typeFactory).toSql(superType); - } - - @Override - protected RelDataType getLogicalTargetRowType(RelDataType targetRowType, SqlInsert insert) { - final RelDataType superType = super.getLogicalTargetRowType(targetRowType, insert); - return ((JavaTypeFactory) typeFactory).toSql(superType); - } - - @Override - protected void inferUnknownTypes(RelDataType inferredType, SqlValidatorScope scope, SqlNode node) { - if (SqlUtil.isNullLiteral(node, false)) { - setValidatedNodeType(node, typeFactory.createSqlType(SqlTypeName.NULL)); - return; - } - super.inferUnknownTypes(inferredType, scope, node); - } - - @Override - public SqlNode expand(SqlNode expr, SqlValidatorScope scope) { - if (expr instanceof SqlBasicCall - && ((SqlBasicCall) expr).getOperator().equals(FunctionFieldReferenceOperator.DOT)) { - SqlBasicCall dotCall = (SqlBasicCall) expr; - if (dotCall.operand(0) instanceof SqlBasicCall) { - return expr; - } - } - return super.expand(expr, scope); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index ac81842a9..89ddc0e3e 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -5,47 +5,21 @@ */ package com.linkedin.coral.hive.hive2rel; -import java.util.List; - import javax.annotation.Nonnull; -import com.google.common.base.Preconditions; - -import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.util.Util; - -import com.linkedin.coral.common.FuzzyUnionSqlRewriter; - /** - * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} - * interface to support expansion of Hive Views to relational algebra. + * @deprecated Use {@link CoralViewExpander} instead. */ -public class HiveViewExpander implements RelOptTable.ViewExpander { +@Deprecated +public class HiveViewExpander extends CoralViewExpander { - private final HiveToRelConverter hiveToRelConverter; /** * Instantiates a new Hive view expander. * * @param hiveToRelConverter Hive to Rel converter */ public HiveViewExpander(@Nonnull HiveToRelConverter hiveToRelConverter) { - this.hiveToRelConverter = hiveToRelConverter; - } - - @Override - public RelRoot expandView(RelDataType rowType, String queryString, List schemaPath, List viewPath) { - Preconditions.checkNotNull(viewPath); - Preconditions.checkState(!viewPath.isEmpty()); - - String dbName = Util.last(schemaPath); - String tableName = viewPath.get(0); - - SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) - .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); - return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + super(hiveToRelConverter); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralExplodeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralExplodeOperator.java new file mode 100644 index 000000000..1cfe9d23d --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralExplodeOperator.java @@ -0,0 +1,63 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.SqlUnnestOperator; +import org.apache.calcite.sql.type.ArraySqlType; +import org.apache.calcite.sql.type.MapSqlType; +import org.apache.calcite.sql.type.SqlOperandCountRanges; + + +/** + * Calcite operator representation for the explode function. + * {@code explode} supports single array or map as argument and + * returns a row set of single column for array operand, or + * a row set with two columns corresponding to (key, value) for + * map operand type. + */ +public class CoralExplodeOperator extends SqlUnnestOperator { + + public static final CoralExplodeOperator EXPLODE = new CoralExplodeOperator(); + + public static final String ARRAY_ELEMENT_COLUMN_NAME = "col"; + + public CoralExplodeOperator() { + // keep the same as base class 'UNNEST' operator + // Hive has a separate 'posexplode' function for ordinality + super(false); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + RelDataType operandType = callBinding.getOperandType(0); + return operandType instanceof ArraySqlType || operandType instanceof MapSqlType; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.of(1); + } + + @Override + public RelDataType inferReturnType(SqlOperatorBinding opBinding) { + RelDataType operandType = opBinding.getOperandType(0); + final RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); + if (operandType instanceof ArraySqlType) { + // array type + builder.add(ARRAY_ELEMENT_COLUMN_NAME, operandType.getComponentType()); + } else { + // map type + builder.add(MAP_KEY_COLUMN_NAME, operandType.getKeyType()); + builder.add(MAP_VALUE_COLUMN_NAME, operandType.getValueType()); + } + return builder.build(); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralFunction.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralFunction.java new file mode 100644 index 000000000..4c1a58f01 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralFunction.java @@ -0,0 +1,110 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.ImmutableList; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.fun.SqlCase; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.Util; + +import com.linkedin.coral.common.functions.Function; + +import static com.google.common.base.Preconditions.*; +import static org.apache.calcite.sql.parser.SqlParserPos.*; + + +public class CoralFunction { + + // Specific instances of CoralFunction to override default behavior + /** + * Instance of cast() function + */ + public static final Function CAST = new Function("cast", SqlStdOperatorTable.CAST) { + @Override + public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { + checkNotNull(operands); + checkArgument(operands.size() == 1); + return super.createCall(null, ImmutableList.of(operands.get(0), function), null); + } + }; + + /** + * {@code CASE} operator + */ + public static final Function CASE = new Function("case", SqlStdOperatorTable.CASE) { + @Override + public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { + checkNotNull(operands); + List whenNodes = new ArrayList<>(); + List thenNodes = new ArrayList<>(); + for (int i = 1; i < operands.size() - 1; i += 2) { + whenNodes.add(operands.get(i)); + thenNodes.add(operands.get(i + 1)); + } + // 1 node for case, 2n for when/then nodes, and optionally 1 else node + SqlNode elseNode = operands.size() % 2 == 1 ? SqlLiteral.createNull(ZERO) : Util.last(operands); + return SqlCase.createSwitched(ZERO, operands.get(0), new SqlNodeList(whenNodes, ZERO), + new SqlNodeList(thenNodes, ZERO), elseNode); + } + }; + + public static final Function WHEN = new Function("when", SqlStdOperatorTable.CASE) { + @Override + public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { + checkNotNull(operands); + List whenNodes = new ArrayList<>(); + List thenNodes = new ArrayList<>(); + for (int i = 0; i < operands.size() - 1; i += 2) { + whenNodes.add(operands.get(i)); + thenNodes.add(operands.get(i + 1)); + } + // 2n for when/then nodes, and optionally 1 else node + SqlNode elseNode = operands.size() % 2 == 0 ? SqlLiteral.createNull(ZERO) : Util.last(operands); + return new SqlCase(ZERO, null, new SqlNodeList(whenNodes, ZERO), new SqlNodeList(thenNodes, ZERO), elseNode); + } + }; + + // this handles both between and not_between...it's odd because hive parse tree for between operator is odd! + public static final Function BETWEEN = new Function("between", SqlStdOperatorTable.BETWEEN) { + @Override + public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { + checkNotNull(operands); + checkArgument(operands.size() >= 3 && operands.get(0) instanceof SqlLiteral); + SqlLiteral opType = (SqlLiteral) operands.get(0); + List callParams = operands.subList(1, operands.size()); + if (opType.booleanValue()) { + return SqlStdOperatorTable.NOT_BETWEEN.createCall(ZERO, callParams); + } else { + return SqlStdOperatorTable.BETWEEN.createCall(ZERO, callParams); + } + } + }; + + public static final Function IN = new Function("in", CoralINOperator.IN) { + @Override + public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { + checkState(operands.size() >= 2); + if (operands.get(1) instanceof SqlSelect) { + // for IN subquery use Calcite IN operator. Calcite IN operator + // will turn it into inner join, which not ideal but that's better + // tested. + return SqlStdOperatorTable.IN.createCall(ZERO, operands); + } else { + // For IN whose operand is a list of values, we use custom IN operator {@link CoralINOperator}. + return getSqlOperator().createCall(ZERO, operands); + } + } + }; +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralJsonTupleOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralJsonTupleOperator.java new file mode 100644 index 000000000..28a623341 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralJsonTupleOperator.java @@ -0,0 +1,74 @@ +/** + * Copyright 2021-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunctionalOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.util.Static; + + +public class CoralJsonTupleOperator extends SqlFunctionalOperator { + public static final CoralJsonTupleOperator JSON_TUPLE = new CoralJsonTupleOperator(); + + public CoralJsonTupleOperator() { + super("json_tuple", SqlKind.OTHER_FUNCTION, 200, true, null, null, null); + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(2); + } + + @Override + protected void checkOperandCount(SqlValidator validator, SqlOperandTypeChecker argType, SqlCall call) { + if (call.operandCount() < 2) { + throw validator.newValidationError(call, Static.RESOURCE.wrongNumOfArguments()); + } + } + + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + // TODO respect throwOnFailure + for (int operand = 0; operand < callBinding.getOperandCount(); operand++) { + RelDataType operandType = callBinding.getOperandType(operand); + if (!operandType.getSqlTypeName().equals(SqlTypeName.VARCHAR)) { + return false; + } + } + return true; + } + + @Override + public RelDataType inferReturnType(SqlOperatorBinding opBinding) { + RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); + for (int i = 0; i < opBinding.getOperandCount() - 1; i++) { + builder.add("c" + i, SqlTypeName.VARCHAR); + } + return builder.build(); + } + + @Override + public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { + // TODO, see org.apache.calcite.sql.SqlUnnestOperator#unparse + super.unparse(writer, call, leftPrec, rightPrec); + } + + @Override + public boolean isDeterministic() { + return true; + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralNamedStructFunction.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralNamedStructFunction.java new file mode 100644 index 000000000..e37adac84 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralNamedStructFunction.java @@ -0,0 +1,94 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import java.util.AbstractList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.SqlUtil; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Static; + +import static com.google.common.base.Preconditions.*; + + +public class CoralNamedStructFunction extends SqlUserDefinedFunction { + public static final CoralNamedStructFunction NAMED_STRUCT = new CoralNamedStructFunction(); + + public CoralNamedStructFunction() { + super(new SqlIdentifier("named_struct", SqlParserPos.ZERO), null, null, null, null, null); + } + + @Override + public RelDataType inferReturnType(final SqlOperatorBinding opBinding) { + checkState(opBinding instanceof SqlCallBinding); + final SqlCallBinding callBinding = (SqlCallBinding) opBinding; + return opBinding.getTypeFactory().createStructType(new AbstractList>() { + @Override + public int size() { + return opBinding.getOperandCount() / 2; + } + + @Override + public Map.Entry get(int index) { + String fieldName = callBinding.operand(2 * index).toString(); + // strip quotes + String fieldNameNoQuotes = fieldName.substring(1, fieldName.length() - 1); + //Comparable colName = opBinding.getOperandLiteralValue(2 * index); + + return Pair.of(fieldNameNoQuotes, opBinding.getOperandType(2 * index + 1)); + } + }); + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.any(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + List operands = callBinding.operands(); + // check that every even numbered operand is a string literal and odd numbered operands + // can be of any type + for (int i = 0; i < operands.size() - 1; i += 2) { + SqlNode fieldName = callBinding.operand(i); + RelDataType colNameType = callBinding.getValidator().getValidatedNodeType(fieldName); + if (SqlUtil.isNull(fieldName) || !SqlTypeFamily.STRING.contains(colNameType)) { + if (throwOnFailure) { + throw callBinding.newError(Static.RESOURCE.typeNotSupported(colNameType.toString())); + } else { + return false; + } + } + } + return true; + } + + protected void checkOperandCount(SqlValidator validator, SqlOperandTypeChecker argTypeChecker, SqlCall call) { + // Hive allows 0 arguments to named_struct but that causes issues with type inference. + // Disallow for now and we will enable if there is a real use case + if (call.operandCount() > 0 && call.operandCount() % 2 == 0) { + // valid + return; + } + throw validator.newValidationError(call, Static.RESOURCE.wrongNumOfArguments()); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralPosExplodeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralPosExplodeOperator.java new file mode 100644 index 000000000..dff2c1f13 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralPosExplodeOperator.java @@ -0,0 +1,55 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.SqlUnnestOperator; +import org.apache.calcite.sql.type.ArraySqlType; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlTypeName; + + +/** + * Calcite operator representation for the posexplode function. + * {@code posexplode} supports single array as argument and + * behaves like explode for arrays, but includes the position of items in the original array + */ +public class CoralPosExplodeOperator extends SqlUnnestOperator { + + public static final CoralPosExplodeOperator POS_EXPLODE = new CoralPosExplodeOperator(); + + public static final String ARRAY_ELEMENT_POS_NAME = "pos"; + public static final String ARRAY_ELEMENT_VAL_NAME = "col"; + + public CoralPosExplodeOperator() { + // keep the same as base class 'UNNEST' operator + super(true); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + RelDataType operandType = callBinding.getOperandType(0); + return operandType instanceof ArraySqlType; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.of(1); + } + + @Override + public RelDataType inferReturnType(SqlOperatorBinding opBinding) { + RelDataType operandType = opBinding.getOperandType(0); + final RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); + builder.add(ARRAY_ELEMENT_VAL_NAME, operandType.getComponentType()); + builder.add(ARRAY_ELEMENT_POS_NAME, SqlTypeName.INTEGER); + return builder.build(); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralRLikeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralRLikeOperator.java new file mode 100644 index 000000000..69f2bdd23 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/CoralRLikeOperator.java @@ -0,0 +1,60 @@ +/** + * Copyright 2018-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlTypeUtil; + + +public class CoralRLikeOperator extends SqlSpecialOperator { + + public static final CoralRLikeOperator RLIKE = new CoralRLikeOperator("RLIKE", false); + public static final CoralRLikeOperator REGEXP = new CoralRLikeOperator("REGEXP", false); + + private final boolean negated; + + /** + * Creates an operator to represent the RLIKE operator as a Calcite operator + * @param name Operator name + * @param negated Whether this is 'NOT LIKE' + */ + public CoralRLikeOperator(String name, boolean negated) { + super(name, SqlKind.OTHER_FUNCTION, 32, false, ReturnTypes.BOOLEAN_NULLABLE, InferTypes.FIRST_KNOWN, + OperandTypes.STRING_SAME_SAME); + this.negated = negated; + } + + public boolean isNegated() { + return negated; + } + + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.of(2); + } + + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + return OperandTypes.STRING_SAME_SAME.checkOperandTypes(callBinding, throwOnFailure) + && SqlTypeUtil.isCharTypeComparable(callBinding, callBinding.operands(), throwOnFailure); + } + + public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { + final SqlWriter.Frame frame = writer.startList("", ""); + call.operand(0).unparse(writer, getLeftPrec(), getRightPrec()); + writer.sep(getName()); + + call.operand(1).unparse(writer, getLeftPrec(), getRightPrec()); + writer.endList(frame); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveExplodeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveExplodeOperator.java index dbfe5834f..c15cdf93f 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveExplodeOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveExplodeOperator.java @@ -5,59 +5,20 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlOperatorBinding; -import org.apache.calcite.sql.SqlUnnestOperator; -import org.apache.calcite.sql.type.ArraySqlType; -import org.apache.calcite.sql.type.MapSqlType; -import org.apache.calcite.sql.type.SqlOperandCountRanges; - /** - * Calcite operator representation for Hive explode function. - * {@code explode} supports single array or map as argument and - * returns a row set of single column for array operand, or - * a row set with two columns corresponding to (key, value) for - * map operand type. + * @deprecated Use {@link CoralExplodeOperator} instead. */ -public class HiveExplodeOperator extends SqlUnnestOperator { +@Deprecated +public class HiveExplodeOperator extends CoralExplodeOperator { + /** + * @deprecated Use {@link CoralExplodeOperator#EXPLODE} instead. + */ + @Deprecated public static final HiveExplodeOperator EXPLODE = new HiveExplodeOperator(); - public static final String ARRAY_ELEMENT_COLUMN_NAME = "col"; - public HiveExplodeOperator() { - // keep the same as base class 'UNNEST' operator - // Hive has a separate 'posexplode' function for ordinality - super(false); - } - - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - RelDataType operandType = callBinding.getOperandType(0); - return operandType instanceof ArraySqlType || operandType instanceof MapSqlType; - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.of(1); - } - - @Override - public RelDataType inferReturnType(SqlOperatorBinding opBinding) { - RelDataType operandType = opBinding.getOperandType(0); - final RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); - if (operandType instanceof ArraySqlType) { - // array type - builder.add(ARRAY_ELEMENT_COLUMN_NAME, operandType.getComponentType()); - } else { - // map type - builder.add(MAP_KEY_COLUMN_NAME, operandType.getKeyType()); - builder.add(MAP_VALUE_COLUMN_NAME, operandType.getValueType()); - } - return builder.build(); + super(); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java index b80b69b9f..8aa5897ae 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java @@ -5,106 +5,42 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import java.util.ArrayList; -import java.util.List; - -import com.google.common.collect.ImmutableList; - -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlLiteral; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlNodeList; -import org.apache.calcite.sql.SqlSelect; -import org.apache.calcite.sql.fun.SqlCase; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.util.Util; - import com.linkedin.coral.common.functions.Function; -import static com.google.common.base.Preconditions.*; -import static org.apache.calcite.sql.parser.SqlParserPos.*; +/** + * @deprecated Use {@link CoralFunction} instead. + */ +@Deprecated +public class HiveFunction extends CoralFunction { -public class HiveFunction { - - // Specific instances of HiveFunction to override default behavior /** - * Instance of cast() function + * @deprecated Use {@link CoralFunction#CAST} instead. */ - public static final Function CAST = new Function("cast", SqlStdOperatorTable.CAST) { - @Override - public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { - checkNotNull(operands); - checkArgument(operands.size() == 1); - return super.createCall(null, ImmutableList.of(operands.get(0), function), null); - } - }; + @Deprecated + public static final Function CAST = CoralFunction.CAST; /** - * Hive {@code CASE} operator + * @deprecated Use {@link CoralFunction#CASE} instead. */ - public static final Function CASE = new Function("case", SqlStdOperatorTable.CASE) { - @Override - public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { - checkNotNull(operands); - List whenNodes = new ArrayList<>(); - List thenNodes = new ArrayList<>(); - for (int i = 1; i < operands.size() - 1; i += 2) { - whenNodes.add(operands.get(i)); - thenNodes.add(operands.get(i + 1)); - } - // 1 node for case, 2n for when/then nodes, and optionally 1 else node - SqlNode elseNode = operands.size() % 2 == 1 ? SqlLiteral.createNull(ZERO) : Util.last(operands); - return SqlCase.createSwitched(ZERO, operands.get(0), new SqlNodeList(whenNodes, ZERO), - new SqlNodeList(thenNodes, ZERO), elseNode); - } - }; + @Deprecated + public static final Function CASE = CoralFunction.CASE; - public static final Function WHEN = new Function("when", SqlStdOperatorTable.CASE) { - @Override - public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { - checkNotNull(operands); - List whenNodes = new ArrayList<>(); - List thenNodes = new ArrayList<>(); - for (int i = 0; i < operands.size() - 1; i += 2) { - whenNodes.add(operands.get(i)); - thenNodes.add(operands.get(i + 1)); - } - // 2n for when/then nodes, and optionally 1 else node - SqlNode elseNode = operands.size() % 2 == 0 ? SqlLiteral.createNull(ZERO) : Util.last(operands); - return new SqlCase(ZERO, null, new SqlNodeList(whenNodes, ZERO), new SqlNodeList(thenNodes, ZERO), elseNode); - } - }; + /** + * @deprecated Use {@link CoralFunction#WHEN} instead. + */ + @Deprecated + public static final Function WHEN = CoralFunction.WHEN; - // this handles both between and not_between...it's odd because hive parse tree for between operator is odd! - public static final Function BETWEEN = new Function("between", SqlStdOperatorTable.BETWEEN) { - @Override - public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { - checkNotNull(operands); - checkArgument(operands.size() >= 3 && operands.get(0) instanceof SqlLiteral); - SqlLiteral opType = (SqlLiteral) operands.get(0); - List callParams = operands.subList(1, operands.size()); - if (opType.booleanValue()) { - return SqlStdOperatorTable.NOT_BETWEEN.createCall(ZERO, callParams); - } else { - return SqlStdOperatorTable.BETWEEN.createCall(ZERO, callParams); - } - } - }; + /** + * @deprecated Use {@link CoralFunction#BETWEEN} instead. + */ + @Deprecated + public static final Function BETWEEN = CoralFunction.BETWEEN; - public static final Function IN = new Function("in", CoralINOperator.IN) { - @Override - public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { - checkState(operands.size() >= 2); - if (operands.get(1) instanceof SqlSelect) { - // for IN subquery use Calcite IN operator. Calcite IN operator - // will turn it into inner join, which not ideal but that's better - // tested. - return SqlStdOperatorTable.IN.createCall(ZERO, operands); - } else { - // For IN whose operand is a list of values, we use custom IN operator {@link CoralINOperator}. - return getSqlOperator().createCall(ZERO, operands); - } - } - }; + /** + * @deprecated Use {@link CoralFunction#IN} instead. + */ + @Deprecated + public static final Function IN = CoralFunction.IN; } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveJsonTupleOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveJsonTupleOperator.java index b9954f744..081866500 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveJsonTupleOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveJsonTupleOperator.java @@ -5,70 +5,20 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlFunctionalOperator; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlOperatorBinding; -import org.apache.calcite.sql.SqlWriter; -import org.apache.calcite.sql.type.SqlOperandCountRanges; -import org.apache.calcite.sql.type.SqlOperandTypeChecker; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.util.Static; +/** + * @deprecated Use {@link CoralJsonTupleOperator} instead. + */ +@Deprecated +public class HiveJsonTupleOperator extends CoralJsonTupleOperator { -public class HiveJsonTupleOperator extends SqlFunctionalOperator { + /** + * @deprecated Use {@link CoralJsonTupleOperator#JSON_TUPLE} instead. + */ + @Deprecated public static final HiveJsonTupleOperator JSON_TUPLE = new HiveJsonTupleOperator(); public HiveJsonTupleOperator() { - super("json_tuple", SqlKind.OTHER_FUNCTION, 200, true, null, null, null); - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.from(2); - } - - @Override - protected void checkOperandCount(SqlValidator validator, SqlOperandTypeChecker argType, SqlCall call) { - if (call.operandCount() < 2) { - throw validator.newValidationError(call, Static.RESOURCE.wrongNumOfArguments()); - } - } - - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - // TODO respect throwOnFailure - for (int operand = 0; operand < callBinding.getOperandCount(); operand++) { - RelDataType operandType = callBinding.getOperandType(operand); - if (!operandType.getSqlTypeName().equals(SqlTypeName.VARCHAR)) { - return false; - } - } - return true; - } - - @Override - public RelDataType inferReturnType(SqlOperatorBinding opBinding) { - RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); - for (int i = 0; i < opBinding.getOperandCount() - 1; i++) { - builder.add("c" + i, SqlTypeName.VARCHAR); - } - return builder.build(); - } - - @Override - public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { - // TODO, see org.apache.calcite.sql.SqlUnnestOperator#unparse - super.unparse(writer, call, leftPrec, rightPrec); - } - - @Override - public boolean isDeterministic() { - return true; + super(); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveNamedStructFunction.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveNamedStructFunction.java index 89388f76f..4d0127ff2 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveNamedStructFunction.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveNamedStructFunction.java @@ -5,90 +5,20 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import java.util.AbstractList; -import java.util.List; -import java.util.Map; - -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlOperatorBinding; -import org.apache.calcite.sql.SqlUtil; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.SqlOperandCountRanges; -import org.apache.calcite.sql.type.SqlOperandTypeChecker; -import org.apache.calcite.sql.type.SqlTypeFamily; -import org.apache.calcite.sql.validate.SqlUserDefinedFunction; -import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.util.Pair; -import org.apache.calcite.util.Static; - -import static com.google.common.base.Preconditions.*; +/** + * @deprecated Use {@link CoralNamedStructFunction} instead. + */ +@Deprecated +public class HiveNamedStructFunction extends CoralNamedStructFunction { -public class HiveNamedStructFunction extends SqlUserDefinedFunction { + /** + * @deprecated Use {@link CoralNamedStructFunction#NAMED_STRUCT} instead. + */ + @Deprecated public static final HiveNamedStructFunction NAMED_STRUCT = new HiveNamedStructFunction(); public HiveNamedStructFunction() { - super(new SqlIdentifier("named_struct", SqlParserPos.ZERO), null, null, null, null, null); - } - - @Override - public RelDataType inferReturnType(final SqlOperatorBinding opBinding) { - checkState(opBinding instanceof SqlCallBinding); - final SqlCallBinding callBinding = (SqlCallBinding) opBinding; - return opBinding.getTypeFactory().createStructType(new AbstractList>() { - @Override - public int size() { - return opBinding.getOperandCount() / 2; - } - - @Override - public Map.Entry get(int index) { - String fieldName = callBinding.operand(2 * index).toString(); - // strip quotes - String fieldNameNoQuotes = fieldName.substring(1, fieldName.length() - 1); - //Comparable colName = opBinding.getOperandLiteralValue(2 * index); - - return Pair.of(fieldNameNoQuotes, opBinding.getOperandType(2 * index + 1)); - } - }); - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.any(); - } - - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - List operands = callBinding.operands(); - // check that every even numbered operand is a string literal and odd numbered operands - // can be of any type - for (int i = 0; i < operands.size() - 1; i += 2) { - SqlNode fieldName = callBinding.operand(i); - RelDataType colNameType = callBinding.getValidator().getValidatedNodeType(fieldName); - if (SqlUtil.isNull(fieldName) || !SqlTypeFamily.STRING.contains(colNameType)) { - if (throwOnFailure) { - throw callBinding.newError(Static.RESOURCE.typeNotSupported(colNameType.toString())); - } else { - return false; - } - } - } - return true; - } - - protected void checkOperandCount(SqlValidator validator, SqlOperandTypeChecker argTypeChecker, SqlCall call) { - // Hive allows 0 arguments to named_struct but that causes issues with type inference. - // Disallow for now and we will enable if there is a real use case - if (call.operandCount() > 0 && call.operandCount() % 2 == 0) { - // valid - return; - } - throw validator.newValidationError(call, Static.RESOURCE.wrongNumOfArguments()); + super(); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HivePosExplodeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HivePosExplodeOperator.java index c39d12116..a62bfa604 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HivePosExplodeOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HivePosExplodeOperator.java @@ -5,51 +5,20 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlOperatorBinding; -import org.apache.calcite.sql.SqlUnnestOperator; -import org.apache.calcite.sql.type.ArraySqlType; -import org.apache.calcite.sql.type.SqlOperandCountRanges; -import org.apache.calcite.sql.type.SqlTypeName; - /** - * Calcite operator representation for Hive posexplode function. - * {@code posexplode} supports single array as argument and - * behaves like explode for arrays, but includes the position of items in the original array + * @deprecated Use {@link CoralPosExplodeOperator} instead. */ -public class HivePosExplodeOperator extends SqlUnnestOperator { +@Deprecated +public class HivePosExplodeOperator extends CoralPosExplodeOperator { + /** + * @deprecated Use {@link CoralPosExplodeOperator#POS_EXPLODE} instead. + */ + @Deprecated public static final HivePosExplodeOperator POS_EXPLODE = new HivePosExplodeOperator(); - public static final String ARRAY_ELEMENT_POS_NAME = "pos"; - public static final String ARRAY_ELEMENT_VAL_NAME = "col"; - public HivePosExplodeOperator() { - // keep the same as base class 'UNNEST' operator - super(true); - } - - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - RelDataType operandType = callBinding.getOperandType(0); - return operandType instanceof ArraySqlType; - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.of(1); - } - - @Override - public RelDataType inferReturnType(SqlOperatorBinding opBinding) { - RelDataType operandType = opBinding.getOperandType(0); - final RelDataTypeFactory.Builder builder = opBinding.getTypeFactory().builder(); - builder.add(ARRAY_ELEMENT_VAL_NAME, operandType.getComponentType()); - builder.add(ARRAY_ELEMENT_POS_NAME, SqlTypeName.INTEGER); - return builder.build(); + super(); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveRLikeOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveRLikeOperator.java index 40db40bfb..965c4addb 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveRLikeOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveRLikeOperator.java @@ -5,56 +5,26 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlSpecialOperator; -import org.apache.calcite.sql.SqlWriter; -import org.apache.calcite.sql.type.InferTypes; -import org.apache.calcite.sql.type.OperandTypes; -import org.apache.calcite.sql.type.ReturnTypes; -import org.apache.calcite.sql.type.SqlOperandCountRanges; -import org.apache.calcite.sql.type.SqlTypeUtil; +/** + * @deprecated Use {@link CoralRLikeOperator} instead. + */ +@Deprecated +public class HiveRLikeOperator extends CoralRLikeOperator { -public class HiveRLikeOperator extends SqlSpecialOperator { - + /** + * @deprecated Use {@link CoralRLikeOperator#RLIKE} instead. + */ + @Deprecated public static final HiveRLikeOperator RLIKE = new HiveRLikeOperator("RLIKE", false); - public static final HiveRLikeOperator REGEXP = new HiveRLikeOperator("REGEXP", false); - - private final boolean negated; /** - * Creates an operator to represent Hive's RLIKE operator as calcite operator - * @param name Operator name - * @param negated Whether this is 'NOT LIKE' + * @deprecated Use {@link CoralRLikeOperator#REGEXP} instead. */ - public HiveRLikeOperator(String name, boolean negated) { - super(name, SqlKind.OTHER_FUNCTION, 32, false, ReturnTypes.BOOLEAN_NULLABLE, InferTypes.FIRST_KNOWN, - OperandTypes.STRING_SAME_SAME); - this.negated = negated; - } - - public boolean isNegated() { - return negated; - } - - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.of(2); - } - - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - return OperandTypes.STRING_SAME_SAME.checkOperandTypes(callBinding, throwOnFailure) - && SqlTypeUtil.isCharTypeComparable(callBinding, callBinding.operands(), throwOnFailure); - } - - public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { - final SqlWriter.Frame frame = writer.startList("", ""); - call.operand(0).unparse(writer, getLeftPrec(), getRightPrec()); - writer.sep(getName()); + @Deprecated + public static final HiveRLikeOperator REGEXP = new HiveRLikeOperator("REGEXP", false); - call.operand(1).unparse(writer, getLeftPrec(), getRightPrec()); - writer.endList(frame); + public HiveRLikeOperator(String name, boolean negated) { + super(name, negated); } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticCoralFunctionRegistry.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticCoralFunctionRegistry.java new file mode 100644 index 000000000..413ddf659 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticCoralFunctionRegistry.java @@ -0,0 +1,821 @@ +/** + * Copyright 2019-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import com.google.common.base.Preconditions; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.*; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +import com.linkedin.coral.com.google.common.collect.HashMultimap; +import com.linkedin.coral.com.google.common.collect.ImmutableList; +import com.linkedin.coral.com.google.common.collect.ImmutableMultimap; +import com.linkedin.coral.com.google.common.collect.Multimap; +import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; +import com.linkedin.coral.common.functions.Function; +import com.linkedin.coral.common.functions.FunctionRegistry; +import com.linkedin.coral.common.functions.FunctionReturnTypes; +import com.linkedin.coral.common.functions.GenericProjectFunction; +import com.linkedin.coral.common.functions.OperandTypeInference; +import com.linkedin.coral.common.functions.SameOperandTypeExceptFirstOperandChecker; + +import static com.linkedin.coral.hive.hive2rel.functions.CoalesceStructUtility.*; +import static com.linkedin.coral.hive.hive2rel.functions.TimestampFromUnixtime.TIMESTAMP_FROM_UNIXTIME; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.*; +import static org.apache.calcite.sql.fun.SqlStdOperatorTable.*; +import static org.apache.calcite.sql.type.OperandTypes.*; +import static org.apache.calcite.sql.type.ReturnTypes.*; + + +/** + * Static implementation of FunctionRegistry that has hard-coded list of all + * function names. This has a major disadvantage that the user defined functions are + * not available to the registry without manually adding the entry here and uploading + * a new version of library. + * TODO: Provide function registry catalog + * + * Note that Coral maintains a copy of StaticCoralFunctionRegistry for read only at run time. + * For individual query, we create a copy of function registry in a RelConTextProvider object. + */ +public class StaticCoralFunctionRegistry implements FunctionRegistry { + + public static final String IS_TEST_MEMBER_ID_CLASS = "com.linkedin.dali.udf.istestmemberid.hive.IsTestMemberId"; + + // TODO: Make this immutable using builder + static final Multimap FUNCTION_MAP = HashMultimap.create(); + + // Used for registering UDTFs, the key is the function name and the value is a list of field names returned by the UDTF + // We need it because we need to know the return field names of UDTF to do the conversion in ParseTreeBuilder.visitLateralViewUDTF + public static final Map> UDTF_RETURN_FIELD_NAME_MAP = new HashMap<>(); + + static { + // NOTE: All function names will be added as lowercase for case-insensitive comparison. + // FIXME: This mapping is currently incomplete + // aggregation functions + addFunctionEntry("sum", SUM); + addFunctionEntry("count", COUNT); + addFunctionEntry("avg", AVG); + addFunctionEntry("min", MIN); + addFunctionEntry("max", MAX); + createAddUserDefinedFunction("collect_list", FunctionReturnTypes.ARRAY_OF_ARG0_TYPE, ANY); + createAddUserDefinedFunction("collect_set", FunctionReturnTypes.ARRAY_OF_ARG0_TYPE, ANY); + + // window functions + addFunctionEntry("row_number", ROW_NUMBER); + addFunctionEntry("rank", SqlStdOperatorTable.RANK); // qualification required due to naming conflict + addFunctionEntry("dense_rank", DENSE_RANK); + addFunctionEntry("cume_dist", CUME_DIST); + addFunctionEntry("percent_rank", PERCENT_RANK); + addFunctionEntry("first_value", FIRST_VALUE); + addFunctionEntry("last_value", LAST_VALUE); + addFunctionEntry("nth_value", NTH_VALUE); + addFunctionEntry("lag", LAG); + addFunctionEntry("lead", LEAD); + addFunctionEntry("stddev", STDDEV); + addFunctionEntry("stddev_samp", STDDEV_SAMP); + addFunctionEntry("stddev_pop", STDDEV_POP); + addFunctionEntry("variance", VARIANCE); + addFunctionEntry("var_samp", VAR_SAMP); + addFunctionEntry("var_pop", VAR_POP); + + //addFunctionEntry("in", HiveInOperator.IN); + FUNCTION_MAP.put("in", CoralFunction.IN); + + //addFunctionEntry("in", SqlStdOperatorTable.IN); + + // operators + addFunctionEntry("rlike", CoralRLikeOperator.RLIKE); + addFunctionEntry("regexp", CoralRLikeOperator.REGEXP); + addFunctionEntry("!=", NOT_EQUALS); + addFunctionEntry("==", EQUALS); + + // conditional function + addFunctionEntry("tok_isnull", IS_NULL); + addFunctionEntry("tok_isnotnull", IS_NOT_NULL); + FUNCTION_MAP.put("when", CoralFunction.WHEN); + FUNCTION_MAP.put("case", CoralFunction.CASE); + FUNCTION_MAP.put("between", CoralFunction.BETWEEN); + addFunctionEntry("nullif", NULLIF); + addFunctionEntry("isnull", IS_NULL); + addFunctionEntry("isnotnull", IS_NOT_NULL); + + // TODO: this should be arg1 or arg2 nullable + createAddUserDefinedFunction("nvl", ARG0_NULLABLE, and(family(SqlTypeFamily.ANY, SqlTypeFamily.ANY), SAME_SAME)); + + // calcite models 'if' function as CASE operator. We can use CASE but that will cause translation + // to SQL to be odd although correct. So, we add 'if' as UDF + addFunctionEntry("if", + createCalciteUDF("if", FunctionReturnTypes.IF_FUNC_RETURN_TYPE, OperandTypeInference.BOOLEAN_ANY_SAME, + new SameOperandTypeExceptFirstOperandChecker(3, SqlTypeName.BOOLEAN), null)); + + addFunctionEntry("coalesce", COALESCE); + // cast operator + addCastOperatorEntries(); + + // Complex type constructors + addFunctionEntry("array", ARRAY_VALUE_CONSTRUCTOR); + addFunctionEntry("struct", ROW); + addFunctionEntry("map", MAP_VALUE_CONSTRUCTOR); + addFunctionEntry("named_struct", CoralNamedStructFunction.NAMED_STRUCT); + addFunctionEntry("generic_project", GenericProjectFunction.GENERIC_PROJECT); + + // conversion functions + createAddUserDefinedFunction("binary", FunctionReturnTypes.BINARY, + or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); + + // mathematical functions + // we need to define new strategy for hive to allow null operands by default for everything + createAddUserDefinedFunction("pmod", BIGINT, NUMERIC_NUMERIC); + createAddUserDefinedFunction("round", DOUBLE_NULLABLE, + family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER), optionalOrd(1))); + createAddUserDefinedFunction("bround", DOUBLE_NULLABLE, + family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER), optionalOrd(1))); + createAddUserDefinedFunction("floor", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); + createAddUserDefinedFunction("ceil", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); + createAddUserDefinedFunction("ceiling", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); + createAddUserDefinedFunction("rand", DOUBLE_NULLABLE, + family(ImmutableList.of(SqlTypeFamily.INTEGER), optionalOrd(0))); + createAddUserDefinedFunction("exp", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("ln", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("log10", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("log2", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("log", DOUBLE_NULLABLE, NUMERIC_NUMERIC); + createAddUserDefinedFunction("pow", DOUBLE_NULLABLE, NUMERIC_NUMERIC); + createAddUserDefinedFunction("power", DOUBLE_NULLABLE, NUMERIC_NUMERIC); + createAddUserDefinedFunction("sqrt", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("hex", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.NUMERIC), family(SqlTypeFamily.BINARY))); + createAddUserDefinedFunction("unhex", FunctionReturnTypes.BINARY, STRING); + createAddUserDefinedFunction("conv", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.EXACT_NUMERIC, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER))); + createAddUserDefinedFunction("abs", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("sin", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("asin", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("cos", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("acos", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("tan", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("atan", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("degrees", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("radians", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("positive", ARG0_NULLABLE, NUMERIC); + createAddUserDefinedFunction("negative", ARG0_NULLABLE, NUMERIC); + createAddUserDefinedFunction("sign", ARG0_NULLABLE, NUMERIC); + createAddUserDefinedFunction("e", DOUBLE, NILADIC); + createAddUserDefinedFunction("pi", DOUBLE, NILADIC); + createAddUserDefinedFunction("factorial", BIGINT_NULLABLE, family(SqlTypeFamily.INTEGER)); + createAddUserDefinedFunction("cbrt", DOUBLE_NULLABLE, NUMERIC); + createAddUserDefinedFunction("shiftleft", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); + createAddUserDefinedFunction("shiftright", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); + createAddUserDefinedFunction("shiftrightunsigned", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); + createAddUserDefinedFunction("greatest", ARG0_NULLABLE, SAME_VARIADIC); + createAddUserDefinedFunction("least", ARG0_NULLABLE, SAME_VARIADIC); + createAddUserDefinedFunction("width_bucket", INTEGER_NULLABLE, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER)); + + // string functions + // TODO: operand types are not strictly true since these functions can take null literal + // and most of these entries don't allow null literals. This will work for most common usages + // but it's easy to write HiveQL to make these fail + createAddUserDefinedFunction("ascii", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("base64", FunctionReturnTypes.STRING, BINARY); + createAddUserDefinedFunction("character_length", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("chr", FunctionReturnTypes.STRING, NUMERIC); + createAddUserDefinedFunction("concat", cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.TO_NULLABLE), + SAME_VARIADIC); + // [CORAL-24] Tried setting this to + // or(family(SqlTypeFamily.STRING, SqlTypeFamily.ARRAY), + // and(variadic(SqlOperandCountRanges.from(2)), repeat(SqlOperandCountRanges.from(2), STRING))) + // but calcite's composeable operand checker does not handle variadic operator counts correctly. + createAddUserDefinedFunction("concat_ws", FunctionReturnTypes.STRING, new SqlOperandTypeChecker() { + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + return family(SqlTypeFamily.STRING, SqlTypeFamily.ARRAY).checkOperandTypes(callBinding, throwOnFailure) + || new SameOperandTypeChecker(-1).checkOperandTypes(callBinding, throwOnFailure); + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(2); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(STRING, ARRAY|STRING, ...)"; + } + + @Override + public Consistency getConsistency() { + return Consistency.NONE; + } + + @Override + public boolean isOptional(int i) { + return false; + } + }); + + createAddUserDefinedFunction("context_ngrams", LEAST_RESTRICTIVE, + family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)); + createAddUserDefinedFunction("decode", FunctionReturnTypes.STRING, + family(SqlTypeFamily.BINARY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("elt", FunctionReturnTypes.STRING, VARIADIC); + createAddUserDefinedFunction("encode", FunctionReturnTypes.BINARY, STRING_STRING); + createAddUserDefinedFunction("field", ReturnTypes.INTEGER, VARIADIC); + createAddUserDefinedFunction("find_in_set", ReturnTypes.INTEGER, STRING_STRING); + createAddUserDefinedFunction("format_number", FunctionReturnTypes.STRING, NUMERIC_INTEGER); + createAddUserDefinedFunction("get_json_object", FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("in_file", ReturnTypes.BOOLEAN, STRING_STRING); + createAddUserDefinedFunction("initcap", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("instr", ReturnTypes.INTEGER, STRING_STRING); + createAddUserDefinedFunction("length", INTEGER_NULLABLE, STRING); + createAddUserDefinedFunction("levenshtein", ReturnTypes.INTEGER, STRING_STRING); + createAddUserDefinedFunction("locate", FunctionReturnTypes.STRING, + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER), optionalOrd(2))); + addFunctionEntry("lower", LOWER); + addFunctionEntry("lcase", LOWER); + addFunctionEntry("translate", TRANSLATE3); + addFunctionEntry("translate3", TRANSLATE3); + createAddUserDefinedFunction("lpad", FunctionReturnTypes.STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("ltrim", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("ngrams", LEAST_RESTRICTIVE, + family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)); + createAddUserDefinedFunction("octet_length", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("parse_url", FunctionReturnTypes.STRING, + family(Collections.nCopies(3, SqlTypeFamily.STRING), optionalOrd(2))); + createAddUserDefinedFunction("printf", FunctionReturnTypes.STRING, VARIADIC); + createAddUserDefinedFunction("regexp_extract", ARG0, + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER), optionalOrd(2))); + createAddUserDefinedFunction("regexp_replace", FunctionReturnTypes.STRING, STRING_STRING_STRING); + createAddUserDefinedFunction("repeat", FunctionReturnTypes.STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER)); + addFunctionEntry("replace", REPLACE); + createAddUserDefinedFunction("reverse", ARG0, or(STRING, NULLABLE_LITERAL)); + createAddUserDefinedFunction("rpad", FunctionReturnTypes.STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("rtrim", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("sentences", LEAST_RESTRICTIVE, STRING_STRING_STRING); + createAddUserDefinedFunction("soundex", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("space", FunctionReturnTypes.STRING, NUMERIC); + createAddUserDefinedFunction("split", FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), STRING_STRING); + createAddUserDefinedFunction("str_to_map", FunctionReturnTypes.mapOfType(SqlTypeName.VARCHAR, SqlTypeName.VARCHAR), + family(Collections.nCopies(3, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(1, 2)))); + createAddUserDefinedFunction("substr", FunctionReturnTypes.STRING, + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), optionalOrd(2))); + createAddUserDefinedFunction("substring", FunctionReturnTypes.STRING, + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), optionalOrd(2))); + + createAddUserDefinedFunction("substring_index", FunctionReturnTypes.STRING, STRING_STRING_INTEGER); + createAddUserDefinedFunction("trim", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("unbase64", explicit(SqlTypeName.VARBINARY), or(STRING, NULLABLE_LITERAL)); + addFunctionEntry("upper", UPPER); + addFunctionEntry("ucase", UPPER); + addFunctionEntry("initcap", INITCAP); + createAddUserDefinedFunction("md5", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); + createAddUserDefinedFunction("sha1", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); + createAddUserDefinedFunction("sha", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); + createAddUserDefinedFunction("crc32", BIGINT, or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); + createAddUserDefinedFunction("from_utf8", explicit(SqlTypeName.VARCHAR), or(CHARACTER, BINARY)); + createAddUserDefinedFunction("at_timezone", explicit(SqlTypeName.TIMESTAMP), + family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("with_timezone", explicit(SqlTypeName.TIMESTAMP), + family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("to_unixtime", explicit(SqlTypeName.DOUBLE), family(SqlTypeFamily.TIMESTAMP)); + createAddUserDefinedFunction("from_unixtime_nanos", explicit(SqlTypeName.TIMESTAMP), NUMERIC); + createAddUserDefinedFunction("$canonicalize_hive_timezone_id", explicit(SqlTypeName.VARCHAR), STRING); + + // xpath functions + createAddUserDefinedFunction("xpath", FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), STRING_STRING); + createAddUserDefinedFunction("xpath_string", FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("xpath_boolean", ReturnTypes.BOOLEAN, STRING_STRING); + createAddUserDefinedFunction("xpath_short", FunctionReturnTypes.SMALLINT, STRING_STRING); + createAddUserDefinedFunction("xpath_int", ReturnTypes.INTEGER, STRING_STRING); + createAddUserDefinedFunction("xpath_long", BIGINT, STRING_STRING); + createAddUserDefinedFunction("xpath_float", DOUBLE, STRING_STRING); + createAddUserDefinedFunction("xpath_double", DOUBLE, STRING_STRING); + createAddUserDefinedFunction("xpath_number", DOUBLE, STRING_STRING); + + // Date Functions + createAddUserDefinedFunction("from_unixtime", FunctionReturnTypes.STRING, + family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING), optionalOrd(1))); + addFunctionEntry("timestamp_from_unixtime", TIMESTAMP_FROM_UNIXTIME); + createAddUserDefinedFunction("unix_timestamp", BIGINT, + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(0, 1)))); + createAddUserDefinedFunction("to_date", FunctionReturnTypes.STRING, or(STRING, DATETIME)); + createAddUserDefinedFunction("date", DATE, or(STRING, DATETIME)); + createAddUserDefinedFunction("year", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("quarter", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("month", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("day", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("dayofmonth", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("hour", ReturnTypes.INTEGER, or(STRING, DATETIME)); + createAddUserDefinedFunction("minute", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("second", ReturnTypes.INTEGER, STRING); + createAddUserDefinedFunction("weekofyear", ReturnTypes.INTEGER, STRING); + //TODO: add extract UDF + createAddUserDefinedFunction("datediff", ReturnTypes.INTEGER, STRING_STRING); + createAddUserDefinedFunction("date_add", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.DATE, SqlTypeFamily.INTEGER), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER), + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER))); + + createAddUserDefinedFunction("date_sub", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.DATE, SqlTypeFamily.INTEGER), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER), + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER))); + createAddUserDefinedFunction("from_utc_timestamp", explicit(SqlTypeName.TIMESTAMP), + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + addFunctionEntry("current_date", CURRENT_DATE); + addFunctionEntry("current_timestamp", CURRENT_TIMESTAMP); + createAddUserDefinedFunction("add_months", FunctionReturnTypes.STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER)); + createAddUserDefinedFunction("last_day", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("next_day", FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("trunc", FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("months_between", DOUBLE, family(SqlTypeFamily.DATE, SqlTypeFamily.DATE)); + createAddUserDefinedFunction("date_format", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.DATE, SqlTypeFamily.STRING), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING), + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("to_utc_timestamp", FunctionReturnTypes.STRING, + or(STRING_STRING, family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING))); + + // Collection functions + addFunctionEntry("size", CARDINALITY); + createAddUserDefinedFunction("array_contains", ReturnTypes.BOOLEAN, family(SqlTypeFamily.ARRAY, SqlTypeFamily.ANY)); + createAddUserDefinedFunction("map_keys", opBinding -> { + RelDataType operandType = opBinding.getOperandType(0); + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + return typeFactory.createArrayType(operandType.getKeyType(), -1); + }, family(SqlTypeFamily.MAP)); + + createAddUserDefinedFunction("map_values", opBinding -> { + RelDataType operandType = opBinding.getOperandType(0); + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + return typeFactory.createArrayType(operandType.getValueType(), -1); + }, family(SqlTypeFamily.MAP)); + + createAddUserDefinedFunction("sort_array", ARG0, ARRAY); + + createAddUserDefinedFunction("extract_union", COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY, + or(ANY, family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); + createAddUserDefinedFunction("coalesce_struct", COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY, + or(ANY, family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); + + // LinkedIn UDFs: Dali stores mapping from UDF name to the implementing Java class as table properties + // in the HCatalog. So, an UDF implementation may be referred by different names by different views. + // We register these UDFs by the implementing class name to create a single entry for each UDF. + createAddUserDefinedFunction("com.linkedin.dali.bug.DummyUdf", FunctionReturnTypes.STRING, or(STRING, ARRAY)); + createAddUserDefinedFunction(IS_TEST_MEMBER_ID_CLASS, ReturnTypes.BOOLEAN, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.CHARACTER)); + createAddUserDefinedFunction("com.linkedin.dali.udf.urnextractor.hive.UrnExtractor", + FunctionReturnTypes.ARRAY_OF_STR_STR_MAP, or(STRING, ARRAY)); + createAddUserDefinedFunction("com.linkedin.udf.aws.ReadJsonUDF", FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("com.linkedin.udf.hdfs.GetDatasetNameFromPathUDF", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.dali.udf.isguestmemberid.hive.IsGuestMemberId", ReturnTypes.BOOLEAN, + NUMERIC); + createAddUserDefinedFunction("com.linkedin.dali.udf.watbotcrawlerlookup.hive.WATBotCrawlerLookup", + FunctionReturnTypes.rowOf(ImmutableList.of("iscrawler", "crawlerid"), + ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING), + optionalOrd(ImmutableList.of(2, 3)))); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.WatBotCrawlerLookup", + FunctionReturnTypes.rowOf(ImmutableList.of("iscrawler", "crawlerid"), + ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), + family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING), + optionalOrd(ImmutableList.of(2, 3)))); + + createAddUserDefinedFunction("com.linkedin.dali.udf.userinterfacelookup.hive.UserInterfaceLookup", + FunctionReturnTypes.STRING, + or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, + SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.dali.udf.portallookup.hive.PortalLookup", FunctionReturnTypes.STRING, + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.dali.udf.useragentparser.hive.UserAgentParser", + FunctionReturnTypes.STRING, STRING_STRING); + createAddUserDefinedFunction("com.linkedin.dali.udf.maplookup.hive.MapLookup", + cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.FORCE_NULLABLE), + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dali.udf.monarch.UrnGenerator", FunctionReturnTypes.STRING, VARIADIC); + createAddUserDefinedFunction("com.linkedin.dali.udf.genericlookup.hive.GenericLookup", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.ANY, + SqlTypeFamily.ANY), + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.ANY, + SqlTypeFamily.ANY, SqlTypeFamily.ANY))); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.UrnToID", FunctionReturnTypes.STRING, STRING); + + createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.DateFormatToEpoch", BIGINT_NULLABLE, + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.EpochToDateFormat", FunctionReturnTypes.STRING, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.EpochToEpochMilliseconds", BIGINT_NULLABLE, NUMERIC); + createAddUserDefinedFunction("com.linkedin.dali.udf.sanitize.hive.Sanitize", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("org.apache.hadoop.hive.ql.udf.generic.GenericProject", ARG0, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.GetIdFromUrn", BIGINT, STRING); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.GetPermissionsString", + FunctionReturnTypes.STRING, family(SqlTypeFamily.ARRAY)); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.EpochTimeInSeconds", BIGINT, STRING); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.EpochTimeInSecondsNullable", BIGINT_NULLABLE, + STRING); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.IsUrnForType", ReturnTypes.BOOLEAN, + STRING_STRING); + createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.PhoneNumberNormalizer", + FunctionReturnTypes.STRING, STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.dali.views.job.udf.GetUUID", FunctionReturnTypes.STRING, BINARY); + createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetOrderUrn", FunctionReturnTypes.STRING, + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetChooserId", FunctionReturnTypes.STRING, + family(SqlTypeFamily.MAP)); + createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetFamily", FunctionReturnTypes.STRING, + family(SqlTypeFamily.MAP)); + createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetPriceUrnList", + FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), family(SqlTypeFamily.MAP)); + + final SqlReturnTypeInference hitInfo = FunctionReturnTypes.rowOfInference( + ImmutableList.of("secondarysearchresultinfo", "entityawaresuggestioninfo"), + ImmutableList.of(FunctionReturnTypes.rowOf(ImmutableList.of("vertical"), ImmutableList.of(SqlTypeName.VARCHAR)), + FunctionReturnTypes.rowOfInference(ImmutableList.of("suggestedentities"), + ImmutableList.of(FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true))))); + + final SqlReturnTypeInference gridPositionInfo = FunctionReturnTypes.rowOf(ImmutableList.of("row", "column"), + ImmutableList.of(SqlTypeName.INTEGER, SqlTypeName.INTEGER)); + + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.CreateSearchActionResultUDF", + FunctionReturnTypes.rowOfInference( + ImmutableList.of("entityurn", "resulttype", "absoluteposition", "positioninvertical", "iscachehit", + "isanonymized", "hitinfo", "gridposition", "isnamematch", "trackingid"), + ImmutableList.of(FunctionReturnTypes.STRING, FunctionReturnTypes.STRING, INTEGER_NULLABLE, INTEGER_NULLABLE, + ReturnTypes.BOOLEAN, ReturnTypes.BOOLEAN, hitInfo, gridPositionInfo, ReturnTypes.BOOLEAN, + FunctionReturnTypes.BINARY)), + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetActionTypeUDF", FunctionReturnTypes.STRING, + or(STRING_STRING_STRING, STRING_STRING)); + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetTYAHResultTypeUDF", FunctionReturnTypes.STRING, + STRING); + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetVerticalUDF", FunctionReturnTypes.STRING, + or(STRING_STRING_STRING, STRING_STRING)); + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.IsTYAHSearchResultsUDF", ReturnTypes.BOOLEAN, + STRING); + createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.IsValidKeyUDF", ReturnTypes.BOOLEAN, + or(STRING_STRING_STRING, STRING_STRING)); + createAddUserDefinedFunction("com.linkedin.ds.udf.hive.filter.IsTestMemberId", ReturnTypes.BOOLEAN, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.PortalLookup", FunctionReturnTypes.STRING, + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.UserInterfaceLookup", FunctionReturnTypes.STRING, + family(Collections.nCopies(8, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.WATBotCrawlerLookup", FunctionReturnTypes + .rowOf(ImmutableList.of("iscrawler", "crawlerid"), ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), + or(STRING_STRING_STRING, STRING_STRING)); + createAddUserDefinedFunction("com.linkedin.dwh.udf.hive.lookup.OsLookup", + FunctionReturnTypes.rowOf(ImmutableList.of("os_name", "os_major_version", "os_full_version"), + ImmutableList.of(SqlTypeName.VARCHAR, SqlTypeName.VARCHAR, SqlTypeName.VARCHAR)), + or(STRING_STRING, family(SqlTypeFamily.STRING, SqlTypeFamily.ANY))); + createAddUserDefinedFunction("com.linkedin.dwh.udf.profile.GetProfileUrl", FunctionReturnTypes.STRING, family( + SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.dwh.udf.sessionization.CleanupBrowserId", FunctionReturnTypes.STRING, + STRING); + createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductName", FunctionReturnTypes.STRING, + STRING); + createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductCode", FunctionReturnTypes.STRING, + STRING); + createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductId", FunctionReturnTypes.STRING, + STRING); + createAddUserDefinedFunction("udfs.SeoReferrerTrkUdf", FunctionReturnTypes.STRING, STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.PresentDataType", FunctionReturnTypes.STRING, + family(SqlTypeFamily.ANY)); + createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.PresentMediaType", FunctionReturnTypes.STRING, + family(SqlTypeFamily.ANY)); + createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.UnifyVideoOrAudioDurationMicroSeconds", BIGINT, + family(SqlTypeFamily.ANY)); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.AdClickClassifier", FunctionReturnTypes.rowOf( + ImmutableList.of("clicks", "landingPageClicks", "totalEngagements", "otherEngagements", "likes", "commentLikes", + "comments", "shares", "follows", "oneClickLeadFormOpens", "companyPageClicks", "fullScreenPlays", + "viralClicks", "viralLandingPageClicks", "viralLikes", "viralCommentLikes", "viralComments", "viralShares", + "viralFollows", "viralOneClickLeadFormOpens", "viralCompanyPageClicks", "viralFullScreenPlays", + "viralTotalEngagements", "viralOtherEngagements", "adUnitClicks", "actionClicks", "textUrlClicks", "opens", + "cardClicks", "viralCardClicks", "costInUsd", "costInLocalCurrency"), + ImmutableList.of(SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, + SqlTypeName.INTEGER, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE)), + family(SqlTypeFamily.INTEGER, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.MAP, + SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.MAP, SqlTypeFamily.ANY, + SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.UnifiedCampaignType", + FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.ActivityId", BIGINT, family(SqlTypeFamily.MAP)); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.AdPlacementClassifier", + FunctionReturnTypes.STRING, family(SqlTypeFamily.INTEGER)); + createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.SponsoredMessageNodeId", ReturnTypes.INTEGER, + family(SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.orbit.emerger.coercerudfs.DynamicsLineOfBusinessCoercer", + FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.orbit.emerger.coercerudfs.GenerateId", FunctionReturnTypes.STRING, + new SqlOperandTypeChecker() { + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + return true; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(1); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(ANY, ...)"; + } + + @Override + public Consistency getConsistency() { + return Consistency.NONE; + } + + @Override + public boolean isOptional(int i) { + return false; + } + }); + createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapD365OptionSet", FunctionReturnTypes.STRING, + STRING_STRING_STRING); + + SqlReturnTypeInference getProfileSectionsReturnTypeInference = opBinding -> { + int numArgs = opBinding.getOperandCount(); + Preconditions.checkState(numArgs == 2, "UDF isb.GetProfileSections must take 2 arguments."); + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + RelDataType retType = opBinding.getOperandType(0).getValueType(); + return typeFactory.createArrayType(retType, -1); + }; + createAddUserDefinedFunction("isb.GetProfileSections", getProfileSectionsReturnTypeInference, + family(SqlTypeFamily.MAP, SqlTypeFamily.ARRAY)); + + createAddUserDefinedFunction("com.linkedin.recruiter.udf.GetEventOriginUDF", FunctionReturnTypes.STRING, + or(STRING_STRING_STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.recruiter.udf.QueryRoutingTypeUDF", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.snapshot.udf.ConstructSnapshotUrnUdf", FunctionReturnTypes.STRING, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.snapshot.udf.SnapshotPurgeEligibleUdf", ReturnTypes.BOOLEAN, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); + + // The following UDFs are already defined using Transport UDF. + // The class name is the corresponding Hive UDF. + // We point their class files to the corresponding Spark jar file in TransportableUDFMap. + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.DateFormatToEpoch", BIGINT_NULLABLE, + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.EpochToDateFormat", FunctionReturnTypes.STRING, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.EpochToEpochMilliseconds", BIGINT_NULLABLE, + NUMERIC); + createAddUserDefinedFunction("com.linkedin.stdudfs.stringudfs.hive.InitCap", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.IsGuestMemberId", ReturnTypes.BOOLEAN, NUMERIC); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.MapLookup", + cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.FORCE_NULLABLE), + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.PortalLookup", FunctionReturnTypes.STRING, + STRING_STRING); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.Sanitize", FunctionReturnTypes.STRING, STRING); + createAddUserDefinedFunction("com.linkedin.jemslookup.udf.hive.JemsLookup", FunctionReturnTypes.rowOfInference( + ImmutableList.of("jobproductid", "jobproductname", "jobentitlementids", "jobentitlementnameswithnamespace", + "listingtype", "sublistingtype", "istestjob"), + ImmutableList.of(BIGINT, FunctionReturnTypes.STRING, FunctionReturnTypes.arrayOfType(SqlTypeName.BIGINT, true), + FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true), FunctionReturnTypes.STRING, + FunctionReturnTypes.STRING, ReturnTypes.BOOLEAN)), + family( + ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.stdudfs.userinterfacelookup.hive.UserInterfaceLookup", + FunctionReturnTypes.STRING, + or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, + SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.stdudfs.userinterfacelookuptest.hive.UserInterfaceLookupTest", + FunctionReturnTypes.STRING, + or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, + SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.stdudfs.parsing.hive.UserAgentParser", FunctionReturnTypes.STRING, + family(Collections.nCopies(2, SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.stdudfs.parsing.hive.Ip2Str", FunctionReturnTypes.STRING, + or(family(SqlTypeFamily.STRING, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC), family(SqlTypeFamily.STRING))); + createAddUserDefinedFunction("com.linkedin.stdudfs.lookup.hive.BrowserLookup", + FunctionReturnTypes.rowOfInference( + ImmutableList.of("browser_name", "browser_major_version", "browser_full_version"), + ImmutableList.of(FunctionReturnTypes.STRING, FunctionReturnTypes.STRING, FunctionReturnTypes.STRING)), + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.IsTestMemberId", ReturnTypes.BOOLEAN, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.stdudfs.urnextractor.hive.UrnExtractorFunctionWrapper", opBinding -> { + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + return typeFactory.createArrayType(typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), + typeFactory.createSqlType(SqlTypeName.VARCHAR)), -1); + }, or(ARRAY, STRING)); + createAddUserDefinedFunction("com.linkedin.stdudfs.hive.daliudfs.UrnExtractorFunctionWrapper", opBinding -> { + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + return typeFactory.createArrayType(typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), + typeFactory.createSqlType(SqlTypeName.VARCHAR)), -1); + }, or(ARRAY, STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumeric", BIGINT, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumericInt", BIGINT, + family(SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumericLong", BIGINT, + family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateAll", ARG0, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateArray", ARG0, + family(SqlTypeFamily.ARRAY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateArrayEvolve", ARG0, + family(SqlTypeFamily.ARRAY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateStruct", ARG0, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMap", ARG0, + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapEvolve", ARG0, + family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapKeyEvolve", ARG0, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapValEvolve", ARG0, + family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.jobs.udf.hive.ConvertIndustryCode", FunctionReturnTypes.STRING, STRING); + // This is a Hive Custom UDF which is a simplified version of 'date-converter' package. + // This UDF is not converted to a transport UDF. + createAddUserDefinedFunction("com.linkedin.dali.customudf.date.hive.DateFormatToEpoch", BIGINT_NULLABLE, + STRING_STRING_STRING); + createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.HasMemberConsent", ReturnTypes.BOOLEAN, + family(SqlTypeFamily.STRING, SqlTypeFamily.ANY, SqlTypeFamily.TIMESTAMP)); + createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.RedactFieldIf", ARG1, + family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.STRING, SqlTypeFamily.ANY)); + createAddUserDefinedFunction("li_groot_cast_nullability", new OrdinalReturnTypeInferenceV2(1), + family(SqlTypeFamily.ANY, SqlTypeFamily.ANY)); + + createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.RedactSecondarySchemaFieldIf", ARG1, family( + SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER, SqlTypeFamily.ANY)); + + createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.HasMemberConsentUDF", ReturnTypes.BOOLEAN, + family(SqlTypeFamily.STRING, SqlTypeFamily.ANY, SqlTypeFamily.TIMESTAMP)); + createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactFieldIfUDF", + new OrdinalReturnTypeInferenceV2(1), + family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.STRING, SqlTypeFamily.ANY)); + createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactSecondarySchemaFieldIfUDF", + new OrdinalReturnTypeInferenceV2(1), family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY, + SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.GetMappedValueUDF", FunctionReturnTypes.STRING, + family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.ExtractCollectionUDF", + FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true), family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); + + // UDTFs + addFunctionEntry("explode", new CoralSqlUnnestOperator(false)); + addFunctionEntry("posexplode", new CoralSqlUnnestOperator(true)); + addFunctionEntry("json_tuple", CoralJsonTupleOperator.JSON_TUPLE); + + // reflect functions + addFunctionEntry("reflect", HiveReflectOperator.REFLECT); + addFunctionEntry("java_method", HiveReflectOperator.REFLECT); + + // Generic UDTFs + createAddUserDefinedTableFunction("com.linkedin.tsar.hive.udf.ToJymbiiScores", + ImmutableList.of("job_urn", "rank", "glmix_score", "global_model_score", "sentinel_score", "job_effect_score", + "member_effect_score"), + ImmutableList.of(SqlTypeName.VARCHAR, SqlTypeName.INTEGER, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE, + SqlTypeName.DOUBLE, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE), + family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY)); + + // Context functions + addFunctionEntry("current_user", CURRENT_USER); + } + + /** + * Returns a list of functions matching given name case-insensitively. This returns empty list if the + * function name is not found. + * @param functionName function name to match + * @return list of matching Functions or empty collection. + */ + @Override + public Collection lookup(String functionName) { + return FUNCTION_MAP.get(functionName.toLowerCase()); + } + + /** + * @return immutable copy of internal function registry + */ + public ImmutableMultimap getRegistry() { + return ImmutableMultimap.copyOf(FUNCTION_MAP); + } + + /** + * Adds the function to registry, the key is lowercase functionName to make lookup case-insensitive. + */ + private static void addFunctionEntry(String functionName, SqlOperator operator) { + FUNCTION_MAP.put(functionName.toLowerCase(), new Function(functionName, operator)); + } + + public static void createAddUserDefinedFunction(String functionName, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeChecker operandTypeChecker) { + addFunctionEntry(functionName, createCalciteUDF(functionName, returnTypeInference, operandTypeChecker)); + } + + public static void createAddUserDefinedFunction(String functionName, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeChecker operandTypeChecker, String dependency) { + String depPrefix = dependency.substring(0, 6).toLowerCase(); + + // TODO: dependency not used. Consider removing it (maybe this method completely). + if (!depPrefix.equals("ivy://")) { + dependency = "ivy://" + dependency; + } + addFunctionEntry(functionName, createCalciteUDF(functionName, returnTypeInference, operandTypeChecker)); + } + + /** + * Adds the generic UDTF, which is almost same as how we register for LinkedIn UDFs except that we need to register + * the return field names in `UDTF_RETURN_FIELD_NAME_MAP` + */ + public static void createAddUserDefinedTableFunction(String functionName, ImmutableList returnFieldNames, + ImmutableList returnFieldTypes, SqlOperandTypeChecker operandTypeChecker) { + // The type of returnFieldTypes can only be ImmutableList or ImmutableList + // ImmutableList is used with FunctionReturnTypes.rowOf(ImmutableList fieldNames, ImmutableList types) + // ImmutableList is used with FunctionReturnTypes.rowOfInference(ImmutableList fieldNames, ImmutableList types) + Preconditions.checkArgument(!returnFieldTypes.isEmpty() && returnFieldTypes.size() == returnFieldNames.size() + && (returnFieldTypes.stream().allMatch(type -> type instanceof SqlTypeName) + || returnFieldTypes.stream().allMatch(type -> type instanceof SqlReturnTypeInference))); + if (returnFieldTypes.get(0) instanceof SqlTypeName) { + createAddUserDefinedFunction(functionName, + FunctionReturnTypes.rowOf(returnFieldNames, + ImmutableList + .copyOf(returnFieldTypes.stream().map(type -> (SqlTypeName) type).collect(Collectors.toList()))), + operandTypeChecker); + } else { + createAddUserDefinedFunction(functionName, + FunctionReturnTypes.rowOfInference(returnFieldNames, + ImmutableList.copyOf( + returnFieldTypes.stream().map(type -> (SqlReturnTypeInference) type).collect(Collectors.toList()))), + operandTypeChecker); + } + UDTF_RETURN_FIELD_NAME_MAP.put(functionName, returnFieldNames); + } + + private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, + List paramTypes) { + return new SqlUserDefinedFunction(new SqlIdentifier(functionName, SqlParserPos.ZERO), returnTypeInference, + operandTypeInference, operandTypeChecker, paramTypes, null); + } + + private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeChecker operandTypeChecker) { + return new SqlUserDefinedFunction(new SqlIdentifier(functionName, SqlParserPos.ZERO), returnTypeInference, null, + operandTypeChecker, null, null); + } + + private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference) { + return createCalciteUDF(functionName, returnTypeInference, null); + } + + private static void addCastOperatorEntries() { + String[] castFunctions = + { "tok_boolean", "tok_int", "tok_string", "tok_double", "tok_float", "tok_bigint", "tok_tinyint", "tok_smallint", "tok_char", "tok_decimal", "tok_varchar", "tok_binary", "tok_date", "tok_timestamp" }; + for (String f : castFunctions) { + FUNCTION_MAP.put(f, CoralFunction.CAST); + } + } + + /** + * Returns a predicate to test if ordinal parameter is optional + * @param ordinal parameter ordinal number + * @return predicate to test if the parameter is optional + */ + private static Predicate optionalOrd(final int ordinal) { + return input -> input == ordinal; + } + + private static Predicate optionalOrd(final List ordinals) { + return ordinals::contains; + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java index edeb99fc9..d8efda469 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java @@ -5,817 +5,10 @@ */ package com.linkedin.coral.hive.hive2rel.functions; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -import com.google.common.base.Preconditions; - -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlOperandCountRange; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.*; -import org.apache.calcite.sql.validate.SqlUserDefinedFunction; - -import com.linkedin.coral.com.google.common.collect.HashMultimap; -import com.linkedin.coral.com.google.common.collect.ImmutableList; -import com.linkedin.coral.com.google.common.collect.ImmutableMultimap; -import com.linkedin.coral.com.google.common.collect.Multimap; -import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; -import com.linkedin.coral.common.functions.Function; -import com.linkedin.coral.common.functions.FunctionRegistry; -import com.linkedin.coral.common.functions.FunctionReturnTypes; -import com.linkedin.coral.common.functions.GenericProjectFunction; -import com.linkedin.coral.common.functions.OperandTypeInference; -import com.linkedin.coral.common.functions.SameOperandTypeExceptFirstOperandChecker; - -import static com.linkedin.coral.hive.hive2rel.functions.CoalesceStructUtility.*; -import static com.linkedin.coral.hive.hive2rel.functions.TimestampFromUnixtime.TIMESTAMP_FROM_UNIXTIME; -import static org.apache.calcite.sql.fun.SqlLibraryOperators.*; -import static org.apache.calcite.sql.fun.SqlStdOperatorTable.*; -import static org.apache.calcite.sql.type.OperandTypes.*; -import static org.apache.calcite.sql.type.ReturnTypes.*; - /** - * Static implementation of HiveFunctionRegistry that has hard-coded list of all - * function names. This has a major disadvantage that the user defined functions are - * not available to the registry without manually adding the entry here and uploading - * a new version of library. - * TODO: Provide function registry catalog - * - * Note that Coral maintains a copy of StaticHiveFunctionRegistry for read only at run time. - * For individual query, we create a copy of function registry in a RelConTextProvider object. + * @deprecated Use {@link StaticCoralFunctionRegistry} instead. */ -public class StaticHiveFunctionRegistry implements FunctionRegistry { - - public static final String IS_TEST_MEMBER_ID_CLASS = "com.linkedin.dali.udf.istestmemberid.hive.IsTestMemberId"; - - // TODO: Make this immutable using builder - static final Multimap FUNCTION_MAP = HashMultimap.create(); - - // Used for registering UDTFs, the key is the function name and the value is a list of field names returned by the UDTF - // We need it because we need to know the return field names of UDTF to do the conversion in ParseTreeBuilder.visitLateralViewUDTF - public static final Map> UDTF_RETURN_FIELD_NAME_MAP = new HashMap<>(); - - static { - // NOTE: All function names will be added as lowercase for case-insensitive comparison. - // FIXME: This mapping is currently incomplete - // aggregation functions - addFunctionEntry("sum", SUM); - addFunctionEntry("count", COUNT); - addFunctionEntry("avg", AVG); - addFunctionEntry("min", MIN); - addFunctionEntry("max", MAX); - createAddUserDefinedFunction("collect_list", FunctionReturnTypes.ARRAY_OF_ARG0_TYPE, ANY); - createAddUserDefinedFunction("collect_set", FunctionReturnTypes.ARRAY_OF_ARG0_TYPE, ANY); - - // window functions - addFunctionEntry("row_number", ROW_NUMBER); - addFunctionEntry("rank", SqlStdOperatorTable.RANK); // qualification required due to naming conflict - addFunctionEntry("dense_rank", DENSE_RANK); - addFunctionEntry("cume_dist", CUME_DIST); - addFunctionEntry("percent_rank", PERCENT_RANK); - addFunctionEntry("first_value", FIRST_VALUE); - addFunctionEntry("last_value", LAST_VALUE); - addFunctionEntry("nth_value", NTH_VALUE); - addFunctionEntry("lag", LAG); - addFunctionEntry("lead", LEAD); - addFunctionEntry("stddev", STDDEV); - addFunctionEntry("stddev_samp", STDDEV_SAMP); - addFunctionEntry("stddev_pop", STDDEV_POP); - addFunctionEntry("variance", VARIANCE); - addFunctionEntry("var_samp", VAR_SAMP); - addFunctionEntry("var_pop", VAR_POP); - - //addFunctionEntry("in", HiveInOperator.IN); - FUNCTION_MAP.put("in", HiveFunction.IN); - - //addFunctionEntry("in", SqlStdOperatorTable.IN); - - // operators - addFunctionEntry("rlike", HiveRLikeOperator.RLIKE); - addFunctionEntry("regexp", HiveRLikeOperator.REGEXP); - addFunctionEntry("!=", NOT_EQUALS); - addFunctionEntry("==", EQUALS); - - // conditional function - addFunctionEntry("tok_isnull", IS_NULL); - addFunctionEntry("tok_isnotnull", IS_NOT_NULL); - FUNCTION_MAP.put("when", HiveFunction.WHEN); - FUNCTION_MAP.put("case", HiveFunction.CASE); - FUNCTION_MAP.put("between", HiveFunction.BETWEEN); - addFunctionEntry("nullif", NULLIF); - addFunctionEntry("isnull", IS_NULL); - addFunctionEntry("isnotnull", IS_NOT_NULL); - - // TODO: this should be arg1 or arg2 nullable - createAddUserDefinedFunction("nvl", ARG0_NULLABLE, and(family(SqlTypeFamily.ANY, SqlTypeFamily.ANY), SAME_SAME)); - - // calcite models 'if' function as CASE operator. We can use CASE but that will cause translation - // to SQL to be odd although correct. So, we add 'if' as UDF - addFunctionEntry("if", - createCalciteUDF("if", FunctionReturnTypes.IF_FUNC_RETURN_TYPE, OperandTypeInference.BOOLEAN_ANY_SAME, - new SameOperandTypeExceptFirstOperandChecker(3, SqlTypeName.BOOLEAN), null)); - - addFunctionEntry("coalesce", COALESCE); - // cast operator - addCastOperatorEntries(); - - // Complex type constructors - addFunctionEntry("array", ARRAY_VALUE_CONSTRUCTOR); - addFunctionEntry("struct", ROW); - addFunctionEntry("map", MAP_VALUE_CONSTRUCTOR); - addFunctionEntry("named_struct", HiveNamedStructFunction.NAMED_STRUCT); - addFunctionEntry("generic_project", GenericProjectFunction.GENERIC_PROJECT); - - // conversion functions - createAddUserDefinedFunction("binary", FunctionReturnTypes.BINARY, - or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); - - // mathematical functions - // we need to define new strategy for hive to allow null operands by default for everything - createAddUserDefinedFunction("pmod", BIGINT, NUMERIC_NUMERIC); - createAddUserDefinedFunction("round", DOUBLE_NULLABLE, - family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER), optionalOrd(1))); - createAddUserDefinedFunction("bround", DOUBLE_NULLABLE, - family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER), optionalOrd(1))); - createAddUserDefinedFunction("floor", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); - createAddUserDefinedFunction("ceil", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); - createAddUserDefinedFunction("ceiling", BIGINT_FORCE_NULLABLE, family(SqlTypeFamily.NUMERIC)); - createAddUserDefinedFunction("rand", DOUBLE_NULLABLE, - family(ImmutableList.of(SqlTypeFamily.INTEGER), optionalOrd(0))); - createAddUserDefinedFunction("exp", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("ln", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("log10", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("log2", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("log", DOUBLE_NULLABLE, NUMERIC_NUMERIC); - createAddUserDefinedFunction("pow", DOUBLE_NULLABLE, NUMERIC_NUMERIC); - createAddUserDefinedFunction("power", DOUBLE_NULLABLE, NUMERIC_NUMERIC); - createAddUserDefinedFunction("sqrt", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("hex", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.NUMERIC), family(SqlTypeFamily.BINARY))); - createAddUserDefinedFunction("unhex", FunctionReturnTypes.BINARY, STRING); - createAddUserDefinedFunction("conv", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.EXACT_NUMERIC, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER))); - createAddUserDefinedFunction("abs", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("sin", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("asin", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("cos", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("acos", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("tan", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("atan", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("degrees", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("radians", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("positive", ARG0_NULLABLE, NUMERIC); - createAddUserDefinedFunction("negative", ARG0_NULLABLE, NUMERIC); - createAddUserDefinedFunction("sign", ARG0_NULLABLE, NUMERIC); - createAddUserDefinedFunction("e", DOUBLE, NILADIC); - createAddUserDefinedFunction("pi", DOUBLE, NILADIC); - createAddUserDefinedFunction("factorial", BIGINT_NULLABLE, family(SqlTypeFamily.INTEGER)); - createAddUserDefinedFunction("cbrt", DOUBLE_NULLABLE, NUMERIC); - createAddUserDefinedFunction("shiftleft", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); - createAddUserDefinedFunction("shiftright", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); - createAddUserDefinedFunction("shiftrightunsigned", ARG0_NULLABLE, EXACT_NUMERIC_EXACT_NUMERIC); - createAddUserDefinedFunction("greatest", ARG0_NULLABLE, SAME_VARIADIC); - createAddUserDefinedFunction("least", ARG0_NULLABLE, SAME_VARIADIC); - createAddUserDefinedFunction("width_bucket", INTEGER_NULLABLE, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER)); - - // string functions - // TODO: operand types are not strictly true since these functions can take null literal - // and most of these entries don't allow null literals. This will work for most common usages - // but it's easy to write HiveQL to make these fail - createAddUserDefinedFunction("ascii", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("base64", FunctionReturnTypes.STRING, BINARY); - createAddUserDefinedFunction("character_length", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("chr", FunctionReturnTypes.STRING, NUMERIC); - createAddUserDefinedFunction("concat", cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.TO_NULLABLE), - SAME_VARIADIC); - // [CORAL-24] Tried setting this to - // or(family(SqlTypeFamily.STRING, SqlTypeFamily.ARRAY), - // and(variadic(SqlOperandCountRanges.from(2)), repeat(SqlOperandCountRanges.from(2), STRING))) - // but calcite's composeable operand checker does not handle variadic operator counts correctly. - createAddUserDefinedFunction("concat_ws", FunctionReturnTypes.STRING, new SqlOperandTypeChecker() { - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - return family(SqlTypeFamily.STRING, SqlTypeFamily.ARRAY).checkOperandTypes(callBinding, throwOnFailure) - || new SameOperandTypeChecker(-1).checkOperandTypes(callBinding, throwOnFailure); - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.from(2); - } - - @Override - public String getAllowedSignatures(SqlOperator op, String opName) { - return opName + "(STRING, ARRAY|STRING, ...)"; - } - - @Override - public Consistency getConsistency() { - return Consistency.NONE; - } - - @Override - public boolean isOptional(int i) { - return false; - } - }); - - createAddUserDefinedFunction("context_ngrams", LEAST_RESTRICTIVE, - family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)); - createAddUserDefinedFunction("decode", FunctionReturnTypes.STRING, - family(SqlTypeFamily.BINARY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("elt", FunctionReturnTypes.STRING, VARIADIC); - createAddUserDefinedFunction("encode", FunctionReturnTypes.BINARY, STRING_STRING); - createAddUserDefinedFunction("field", ReturnTypes.INTEGER, VARIADIC); - createAddUserDefinedFunction("find_in_set", ReturnTypes.INTEGER, STRING_STRING); - createAddUserDefinedFunction("format_number", FunctionReturnTypes.STRING, NUMERIC_INTEGER); - createAddUserDefinedFunction("get_json_object", FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("in_file", ReturnTypes.BOOLEAN, STRING_STRING); - createAddUserDefinedFunction("initcap", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("instr", ReturnTypes.INTEGER, STRING_STRING); - createAddUserDefinedFunction("length", INTEGER_NULLABLE, STRING); - createAddUserDefinedFunction("levenshtein", ReturnTypes.INTEGER, STRING_STRING); - createAddUserDefinedFunction("locate", FunctionReturnTypes.STRING, - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER), optionalOrd(2))); - addFunctionEntry("lower", LOWER); - addFunctionEntry("lcase", LOWER); - addFunctionEntry("translate", TRANSLATE3); - addFunctionEntry("translate3", TRANSLATE3); - createAddUserDefinedFunction("lpad", FunctionReturnTypes.STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("ltrim", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("ngrams", LEAST_RESTRICTIVE, - family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)); - createAddUserDefinedFunction("octet_length", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("parse_url", FunctionReturnTypes.STRING, - family(Collections.nCopies(3, SqlTypeFamily.STRING), optionalOrd(2))); - createAddUserDefinedFunction("printf", FunctionReturnTypes.STRING, VARIADIC); - createAddUserDefinedFunction("regexp_extract", ARG0, - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER), optionalOrd(2))); - createAddUserDefinedFunction("regexp_replace", FunctionReturnTypes.STRING, STRING_STRING_STRING); - createAddUserDefinedFunction("repeat", FunctionReturnTypes.STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER)); - addFunctionEntry("replace", REPLACE); - createAddUserDefinedFunction("reverse", ARG0, or(STRING, NULLABLE_LITERAL)); - createAddUserDefinedFunction("rpad", FunctionReturnTypes.STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("rtrim", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("sentences", LEAST_RESTRICTIVE, STRING_STRING_STRING); - createAddUserDefinedFunction("soundex", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("space", FunctionReturnTypes.STRING, NUMERIC); - createAddUserDefinedFunction("split", FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), STRING_STRING); - createAddUserDefinedFunction("str_to_map", FunctionReturnTypes.mapOfType(SqlTypeName.VARCHAR, SqlTypeName.VARCHAR), - family(Collections.nCopies(3, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(1, 2)))); - createAddUserDefinedFunction("substr", FunctionReturnTypes.STRING, - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), optionalOrd(2))); - createAddUserDefinedFunction("substring", FunctionReturnTypes.STRING, - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), optionalOrd(2))); - - createAddUserDefinedFunction("substring_index", FunctionReturnTypes.STRING, STRING_STRING_INTEGER); - createAddUserDefinedFunction("trim", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("unbase64", explicit(SqlTypeName.VARBINARY), or(STRING, NULLABLE_LITERAL)); - addFunctionEntry("upper", UPPER); - addFunctionEntry("ucase", UPPER); - addFunctionEntry("initcap", INITCAP); - createAddUserDefinedFunction("md5", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); - createAddUserDefinedFunction("sha1", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); - createAddUserDefinedFunction("sha", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); - createAddUserDefinedFunction("crc32", BIGINT, or(family(SqlTypeFamily.STRING), family(SqlTypeFamily.BINARY))); - createAddUserDefinedFunction("from_utf8", explicit(SqlTypeName.VARCHAR), or(CHARACTER, BINARY)); - createAddUserDefinedFunction("at_timezone", explicit(SqlTypeName.TIMESTAMP), - family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("with_timezone", explicit(SqlTypeName.TIMESTAMP), - family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("to_unixtime", explicit(SqlTypeName.DOUBLE), family(SqlTypeFamily.TIMESTAMP)); - createAddUserDefinedFunction("from_unixtime_nanos", explicit(SqlTypeName.TIMESTAMP), NUMERIC); - createAddUserDefinedFunction("$canonicalize_hive_timezone_id", explicit(SqlTypeName.VARCHAR), STRING); - - // xpath functions - createAddUserDefinedFunction("xpath", FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), STRING_STRING); - createAddUserDefinedFunction("xpath_string", FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("xpath_boolean", ReturnTypes.BOOLEAN, STRING_STRING); - createAddUserDefinedFunction("xpath_short", FunctionReturnTypes.SMALLINT, STRING_STRING); - createAddUserDefinedFunction("xpath_int", ReturnTypes.INTEGER, STRING_STRING); - createAddUserDefinedFunction("xpath_long", BIGINT, STRING_STRING); - createAddUserDefinedFunction("xpath_float", DOUBLE, STRING_STRING); - createAddUserDefinedFunction("xpath_double", DOUBLE, STRING_STRING); - createAddUserDefinedFunction("xpath_number", DOUBLE, STRING_STRING); - - // Date Functions - createAddUserDefinedFunction("from_unixtime", FunctionReturnTypes.STRING, - family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING), optionalOrd(1))); - addFunctionEntry("timestamp_from_unixtime", TIMESTAMP_FROM_UNIXTIME); - createAddUserDefinedFunction("unix_timestamp", BIGINT, - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(0, 1)))); - createAddUserDefinedFunction("to_date", FunctionReturnTypes.STRING, or(STRING, DATETIME)); - createAddUserDefinedFunction("date", DATE, or(STRING, DATETIME)); - createAddUserDefinedFunction("year", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("quarter", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("month", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("day", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("dayofmonth", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("hour", ReturnTypes.INTEGER, or(STRING, DATETIME)); - createAddUserDefinedFunction("minute", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("second", ReturnTypes.INTEGER, STRING); - createAddUserDefinedFunction("weekofyear", ReturnTypes.INTEGER, STRING); - //TODO: add extract UDF - createAddUserDefinedFunction("datediff", ReturnTypes.INTEGER, STRING_STRING); - createAddUserDefinedFunction("date_add", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.DATE, SqlTypeFamily.INTEGER), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER), - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER))); - - createAddUserDefinedFunction("date_sub", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.DATE, SqlTypeFamily.INTEGER), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER), - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER))); - createAddUserDefinedFunction("from_utc_timestamp", explicit(SqlTypeName.TIMESTAMP), - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - addFunctionEntry("current_date", CURRENT_DATE); - addFunctionEntry("current_timestamp", CURRENT_TIMESTAMP); - createAddUserDefinedFunction("add_months", FunctionReturnTypes.STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER)); - createAddUserDefinedFunction("last_day", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("next_day", FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("trunc", FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("months_between", DOUBLE, family(SqlTypeFamily.DATE, SqlTypeFamily.DATE)); - createAddUserDefinedFunction("date_format", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.DATE, SqlTypeFamily.STRING), family(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.STRING), - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("to_utc_timestamp", FunctionReturnTypes.STRING, - or(STRING_STRING, family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING))); - - // Collection functions - addFunctionEntry("size", CARDINALITY); - createAddUserDefinedFunction("array_contains", ReturnTypes.BOOLEAN, family(SqlTypeFamily.ARRAY, SqlTypeFamily.ANY)); - createAddUserDefinedFunction("map_keys", opBinding -> { - RelDataType operandType = opBinding.getOperandType(0); - RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - return typeFactory.createArrayType(operandType.getKeyType(), -1); - }, family(SqlTypeFamily.MAP)); - - createAddUserDefinedFunction("map_values", opBinding -> { - RelDataType operandType = opBinding.getOperandType(0); - RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - return typeFactory.createArrayType(operandType.getValueType(), -1); - }, family(SqlTypeFamily.MAP)); - - createAddUserDefinedFunction("sort_array", ARG0, ARRAY); - - createAddUserDefinedFunction("extract_union", COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY, - or(ANY, family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); - createAddUserDefinedFunction("coalesce_struct", COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY, - or(ANY, family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); - - // LinkedIn UDFs: Dali stores mapping from UDF name to the implementing Java class as table properties - // in the HCatalog. So, an UDF implementation may be referred by different names by different views. - // We register these UDFs by the implementing class name to create a single entry for each UDF. - createAddUserDefinedFunction("com.linkedin.dali.bug.DummyUdf", FunctionReturnTypes.STRING, or(STRING, ARRAY)); - createAddUserDefinedFunction(IS_TEST_MEMBER_ID_CLASS, ReturnTypes.BOOLEAN, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.CHARACTER)); - createAddUserDefinedFunction("com.linkedin.dali.udf.urnextractor.hive.UrnExtractor", - FunctionReturnTypes.ARRAY_OF_STR_STR_MAP, or(STRING, ARRAY)); - createAddUserDefinedFunction("com.linkedin.udf.aws.ReadJsonUDF", FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("com.linkedin.udf.hdfs.GetDatasetNameFromPathUDF", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.dali.udf.isguestmemberid.hive.IsGuestMemberId", ReturnTypes.BOOLEAN, - NUMERIC); - createAddUserDefinedFunction("com.linkedin.dali.udf.watbotcrawlerlookup.hive.WATBotCrawlerLookup", - FunctionReturnTypes.rowOf(ImmutableList.of("iscrawler", "crawlerid"), - ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING), - optionalOrd(ImmutableList.of(2, 3)))); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.WatBotCrawlerLookup", - FunctionReturnTypes.rowOf(ImmutableList.of("iscrawler", "crawlerid"), - ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), - family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING), - optionalOrd(ImmutableList.of(2, 3)))); - - createAddUserDefinedFunction("com.linkedin.dali.udf.userinterfacelookup.hive.UserInterfaceLookup", - FunctionReturnTypes.STRING, - or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, - SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.dali.udf.portallookup.hive.PortalLookup", FunctionReturnTypes.STRING, - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.dali.udf.useragentparser.hive.UserAgentParser", - FunctionReturnTypes.STRING, STRING_STRING); - createAddUserDefinedFunction("com.linkedin.dali.udf.maplookup.hive.MapLookup", - cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.FORCE_NULLABLE), - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dali.udf.monarch.UrnGenerator", FunctionReturnTypes.STRING, VARIADIC); - createAddUserDefinedFunction("com.linkedin.dali.udf.genericlookup.hive.GenericLookup", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.ANY, - SqlTypeFamily.ANY), - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.ANY, - SqlTypeFamily.ANY, SqlTypeFamily.ANY))); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.UrnToID", FunctionReturnTypes.STRING, STRING); - - createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.DateFormatToEpoch", BIGINT_NULLABLE, - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.EpochToDateFormat", FunctionReturnTypes.STRING, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dali.udf.date.hive.EpochToEpochMilliseconds", BIGINT_NULLABLE, NUMERIC); - createAddUserDefinedFunction("com.linkedin.dali.udf.sanitize.hive.Sanitize", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("org.apache.hadoop.hive.ql.udf.generic.GenericProject", ARG0, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.GetIdFromUrn", BIGINT, STRING); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.GetPermissionsString", - FunctionReturnTypes.STRING, family(SqlTypeFamily.ARRAY)); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.EpochTimeInSeconds", BIGINT, STRING); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.EpochTimeInSecondsNullable", BIGINT_NULLABLE, - STRING); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.IsUrnForType", ReturnTypes.BOOLEAN, - STRING_STRING); - createAddUserDefinedFunction("com.linkedin.dali.view.udf.entityhandles.PhoneNumberNormalizer", - FunctionReturnTypes.STRING, STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.dali.views.job.udf.GetUUID", FunctionReturnTypes.STRING, BINARY); - createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetOrderUrn", FunctionReturnTypes.STRING, - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetChooserId", FunctionReturnTypes.STRING, - family(SqlTypeFamily.MAP)); - createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetFamily", FunctionReturnTypes.STRING, - family(SqlTypeFamily.MAP)); - createAddUserDefinedFunction("com.linkedin.dali.views.premium.udf.GetPriceUrnList", - FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR), family(SqlTypeFamily.MAP)); - - final SqlReturnTypeInference hitInfo = FunctionReturnTypes.rowOfInference( - ImmutableList.of("secondarysearchresultinfo", "entityawaresuggestioninfo"), - ImmutableList.of(FunctionReturnTypes.rowOf(ImmutableList.of("vertical"), ImmutableList.of(SqlTypeName.VARCHAR)), - FunctionReturnTypes.rowOfInference(ImmutableList.of("suggestedentities"), - ImmutableList.of(FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true))))); - - final SqlReturnTypeInference gridPositionInfo = FunctionReturnTypes.rowOf(ImmutableList.of("row", "column"), - ImmutableList.of(SqlTypeName.INTEGER, SqlTypeName.INTEGER)); - - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.CreateSearchActionResultUDF", - FunctionReturnTypes.rowOfInference( - ImmutableList.of("entityurn", "resulttype", "absoluteposition", "positioninvertical", "iscachehit", - "isanonymized", "hitinfo", "gridposition", "isnamematch", "trackingid"), - ImmutableList.of(FunctionReturnTypes.STRING, FunctionReturnTypes.STRING, INTEGER_NULLABLE, INTEGER_NULLABLE, - ReturnTypes.BOOLEAN, ReturnTypes.BOOLEAN, hitInfo, gridPositionInfo, ReturnTypes.BOOLEAN, - FunctionReturnTypes.BINARY)), - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetActionTypeUDF", FunctionReturnTypes.STRING, - or(STRING_STRING_STRING, STRING_STRING)); - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetTYAHResultTypeUDF", FunctionReturnTypes.STRING, - STRING); - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.GetVerticalUDF", FunctionReturnTypes.STRING, - or(STRING_STRING_STRING, STRING_STRING)); - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.IsTYAHSearchResultsUDF", ReturnTypes.BOOLEAN, - STRING); - createAddUserDefinedFunction("com.linkedin.dali.views.search.udf.IsValidKeyUDF", ReturnTypes.BOOLEAN, - or(STRING_STRING_STRING, STRING_STRING)); - createAddUserDefinedFunction("com.linkedin.ds.udf.hive.filter.IsTestMemberId", ReturnTypes.BOOLEAN, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.PortalLookup", FunctionReturnTypes.STRING, - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.UserInterfaceLookup", FunctionReturnTypes.STRING, - family(Collections.nCopies(8, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.ds.udf.hive.lookup.WATBotCrawlerLookup", FunctionReturnTypes - .rowOf(ImmutableList.of("iscrawler", "crawlerid"), ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.VARCHAR)), - or(STRING_STRING_STRING, STRING_STRING)); - createAddUserDefinedFunction("com.linkedin.dwh.udf.hive.lookup.OsLookup", - FunctionReturnTypes.rowOf(ImmutableList.of("os_name", "os_major_version", "os_full_version"), - ImmutableList.of(SqlTypeName.VARCHAR, SqlTypeName.VARCHAR, SqlTypeName.VARCHAR)), - or(STRING_STRING, family(SqlTypeFamily.STRING, SqlTypeFamily.ANY))); - createAddUserDefinedFunction("com.linkedin.dwh.udf.profile.GetProfileUrl", FunctionReturnTypes.STRING, family( - SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.dwh.udf.sessionization.CleanupBrowserId", FunctionReturnTypes.STRING, - STRING); - createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductName", FunctionReturnTypes.STRING, - STRING); - createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductCode", FunctionReturnTypes.STRING, - STRING); - createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapSfdcProductId", FunctionReturnTypes.STRING, - STRING); - createAddUserDefinedFunction("udfs.SeoReferrerTrkUdf", FunctionReturnTypes.STRING, STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.PresentDataType", FunctionReturnTypes.STRING, - family(SqlTypeFamily.ANY)); - createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.PresentMediaType", FunctionReturnTypes.STRING, - family(SqlTypeFamily.ANY)); - createAddUserDefinedFunction("com.linkedin.vector.daliview.udf.UnifyVideoOrAudioDurationMicroSeconds", BIGINT, - family(SqlTypeFamily.ANY)); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.AdClickClassifier", FunctionReturnTypes.rowOf( - ImmutableList.of("clicks", "landingPageClicks", "totalEngagements", "otherEngagements", "likes", "commentLikes", - "comments", "shares", "follows", "oneClickLeadFormOpens", "companyPageClicks", "fullScreenPlays", - "viralClicks", "viralLandingPageClicks", "viralLikes", "viralCommentLikes", "viralComments", "viralShares", - "viralFollows", "viralOneClickLeadFormOpens", "viralCompanyPageClicks", "viralFullScreenPlays", - "viralTotalEngagements", "viralOtherEngagements", "adUnitClicks", "actionClicks", "textUrlClicks", "opens", - "cardClicks", "viralCardClicks", "costInUsd", "costInLocalCurrency"), - ImmutableList.of(SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, SqlTypeName.INTEGER, - SqlTypeName.INTEGER, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE)), - family(SqlTypeFamily.INTEGER, SqlTypeFamily.STRING, SqlTypeFamily.INTEGER, SqlTypeFamily.MAP, - SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.MAP, SqlTypeFamily.ANY, - SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.UnifiedCampaignType", - FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.ActivityId", BIGINT, family(SqlTypeFamily.MAP)); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.AdPlacementClassifier", - FunctionReturnTypes.STRING, family(SqlTypeFamily.INTEGER)); - createAddUserDefinedFunction("com.linkedin.tscp.reporting.dali.udfs.SponsoredMessageNodeId", ReturnTypes.INTEGER, - family(SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.orbit.emerger.coercerudfs.DynamicsLineOfBusinessCoercer", - FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.orbit.emerger.coercerudfs.GenerateId", FunctionReturnTypes.STRING, - new SqlOperandTypeChecker() { - @Override - public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - return true; - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.from(1); - } - - @Override - public String getAllowedSignatures(SqlOperator op, String opName) { - return opName + "(ANY, ...)"; - } - - @Override - public Consistency getConsistency() { - return Consistency.NONE; - } - - @Override - public boolean isOptional(int i) { - return false; - } - }); - createAddUserDefinedFunction("com.linkedin.etg.business.common.udfs.MapD365OptionSet", FunctionReturnTypes.STRING, - STRING_STRING_STRING); - - SqlReturnTypeInference getProfileSectionsReturnTypeInference = opBinding -> { - int numArgs = opBinding.getOperandCount(); - Preconditions.checkState(numArgs == 2, "UDF isb.GetProfileSections must take 2 arguments."); - RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - RelDataType retType = opBinding.getOperandType(0).getValueType(); - return typeFactory.createArrayType(retType, -1); - }; - createAddUserDefinedFunction("isb.GetProfileSections", getProfileSectionsReturnTypeInference, - family(SqlTypeFamily.MAP, SqlTypeFamily.ARRAY)); - - createAddUserDefinedFunction("com.linkedin.recruiter.udf.GetEventOriginUDF", FunctionReturnTypes.STRING, - or(STRING_STRING_STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.recruiter.udf.QueryRoutingTypeUDF", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.snapshot.udf.ConstructSnapshotUrnUdf", FunctionReturnTypes.STRING, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.snapshot.udf.SnapshotPurgeEligibleUdf", ReturnTypes.BOOLEAN, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); - - // The following UDFs are already defined using Transport UDF. - // The class name is the corresponding Hive UDF. - // We point their class files to the corresponding Spark jar file in TransportableUDFMap. - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.DateFormatToEpoch", BIGINT_NULLABLE, - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.EpochToDateFormat", FunctionReturnTypes.STRING, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.EpochToEpochMilliseconds", BIGINT_NULLABLE, - NUMERIC); - createAddUserDefinedFunction("com.linkedin.stdudfs.stringudfs.hive.InitCap", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.IsGuestMemberId", ReturnTypes.BOOLEAN, NUMERIC); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.MapLookup", - cascade(FunctionReturnTypes.STRING, SqlTypeTransforms.FORCE_NULLABLE), - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.PortalLookup", FunctionReturnTypes.STRING, - STRING_STRING); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.Sanitize", FunctionReturnTypes.STRING, STRING); - createAddUserDefinedFunction("com.linkedin.jemslookup.udf.hive.JemsLookup", FunctionReturnTypes.rowOfInference( - ImmutableList.of("jobproductid", "jobproductname", "jobentitlementids", "jobentitlementnameswithnamespace", - "listingtype", "sublistingtype", "istestjob"), - ImmutableList.of(BIGINT, FunctionReturnTypes.STRING, FunctionReturnTypes.arrayOfType(SqlTypeName.BIGINT, true), - FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true), FunctionReturnTypes.STRING, - FunctionReturnTypes.STRING, ReturnTypes.BOOLEAN)), - family( - ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.stdudfs.userinterfacelookup.hive.UserInterfaceLookup", - FunctionReturnTypes.STRING, - or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, - SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.stdudfs.userinterfacelookuptest.hive.UserInterfaceLookupTest", - FunctionReturnTypes.STRING, - or(family(Collections.nCopies(8, SqlTypeFamily.STRING)), - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING, - SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.stdudfs.parsing.hive.UserAgentParser", FunctionReturnTypes.STRING, - family(Collections.nCopies(2, SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.stdudfs.parsing.hive.Ip2Str", FunctionReturnTypes.STRING, - or(family(SqlTypeFamily.STRING, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC), family(SqlTypeFamily.STRING))); - createAddUserDefinedFunction("com.linkedin.stdudfs.lookup.hive.BrowserLookup", - FunctionReturnTypes.rowOfInference( - ImmutableList.of("browser_name", "browser_major_version", "browser_full_version"), - ImmutableList.of(FunctionReturnTypes.STRING, FunctionReturnTypes.STRING, FunctionReturnTypes.STRING)), - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.stdudfs.daliudfs.hive.IsTestMemberId", ReturnTypes.BOOLEAN, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.stdudfs.urnextractor.hive.UrnExtractorFunctionWrapper", opBinding -> { - RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - return typeFactory.createArrayType(typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), - typeFactory.createSqlType(SqlTypeName.VARCHAR)), -1); - }, or(ARRAY, STRING)); - createAddUserDefinedFunction("com.linkedin.stdudfs.hive.daliudfs.UrnExtractorFunctionWrapper", opBinding -> { - RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - return typeFactory.createArrayType(typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), - typeFactory.createSqlType(SqlTypeName.VARCHAR)), -1); - }, or(ARRAY, STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumeric", BIGINT, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumericInt", BIGINT, - family(SqlTypeFamily.INTEGER, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMemberIdNumericLong", BIGINT, - family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateAll", ARG0, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateArray", ARG0, - family(SqlTypeFamily.ARRAY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateArrayEvolve", ARG0, - family(SqlTypeFamily.ARRAY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateStruct", ARG0, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMap", ARG0, - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapEvolve", ARG0, - family(SqlTypeFamily.MAP, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapKeyEvolve", ARG0, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.udfs.standard.hive.ObfuscateMapValEvolve", ARG0, - family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.jobs.udf.hive.ConvertIndustryCode", FunctionReturnTypes.STRING, STRING); - // This is a Hive Custom UDF which is a simplified version of 'date-converter' package. - // This UDF is not converted to a transport UDF. - createAddUserDefinedFunction("com.linkedin.dali.customudf.date.hive.DateFormatToEpoch", BIGINT_NULLABLE, - STRING_STRING_STRING); - createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.HasMemberConsent", ReturnTypes.BOOLEAN, - family(SqlTypeFamily.STRING, SqlTypeFamily.ANY, SqlTypeFamily.TIMESTAMP)); - createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.RedactFieldIf", ARG1, - family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.STRING, SqlTypeFamily.ANY)); - createAddUserDefinedFunction("li_groot_cast_nullability", new OrdinalReturnTypeInferenceV2(1), - family(SqlTypeFamily.ANY, SqlTypeFamily.ANY)); - - createAddUserDefinedFunction("com.linkedin.policy.decoration.udfs.RedactSecondarySchemaFieldIf", ARG1, family( - SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER, SqlTypeFamily.ANY)); - - createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.HasMemberConsentUDF", ReturnTypes.BOOLEAN, - family(SqlTypeFamily.STRING, SqlTypeFamily.ANY, SqlTypeFamily.TIMESTAMP)); - createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactFieldIfUDF", - new OrdinalReturnTypeInferenceV2(1), - family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.STRING, SqlTypeFamily.ANY)); - createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactSecondarySchemaFieldIfUDF", - new OrdinalReturnTypeInferenceV2(1), family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY, - SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.GetMappedValueUDF", FunctionReturnTypes.STRING, - family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)); - createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.ExtractCollectionUDF", - FunctionReturnTypes.arrayOfType(SqlTypeName.VARCHAR, true), family(SqlTypeFamily.ANY, SqlTypeFamily.STRING)); - - // UDTFs - addFunctionEntry("explode", new CoralSqlUnnestOperator(false)); - addFunctionEntry("posexplode", new CoralSqlUnnestOperator(true)); - addFunctionEntry("json_tuple", HiveJsonTupleOperator.JSON_TUPLE); - - // reflect functions - addFunctionEntry("reflect", HiveReflectOperator.REFLECT); - addFunctionEntry("java_method", HiveReflectOperator.REFLECT); - - // Generic UDTFs - createAddUserDefinedTableFunction("com.linkedin.tsar.hive.udf.ToJymbiiScores", - ImmutableList.of("job_urn", "rank", "glmix_score", "global_model_score", "sentinel_score", "job_effect_score", - "member_effect_score"), - ImmutableList.of(SqlTypeName.VARCHAR, SqlTypeName.INTEGER, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE, - SqlTypeName.DOUBLE, SqlTypeName.DOUBLE, SqlTypeName.DOUBLE), - family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY)); - - // Context functions - addFunctionEntry("current_user", CURRENT_USER); - } - - /** - * Returns a list of functions matching given name case-insensitively. This returns empty list if the - * function name is not found. - * @param functionName function name to match - * @return list of matching HiveFunctions or empty collection. - */ - @Override - public Collection lookup(String functionName) { - return FUNCTION_MAP.get(functionName.toLowerCase()); - } - - /** - * @return immutable copy of internal function registry - */ - public ImmutableMultimap getRegistry() { - return ImmutableMultimap.copyOf(FUNCTION_MAP); - } - - /** - * Adds the function to registry, the key is lowercase functionName to make lookup case-insensitive. - */ - private static void addFunctionEntry(String functionName, SqlOperator operator) { - FUNCTION_MAP.put(functionName.toLowerCase(), new Function(functionName, operator)); - } - - public static void createAddUserDefinedFunction(String functionName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeChecker operandTypeChecker) { - addFunctionEntry(functionName, createCalciteUDF(functionName, returnTypeInference, operandTypeChecker)); - } - - public static void createAddUserDefinedFunction(String functionName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeChecker operandTypeChecker, String dependency) { - String depPrefix = dependency.substring(0, 6).toLowerCase(); - - // TODO: dependency not used. Consider removing it (maybe this method completely). - if (!depPrefix.equals("ivy://")) { - dependency = "ivy://" + dependency; - } - addFunctionEntry(functionName, createCalciteUDF(functionName, returnTypeInference, operandTypeChecker)); - } - - /** - * Adds the generic UDTF, which is almost same as how we register for LinkedIn UDFs except that we need to register - * the return field names in `UDTF_RETURN_FIELD_NAME_MAP` - */ - public static void createAddUserDefinedTableFunction(String functionName, ImmutableList returnFieldNames, - ImmutableList returnFieldTypes, SqlOperandTypeChecker operandTypeChecker) { - // The type of returnFieldTypes can only be ImmutableList or ImmutableList - // ImmutableList is used with FunctionReturnTypes.rowOf(ImmutableList fieldNames, ImmutableList types) - // ImmutableList is used with FunctionReturnTypes.rowOfInference(ImmutableList fieldNames, ImmutableList types) - Preconditions.checkArgument(!returnFieldTypes.isEmpty() && returnFieldTypes.size() == returnFieldNames.size() - && (returnFieldTypes.stream().allMatch(type -> type instanceof SqlTypeName) - || returnFieldTypes.stream().allMatch(type -> type instanceof SqlReturnTypeInference))); - if (returnFieldTypes.get(0) instanceof SqlTypeName) { - createAddUserDefinedFunction(functionName, - FunctionReturnTypes.rowOf(returnFieldNames, - ImmutableList - .copyOf(returnFieldTypes.stream().map(type -> (SqlTypeName) type).collect(Collectors.toList()))), - operandTypeChecker); - } else { - createAddUserDefinedFunction(functionName, - FunctionReturnTypes.rowOfInference(returnFieldNames, - ImmutableList.copyOf( - returnFieldTypes.stream().map(type -> (SqlReturnTypeInference) type).collect(Collectors.toList()))), - operandTypeChecker); - } - UDTF_RETURN_FIELD_NAME_MAP.put(functionName, returnFieldNames); - } - - private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, - List paramTypes) { - return new SqlUserDefinedFunction(new SqlIdentifier(functionName, SqlParserPos.ZERO), returnTypeInference, - operandTypeInference, operandTypeChecker, paramTypes, null); - } - - private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeChecker operandTypeChecker) { - return new SqlUserDefinedFunction(new SqlIdentifier(functionName, SqlParserPos.ZERO), returnTypeInference, null, - operandTypeChecker, null, null); - } - - private static SqlOperator createCalciteUDF(String functionName, SqlReturnTypeInference returnTypeInference) { - return createCalciteUDF(functionName, returnTypeInference, null); - } - - private static void addCastOperatorEntries() { - String[] castFunctions = - { "tok_boolean", "tok_int", "tok_string", "tok_double", "tok_float", "tok_bigint", "tok_tinyint", "tok_smallint", "tok_char", "tok_decimal", "tok_varchar", "tok_binary", "tok_date", "tok_timestamp" }; - for (String f : castFunctions) { - FUNCTION_MAP.put(f, HiveFunction.CAST); - } - } - - /** - * Returns a predicate to test if ordinal parameter is optional - * @param ordinal parameter ordinal number - * @return predicate to test if the parameter is optional - */ - private static Predicate optionalOrd(final int ordinal) { - return input -> input == ordinal; - } - - private static Predicate optionalOrd(final List ordinals) { - return ordinals::contains; - } +@Deprecated +public class StaticHiveFunctionRegistry extends StaticCoralFunctionRegistry { } diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/CoralUDFTransformer.java b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/CoralUDFTransformer.java new file mode 100644 index 000000000..d3280122e --- /dev/null +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/CoralUDFTransformer.java @@ -0,0 +1,78 @@ +/** + * Copyright 2023-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.spark.transformers; + +import java.net.URI; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlOperator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.linkedin.coral.com.google.common.collect.ImmutableSet; +import com.linkedin.coral.common.transformers.SqlCallTransformer; +import com.linkedin.coral.hive.hive2rel.functions.VersionedSqlUserDefinedFunction; +import com.linkedin.coral.spark.containers.SparkUDFInfo; +import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; + + +/** + * This transformer converts a UDF SqlCall name from the UDF class name (e.g., `com.linkedin.HiveUDF`) + * to the corresponding view-dependent UDF name in the view text. It also adds the UDF information to `sparkUDFInfos`. + * Refer to `CoralSparkTest#testHiveUDFTransformer()` for an example. + */ +public class CoralUDFTransformer extends SqlCallTransformer { + private static final Logger LOG = LoggerFactory.getLogger(CoralUDFTransformer.class); + + /** + * Some LinkedIn UDFs get registered correctly in a SparkSession, and hence a DataFrame is successfully + * created for the views containing those UDFs, but those UDFs fail going forward during the execution phase. + * We cannot use a fallback mechanism for such cases because a DataFrame can be created successfully. + * Because of this, we need to proactively fail during the CoralSpark view analysis phase when we encounter such UDFs, + * so that Spark can fall back to its stable execution. + */ + private static final Set UNSUPPORTED_HIVE_UDFS = + ImmutableSet.of("com.linkedin.dali.udf.userinterfacelookup.hive.UserInterfaceLookup", + "com.linkedin.dali.udf.portallookup.hive.PortalLookup", + // for unit test + "com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF"); + private final Set sparkUDFInfos; + + public CoralUDFTransformer(Set sparkUDFInfos) { + this.sparkUDFInfos = sparkUDFInfos; + } + + @Override + protected boolean condition(SqlCall sqlCall) { + final SqlOperator operator = sqlCall.getOperator(); + final String operatorName = operator.getName(); + return operator instanceof VersionedSqlUserDefinedFunction && operatorName.contains(".") + && !operatorName.equals("."); + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + final VersionedSqlUserDefinedFunction operator = (VersionedSqlUserDefinedFunction) sqlCall.getOperator(); + final String operatorName = operator.getName(); + if (UNSUPPORTED_HIVE_UDFS.contains(operatorName)) { + throw new UnsupportedUDFException(operatorName); + } + final String originalViewTextFunctionName = operator.getOriginalViewTextFunctionName(); + final List dependencies = operator.getIvyDependencies(); + List listOfUris = dependencies.stream().map(URI::create).collect(Collectors.toList()); + LOG.info("Function: {} is not a Builtin UDF or Transport UDF. We fall back to its Hive " + + "function with ivy dependency: {}", operatorName, String.join(",", dependencies)); + final SparkUDFInfo sparkUDFInfo = new SparkUDFInfo(operator.getFunctionClassName(), originalViewTextFunctionName, + listOfUris, SparkUDFInfo.UDFTYPE.HIVE_CUSTOM_UDF); + sparkUDFInfos.add(sparkUDFInfo); + final SqlOperator convertedFunction = + createSqlOperator(originalViewTextFunctionName, operator.getReturnTypeInference()); + return convertedFunction.createCall(sqlCall.getParserPosition(), sqlCall.getOperandList()); + } +} diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java index ef29d51ec..5071e9bb1 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java @@ -5,74 +5,18 @@ */ package com.linkedin.coral.spark.transformers; -import java.net.URI; -import java.util.List; import java.util.Set; -import java.util.stream.Collectors; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlOperator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.linkedin.coral.com.google.common.collect.ImmutableSet; -import com.linkedin.coral.common.transformers.SqlCallTransformer; -import com.linkedin.coral.hive.hive2rel.functions.VersionedSqlUserDefinedFunction; import com.linkedin.coral.spark.containers.SparkUDFInfo; -import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; /** - * This transformer converts the Hive UDF SqlCall name from the UDF class name (e.g., `com.linkedin.HiveUDF`) - * to the corresponding view-dependent UDF name in the view text. It also adds the UDF information to `sparkUDFInfos`. - * Refer to `CoralSparkTest#testHiveUDFTransformer()` for an example. + * @deprecated Use {@link CoralUDFTransformer} instead. This class will be removed in a future release. */ -public class HiveUDFTransformer extends SqlCallTransformer { - private static final Logger LOG = LoggerFactory.getLogger(HiveUDFTransformer.class); - - /** - * Some LinkedIn UDFs get registered correctly in a SparkSession, and hence a DataFrame is successfully - * created for the views containing those UDFs, but those UDFs fail going forward during the execution phase. - * We cannot use a fallback mechanism for such cases because a DataFrame can be created successfully. - * Because of this, we need to proactively fail during the CoralSpark view analysis phase when we encounter such UDFs, - * so that Spark can fall back to its stable execution. - */ - private static final Set UNSUPPORTED_HIVE_UDFS = - ImmutableSet.of("com.linkedin.dali.udf.userinterfacelookup.hive.UserInterfaceLookup", - "com.linkedin.dali.udf.portallookup.hive.PortalLookup", - // for unit test - "com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF"); - private final Set sparkUDFInfos; +@Deprecated +public class HiveUDFTransformer extends CoralUDFTransformer { public HiveUDFTransformer(Set sparkUDFInfos) { - this.sparkUDFInfos = sparkUDFInfos; - } - - @Override - protected boolean condition(SqlCall sqlCall) { - final SqlOperator operator = sqlCall.getOperator(); - final String operatorName = operator.getName(); - return operator instanceof VersionedSqlUserDefinedFunction && operatorName.contains(".") - && !operatorName.equals("."); - } - - @Override - protected SqlCall transform(SqlCall sqlCall) { - final VersionedSqlUserDefinedFunction operator = (VersionedSqlUserDefinedFunction) sqlCall.getOperator(); - final String operatorName = operator.getName(); - if (UNSUPPORTED_HIVE_UDFS.contains(operatorName)) { - throw new UnsupportedUDFException(operatorName); - } - final String originalViewTextFunctionName = operator.getOriginalViewTextFunctionName(); - final List dependencies = operator.getIvyDependencies(); - List listOfUris = dependencies.stream().map(URI::create).collect(Collectors.toList()); - LOG.info("Function: {} is not a Builtin UDF or Transport UDF. We fall back to its Hive " - + "function with ivy dependency: {}", operatorName, String.join(",", dependencies)); - final SparkUDFInfo sparkUDFInfo = new SparkUDFInfo(operator.getFunctionClassName(), originalViewTextFunctionName, - listOfUris, SparkUDFInfo.UDFTYPE.HIVE_CUSTOM_UDF); - sparkUDFInfos.add(sparkUDFInfo); - final SqlOperator convertedFunction = - createSqlOperator(originalViewTextFunctionName, operator.getReturnTypeInference()); - return convertedFunction.createCall(sqlCall.getParserPosition(), sqlCall.getOperandList()); + super(sparkUDFInfos); } } diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CoralUDFTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CoralUDFTransformer.java new file mode 100644 index 000000000..b44054770 --- /dev/null +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CoralUDFTransformer.java @@ -0,0 +1,39 @@ +/** + * Copyright 2023-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.trino.rel2trino.transformers; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.parser.SqlParserPos; + +import com.linkedin.coral.common.transformers.SqlCallTransformer; +import com.linkedin.coral.hive.hive2rel.functions.VersionedSqlUserDefinedFunction; + + +/** + * This transformer converts a UDF SqlCall name from the UDF class name to the + * corresponding Trino function name. + * i.e. from `com.linkedin.stdudfs.parsing.hive.Ip2Str` to `ip2str`. + */ +public class CoralUDFTransformer extends SqlCallTransformer { + + @Override + protected boolean condition(SqlCall sqlCall) { + final SqlOperator operator = sqlCall.getOperator(); + final String operatorName = operator.getName(); + return operator instanceof VersionedSqlUserDefinedFunction && operatorName.contains(".") + && !operatorName.equals("."); + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + final SqlOperator operator = sqlCall.getOperator(); + final String trinoFunctionName = ((VersionedSqlUserDefinedFunction) operator).getShortFunctionName(); + return createSqlOperator(trinoFunctionName, operator.getReturnTypeInference()) + .createCall(new SqlNodeList(sqlCall.getOperandList(), SqlParserPos.ZERO)); + } +} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/HiveUDFTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/HiveUDFTransformer.java index 318f87a79..4b17f6777 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/HiveUDFTransformer.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/HiveUDFTransformer.java @@ -5,35 +5,10 @@ */ package com.linkedin.coral.trino.rel2trino.transformers; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNodeList; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.parser.SqlParserPos; - -import com.linkedin.coral.common.transformers.SqlCallTransformer; -import com.linkedin.coral.hive.hive2rel.functions.VersionedSqlUserDefinedFunction; - /** - * This transformer converts the Hive UDF SqlCall name from the UDF class name to the - * corresponding Trino function name. - * i.e. from `com.linkedin.stdudfs.parsing.hive.Ip2Str` to `ip2str`. + * @deprecated Use {@link CoralUDFTransformer} instead. This class will be removed in a future release. */ -public class HiveUDFTransformer extends SqlCallTransformer { - - @Override - protected boolean condition(SqlCall sqlCall) { - final SqlOperator operator = sqlCall.getOperator(); - final String operatorName = operator.getName(); - return operator instanceof VersionedSqlUserDefinedFunction && operatorName.contains(".") - && !operatorName.equals("."); - } - - @Override - protected SqlCall transform(SqlCall sqlCall) { - final SqlOperator operator = sqlCall.getOperator(); - final String trinoFunctionName = ((VersionedSqlUserDefinedFunction) operator).getShortFunctionName(); - return createSqlOperator(trinoFunctionName, operator.getReturnTypeInference()) - .createCall(new SqlNodeList(sqlCall.getOperandList(), SqlParserPos.ZERO)); - } +@Deprecated +public class HiveUDFTransformer extends CoralUDFTransformer { }