Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -692,28 +692,29 @@ protected SqlNode visitIdentifier(ASTNode node, ParseContext ctx) {
return new SqlIdentifier(node.getText(), ZERO);
}

/** See {@link #removeBackslashBeforeQuotes}
* We use removeBackslashBeforeQuotes to remove the backslash before quotes,
* so that we maintain patterns like {@code I'm} or {@code abc"xyz} as is in the java object in memory,
* the escaped literal string representation will be generated when the SqlNode is written to string
/** See {@link #unescapeHiveStringLiteral}
* We use unescapeHiveStringLiteral to interpret Hive backslash escape sequences,
* so that patterns like {@code \\d} are correctly stored as {@code \d} in the java object in memory.
* The escaped literal string representation will be generated when the SqlNode is written to string
* by the SqlWriter, which can be controlled by the SqlDialect to decide the choice of escaping mechanism.
* */
@Override
protected SqlNode visitStringLiteral(ASTNode node, ParseContext ctx) {
// TODO: Add charset here. UTF-8 is not supported by calcite
String text = node.getText();
checkState(text.length() >= 2);
return SqlLiteral.createCharString(removeBackslashBeforeQuotes(text.substring(1, text.length() - 1)), ZERO);
return SqlLiteral.createCharString(unescapeHiveStringLiteral(text.substring(1, text.length() - 1)), ZERO);
}

private String removeBackslashBeforeQuotes(String input) {
// matches a \' or \" literal pattern
Pattern pattern = Pattern.compile("\\\\['\"]");
private String unescapeHiveStringLiteral(String input) {
// Handle Hive backslash escape sequences: \\ -> \, \' -> ', \" -> "
Pattern pattern = Pattern.compile("\\\\[\\\\'\"]");
Matcher matcher = pattern.matcher(input);

StringBuffer res = new StringBuffer();
while (matcher.find()) {
String replacement = matcher.group().substring(1);
String matched = matcher.group();
String replacement = Matcher.quoteReplacement(matched.substring(1));
matcher.appendReplacement(res, replacement);
}
matcher.appendTail(res);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.type.OperandTypes.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;


public class HiveToTrinoConverterTest {
Expand Down Expand Up @@ -966,6 +968,55 @@ public void testRegexpTransformation() {
assertEquals(expandedSql, targetSql);
}

@Test
public void testRlikeBackslashEscaping() {
RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter();

// In Hive SQL, '\\d' means the string \d (backslash is escape char).
// In Trino SQL, '\d' means the string \d (no backslash escaping).
// So Hive's '\\d{4}' should become Trino's '\d{4}'.
RelNode relNode =
TestUtils.getHiveToRelConverter().convertSql("SELECT '2022-01-01' RLIKE '^\\\\d{4}-\\\\d{2}-\\\\d{2}$'");
String targetSql =
"SELECT \"REGEXP_LIKE\"('2022-01-01', '^\\d{4}-\\d{2}-\\d{2}$')\n" + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")";
String expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
}

@Test
public void testRlikeBackslashEscapingWithColumn() {
RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter();

// Test backslash escaping with a column reference instead of literal
RelNode relNode =
TestUtils.getHiveToRelConverter().convertSql("SELECT * FROM test.tableA WHERE a RLIKE '^\\\\d+$'");
String expandedSql = relToTrinoConverter.convert(relNode);
assertTrue(expandedSql.contains("\"REGEXP_LIKE\""));
assertTrue(expandedSql.contains("'^\\d+$'"));
assertFalse(expandedSql.contains("'^\\\\d+$'"));
}

@Test
public void testRegexpBackslashEscaping() {
RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter();

// Test that REGEXP (synonym for RLIKE) also handles backslash escaping
RelNode relNode = TestUtils.getHiveToRelConverter().convertSql("SELECT 'hello' REGEXP '^\\\\w+$'");
String targetSql = "SELECT \"REGEXP_LIKE\"('hello', '^\\w+$')\n" + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")";
String expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
}

@Test
public void testStringLiteralWithEscapedBackslash() {
RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter();

// Test that a literal backslash (\\\\ in Hive = \\ in string = one backslash in Trino)
RelNode relNode = TestUtils.getHiveToRelConverter().convertSql("SELECT 'path\\\\to\\\\file'");
String expandedSql = relToTrinoConverter.convert(relNode);
assertTrue(expandedSql.contains("'path\\to\\file'"));
}

@Test
public void testSqlSelectAliasAppenderTransformer() {
// test.tableA(a int, b struct<b1:string>
Expand Down