Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.metastore.tools.schematool;

import com.google.common.collect.ImmutableSet;

import java.sql.SQLException;
import java.util.Set;

/**
* Per-database ignorable DDL error codes for idempotent schema upgrades.
* This is used by {@link IdempotentDDLExecutor} to determine whether a given {@link SQLException}
* can be safely ignored (e.g. because it indicates an object already exists or is already gone).
* <p>
* Use {@link #forDbType(String)} to get the right instance.
*/
public interface DbErrorCodes {

/** @return true if the exception can be safely ignored (object already exists or already gone). */
boolean isIgnorable(SQLException e);

/**
* Checks both {@link SQLException#getSQLState()} and {@code String.valueOf(getErrorCode())}
* against {@code duplicateCodes} and {@code missingCodes}.
*/
class Codes implements DbErrorCodes {
private final Set<String> duplicateCodes;
private final Set<String> missingCodes;

Codes(Set<String> duplicateCodes, Set<String> missingCodes) {
this.duplicateCodes = duplicateCodes;
this.missingCodes = missingCodes;
}

@Override
public boolean isIgnorable(SQLException e) {
String state = e.getSQLState();
String code = String.valueOf(e.getErrorCode());
return duplicateCodes.contains(state) || duplicateCodes.contains(code)
|| missingCodes.contains(state) || missingCodes.contains(code);
}
}

DbErrorCodes POSTGRES = new Codes(
ImmutableSet.of(
"42P07", // duplicate table
"42701", // duplicate column
"42710" // duplicate object (e.g. constraint, index)
),
ImmutableSet.of(
"42P01", // undefined table
"42703", // undefined column
"42704" // undefined object
)
);

DbErrorCodes DERBY = new Codes(
ImmutableSet.of(
"X0Y32", // table/view already exists
"X0Y68", // index already exists
"42Z93" // duplicate constraint (same column set already constrained)
),
ImmutableSet.of(
"42Y55", // table/view does not exist
"42X14", // column does not exist
"42X65", // index does not exist
"42X86" // constraint does not exist on table (ALTER TABLE DROP CONSTRAINT)
)
);

DbErrorCodes MYSQL = new Codes(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might only take care of MySQL and Oracle?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think limiting to MySQL/Oracle would leave other DBs non-idempotent or require DB-specific script rewrites (and procedural SQL in some cases). So it is better to do this for all DBs, but do add more details if you have another idea :)

ImmutableSet.of(
"1050", // ER_TABLE_EXISTS_ERROR: table already exists
"1060", // ER_DUP_FIELDNAME: duplicate column name
"1061" // ER_DUP_KEYNAME: duplicate key name
),
ImmutableSet.of(
"1051", // ER_BAD_TABLE_ERROR: unknown table (DROP TABLE)
"1054", // ER_BAD_FIELD_ERROR: unknown column (CHANGE COLUMN on already-renamed column)
"1091" // ER_CANT_DROP_FIELD_OR_KEY: column or index does not exist (DROP COLUMN/INDEX)
)
);

DbErrorCodes ORACLE = new Codes(
ImmutableSet.of(
"955", // ORA-00955: name already used by an existing object
"957", // ORA-00957: duplicate column name (RENAME COLUMN target already exists)
"1430", // ORA-01430: column being added already exists in table
"2261" // ORA-02261: unique or primary key already exists in the table
),
ImmutableSet.of(
"942", // ORA-00942: table or view does not exist
"904", // ORA-00904: invalid identifier (column does not exist)
"1418", // ORA-01418: specified index does not exist
"2443" // ORA-02443: cannot drop constraint - nonexistent constraint
)
);

DbErrorCodes MSSQL = new Codes(
ImmutableSet.of(
"2714", // There is already an object named '...' in the database
"2705", // Column names in each table must be unique (duplicate column)
"1913" // There is already an index named '...' on table '...'
),
ImmutableSet.of(
"3701", // Cannot drop object because it does not exist or you do not have permission
"3728", // DROP CONSTRAINT: not a constraint
"4924", // ALTER TABLE DROP COLUMN: column does not exist
"15248" // sp_rename: parameter @objname is ambiguous or @objtype is wrong (column already renamed)
)
);

/** No-op instance for unrecognized database types; never ignores any error. */
DbErrorCodes NOOP = e -> false;

/** Returns the {@link DbErrorCodes} for the given db-type string, or {@link #NOOP} if unrecognized. */
static DbErrorCodes forDbType(String dbType) {
if (dbType == null) {
return NOOP;
}
return switch (dbType.toLowerCase()) {
case "postgres" -> POSTGRES;
case "derby", "derby.clean" -> DERBY;
case "mysql", "mariadb" -> MYSQL;
case "oracle" -> ORACLE;
case "mssql" -> MSSQL;
default -> NOOP;
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class HiveSchemaHelper {
public static final String DB_HIVE = "hive";
public static final String DB_MSSQL = "mssql";
public static final String DB_MYSQL = "mysql";
public static final String DB_POSTGRACE = "postgres";
public static final String DB_POSTGRES = "postgres";
public static final String DB_ORACLE = "oracle";
public static final String EMBEDDED_HS2_URL =
"jdbc:hive2://?hive.conf.restricted.list=;hive.security.authorization.sqlstd.confwhitelist=.*;"
Expand Down Expand Up @@ -186,6 +186,18 @@ String buildCommand(String scriptDir, String scriptFile)
*/
String buildCommand(String scriptDir, String scriptFile, boolean fixQuotes)
throws IllegalFormatException, IOException;

/**
* Parse the script and return a list of individual, executable SQL commands.
*/
List<String> getExecutableCommands(String scriptDir, String scriptFile)
throws IllegalFormatException, IOException;

/**
* Parse the script and return a list of individual, executable SQL commands.
*/
List<String> getExecutableCommands(String scriptDir, String scriptFile, boolean fixQuotes)
throws IllegalFormatException, IOException;
}

/**
Expand Down Expand Up @@ -254,58 +266,77 @@ public boolean needsQuotedIdentifier() {
}

@Override
public String buildCommand(
String scriptDir, String scriptFile) throws IllegalFormatException, IOException {
return buildCommand(scriptDir, scriptFile, false);
public List<String> getExecutableCommands(String scriptDir, String scriptFile)
throws IllegalFormatException, IOException {
return getExecutableCommands(scriptDir, scriptFile, false);
}

@Override
public String buildCommand(
String scriptDir, String scriptFile, boolean fixQuotes) throws IllegalFormatException, IOException {
BufferedReader bfReader =
new BufferedReader(new FileReader(scriptDir + File.separatorChar + scriptFile));
String currLine;
StringBuilder sb = new StringBuilder();
String currentCommand = null;
while ((currLine = bfReader.readLine()) != null) {
currLine = currLine.trim();
public List<String> getExecutableCommands(String scriptDir, String scriptFile, boolean fixQuotes)
throws IllegalFormatException, IOException {
List<String> commands = new java.util.ArrayList<>();

if (fixQuotes && !getQuoteCharacter().equals(DEFAULT_QUOTE)) {
currLine = currLine.replace("\\\"", getQuoteCharacter());
}
try (BufferedReader bfReader =
new BufferedReader(new FileReader(scriptDir + File.separatorChar + scriptFile))) {
String currLine;
String currentCommand = null;

if (currLine.isEmpty()) {
continue; // skip empty lines
}
while ((currLine = bfReader.readLine()) != null) {
currLine = fixQuotesFromCurrentLine(fixQuotes, currLine.trim());

if (currLine.isEmpty()) {
continue;
}

if (currentCommand == null) {
currentCommand = currLine;
} else {
currentCommand = currentCommand + " " + currLine;
currentCommand = currentCommand == null ? currLine : currentCommand + " " + currLine;

if (!isPartialCommand(currLine)) {
if (!isNonExecCommand(currentCommand)) {
currentCommand = cleanseCommand(currentCommand);
if (isNestedScript(currentCommand)) {
String currScript = getScriptName(currentCommand);
commands.addAll(getExecutableCommands(scriptDir, currScript, fixQuotes));
} else {
commands.add(currentCommand.trim());
}
}
currentCommand = null;
}
}
if (isPartialCommand(currLine)) {
// if its a partial line, continue collecting the pieces
continue;

if (currentCommand != null && !isNonExecCommand(currentCommand)) {
throw new IllegalArgumentException("Unterminated SQL statement at end of script: " + scriptFile);
}
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getExecutableCommands() drops the final SQL statement if the file ends without a trailing delimiter (currentCommand remains non-null when the reader hits EOF, but it’s never flushed into commands). This changes behavior vs executing the script directly and can cause the last statement in a script to be silently skipped. Consider handling any remaining currentCommand after the loop (including nested-script expansion and cleansing) so EOF also terminates a statement.

Suggested change
}
}
// Handle any remaining command when EOF is reached without a terminating delimiter.
if (currentCommand != null && !isNonExecCommand(currentCommand)) {
currentCommand = cleanseCommand(currentCommand);
if (isNestedScript(currentCommand)) {
String currScript = getScriptName(currentCommand);
commands.addAll(getExecutableCommands(scriptDir, currScript, fixQuotes));
} else {
commands.add(currentCommand.trim());
}
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We run into this issue when the last executable sql line doesn't end with a delimiter (semi-colon for most cases).
The current scripts do not have this problem and won't run into this. For future scripts it might be better just to fail fast to indicate that the script is incorrect.
I am adding a check for this and throwing an exception.

}
return commands;
}

// if this is a valid executable command then add it to the buffer
if (!isNonExecCommand(currentCommand)) {
currentCommand = cleanseCommand(currentCommand);
if (isNestedScript(currentCommand)) {
// if this is a nested sql script then flatten it
String currScript = getScriptName(currentCommand);
sb.append(buildCommand(scriptDir, currScript));
} else {
// Now we have a complete statement, process it
// write the line to buffer
sb.append(currentCommand);
if (usingSqlLine) sb.append(";");
sb.append(System.getProperty("line.separator"));
}
private String fixQuotesFromCurrentLine(boolean fixQuotes, String currLine) {
if (fixQuotes && !getQuoteCharacter().equals(DEFAULT_QUOTE)) {
currLine = currLine.replace("\\\"", getQuoteCharacter());
}
return currLine;
}

@Override
public String buildCommand(
String scriptDir, String scriptFile) throws IllegalFormatException, IOException {
return buildCommand(scriptDir, scriptFile, false);
}

@Override
public String buildCommand(
String scriptDir, String scriptFile, boolean fixQuotes) throws IllegalFormatException, IOException {
List<String> commands = getExecutableCommands(scriptDir, scriptFile, fixQuotes);
StringBuilder sb = new StringBuilder();
for (String cmd : commands) {
sb.append(cmd);
if (usingSqlLine) {
sb.append(";");
}
currentCommand = null;
sb.append(System.lineSeparator());
}
bfReader.close();

return sb.toString();
}

Expand Down Expand Up @@ -581,7 +612,7 @@ public static NestedScriptParser getDbCommandParser(String dbName,
return new MSSQLCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine);
} else if (dbName.equalsIgnoreCase(DB_MYSQL)) {
return new MySqlCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine);
} else if (dbName.equalsIgnoreCase(DB_POSTGRACE)) {
} else if (dbName.equalsIgnoreCase(DB_POSTGRES)) {
return new PostgresCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine);
} else if (dbName.equalsIgnoreCase(DB_ORACLE)) {
return new OracleCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine);
Expand Down
Loading
Loading