-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-28265: Fix JDBC timeout message for hive.query.timeout.seconds #6412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
eec11d2
ab425d3
c5399f3
eaae132
aeb5a7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,8 +19,10 @@ | |
| package org.apache.hive.jdbc; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
| import org.apache.hadoop.hive.conf.HiveConf; | ||
| import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; | ||
| import org.apache.hive.jdbc.logs.InPlaceUpdateStream; | ||
| import org.apache.hive.service.cli.HiveSQLException; | ||
| import org.apache.hive.service.cli.RowSet; | ||
| import org.apache.hive.service.cli.RowSetFactory; | ||
| import org.apache.hive.service.rpc.thrift.TCLIService; | ||
|
|
@@ -57,6 +59,9 @@ | |
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| import static org.apache.hadoop.hive.ql.ErrorMsg.CLIENT_POLLING_OPSTATUS_INTERRUPTED; | ||
|
|
||
|
|
@@ -70,6 +75,10 @@ public class HiveStatement implements java.sql.Statement { | |
|
|
||
| public static final String QUERY_CANCELLED_MESSAGE = "Query was cancelled."; | ||
|
|
||
| /** Last assignment wins if multiple appear (e.g. multi-line script). Uses find(), not full-string match. */ | ||
| private static final Pattern SET_HIVE_QUERY_TIMEOUT_SECONDS = Pattern.compile( | ||
| "(?i)set\\s+hive\\.query\\.timeout\\.seconds\\s*=\\s*([^;\\n]+)"); | ||
|
|
||
| private final HiveConnection connection; | ||
| private TCLIService.Iface client; | ||
| private Optional<TOperationHandle> stmtHandle; | ||
|
|
@@ -298,6 +307,7 @@ public void closeOnCompletion() throws SQLException { | |
| public boolean execute(String sql) throws SQLException { | ||
| runAsyncOnServer(sql); | ||
| TGetOperationStatusResp status = waitForOperationToComplete(); | ||
| trackSessionQueryTimeoutIfSet(sql); | ||
|
|
||
| // The query should be completed by now | ||
| if (!status.isHasResultSet() && stmtHandle.isPresent() && !stmtHandle.get().isHasResultSet()) { | ||
|
|
@@ -398,6 +408,84 @@ private TGetOperationStatusResp waitForResultSetStatus() throws SQLException { | |
| return statusResp; | ||
| } | ||
|
|
||
| /** | ||
| * When {@code SET hive.query.timeout.seconds=...} succeeds, remember the effective value on the | ||
| * connection so {@code TIMEDOUT_STATE} can report it if the server omits {@code errorMessage} | ||
| * (HIVE-28265). | ||
| */ | ||
| private void trackSessionQueryTimeoutIfSet(String sql) { | ||
| if (sql == null) { | ||
| return; | ||
| } | ||
| Matcher m = SET_HIVE_QUERY_TIMEOUT_SECONDS.matcher(sql); | ||
| Long lastSec = null; | ||
| while (m.find()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this PR interesting. Unfortunately, I have no time to finish this review as I go for a long vacation. But this part made me suspicious as I'm pretty sure we usually don't read Hive this way. My fault, I should start with this one. Just thinking out loud: As I see HiveStatement doesn't contain any reference to Hive Configuration. Creating a hiveConf object is not a top of my mind but I affraid with this way you ignore the actual HiveConf loaded in the Hive Server session. I'm sad that I have no time to debug it out but for me, it looks suspicious. Anyway, good luck with the PR. If you have still have open questions at the end of the next week, I would be happy to help and learn this part of the code.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thank you @InvisibleProgrammer This was reproducable. Could you please let me know any setting or runtime configs, if you feel can fix the issue? Could you also please let me know, how to refactor? As I am new to the community |
||
| try { | ||
| HiveConf conf = new HiveConf(); | ||
| conf.set(HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS.varname, m.group(1).trim()); | ||
| long sec = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS, TimeUnit.SECONDS); | ||
| lastSec = sec; | ||
| } catch (Exception e) { | ||
| LOG.debug("Could not parse session query timeout fragment: {}", m.group(0), e); | ||
| } | ||
| } | ||
| if (lastSec != null) { | ||
| connection.setSessionQueryTimeoutSeconds(lastSec); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * HIVE-28265: Prefer server error text unless it is empty or the known-broken "0 seconds" case; | ||
| * otherwise derive seconds from JDBC {@link #setQueryTimeout(int)} or last session SET. | ||
| */ | ||
| private String sqlTimeoutMessageForTimedOutState(String serverMessage) { | ||
| if (!needsLocalTimeoutMessageForTimedOut(serverMessage)) { | ||
| return stripTrailingQueryIdSuffix(serverMessage); | ||
| } | ||
| long effectiveSec = resolveEffectiveTimeoutSecondsForMessage(); | ||
| if (effectiveSec > 0) { | ||
| return "Query timed out after " + effectiveSec + " seconds"; | ||
| } | ||
| return "Query timed out"; | ||
| } | ||
|
|
||
| /** | ||
| * HS2 wraps many errors via {@link HiveSQLException#getMessage()}, which appends | ||
| * {@code ; Query ID: ...}. Strip that trailer so JDBC timeout text stays stable and matches the | ||
| * base HS2 wording (HIVE-28265). | ||
| */ | ||
| private static String stripTrailingQueryIdSuffix(String msg) { | ||
| if (msg == null) { | ||
| return null; | ||
| } | ||
| String marker = "; " + HiveSQLException.QUERY_ID + ": "; | ||
| int idx = msg.indexOf(marker); | ||
| return idx >= 0 ? msg.substring(0, idx) : msg; | ||
| } | ||
|
|
||
| private boolean needsLocalTimeoutMessageForTimedOut(String timeoutMsg) { | ||
| return StringUtils.isBlank(timeoutMsg) | ||
| || StringUtils.containsIgnoreCase(timeoutMsg, "after 0 seconds"); | ||
| } | ||
|
|
||
| private long resolveEffectiveTimeoutSecondsForMessage() { | ||
| if (queryTimeout > 0) { | ||
| return queryTimeout; | ||
| } | ||
| long tracked = connection.getSessionQueryTimeoutSeconds(); | ||
| if (tracked > 0) { | ||
| return tracked; | ||
| } | ||
| return 0L; | ||
| } | ||
|
|
||
| private SQLException sqlExceptionForCanceledState(TGetOperationStatusResp statusResp) { | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg = | ||
| (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| return new SQLException(fullErrMsg, "01000"); | ||
| } | ||
|
|
||
| TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | ||
| TGetOperationStatusResp statusResp = null; | ||
|
|
||
|
|
@@ -436,13 +524,9 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | |
| isLogBeingGenerated = false; | ||
| break; | ||
| case CANCELED_STATE: | ||
| // 01000 -> warning | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg = | ||
| (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| throw new SQLException(fullErrMsg, "01000"); | ||
| throw sqlExceptionForCanceledState(statusResp); | ||
| case TIMEDOUT_STATE: | ||
| throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds"); | ||
| throw new SQLTimeoutException(sqlTimeoutMessageForTimedOutState(statusResp.getErrorMessage())); | ||
| case ERROR_STATE: | ||
| // Get the error details from the underlying exception | ||
| throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -179,6 +179,11 @@ private void prepare(QueryState queryState) throws HiveSQLException { | |
| try { | ||
| final String queryId = queryState.getQueryId(); | ||
| log.info("Query timed out after: {} seconds. Cancelling the execution now: {}", queryTimeout, queryId); | ||
| setOperationException(new HiveSQLException( | ||
| "Query timed out after " + queryTimeout + " seconds", | ||
| "HYT00", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please share some reference to this error code? The closest thing that I have found is a login timeout for SQL Server: https://learn.microsoft.com/en-us/answers/questions/1348638/sql-state-hyt00-sql-error-code-0-login-timeout-exp
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HYT00 is not a SQL Server–only code. It comes from the ODBC SQLSTATE list. In ODBC 3.x, HYT00 means “Timeout expired” (generic timeout, not specifically “login timeout”). SQL Server’s docs often show up in web search because they map ODBC states, but the same state is used for query / statement timeouts and other “timeout expired” situations when products follow ODBC-style SQLSTATEs. References: ==> Microsoft’s ODBC appendix lists ODBC error / SQLSTATE codes; HYT00 is documented there in the “Timeout expired” family (see Appendix A: ODBC Error Codes — search the page for HYT00). |
||
| 0, | ||
| queryId)); | ||
| SQLOperation.this.cancel(OperationState.TIMEDOUT); | ||
| } catch (HiveSQLException e) { | ||
| log.error("Error cancelling the query after timeout: {} seconds", queryTimeout, e); | ||
|
|
@@ -334,7 +339,9 @@ public Object run() throws HiveSQLException { | |
| runQuery(); | ||
| } catch (HiveSQLException e) { | ||
| // TODO: why do we invent our own error path op top of the one from Future.get? | ||
| setOperationException(e); | ||
| if (getState() != OperationState.TIMEDOUT) { | ||
| setOperationException(e); | ||
| } | ||
| log.error("Error running hive query", e); | ||
| } finally { | ||
| if (!embedded) { | ||
|
|
@@ -353,7 +360,9 @@ public Object run() throws HiveSQLException { | |
| try { | ||
| currentUGI.doAs(doAsAction); | ||
| } catch (Exception e) { | ||
| setOperationException(new HiveSQLException(e)); | ||
| if (getState() != OperationState.TIMEDOUT) { | ||
| setOperationException(new HiveSQLException(e)); | ||
| } | ||
| log.error("Error running hive query as user : {}", currentUGI.getShortUserName(), e); | ||
| } finally { | ||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking out loud: I wonder if a connection can have concurrency issue: I mean, you can have multiple individual connections to Hive, but inside a connection itself, can we have multiple hive statements in parallel?
I have no such use case in my mind, but let me ping Ayush about this question.
@ayushtkn , what do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a single JDBC Connection can be shared across multiple threads, and it is entirely possible to have multiple HiveStatement objects executing concurrently on the same connection (which maps to a single session on the HS2 side).
via Beeline or so maybe not but In Hive Server 2 (HS2), a single JDBC Connection corresponds to a single HS2 Session. You can absolutely execute multiple queries concurrently within the same session by spawning multiple threads on the client side, each using a different HiveStatement created from that single HiveConnection.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thx