-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-28265: Fix JDBC timeout message for hive.query.timeout.seconds #6412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
eec11d2
ab425d3
c5399f3
eaae132
aeb5a7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |
| import org.apache.hive.service.cli.operation.ClassicTableTypeMapping.ClassicTableTypes; | ||
| import org.apache.hive.service.cli.operation.HiveTableTypeMapping; | ||
| import org.apache.hive.service.cli.operation.TableTypeMappingFactory.TableTypeMappings; | ||
| import org.junit.After; | ||
| import org.junit.AfterClass; | ||
| import org.junit.BeforeClass; | ||
| import org.junit.Rule; | ||
|
|
@@ -131,6 +132,33 @@ public class TestJdbcDriver2 { | |
| @Rule public ExpectedException thrown = ExpectedException.none(); | ||
| @Rule public final TestName testName = new TestName(); | ||
|
|
||
| /** | ||
| * {@code SET hive.query.timeout.seconds} applies to the whole HS2 session. Tests such as | ||
| * {@link #testQueryTimeoutMessageUsesHiveConf()} must not leave a short limit on the shared | ||
| * {@link #con}, or unrelated tests will see {@link SQLTimeoutException}. | ||
| */ | ||
| @After | ||
| public void resetHiveSessionQueryTimeout() { | ||
| try { | ||
| if (con == null || con.isClosed()) { | ||
| return; | ||
| } | ||
| try (Statement st = con.createStatement()) { | ||
| st.execute("set hive.query.timeout.seconds=0s"); | ||
| } | ||
| } catch (SQLException e) { | ||
| LOG.warn("Could not reset hive.query.timeout.seconds after {}", testName.getMethodName(), e); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * HS2 / {@code HiveStatement} report timeouts as {@code ...timed out after N seconds...}; match | ||
| * {@code N == 1} with flexible whitespace so we do not treat {@code 10} or unrelated digits as {@code 1}. | ||
| */ | ||
| private static boolean isQueryTimedOutAfterOneSecondMessage(String msg) { | ||
| return msg != null && msg.matches("(?is).*timed out after\\s+1\\s+seconds.*"); | ||
| } | ||
|
|
||
| private static Connection getConnection(String prefix, String postfix) throws SQLException { | ||
| Connection con1; | ||
| String connString = "jdbc:hive2:///" + prefix + "?" + conf.getOverlayOptionsAsQueryString() | ||
|
|
@@ -2662,6 +2690,10 @@ public void testQueryTimeout() throws Exception { | |
| fail("Expecting SQLTimeoutException"); | ||
| } catch (SQLTimeoutException e) { | ||
| assertNotNull(e); | ||
| assertTrue("Message should reflect JDBC query timeout (1s): " + e.getMessage(), | ||
|
||
| isQueryTimedOutAfterOneSecondMessage(e.getMessage())); | ||
| assertFalse("Message should not claim 0 seconds: " + e.getMessage(), | ||
|
||
| e.getMessage().contains("after 0 seconds")); | ||
| System.err.println(e.toString()); | ||
| } catch (SQLException e) { | ||
| fail("Expecting SQLTimeoutException, but got SQLException: " + e); | ||
|
|
@@ -2680,6 +2712,37 @@ public void testQueryTimeout() throws Exception { | |
| stmt.close(); | ||
| } | ||
|
|
||
| /** | ||
| * HIVE-28265: hive.query.timeout.seconds drives the server-side timer, but the JDBC client | ||
| * must not report "0 seconds" when Statement#setQueryTimeout was not used. | ||
| */ | ||
| @Test | ||
| public void testQueryTimeoutMessageUsesHiveConf() throws Exception { | ||
| String udfName = SleepMsUDF.class.getName(); | ||
| Statement stmt1 = con.createStatement(); | ||
| stmt1.execute("create temporary function sleepMsUDF as '" + udfName + "'"); | ||
| stmt1.close(); | ||
|
|
||
| Statement stmt = con.createStatement(); | ||
| stmt.execute("set hive.query.timeout.seconds=1s"); | ||
|
|
||
| try { | ||
| stmt.executeQuery("select sleepMsUDF(t1.under_col, 5) as u0, t1.under_col as u1, " | ||
| + "t2.under_col as u2 from " + tableName + " t1 join " + tableName | ||
| + " t2 on t1.under_col = t2.under_col"); | ||
| fail("Expecting SQLTimeoutException"); | ||
| } catch (SQLTimeoutException e) { | ||
| assertNotNull(e); | ||
|
||
| assertTrue("Message should include session timeout (1s): " + e.getMessage(), | ||
| isQueryTimedOutAfterOneSecondMessage(e.getMessage())); | ||
| assertFalse("Message should not claim 0 seconds (HIVE-28265): " + e.getMessage(), | ||
|
||
| e.getMessage().contains("after 0 seconds")); | ||
| } catch (SQLException e) { | ||
| fail("Expecting SQLTimeoutException, but got SQLException: " + e); | ||
| } | ||
| stmt.close(); | ||
| } | ||
|
|
||
| /** | ||
| * Test the non-null value of the Yarn ATS GUID. | ||
| * We spawn 2 threads - one running the query and | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,6 +155,7 @@ | |
| import org.apache.thrift.transport.TTransportException; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.function.Supplier; | ||
|
|
||
| /** | ||
|
|
@@ -163,6 +164,12 @@ | |
| */ | ||
| public class HiveConnection implements java.sql.Connection { | ||
| private static final Logger LOG = LoggerFactory.getLogger(HiveConnection.class); | ||
|
|
||
| /** | ||
| * Sentinel: no {@code SET hive.query.timeout.seconds} has been observed on this connection yet. | ||
| */ | ||
| static final long SESSION_QUERY_TIMEOUT_NOT_TRACKED = -1L; | ||
| private final AtomicLong sessionQueryTimeoutSeconds = new AtomicLong(SESSION_QUERY_TIMEOUT_NOT_TRACKED); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thinking out loud: I wonder if a connection can have concurrency issue: I mean, you can have multiple individual connections to Hive, but inside a connection itself, can we have multiple hive statements in parallel? @ayushtkn , what do you think?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a single JDBC Connection can be shared across multiple threads, and it is entirely possible to have multiple HiveStatement objects executing concurrently on the same connection (which maps to a single session on the HS2 side). via Beeline or so maybe not but In Hive Server 2 (HS2), a single JDBC Connection corresponds to a single HS2 Session. You can absolutely execute multiple queries concurrently within the same session by spawning multiple threads on the client side, each using a different HiveStatement created from that single HiveConnection.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx |
||
| private String jdbcUriString; | ||
| private String host; | ||
| private int port; | ||
|
|
@@ -190,6 +197,21 @@ public class HiveConnection implements java.sql.Connection { | |
|
|
||
| public TCLIService.Iface getClient() { return client; } | ||
|
|
||
| /** | ||
| * Records the effective {@code hive.query.timeout.seconds} (in seconds) after a successful | ||
| * {@code SET hive.query.timeout.seconds=...} on this connection. Used for JDBC timeout messages. | ||
| */ | ||
| void recordSessionQueryTimeoutFromSet(long seconds) { | ||
|
||
| sessionQueryTimeoutSeconds.set(seconds); | ||
| } | ||
|
|
||
| /** | ||
| * @return seconds from the last client-tracked SET, or {@link #SESSION_QUERY_TIMEOUT_NOT_TRACKED} if none | ||
| */ | ||
| long getSessionQueryTimeoutSecondsTracked() { | ||
| return sessionQueryTimeoutSeconds.get(); | ||
| } | ||
|
|
||
| /** | ||
| * Get all direct HiveServer2 URLs from a ZooKeeper based HiveServer2 URL | ||
| * @param zookeeperBasedHS2Url | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| package org.apache.hive.jdbc; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
| import org.apache.hadoop.hive.conf.HiveConf; | ||
| import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; | ||
| import org.apache.hive.jdbc.logs.InPlaceUpdateStream; | ||
| import org.apache.hive.service.cli.RowSet; | ||
|
|
@@ -57,6 +58,9 @@ | |
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| import static org.apache.hadoop.hive.ql.ErrorMsg.CLIENT_POLLING_OPSTATUS_INTERRUPTED; | ||
|
|
||
|
|
@@ -70,6 +74,10 @@ public class HiveStatement implements java.sql.Statement { | |
|
|
||
| public static final String QUERY_CANCELLED_MESSAGE = "Query was cancelled."; | ||
|
|
||
| /** Last assignment wins if multiple appear (e.g. multi-line script). Uses find(), not full-string match. */ | ||
| private static final Pattern SET_HIVE_QUERY_TIMEOUT_SECONDS = Pattern.compile( | ||
| "(?i)set\\s+hive\\.query\\.timeout\\.seconds\\s*=\\s*([^;\\n]+)"); | ||
|
|
||
| private final HiveConnection connection; | ||
| private TCLIService.Iface client; | ||
| private Optional<TOperationHandle> stmtHandle; | ||
|
|
@@ -298,6 +306,7 @@ public void closeOnCompletion() throws SQLException { | |
| public boolean execute(String sql) throws SQLException { | ||
| runAsyncOnServer(sql); | ||
| TGetOperationStatusResp status = waitForOperationToComplete(); | ||
| trackSessionQueryTimeoutIfSet(sql); | ||
|
|
||
| // The query should be completed by now | ||
| if (!status.isHasResultSet() && stmtHandle.isPresent() && !stmtHandle.get().isHasResultSet()) { | ||
|
|
@@ -398,6 +407,70 @@ private TGetOperationStatusResp waitForResultSetStatus() throws SQLException { | |
| return statusResp; | ||
| } | ||
|
|
||
| /** | ||
| * When {@code SET hive.query.timeout.seconds=...} succeeds, remember the effective value on the | ||
| * connection so {@code TIMEDOUT_STATE} can report it if the server omits {@code errorMessage} | ||
| * (HIVE-28265). | ||
| */ | ||
| private void trackSessionQueryTimeoutIfSet(String sql) { | ||
| if (sql == null) { | ||
| return; | ||
| } | ||
| Matcher m = SET_HIVE_QUERY_TIMEOUT_SECONDS.matcher(sql); | ||
| Long lastSec = null; | ||
| while (m.find()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this PR interesting. Unfortunately, I have no time to finish this review as I go for a long vacation. But this part made me suspicious as I'm pretty sure we usually don't read Hive this way. My fault, I should start with this one. Just thinking out loud: As I see HiveStatement doesn't contain any reference to Hive Configuration. Creating a hiveConf object is not a top of my mind but I affraid with this way you ignore the actual HiveConf loaded in the Hive Server session. I'm sad that I have no time to debug it out but for me, it looks suspicious. Anyway, good luck with the PR. If you have still have open questions at the end of the next week, I would be happy to help and learn this part of the code.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thank you @InvisibleProgrammer This was reproducable. Could you please let me know any setting or runtime configs, if you feel can fix the issue? Could you also please let me know, how to refactor? As I am new to the community |
||
| try { | ||
| HiveConf conf = new HiveConf(); | ||
| conf.set(HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS.varname, m.group(1).trim()); | ||
| long sec = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS, TimeUnit.SECONDS); | ||
| lastSec = sec; | ||
| } catch (Exception e) { | ||
| LOG.debug("Could not parse session query timeout fragment: {}", m.group(0), e); | ||
| } | ||
| } | ||
| if (lastSec != null) { | ||
| connection.recordSessionQueryTimeoutFromSet(lastSec); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * HIVE-28265: Prefer server error text unless it is empty or the known-broken "0 seconds" case; | ||
| * otherwise derive seconds from JDBC {@link #setQueryTimeout(int)} or last session SET. | ||
| */ | ||
| private String sqlTimeoutMessageForTimedOutState(String serverMessage) { | ||
| if (!needsLocalTimeoutMessageForTimedOut(serverMessage)) { | ||
| return serverMessage; | ||
| } | ||
| long effectiveSec = resolveEffectiveTimeoutSecondsForMessage(); | ||
| if (effectiveSec > 0) { | ||
| return "Query timed out after " + effectiveSec + " seconds"; | ||
| } | ||
| return "Query timed out"; | ||
| } | ||
|
|
||
| private boolean needsLocalTimeoutMessageForTimedOut(String timeoutMsg) { | ||
| return StringUtils.isBlank(timeoutMsg) | ||
| || StringUtils.containsIgnoreCase(timeoutMsg, "after 0 seconds"); | ||
| } | ||
|
|
||
| private long resolveEffectiveTimeoutSecondsForMessage() { | ||
| if (queryTimeout > 0) { | ||
| return queryTimeout; | ||
| } | ||
| long tracked = connection.getSessionQueryTimeoutSecondsTracked(); | ||
| if (tracked > 0) { | ||
| return tracked; | ||
| } | ||
| return 0L; | ||
| } | ||
|
|
||
| private SQLException sqlExceptionForCanceledState(TGetOperationStatusResp statusResp) { | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg = | ||
| (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| return new SQLException(fullErrMsg, "01000"); | ||
| } | ||
|
|
||
| TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | ||
| TGetOperationStatusResp statusResp = null; | ||
|
|
||
|
|
@@ -436,13 +509,9 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | |
| isLogBeingGenerated = false; | ||
| break; | ||
| case CANCELED_STATE: | ||
| // 01000 -> warning | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg = | ||
| (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| throw new SQLException(fullErrMsg, "01000"); | ||
| throw sqlExceptionForCanceledState(statusResp); | ||
| case TIMEDOUT_STATE: | ||
| throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds"); | ||
| throw new SQLTimeoutException(sqlTimeoutMessageForTimedOutState(statusResp.getErrorMessage())); | ||
| case ERROR_STATE: | ||
| // Get the error details from the underlying exception | ||
| throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -179,6 +179,11 @@ private void prepare(QueryState queryState) throws HiveSQLException { | |
| try { | ||
| final String queryId = queryState.getQueryId(); | ||
| log.info("Query timed out after: {} seconds. Cancelling the execution now: {}", queryTimeout, queryId); | ||
| setOperationException(new HiveSQLException( | ||
| "Query timed out after " + queryTimeout + " seconds", | ||
| "HYT00", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please share some reference to this error code? The closest thing that I have found is a login timeout for SQL Server: https://learn.microsoft.com/en-us/answers/questions/1348638/sql-state-hyt00-sql-error-code-0-login-timeout-exp
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HYT00 is not a SQL Server–only code. It comes from the ODBC SQLSTATE list. In ODBC 3.x, HYT00 means “Timeout expired” (generic timeout, not specifically “login timeout”). SQL Server’s docs often show up in web search because they map ODBC states, but the same state is used for query / statement timeouts and other “timeout expired” situations when products follow ODBC-style SQLSTATEs. References: ==> Microsoft’s ODBC appendix lists ODBC error / SQLSTATE codes; HYT00 is documented there in the “Timeout expired” family (see Appendix A: ODBC Error Codes — search the page for HYT00). |
||
| 0, | ||
| queryId)); | ||
| SQLOperation.this.cancel(OperationState.TIMEDOUT); | ||
| } catch (HiveSQLException e) { | ||
| log.error("Error cancelling the query after timeout: {} seconds", queryTimeout, e); | ||
|
|
@@ -334,7 +339,9 @@ public Object run() throws HiveSQLException { | |
| runQuery(); | ||
| } catch (HiveSQLException e) { | ||
| // TODO: why do we invent our own error path op top of the one from Future.get? | ||
| setOperationException(e); | ||
| if (getState() != OperationState.TIMEDOUT) { | ||
| setOperationException(e); | ||
| } | ||
| log.error("Error running hive query", e); | ||
| } finally { | ||
| if (!embedded) { | ||
|
|
@@ -353,7 +360,9 @@ public Object run() throws HiveSQLException { | |
| try { | ||
| currentUGI.doAs(doAsAction); | ||
| } catch (Exception e) { | ||
| setOperationException(new HiveSQLException(e)); | ||
| if (getState() != OperationState.TIMEDOUT) { | ||
| setOperationException(new HiveSQLException(e)); | ||
| } | ||
| log.error("Error running hive query as user : {}", currentUGI.getShortUserName(), e); | ||
| } finally { | ||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We know it is 1 sec. And we don't accept any other output in that case.
In my opinion, regex here can be a little bit overkill.
What about something like:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed isQueryTimedOutAfterOneSecondMessage (regex helper).
Positive assertions use assertEquals("…", QUERY_TIMED_OUT_AFTER_1_SECONDS, e.getMessage()) in both timeout-related tests.