Skip to content

Commit ef2c646

Browse files
HIVE-28170: Implement drop stats (#6391)
1 parent 843168b commit ef2c646

20 files changed

Lines changed: 700 additions & 22 deletions

parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ alterTableStatementSuffix
8686
| alterStatementSuffixReplaceBranch
8787
| alterStatementSuffixReplaceTag
8888
| alterStatementSuffixSetWriteOrder
89+
| alterStatementSuffixDropStatsForColumns
8990
;
9091

9192
alterTblPartitionStatementSuffix[boolean partition]
@@ -285,6 +286,13 @@ alterStatementSuffixUpdateStats[boolean partition]
285286
-> ^(TOK_ALTERTABLE_UPDATESTATS tableProperties)
286287
;
287288

289+
alterStatementSuffixDropStatsForColumns
290+
@init { gParent.pushMsg("drop statistics for columns", state); }
291+
@after { gParent.popMsg(state); }
292+
: KW_DROP KW_STATISTICS KW_FOR KW_COLUMNS
293+
-> ^(TOK_ALTERTABLE_DROPCOLSTATS)
294+
;
295+
288296
alterStatementChangeColPosition
289297
: first=KW_FIRST|KW_AFTER afterCol=identifier
290298
->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION )
@@ -790,3 +798,4 @@ alterForeignKeyWithName
790798
: KW_CONSTRAINT constraintName=identifier KW_FOREIGN KW_KEY fkCols=columnParenthesesList KW_REFERENCES tabName=tableName parCols=columnParenthesesList constraintOptsAlter?
791799
-> ^(TOK_FOREIGN_KEY ^(TOK_CONSTRAINT_NAME $constraintName) $fkCols $tabName $parCols constraintOptsAlter?)
792800
;
801+

parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ TOK_ALTERTABLE_UPDATECOLSTATS;
199199
TOK_ALTERPARTITION_UPDATECOLSTATS;
200200
TOK_ALTERTABLE_UPDATESTATS;
201201
TOK_ALTERPARTITION_UPDATESTATS;
202+
TOK_ALTERTABLE_DROPCOLSTATS;
202203
TOK_TABLE_PARTITION;
203204
TOK_ALTERTABLE_FILEFORMAT;
204205
TOK_ALTERPARTITION_FILEFORMAT;
@@ -2938,4 +2939,4 @@ compactionStatus
29382939
;
29392940
/*
29402941
END SHOW COMPACTIONS statement
2941-
*/
2942+
*/

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ public enum AlterTableType {
7373
COMPACT("compact"),
7474
TRUNCATE("truncate"),
7575
MERGEFILES("merge files"),
76-
UPDATESTATS("update stats"); // Note: used in ColumnStatsUpdateWork, not here.
76+
UPDATESTATS("update stats"), // Note: used in ColumnStatsUpdateWork, not here.
77+
DROP_COL_STATS("drop column stats");
7778

7879
private final String name;
7980

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.hive.ql.ddl.table.misc.columnstats;
20+
21+
import java.util.Map;
22+
23+
import org.apache.hadoop.hive.common.TableName;
24+
import org.apache.hadoop.hive.ql.QueryState;
25+
import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory.DDLType;
26+
import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableAnalyzer;
27+
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
28+
import org.apache.hadoop.hive.ql.exec.ColumnStatsDropTask;
29+
import org.apache.hadoop.hive.ql.exec.TaskFactory;
30+
import org.apache.hadoop.hive.ql.metadata.Table;
31+
import org.apache.hadoop.hive.ql.parse.ASTNode;
32+
import org.apache.hadoop.hive.ql.parse.HiveParser;
33+
import org.apache.hadoop.hive.ql.parse.SemanticException;
34+
import org.apache.hadoop.hive.ql.plan.ColumnStatsDropWork;
35+
36+
/**
37+
* Analyzer for drop column statistics commands.
38+
*/
39+
@DDLType(types = HiveParser.TOK_ALTERTABLE_DROPCOLSTATS)
40+
public class AlterTableDropColumnStatisticsAnalyzer extends AbstractAlterTableAnalyzer {
41+
public AlterTableDropColumnStatisticsAnalyzer(QueryState queryState) throws SemanticException {
42+
super(queryState);
43+
}
44+
45+
@Override
46+
protected void analyzeCommand(TableName tableName, Map<String, String> partitionSpec, ASTNode command)
47+
throws SemanticException {
48+
Table table = getTable(tableName);
49+
50+
if (table.isNonNative() && table.getStorageHandler().canSetColStatistics(table)) {
51+
throw new SemanticException("DROP STATISTICS FOR COLUMNS is not supported for non-native table that " +
52+
"doesn't store stats in metastore.");
53+
}
54+
55+
ColumnStatsDropWork work = new ColumnStatsDropWork(tableName);
56+
ColumnStatsDropTask task = (ColumnStatsDropTask) TaskFactory.get(work);
57+
58+
addInputsOutputsAlterTable(tableName, partitionSpec, null, AlterTableType.DROP_COL_STATS, false);
59+
60+
rootTasks.add(task);
61+
}
62+
}

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/columnstats/AlterTableUpdateColumnStatistictAnalyzer.java renamed to ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/columnstats/AlterTableUpdateColumnStatisticsAnalyzer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939
* Analyzer for update column statistics commands.
4040
*/
4141
@DDLType(types = {HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS, HiveParser.TOK_ALTERPARTITION_UPDATECOLSTATS})
42-
public class AlterTableUpdateColumnStatistictAnalyzer extends AbstractAlterTableAnalyzer {
43-
public AlterTableUpdateColumnStatistictAnalyzer(QueryState queryState) throws SemanticException {
42+
public class AlterTableUpdateColumnStatisticsAnalyzer extends AbstractAlterTableAnalyzer {
43+
public AlterTableUpdateColumnStatisticsAnalyzer(QueryState queryState) throws SemanticException {
4444
super(queryState);
4545
}
4646

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.hive.ql.exec;
20+
21+
import java.io.Serial;
22+
import org.apache.hadoop.hive.ql.ErrorMsg;
23+
import org.apache.hadoop.hive.ql.plan.ColumnStatsDropWork;
24+
import org.apache.hadoop.hive.ql.plan.api.StageType;
25+
import org.slf4j.Logger;
26+
import org.slf4j.LoggerFactory;
27+
28+
/**
29+
* ColumnStatsDropTask implementation. Example:
30+
* ALTER TABLE src_stat DROP STATISTICS for columns;
31+
**/
32+
33+
public class ColumnStatsDropTask extends Task<ColumnStatsDropWork> {
34+
@Serial
35+
private static final long serialVersionUID = 1L;
36+
private static final Logger LOG = LoggerFactory.getLogger(ColumnStatsDropTask.class);
37+
38+
@Override
39+
public int execute() {
40+
try {
41+
getHive().deleteColumnStatistics(work.tableName());
42+
return 0;
43+
} catch (Exception e) {
44+
setException(e);
45+
LOG.info("Failed to drop column stats in metastore", e);
46+
return ErrorMsg.GENERIC_ERROR.getErrorCode();
47+
}
48+
}
49+
50+
@Override
51+
public StageType getType() {
52+
return StageType.COLUMNSTATS;
53+
}
54+
55+
@Override
56+
public String getName() {
57+
return "COLUMN STATS DROP TASK";
58+
}
59+
}

ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.apache.hadoop.hive.ql.io.merge.MergeFileTask;
5555
import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
5656
import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork;
57+
import org.apache.hadoop.hive.ql.plan.ColumnStatsDropWork;
5758
import org.apache.hadoop.hive.ql.plan.ConditionalWork;
5859
import org.apache.hadoop.hive.ql.plan.CopyWork;
5960
import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
@@ -114,6 +115,7 @@ public TaskTuple(Class<T> workClass, Class<? extends Task<T>> taskClass) {
114115
MapredLocalTask.class));
115116
taskvec.add(new TaskTuple<StatsWork>(StatsWork.class, StatsTask.class));
116117
taskvec.add(new TaskTuple<ColumnStatsUpdateWork>(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class));
118+
taskvec.add(new TaskTuple<>(ColumnStatsDropWork.class, ColumnStatsDropTask.class));
117119
taskvec.add(new TaskTuple<MergeFileWork>(MergeFileWork.class,
118120
MergeFileTask.class));
119121
taskvec.add(new TaskTuple<DependencyCollectionWork>(DependencyCollectionWork.class,

ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERTABLE_UNARCHIVE;
5656
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERTABLE_UPDATEPARTSTATS;
5757
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERTABLE_UPDATETABLESTATS;
58+
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERTABLE_DROP_COL_STATS;
5859
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERTBLPART_SKEWED_LOCATION;
5960
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERVIEW_AS;
6061
import static org.apache.hadoop.hive.ql.plan.HiveOperation.ALTERVIEW_PROPERTIES;
@@ -145,7 +146,8 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext {
145146
includedOperationSet = Arrays.stream(new HiveOperation[] { LOAD, EXPORT, IMPORT,
146147
CREATEDATABASE, DROPDATABASE, DROPTABLE, MSCK, ALTERTABLE_ADDCOLS, ALTERTABLE_REPLACECOLS,
147148
ALTERTABLE_RENAMECOL, ALTERTABLE_RENAMEPART, ALTERTABLE_UPDATEPARTSTATS,
148-
ALTERTABLE_UPDATETABLESTATS, ALTERTABLE_RENAME, ALTERTABLE_DROPPARTS, ALTERTABLE_ADDPARTS,
149+
ALTERTABLE_UPDATETABLESTATS, ALTERTABLE_DROP_COL_STATS,
150+
ALTERTABLE_RENAME, ALTERTABLE_DROPPARTS, ALTERTABLE_ADDPARTS,
149151
ALTERTABLE_TOUCH, ALTERTABLE_ARCHIVE, ALTERTABLE_UNARCHIVE, ALTERTABLE_PROPERTIES,
150152
ALTERTABLE_SERIALIZER, ALTERPARTITION_SERIALIZER, ALTERTABLE_SERDEPROPERTIES,
151153
ALTERPARTITION_SERDEPROPERTIES, ALTERTABLE_CLUSTER_SORT, ANALYZE_TABLE, CACHE_METADATA,

ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ public static WriteType determineAlterTableWriteType(AlterTableType op, Table ta
276276
case SET_SERDE_PROPS:
277277
case ADDPROPS:
278278
case UPDATESTATS:
279+
case DROP_COL_STATS:
279280
return WriteType.DDL_SHARED;
280281

281282
case COMPACT:

ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
import org.apache.hadoop.hive.metastore.api.DataConnector;
8787
import org.apache.hadoop.hive.metastore.api.Database;
8888
import org.apache.hadoop.hive.metastore.api.DefaultConstraintsRequest;
89+
import org.apache.hadoop.hive.metastore.api.DeleteColumnStatisticsRequest;
8990
import org.apache.hadoop.hive.metastore.api.DropDatabaseRequest;
9091
import org.apache.hadoop.hive.metastore.api.DropPartitionsExpr;
9192
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
@@ -6330,27 +6331,18 @@ public AggrStats getAggrColStatsFor(Table tbl,
63306331
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.HIVE_GET_AGGR_COL_STATS, "HS2-cache");
63316332
}
63326333
}
6333-
6334-
public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName)
6335-
throws HiveException {
6334+
6335+
public void deleteColumnStatistics(TableName tableName) throws HiveException {
6336+
DeleteColumnStatisticsRequest request =
6337+
new DeleteColumnStatisticsRequest(tableName.getDb(), tableName.getTable());
6338+
request.setEngine(Constants.HIVE_ENGINE);
63366339
try {
6337-
return getMSC().deleteTableColumnStatistics(dbName, tableName, colName, Constants.HIVE_ENGINE);
6338-
} catch(Exception e) {
6339-
LOG.debug("Failed deleteTableColumnStatistics", e);
6340-
throw new HiveException(e);
6340+
getMSC().deleteColumnStatistics(request);
6341+
} catch (Exception e) {
6342+
throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
63416343
}
63426344
}
63436345

6344-
public boolean deletePartitionColumnStatistics(String dbName, String tableName, String partName,
6345-
String colName) throws HiveException {
6346-
try {
6347-
return getMSC().deletePartitionColumnStatistics(dbName, tableName, partName, colName, Constants.HIVE_ENGINE);
6348-
} catch(Exception e) {
6349-
LOG.debug("Failed deletePartitionColumnStatistics", e);
6350-
throw new HiveException(e);
6351-
}
6352-
}
6353-
63546346
public void updateTransactionalStatistics(UpdateTransactionalStatsRequest req) throws HiveException {
63556347
try {
63566348
getMSC().updateTransactionalStatistics(req);

0 commit comments

Comments
 (0)