2222import java .util .HashSet ;
2323import java .util .List ;
2424import java .util .Map ;
25+ import java .util .Optional ;
2526import java .util .Set ;
2627import java .util .concurrent .atomic .AtomicInteger ;
27- import java .util .stream .Collectors ;
2828
2929import org .apache .calcite .linq4j .tree .Expression ;
3030import org .apache .calcite .plan .RelOptSchema ;
8282import com .google .common .collect .ImmutableList ;
8383import com .google .common .collect .ImmutableMap ;
8484import com .google .common .collect .Lists ;
85- import com .google .common .collect .Sets ;
8685
8786public class RelOptHiveTable implements RelOptTable {
8887
@@ -125,7 +124,7 @@ public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactor
125124 this .schema = calciteSchema ;
126125 this .typeFactory = typeFactory ;
127126 this .qualifiedTblName = ImmutableList .copyOf (qualifiedTblName );
128- this .name = this . qualifiedTblName . stream (). collect ( Collectors . joining ( "." ) );
127+ this .name = String . join ( "." , this . qualifiedTblName );
129128 this .rowType = rowType ;
130129 this .hiveTblMetadata = hiveTblMetadata ;
131130 this .hiveColStatsMap = new HashMap <>();
@@ -192,15 +191,15 @@ public List<ColumnStrategy> getColumnStrategies() {
192191 public RelOptHiveTable copy (RelDataType newRowType ) {
193192 // 1. Build map of column name to col index of original schema
194193 // Assumption: Hive Table can not contain duplicate column names
195- Map <String , Integer > nameToColIndxMap = new HashMap <String , Integer >();
194+ Map <String , Integer > nameToColIndxMap = new HashMap <>();
196195 for (RelDataTypeField f : this .rowType .getFieldList ()) {
197196 nameToColIndxMap .put (f .getName (), f .getIndex ());
198197 }
199198
200199 // 2. Build nonPart/Part/Virtual column info for new RowSchema
201- List <ColumnInfo > newHiveNonPartitionCols = new ArrayList <ColumnInfo >();
202- List <ColumnInfo > newHivePartitionCols = new ArrayList <ColumnInfo >();
203- List <VirtualColumn > newHiveVirtualCols = new ArrayList <VirtualColumn >();
200+ List <ColumnInfo > newHiveNonPartitionCols = new ArrayList <>();
201+ List <ColumnInfo > newHivePartitionCols = new ArrayList <>();
202+ List <VirtualColumn > newHiveVirtualCols = new ArrayList <>();
204203 Map <Integer , VirtualColumn > virtualColInfoMap = HiveCalciteUtil .getVColsMap (this .hiveVirtualCols ,
205204 this .noOfNonVirtualCols );
206205 Integer originalColIndx ;
@@ -329,8 +328,8 @@ private List<RelReferentialConstraint> generateReferentialConstraints() {
329328 ImmutableList .Builder <RelReferentialConstraint > builder = ImmutableList .builder ();
330329 if (foreignKeyInfo != null && !foreignKeyInfo .getForeignKeys ().isEmpty ()) {
331330 for (List <ForeignKeyCol > fkCols : foreignKeyInfo .getForeignKeys ().values ()) {
332- String parentDatabaseName = fkCols .get ( 0 ).parentDatabaseName ;
333- String parentTableName = fkCols .get ( 0 ).parentTableName ;
331+ String parentDatabaseName = fkCols .getFirst ( ).parentDatabaseName ;
332+ String parentTableName = fkCols .getFirst ( ).parentTableName ;
334333 String qualifiedName ;
335334 List <String > parentTableQualifiedName = new ArrayList <>();
336335 if (parentDatabaseName != null && !parentDatabaseName .isEmpty ()) {
@@ -390,7 +389,7 @@ public <T> T unwrap(Class<T> arg0) {
390389
391390 @ Override
392391 public List <RelCollation > getCollationList () {
393- ImmutableList .Builder <RelFieldCollation > collationList = new ImmutableList .Builder <RelFieldCollation >();
392+ ImmutableList .Builder <RelFieldCollation > collationList = new ImmutableList .Builder <>();
394393 for (Order sortColumn : this .hiveTblMetadata .getSortCols ()) {
395394 for (int i =0 ; i <this .hiveTblMetadata .getSd ().getCols ().size (); i ++) {
396395 FieldSchema field = this .hiveTblMetadata .getSd ().getCols ().get (i );
@@ -411,7 +410,7 @@ public List<RelCollation> getCollationList() {
411410
412411 @ Override
413412 public RelDistribution getDistribution () {
414- ImmutableList .Builder <Integer > columnPositions = new ImmutableList .Builder <Integer >();
413+ ImmutableList .Builder <Integer > columnPositions = new ImmutableList .Builder <>();
415414 for (String bucketColumn : this .hiveTblMetadata .getBucketCols ()) {
416415 for (int i =0 ; i <this .hiveTblMetadata .getSd ().getCols ().size (); i ++) {
417416 FieldSchema field = this .hiveTblMetadata .getSd ().getCols ().get (i );
@@ -435,7 +434,7 @@ public double getRowCount() {
435434 if (null == partitionList ) {
436435 // we are here either unpartitioned table or partitioned table with no
437436 // predicates
438- computePartitionList (hiveConf , null , new HashSet <Integer >());
437+ computePartitionList (hiveConf , null , new HashSet <>());
439438 }
440439 rowCount = StatsUtils .getNumRows (hiveConf , getNonPartColumns (), hiveTblMetadata ,
441440 partitionList , noColsMissingStats );
@@ -465,7 +464,7 @@ private String getColNamesForLogging(Set<String> colLst) {
465464 public void computePartitionList (HiveConf conf , RexNode pruneNode , Set <Integer > partOrVirtualCols ) {
466465 try {
467466 if (!hiveTblMetadata .isPartitioned () || pruneNode == null
468- || InputFinder .bits (pruneNode ).length () == 0 ) {
467+ || InputFinder .bits (pruneNode ).isEmpty () ) {
469468 // there is no predicate on partitioning column, we need all partitions
470469 // in this case.
471470 partitionList = PartitionPruner .prune (hiveTblMetadata , null , conf , getName (),
@@ -485,11 +484,11 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode, Set<Integer>
485484 }
486485
487486 private void updateColStats (Set <Integer > projIndxLst , boolean allowMissingStats ) {
488- List <String > nonPartColNamesThatRqrStats = new ArrayList <String >();
489- List <Integer > nonPartColIndxsThatRqrStats = new ArrayList <Integer >();
490- List <String > partColNamesThatRqrStats = new ArrayList <String >();
491- List <Integer > partColIndxsThatRqrStats = new ArrayList <Integer >();
492- Set <String > colNamesFailedStats = new HashSet <String >();
487+ List <String > nonPartColNamesThatRqrStats = new ArrayList <>();
488+ List <Integer > nonPartColIndxsThatRqrStats = new ArrayList <>();
489+ List <String > partColNamesThatRqrStats = new ArrayList <>();
490+ List <Integer > partColIndxsThatRqrStats = new ArrayList <>();
491+ Set <String > colNamesFailedStats = new HashSet <>();
493492
494493 // 1. Separate required columns to Non Partition and Partition Cols
495494 ColumnInfo tmp ;
@@ -514,19 +513,24 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
514513 if (null == partitionList ) {
515514 // We could be here either because its an unpartitioned table or because
516515 // there are no pruning predicates on a partitioned table.
517- computePartitionList (hiveConf , null , new HashSet <Integer >());
516+ computePartitionList (hiveConf , null , new HashSet <>());
518517 }
519518
520- String partitionListKey = partitionList .getKey ().orElse (null );
521- ColumnStatsList colStatsCached = colStatsCache .get (partitionListKey );
522- if (colStatsCached == null ) {
523- colStatsCached = new ColumnStatsList ();
524- colStatsCache .put (partitionListKey , colStatsCached );
525- }
519+ String key = partitionList .getKey ();
520+
521+ String partitionListKey = Optional .ofNullable (hiveTblMetadata .getStorageHandler ())
522+ .map (handler -> handler .getStatsCacheKeySuffix (hiveTblMetadata ))
523+ .map (suffix -> key + ";" + suffix )
524+ .orElse (key );
525+
526+ ColumnStatsList colStatsCached = colStatsCache .computeIfAbsent (
527+ partitionListKey ,
528+ k -> new ColumnStatsList ()
529+ );
526530
527531 // 2. Obtain Col Stats for Non Partition Cols
528- if (nonPartColNamesThatRqrStats .size () > 0 ) {
529- List <ColStatistics > hiveColStats = new ArrayList <ColStatistics >();
532+ if (! nonPartColNamesThatRqrStats .isEmpty () ) {
533+ List <ColStatistics > hiveColStats = new ArrayList <>();
530534
531535 if (!hiveTblMetadata .isPartitioned ()) {
532536 // 2.1 Handle the case for unpartitioned table.
@@ -547,9 +551,9 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
547551 if (hiveColStats .isEmpty ()) {
548552 colNamesFailedStats .addAll (nonPartColNamesThatRqrStats );
549553 } else if (hiveColStats .size () != nonPartColNamesThatRqrStats .size ()) {
550- Set <String > setOfFiledCols = new HashSet <String >(nonPartColNamesThatRqrStats );
554+ Set <String > setOfFiledCols = new HashSet <>(nonPartColNamesThatRqrStats );
551555
552- Set <String > setOfObtainedColStats = new HashSet <String >();
556+ Set <String > setOfObtainedColStats = new HashSet <>();
553557 for (ColStatistics cs : hiveColStats ) {
554558 setOfObtainedColStats .add (cs .getColumnName ());
555559 }
@@ -561,7 +565,7 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
561565 // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
562566 // using nonPartColIndxsThatRqrStats as below
563567 Map <String , ColStatistics > columnStatsMap =
564- new HashMap <String , ColStatistics >(hiveColStats .size ());
568+ new HashMap <>(hiveColStats .size ());
565569 for (ColStatistics cs : hiveColStats ) {
566570 columnStatsMap .put (cs .getColumnName (), cs );
567571 // even though the stats were estimated we need to warn user that
@@ -586,22 +590,21 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
586590 if (partitionList .getNotDeniedPartns ().isEmpty ()) {
587591 // no need to make a metastore call
588592 rowCount = 0 ;
589- hiveColStats = new ArrayList <ColStatistics >();
593+ hiveColStats = new ArrayList <>();
590594 for (int i = 0 ; i < nonPartColNamesThatRqrStats .size (); i ++) {
591595 // add empty stats object for each column
592596 hiveColStats .add (
593597 new ColStatistics (
594598 nonPartColNamesThatRqrStats .get (i ),
595599 hiveNonPartitionColsMap .get (nonPartColIndxsThatRqrStats .get (i )).getTypeName ()));
596600 }
597- colNamesFailedStats .clear ();
598601 colStatsCached .updateState (State .COMPLETE );
599602 } else {
600603 Statistics stats = StatsUtils .collectStatistics (hiveConf , partitionList ,
601604 hiveTblMetadata , hiveNonPartitionCols , nonPartColNamesThatRqrStats , colStatsCached ,
602605 nonPartColNamesThatRqrStats , true );
603606 rowCount = stats .getNumRows ();
604- hiveColStats = new ArrayList <ColStatistics >();
607+ hiveColStats = new ArrayList <>();
605608 for (String c : nonPartColNamesThatRqrStats ) {
606609 ColStatistics cs = stats .getColumnStatisticsFromColName (c );
607610 if (cs != null ) {
@@ -622,7 +625,7 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
622625 }
623626 }
624627
625- if (hiveColStats != null && hiveColStats .size () == nonPartColNamesThatRqrStats .size ()) {
628+ if (hiveColStats .size () == nonPartColNamesThatRqrStats .size ()) {
626629 for (int i = 0 ; i < hiveColStats .size (); i ++) {
627630 // the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
628631 // are in same order
@@ -754,7 +757,7 @@ public int hashCode() {
754757 }
755758
756759 public String getPartitionListKey () {
757- return partitionList != null ? partitionList .getKey (). orElse ( null ) : null ;
760+ return partitionList != null ? partitionList .getKey () : null ;
758761 }
759762
760763}
0 commit comments