@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // If the reduce sink has not been introduced due to bucketing/sorting, ignore it FileSinkOperator fsOp = (FileSinkOperator) nd; ReduceSinkOperator rsOp = (ReduceSinkOperator) fsOp.getParentOperators().get(0).getParentOperators().get(0); List<ReduceSinkOperator> rsOps = pGraphContext.getReduceSinkOperatorsAddedByEnforceBucketingSorting(); // nothing to do if ((rsOps != null) && (!rsOps.contains(rsOp))) { return null; } // Don't do this optimization with updates or deletes if (pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.UPDATE || pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.DELETE) { return null; } if (stack.get(0) instanceof TableScanOperator) { TableScanOperator tso = ((TableScanOperator) stack.get(0)); if (SemanticAnalyzer.isAcidTable(tso.getConf().getTableMetadata())) { /*ACID tables have complex directory layout and require merging of delta files * on read thus we should not try to read bucket files directly*/ return null; } } // Support for dynamic partitions can be added later if (fsOp.getConf().getDynPartCtx() != null) { return null; } // No conversion is possible for the reduce keys for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) { if (!(keyCol instanceof ExprNodeColumnDesc)) { return null; } } Table destTable = fsOp.getConf().getTable(); if (destTable == null) { return null; } int numBucketsDestination = destTable.getNumBuckets(); // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. List<Integer> bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); ObjectPair<List<Integer>, List<Integer>> sortOrderPositions = getSortPositionsOrder(destTable.getSortCols(), destTable.getCols()); List<Integer> sortPositions = sortOrderPositions.getFirst(); List<Integer> sortOrder = sortOrderPositions.getSecond(); boolean useBucketSortPositions = true; // Only selects and filters are allowed Operator<? extends OperatorDesc> op = rsOp; // TableScan will also be followed by a Select Operator. Find the expressions for the // bucketed/sorted columns for the destination table List<ExprNodeColumnDesc> sourceTableBucketCols = new ArrayList<ExprNodeColumnDesc>(); List<ExprNodeColumnDesc> sourceTableSortCols = new ArrayList<ExprNodeColumnDesc>(); op = op.getParentOperators().get(0); while (true) { if (!(op instanceof TableScanOperator) && !(op instanceof FilterOperator) && !(op instanceof SelectOperator) && !(op instanceof SMBMapJoinOperator)) { return null; } if (op instanceof SMBMapJoinOperator) { // Bucketing and sorting keys should exactly match if (!(bucketPositions.equals(sortPositions))) { return null; } SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op; SMBJoinDesc smbJoinDesc = smbOp.getConf(); int posBigTable = smbJoinDesc.getPosBigTable(); // join keys dont match the bucketing keys List<ExprNodeDesc> keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable); if (keysBigTable.size() != bucketPositions.size()) { return null; } if (!validateSMBJoinKeys( smbJoinDesc, sourceTableBucketCols, sourceTableSortCols, sortOrder)) { return null; } sourceTableBucketCols.clear(); sourceTableSortCols.clear(); useBucketSortPositions = false; for (ExprNodeDesc keyBigTable : keysBigTable) { if (!(keyBigTable instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) keyBigTable); sourceTableSortCols.add((ExprNodeColumnDesc) keyBigTable); } // since it is a sort-merge join, only follow the big table op = op.getParentOperators().get(posBigTable); } else { // nothing to be done for filters - the output schema does not change. if (op instanceof TableScanOperator) { assert !useBucketSortPositions; TableScanOperator ts = (TableScanOperator) op; Table srcTable = ts.getConf().getTableMetadata(); // Find the positions of the bucketed columns in the table corresponding // to the select list. // Consider the following scenario: // T1(key, value1, value2) bucketed/sorted by key into 2 buckets // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1 // should be optimized. // Start with the destination: T2, bucketed/sorted position is [1] // At the source T1, the column corresponding to that position is [key], which // maps to column [0] of T1, which is also bucketed/sorted into the same // number of buckets List<Integer> newBucketPositions = new ArrayList<Integer>(); for (int pos = 0; pos < bucketPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableBucketCols.get(pos); String colName = col.getColumn(); int bucketPos = findColumnPosition(srcTable.getCols(), colName); if (bucketPos < 0) { return null; } newBucketPositions.add(bucketPos); } // Find the positions/order of the sorted columns in the table corresponding // to the select list. List<Integer> newSortPositions = new ArrayList<Integer>(); for (int pos = 0; pos < sortPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableSortCols.get(pos); String colName = col.getColumn(); int sortPos = findColumnPosition(srcTable.getCols(), colName); if (sortPos < 0) { return null; } newSortPositions.add(sortPos); } if (srcTable.isPartitioned()) { PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts); List<Partition> partitions = prunedParts.getNotDeniedPartns(); // Support for dynamic partitions can be added later // The following is not optimized: // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1'; // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) { return null; } for (Partition partition : partitions) { if (!checkPartition( partition, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } } removeReduceSink( rsOp, (TableScanOperator) op, fsOp, partitions.get(0).getSortedPaths()); return null; } else { if (!checkTable( srcTable, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths()); return null; } } // None of the operators is changing the positions else if (op instanceof SelectOperator) { SelectOperator selectOp = (SelectOperator) op; SelectDesc selectDesc = selectOp.getConf(); // Iterate backwards, from the destination table to the top of the tree // Based on the output column names, get the new columns. if (!useBucketSortPositions) { bucketPositions.clear(); sortPositions.clear(); List<String> outputColumnNames = selectDesc.getOutputColumnNames(); for (ExprNodeColumnDesc col : sourceTableBucketCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } bucketPositions.add(colPos); } for (ExprNodeColumnDesc col : sourceTableSortCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } sortPositions.add(colPos); } } // There may be multiple selects - chose the one closest to the table sourceTableBucketCols.clear(); sourceTableSortCols.clear(); // Only columns can be selected for both sorted and bucketed positions for (int pos : bucketPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList); } for (int pos : sortPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableSortCols.add((ExprNodeColumnDesc) selectColList); } useBucketSortPositions = false; } op = op.getParentOperators().get(0); } } }
// The output columns for the destination table should match with the join keys // This is to handle queries of the form: // insert overwrite table T3 // select T1.key, T1.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // where T1, T2 and T3 are bucketized/sorted on key and key2 // Assuming T1 is the table on which the mapper is run, the following is true: // . The number of buckets for T1 and T3 should be same // . The bucketing/sorting columns for T1, T2 and T3 should be same // . The sort order of T1 should match with the sort order for T3. // . If T1 is partitioned, only a single partition of T1 can be selected. // . The select list should contain with (T1.key, T1.key2) or (T2.key, T2.key2) // . After the join, only selects and filters are allowed. private boolean validateSMBJoinKeys( SMBJoinDesc smbJoinDesc, List<ExprNodeColumnDesc> sourceTableBucketCols, List<ExprNodeColumnDesc> sourceTableSortCols, List<Integer> sortOrder) { // The sort-merge join creates the output sorted and bucketized by the same columns. // This can be relaxed in the future if there is a requirement. if (!sourceTableBucketCols.equals(sourceTableSortCols)) { return false; } // Get the total number of columns selected, and for each output column, store the // base table it points to. For // insert overwrite table T3 // select T1.key, T1.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // the following arrays are created // [0, 0, 0, 1] --> [T1, T1, T1, T2] (table mapping) // [0, 1, 2, 0] --> [T1.0, T1.1, T1.2, T2.0] (table columns mapping) Byte[] tagOrder = smbJoinDesc.getTagOrder(); Map<Byte, List<Integer>> retainList = smbJoinDesc.getRetainList(); int totalNumberColumns = 0; for (Byte tag : tagOrder) { totalNumberColumns += retainList.get(tag).size(); } byte[] columnTableMappings = new byte[totalNumberColumns]; int[] columnNumberMappings = new int[totalNumberColumns]; int currentColumnPosition = 0; for (Byte tag : tagOrder) { for (int pos = 0; pos < retainList.get(tag).size(); pos++) { columnTableMappings[currentColumnPosition] = tag; columnNumberMappings[currentColumnPosition] = pos; currentColumnPosition++; } } // All output columns used for bucketing/sorting of the destination table should // belong to the same input table // insert overwrite table T3 // select T1.key, T2.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // is not optimized, whereas the insert is optimized if the select list is either changed to // (T1.key, T1.key2, UDF(T1.value, T2.value)) or (T2.key, T2.key2, UDF(T1.value, T2.value)) // Get the input table and make sure the keys match List<String> outputColumnNames = smbJoinDesc.getOutputColumnNames(); byte tableTag = -1; int[] columnNumbersExprList = new int[sourceTableBucketCols.size()]; int currentColPosition = 0; for (ExprNodeColumnDesc bucketCol : sourceTableBucketCols) { String colName = bucketCol.getColumn(); int colNumber = outputColumnNames.indexOf(colName); if (colNumber < 0) { return false; } if (tableTag < 0) { tableTag = columnTableMappings[colNumber]; } else if (tableTag != columnTableMappings[colNumber]) { return false; } columnNumbersExprList[currentColPosition++] = columnNumberMappings[colNumber]; } List<ExprNodeDesc> allExprs = smbJoinDesc.getExprs().get(tableTag); List<ExprNodeDesc> keysSelectedTable = smbJoinDesc.getKeys().get(tableTag); currentColPosition = 0; for (ExprNodeDesc keySelectedTable : keysSelectedTable) { if (!(keySelectedTable instanceof ExprNodeColumnDesc)) { return false; } if (!allExprs.get(columnNumbersExprList[currentColPosition++]).isSame(keySelectedTable)) { return false; } } return true; }