/** * Converts the skewedValue available as a string in the metadata to the appropriate object by * using the type of the column from the join key. * * @param skewedValue * @param keyCol * @return an expression node descriptor of the appropriate constant */ private ExprNodeConstantDesc createConstDesc(String skewedValue, ExprNodeColumnDesc keyCol) { ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(TypeInfoFactory.stringTypeInfo); ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(keyCol.getTypeInfo()); Converter converter = ObjectInspectorConverters.getConverter(inputOI, outputOI); Object skewedValueObject = converter.convert(skewedValue); return new ExprNodeConstantDesc(keyCol.getTypeInfo(), skewedValueObject); }
private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) { if (alias != null) { return alias; } else if (exprNode instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; return colDesc.getColumn(); } return "matchpath_col_" + colIdx; }
protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { InputCtx ic = getInputCtx(col); int pos = ic.hiveNameToPosMap.get(col.getColumn()); return cluster .getRexBuilder() .makeInputRef( ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); }
private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { InputCtx ctxLookingFor = null; if (inputCtxs.size() == 1) { ctxLookingFor = inputCtxs.get(0); } else { String tableAlias = col.getTabAlias(); String colAlias = col.getColumn(); int noInp = 0; for (InputCtx ic : inputCtxs) { if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) { if (ic.hiveRR.getPosition(colAlias) >= 0) { ctxLookingFor = ic; noInp++; } } } if (noInp > 1) throw new RuntimeException("Ambigous column mapping"); } return ctxLookingFor; }
private ExprNodeDesc analyzeExpr( ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) { if (FunctionRegistry.isOpAnd(expr)) { assert (nodeOutputs.length == 2); ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0]; ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1]; if (residual1 == null) { return residual2; } if (residual2 == null) { return residual1; } List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>(); residuals.add(residual1); residuals.add(residual2); return new ExprNodeGenericFuncDesc( TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals); } GenericUDF genericUDF = expr.getGenericUDF(); if (!(genericUDF instanceof GenericUDFBaseCompare)) { return expr; } ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0]; ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1]; // We may need to peel off the GenericUDFBridge that is added by CBO or user if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) { expr1 = getColumnExpr(expr1); expr2 = getColumnExpr(expr2); } ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2); if (extracted == null || (extracted.length > 2 && !acceptsFields)) { return expr; } ExprNodeColumnDesc columnDesc; ExprNodeConstantDesc constantDesc; if (extracted[0] instanceof ExprNodeConstantDesc) { genericUDF = genericUDF.flip(); columnDesc = (ExprNodeColumnDesc) extracted[1]; constantDesc = (ExprNodeConstantDesc) extracted[0]; } else { columnDesc = (ExprNodeColumnDesc) extracted[0]; constantDesc = (ExprNodeConstantDesc) extracted[1]; } String udfName = genericUDF.getUdfName(); if (!udfNames.contains(genericUDF.getUdfName())) { return expr; } if (!allowedColumnNames.contains(columnDesc.getColumn())) { return expr; } String[] fields = null; if (extracted.length > 2) { ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2]; if (!isValidField(fieldDesc)) { return expr; } fields = ExprNodeDescUtils.extractFields(fieldDesc); } // We also need to update the expr so that the index query can be generated. // Note that, hive does not support UDFToDouble etc in the query text. List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>(); list.add(expr1); list.add(expr2); expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list); searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr, fields)); // we converted the expression to a search condition, so // remove it from the residual predicate return fields == null ? null : expr; }
/** * Returns the skewed values in all the tables which are going to be scanned. If the join is on * columns c1, c2 and c3 on tables T1 and T2, T1 is skewed on c1 and c4 with the skew values * ((1,2),(3,4)), whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))> * * @param op The join operator being optimized * @param tableScanOpsForJoin table scan operators which are parents of the join operator * @return map<join keys intersection skewedkeys, list of skewed values>. */ private Map<List<ExprNodeDesc>, List<List<String>>> getSkewedValues( Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) { Map<List<ExprNodeDesc>, List<List<String>>> skewDataReturn = new HashMap<List<ExprNodeDesc>, List<List<String>>>(); Map<List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData = new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>(); // The join keys are available in the reduceSinkOperators before join for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) { ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf(); if (rsDesc.getKeyCols() != null) { Table table = null; // Find the skew information corresponding to the table List<String> skewedColumns = null; List<List<String>> skewedValueList = null; // The join columns which are also skewed List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols = new ArrayList<ExprNodeDescEqualityWrapper>(); // skewed Keys which intersect with join keys List<Integer> positionSkewedKeys = new ArrayList<Integer>(); // Update the joinKeys appropriately. for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) { ExprNodeColumnDesc keyCol = null; // If the key column is not a column, then dont apply this optimization. // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445 // for type conversion UDFs. if (keyColDesc instanceof ExprNodeColumnDesc) { keyCol = (ExprNodeColumnDesc) keyColDesc; if (table == null) { table = getTable(parseContext, reduceSinkOp, tableScanOpsForJoin); skewedColumns = table == null ? null : table.getSkewedColNames(); // No skew on the table to take care of if ((skewedColumns == null) || (skewedColumns.isEmpty())) { continue; } skewedValueList = table == null ? null : table.getSkewedColValues(); } int pos = skewedColumns.indexOf(keyCol.getColumn()); if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) { positionSkewedKeys.add(pos); ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyCol.clone(); keyColClone.setTabAlias(null); joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone)); } } } // If the skew keys match the join keys, then add it to the list if ((skewedColumns != null) && (!skewedColumns.isEmpty())) { if (!joinKeysSkewedCols.isEmpty()) { // If the join keys matches the skewed keys, use the table skewed keys List<List<String>> skewedJoinValues; if (skewedColumns.size() == positionSkewedKeys.size()) { skewedJoinValues = skewedValueList; } else { skewedJoinValues = getSkewedJoinValues(skewedValueList, positionSkewedKeys); } List<List<String>> oldSkewedJoinValues = skewData.get(joinKeysSkewedCols); if (oldSkewedJoinValues == null) { oldSkewedJoinValues = new ArrayList<List<String>>(); } for (List<String> skewValue : skewedJoinValues) { if (!oldSkewedJoinValues.contains(skewValue)) { oldSkewedJoinValues.add(skewValue); } } skewData.put(joinKeysSkewedCols, oldSkewedJoinValues); } } } } // convert skewData to contain ExprNodeDesc in the keys for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry : skewData.entrySet()) { List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>(); for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) { skewedKeyJoinCols.add(key.getExprNodeDesc()); } skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue()); } return skewDataReturn; }
@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // If the reduce sink has not been introduced due to bucketing/sorting, ignore it FileSinkOperator fsOp = (FileSinkOperator) nd; ReduceSinkOperator rsOp = (ReduceSinkOperator) fsOp.getParentOperators().get(0).getParentOperators().get(0); List<ReduceSinkOperator> rsOps = pGraphContext.getReduceSinkOperatorsAddedByEnforceBucketingSorting(); // nothing to do if ((rsOps != null) && (!rsOps.contains(rsOp))) { return null; } // Don't do this optimization with updates or deletes if (pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.UPDATE || pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.DELETE) { return null; } if (stack.get(0) instanceof TableScanOperator) { TableScanOperator tso = ((TableScanOperator) stack.get(0)); if (SemanticAnalyzer.isAcidTable(tso.getConf().getTableMetadata())) { /*ACID tables have complex directory layout and require merging of delta files * on read thus we should not try to read bucket files directly*/ return null; } } // Support for dynamic partitions can be added later if (fsOp.getConf().getDynPartCtx() != null) { return null; } // No conversion is possible for the reduce keys for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) { if (!(keyCol instanceof ExprNodeColumnDesc)) { return null; } } Table destTable = fsOp.getConf().getTable(); if (destTable == null) { return null; } int numBucketsDestination = destTable.getNumBuckets(); // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. List<Integer> bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); ObjectPair<List<Integer>, List<Integer>> sortOrderPositions = getSortPositionsOrder(destTable.getSortCols(), destTable.getCols()); List<Integer> sortPositions = sortOrderPositions.getFirst(); List<Integer> sortOrder = sortOrderPositions.getSecond(); boolean useBucketSortPositions = true; // Only selects and filters are allowed Operator<? extends OperatorDesc> op = rsOp; // TableScan will also be followed by a Select Operator. Find the expressions for the // bucketed/sorted columns for the destination table List<ExprNodeColumnDesc> sourceTableBucketCols = new ArrayList<ExprNodeColumnDesc>(); List<ExprNodeColumnDesc> sourceTableSortCols = new ArrayList<ExprNodeColumnDesc>(); op = op.getParentOperators().get(0); while (true) { if (!(op instanceof TableScanOperator) && !(op instanceof FilterOperator) && !(op instanceof SelectOperator) && !(op instanceof SMBMapJoinOperator)) { return null; } if (op instanceof SMBMapJoinOperator) { // Bucketing and sorting keys should exactly match if (!(bucketPositions.equals(sortPositions))) { return null; } SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op; SMBJoinDesc smbJoinDesc = smbOp.getConf(); int posBigTable = smbJoinDesc.getPosBigTable(); // join keys dont match the bucketing keys List<ExprNodeDesc> keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable); if (keysBigTable.size() != bucketPositions.size()) { return null; } if (!validateSMBJoinKeys( smbJoinDesc, sourceTableBucketCols, sourceTableSortCols, sortOrder)) { return null; } sourceTableBucketCols.clear(); sourceTableSortCols.clear(); useBucketSortPositions = false; for (ExprNodeDesc keyBigTable : keysBigTable) { if (!(keyBigTable instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) keyBigTable); sourceTableSortCols.add((ExprNodeColumnDesc) keyBigTable); } // since it is a sort-merge join, only follow the big table op = op.getParentOperators().get(posBigTable); } else { // nothing to be done for filters - the output schema does not change. if (op instanceof TableScanOperator) { assert !useBucketSortPositions; TableScanOperator ts = (TableScanOperator) op; Table srcTable = ts.getConf().getTableMetadata(); // Find the positions of the bucketed columns in the table corresponding // to the select list. // Consider the following scenario: // T1(key, value1, value2) bucketed/sorted by key into 2 buckets // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1 // should be optimized. // Start with the destination: T2, bucketed/sorted position is [1] // At the source T1, the column corresponding to that position is [key], which // maps to column [0] of T1, which is also bucketed/sorted into the same // number of buckets List<Integer> newBucketPositions = new ArrayList<Integer>(); for (int pos = 0; pos < bucketPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableBucketCols.get(pos); String colName = col.getColumn(); int bucketPos = findColumnPosition(srcTable.getCols(), colName); if (bucketPos < 0) { return null; } newBucketPositions.add(bucketPos); } // Find the positions/order of the sorted columns in the table corresponding // to the select list. List<Integer> newSortPositions = new ArrayList<Integer>(); for (int pos = 0; pos < sortPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableSortCols.get(pos); String colName = col.getColumn(); int sortPos = findColumnPosition(srcTable.getCols(), colName); if (sortPos < 0) { return null; } newSortPositions.add(sortPos); } if (srcTable.isPartitioned()) { PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts); List<Partition> partitions = prunedParts.getNotDeniedPartns(); // Support for dynamic partitions can be added later // The following is not optimized: // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1'; // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) { return null; } for (Partition partition : partitions) { if (!checkPartition( partition, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } } removeReduceSink( rsOp, (TableScanOperator) op, fsOp, partitions.get(0).getSortedPaths()); return null; } else { if (!checkTable( srcTable, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths()); return null; } } // None of the operators is changing the positions else if (op instanceof SelectOperator) { SelectOperator selectOp = (SelectOperator) op; SelectDesc selectDesc = selectOp.getConf(); // Iterate backwards, from the destination table to the top of the tree // Based on the output column names, get the new columns. if (!useBucketSortPositions) { bucketPositions.clear(); sortPositions.clear(); List<String> outputColumnNames = selectDesc.getOutputColumnNames(); for (ExprNodeColumnDesc col : sourceTableBucketCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } bucketPositions.add(colPos); } for (ExprNodeColumnDesc col : sourceTableSortCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } sortPositions.add(colPos); } } // There may be multiple selects - chose the one closest to the table sourceTableBucketCols.clear(); sourceTableSortCols.clear(); // Only columns can be selected for both sorted and bucketed positions for (int pos : bucketPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList); } for (int pos : sortPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableSortCols.add((ExprNodeColumnDesc) selectColList); } useBucketSortPositions = false; } op = op.getParentOperators().get(0); } } }
// The output columns for the destination table should match with the join keys // This is to handle queries of the form: // insert overwrite table T3 // select T1.key, T1.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // where T1, T2 and T3 are bucketized/sorted on key and key2 // Assuming T1 is the table on which the mapper is run, the following is true: // . The number of buckets for T1 and T3 should be same // . The bucketing/sorting columns for T1, T2 and T3 should be same // . The sort order of T1 should match with the sort order for T3. // . If T1 is partitioned, only a single partition of T1 can be selected. // . The select list should contain with (T1.key, T1.key2) or (T2.key, T2.key2) // . After the join, only selects and filters are allowed. private boolean validateSMBJoinKeys( SMBJoinDesc smbJoinDesc, List<ExprNodeColumnDesc> sourceTableBucketCols, List<ExprNodeColumnDesc> sourceTableSortCols, List<Integer> sortOrder) { // The sort-merge join creates the output sorted and bucketized by the same columns. // This can be relaxed in the future if there is a requirement. if (!sourceTableBucketCols.equals(sourceTableSortCols)) { return false; } // Get the total number of columns selected, and for each output column, store the // base table it points to. For // insert overwrite table T3 // select T1.key, T1.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // the following arrays are created // [0, 0, 0, 1] --> [T1, T1, T1, T2] (table mapping) // [0, 1, 2, 0] --> [T1.0, T1.1, T1.2, T2.0] (table columns mapping) Byte[] tagOrder = smbJoinDesc.getTagOrder(); Map<Byte, List<Integer>> retainList = smbJoinDesc.getRetainList(); int totalNumberColumns = 0; for (Byte tag : tagOrder) { totalNumberColumns += retainList.get(tag).size(); } byte[] columnTableMappings = new byte[totalNumberColumns]; int[] columnNumberMappings = new int[totalNumberColumns]; int currentColumnPosition = 0; for (Byte tag : tagOrder) { for (int pos = 0; pos < retainList.get(tag).size(); pos++) { columnTableMappings[currentColumnPosition] = tag; columnNumberMappings[currentColumnPosition] = pos; currentColumnPosition++; } } // All output columns used for bucketing/sorting of the destination table should // belong to the same input table // insert overwrite table T3 // select T1.key, T2.key2, UDF(T1.value, T2.value) // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2 // is not optimized, whereas the insert is optimized if the select list is either changed to // (T1.key, T1.key2, UDF(T1.value, T2.value)) or (T2.key, T2.key2, UDF(T1.value, T2.value)) // Get the input table and make sure the keys match List<String> outputColumnNames = smbJoinDesc.getOutputColumnNames(); byte tableTag = -1; int[] columnNumbersExprList = new int[sourceTableBucketCols.size()]; int currentColPosition = 0; for (ExprNodeColumnDesc bucketCol : sourceTableBucketCols) { String colName = bucketCol.getColumn(); int colNumber = outputColumnNames.indexOf(colName); if (colNumber < 0) { return false; } if (tableTag < 0) { tableTag = columnTableMappings[colNumber]; } else if (tableTag != columnTableMappings[colNumber]) { return false; } columnNumbersExprList[currentColPosition++] = columnNumberMappings[colNumber]; } List<ExprNodeDesc> allExprs = smbJoinDesc.getExprs().get(tableTag); List<ExprNodeDesc> keysSelectedTable = smbJoinDesc.getKeys().get(tableTag); currentColPosition = 0; for (ExprNodeDesc keySelectedTable : keysSelectedTable) { if (!(keySelectedTable instanceof ExprNodeColumnDesc)) { return false; } if (!allExprs.get(columnNumbersExprList[currentColPosition++]).isSame(keySelectedTable)) { return false; } } return true; }