private RowResolver buildPrunedRR( List<String> prunedCols, RowResolver oldRR, ArrayList<ColumnInfo> sig) throws SemanticException { RowResolver newRR = new RowResolver(); HashSet<String> prunedColsSet = new HashSet<String>(prunedCols); for (ColumnInfo cInfo : oldRR.getRowSchema().getSignature()) { if (prunedColsSet.contains(cInfo.getInternalName())) { String[] nm = oldRR.reverseLookup(cInfo.getInternalName()); newRR.put(nm[0], nm[1], cInfo); sig.add(cInfo); } } return newRR; }
/* * add array<struct> to the list of columns */ protected static RowResolver createSelectListRR(MatchPath evaluator, PTFInputDef inpDef) throws SemanticException { RowResolver rr = new RowResolver(); RowResolver inputRR = inpDef.getOutputShape().getRr(); evaluator.inputColumnNamesMap = new HashMap<String, String>(); ArrayList<String> inputColumnNames = new ArrayList<String>(); ArrayList<ObjectInspector> inpColOIs = new ArrayList<ObjectInspector>(); for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { ColumnInfo cInfo = new ColumnInfo(inpCInfo); String colAlias = cInfo.getAlias(); String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); if (tabColAlias != null) { colAlias = tabColAlias[1]; } ASTNode inExpr = null; inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); if (inExpr != null) { rr.putExpression(inExpr, cInfo); colAlias = inExpr.toStringTree().toLowerCase(); } else { colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; rr.put(cInfo.getTabAlias(), colAlias, cInfo); } evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); inputColumnNames.add(colAlias); inpColOIs.add(cInfo.getObjectInspector()); } StandardListObjectInspector pathAttrOI = ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, inpColOIs)); ColumnInfo pathColumn = new ColumnInfo( PATHATTR_NAME, TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), null, false, false); rr.put(null, PATHATTR_NAME, pathColumn); return rr; }
/** * Build ExprNodeColumnDesc for the projections in the input operator from sartpos to endpos(both * included). Operator must have an associated colExprMap. * * @param inputOp Input Hive Operator * @param startPos starting position in the input operator schema; must be >=0 and <= endPos * @param endPos end position in the input operator schema; must be >=0. * @return List of ExprNodeDesc */ public static ArrayList<ExprNodeDesc> genExprNodeDesc( Operator inputOp, int startPos, int endPos, boolean addEmptyTabAlias, boolean setColToNonVirtual) { ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>(); List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature(); String tabAlias; boolean vc; ColumnInfo ci; for (int i = startPos; i <= endPos; i++) { ci = colInfoLst.get(i); tabAlias = ci.getTabAlias(); if (addEmptyTabAlias) { tabAlias = ""; } vc = ci.getIsVirtualCol(); if (setColToNonVirtual) { vc = false; } exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); } return exprColLst; }
/** * The pruning needs to preserve the order of columns in the input schema * * @param op * @param cols * @return * @throws SemanticException */ private static List<String> preserveColumnOrder( Operator<? extends OperatorDesc> op, List<String> cols) throws SemanticException { RowSchema inputSchema = op.getSchema(); if (inputSchema != null) { ArrayList<String> rs = new ArrayList<String>(); ArrayList<ColumnInfo> inputCols = inputSchema.getSignature(); for (ColumnInfo i : inputCols) { if (cols.contains(i.getInternalName())) { rs.add(i.getInternalName()); } } return rs; } else { return cols; } }
private static void pruneReduceSinkOperator( boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException { ReduceSinkDesc reduceConf = reduce.getConf(); Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap(); LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap); RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver(); ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature(); ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature); List<String> valueColNames = reduceConf.getOutputValueColumnNames(); ArrayList<String> newValueColNames = new ArrayList<String>(); List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols(); List<ExprNodeDesc> valueExprs = reduceConf.getValueCols(); ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>(); for (int i = 0; i < retainFlags.length; i++) { String outputCol = valueColNames.get(i); ExprNodeDesc outputColExpr = valueExprs.get(i); if (!retainFlags[i]) { String[] nm = oldRR.reverseLookup(outputCol); if (nm == null) { outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol; nm = oldRR.reverseLookup(outputCol); } // In case there are multiple columns referenced to the same column name, we won't // do row resolve once more because the ColumnInfo in row resolver is already removed if (nm == null) { continue; } // Only remove information of a column if it is not a key, // i.e. this column is not appearing in keyExprs of the RS if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) { ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]); oldRR.getInvRslvMap().remove(colInfo.getInternalName()); oldMap.remove(outputCol); signature.remove(colInfo); } } else { newValueColNames.add(outputCol); newValueExprs.add(outputColExpr); } } oldRR.getRowSchema().setSignature(signature); reduce.getSchema().setSignature(signature); reduceConf.setOutputValueColumnNames(newValueColNames); reduceConf.setValueCols(newValueExprs); TableDesc newValueTable = PlanUtils.getReduceValueTableDesc( PlanUtils.getFieldSchemasFromColumnList( reduceConf.getValueCols(), newValueColNames, 0, "")); reduceConf.setValueSerializeInfo(newValueTable); LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap); }
public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) { if (tab_alias != null) { tab_alias = tab_alias.toLowerCase(); } col_alias = col_alias.toLowerCase(); /* * allow multiple mappings to the same ColumnInfo. * When a ColumnInfo is mapped multiple times, only the * first inverse mapping is captured. */ boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); LinkedHashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias); if (f_map == null) { f_map = new LinkedHashMap<String, ColumnInfo>(); rslvMap.put(tab_alias, f_map); } ColumnInfo oldColInfo = f_map.put(col_alias, colInfo); if (oldColInfo != null) { LOG.warn( "Duplicate column info for " + tab_alias + "." + col_alias + " was overwritten in RowResolver map: " + oldColInfo + " by " + colInfo); } String[] qualifiedAlias = new String[2]; qualifiedAlias[0] = tab_alias; qualifiedAlias[1] = col_alias; if (!colPresent) { invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); } else { altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias); } return colPresent; }
private static void pruneOperator( NodeProcessorCtx ctx, Operator<? extends OperatorDesc> op, List<String> cols) throws SemanticException { // the pruning needs to preserve the order of columns in the input schema RowSchema inputSchema = op.getSchema(); if (inputSchema != null) { ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>(); RowResolver oldRR = ((ColumnPrunerProcCtx) ctx).getOpToParseCtxMap().get(op).getRowResolver(); RowResolver newRR = new RowResolver(); for (ColumnInfo i : oldRR.getRowSchema().getSignature()) { if (cols.contains(i.getInternalName())) { String[] nm = oldRR.reverseLookup(i.getInternalName()); newRR.put(nm[0], nm[1], i); rs.add(i); } } ((ColumnPrunerProcCtx) ctx).getOpToParseCtxMap().get(op).setRowResolver(newRR); op.getSchema().setSignature(rs); } }
public int getPosition(String internalName) { int pos = -1; for (ColumnInfo var : rowSchema.getSignature()) { ++pos; if (var.getInternalName().equals(internalName)) { return pos; } } return -1; }
public static void setupNeededColumns( TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException { List<Integer> neededColumnIds = new ArrayList<Integer>(); List<String> neededColumnNames = new ArrayList<String>(); List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); // add virtual columns for ANALYZE TABLE if (scanOp.getConf().isGatherStats()) { cols.add(VirtualColumn.RAWDATASIZE.getName()); } for (String column : cols) { String[] tabCol = inputRR.reverseLookup(column); if (tabCol == null) { continue; } referencedColumnNames.add(column); ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]); if (colInfo.getIsVirtualCol()) { // part is also a virtual column, but part col should not in this // list. for (int j = 0; j < virtualCols.size(); j++) { VirtualColumn vc = virtualCols.get(j); if (vc.getName().equals(colInfo.getInternalName())) { newVirtualCols.add(vc); } } // no need to pass virtual columns to reader. continue; } int position = inputRR.getPosition(column); if (position >= 0) { // get the needed columns by id and name neededColumnIds.add(position); neededColumnNames.add(column); } } desc.setVirtualCols(newVirtualCols); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames); scanOp.setReferencedColumns(referencedColumnNames); }
// TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or not? private static boolean add( RowResolver rrToAddTo, RowResolver rrToAddFrom, IntRef outputColPosRef, int numColumns) throws SemanticException { boolean hasDuplicates = false; String tabAlias; String colAlias; String[] qualifiedColName; int i = 0; int outputColPos = outputColPosRef == null ? 0 : outputColPosRef.val; for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) { if (numColumns >= 0 && i == numColumns) { break; } ColumnInfo newCI = null; String internalName = cInfoFrmInput.getInternalName(); qualifiedColName = rrToAddFrom.reverseLookup(internalName); tabAlias = qualifiedColName[0]; colAlias = qualifiedColName[1]; newCI = new ColumnInfo(cInfoFrmInput); newCI.setInternalName(SemanticAnalyzer.getColumnInternalName(outputColPos)); outputColPos++; boolean isUnique = rrToAddTo.putWithCheck(tabAlias, colAlias, internalName, newCI); hasDuplicates |= (!isUnique); qualifiedColName = rrToAddFrom.getAlternateMappings(internalName); if (qualifiedColName != null) { tabAlias = qualifiedColName[0]; colAlias = qualifiedColName[1]; rrToAddTo.put(tabAlias, colAlias, newCI); } i++; } if (outputColPosRef != null) { outputColPosRef.val = outputColPos; } return !hasDuplicates; }
@Override protected ExprNodeColumnDesc processQualifiedColRef( TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. ColumnInfo colInfo = getColInfo( (JoinTypeCheckCtx) ctx, tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } return new ExprNodeColumnDesc( colInfo.getType(), colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol()); }
/** * Get a list of aliases for non-hidden columns * * @param max the maximum number of columns to return * @return a list of non-hidden column names no greater in size than max */ public List<String> getReferenceableColumnAliases(String tableAlias, int max) { int count = 0; Set<String> columnNames = new LinkedHashSet<String>(); int tables = rslvMap.size(); Map<String, ColumnInfo> mapping = rslvMap.get(tableAlias); if (mapping != null) { for (Map.Entry<String, ColumnInfo> entry : mapping.entrySet()) { if (max > 0 && count >= max) { break; } ColumnInfo columnInfo = entry.getValue(); if (!columnInfo.isHiddenVirtualCol()) { columnNames.add(entry.getKey()); count++; } } } else { for (ColumnInfo columnInfo : getColumnInfos()) { if (max > 0 && count >= max) { break; } if (!columnInfo.isHiddenVirtualCol()) { String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null; if (inverse != null) { columnNames.add( inverse[0] == null || tables <= 1 ? inverse[1] : inverse[0] + "." + inverse[1]); } else { columnNames.add(columnInfo.getAlias()); } count++; } } } return new ArrayList<String>(columnNames); }
private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { Task<? extends Serializable> currTask = ctx.getCurrTask(); RowSchema inputRS = fsOp.getSchema(); // create a reduce Sink operator - key is the first column ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>(); keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); // value is all the columns in the FileSink operator input ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); for (ColumnInfo ci : inputRS.getSignature()) { valueCols.add( new ExprNodeColumnDesc( ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol())); } // create a dummy tableScan operator Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); } ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1); OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge); ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver(); Integer pos = Integer.valueOf(0); for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put( info[0], info[1], new ColumnInfo( pos.toString(), colInfo.getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); } Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild( new ExtractDesc( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(out_rwsch.getColumnInfos())); TableDesc ts = (TableDesc) fsConf.getTableInfo().clone(); fsConf .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkDesc newFSD = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract); HiveConf conf = parseCtx.getConf(); MapredWork cplan = createMergeTask(conf, tsMerge, fsConf); cplan.setReducer(extract); // NOTE: we should gather stats in MR1 (rather than the merge MR job) // since it is unknown if the merge MR will be triggered at execution time. MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false); ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName()); LinkMoveTask(ctx, newOutput, cndTsk); }
// main work @SuppressWarnings("unchecked") public static void analyzeHelper(Operator sinkOp, int level) { println(level, sinkOp.getClass()); if (sinkOp instanceof TableScanOperator) { // System.out.println("=========== " + // opParseCtx.get(sinkOp).getRowResolver().tableOriginalName); // System.out.println("========= " + ((TableScanOperator)(sinkOp)).getNeededColumnIDs()); // System.out.println("========= " + ((TableScanOperator)(sinkOp)).getNeededColumns()); // System.out.println("======Table Desc " + ((TableScanOperator)(sinkOp)).getTableDesc()); // System.out.println(qb.getTabNameForAlias("a")); // System.out.println(qb.getTabNameForAlias("b")); } println(level, "Column Expr Map: "); Map<String, ExprNodeDesc> map = sinkOp.getColumnExprMap(); if (map != null && map.entrySet() != null) { for (Entry<String, ExprNodeDesc> entry : map.entrySet()) { if (entry.getValue() instanceof ExprNodeColumnDesc) { println( level, entry.getKey() + ": " + ((ExprNodeColumnDesc) entry.getValue()).getTabAlias() + ((ExprNodeColumnDesc) entry.getValue()).getCols()); } else if (entry.getValue() instanceof ExprNodeConstantDesc) { println( level, entry.getKey() + ":: " + ((ExprNodeConstantDesc) entry.getValue()).getExprString()); // + ((ExprNodeConstantDesc)entry.getValue()).getCols()); } else { println(level, entry.getValue().getExprString()); // throw new RuntimeException("ExprNode Type does not supported!"); } } } println(level, "Schema: "); RowSchema schema = sinkOp.getSchema(); for (ColumnInfo info : schema.getSignature()) { println(level, info.getTabAlias() + "[" + info.getInternalName() + "]"); } if (sinkOp instanceof JoinOperator) { // println(level, ((JoinOperator) sinkOp).getPosToAliasMap()); // println(level, "Reversed Mapping: " + ((JoinOperator)sinkOp).getConf().getReversedExprs()); // println(level, ((JoinOperator)sinkOp).getConf()); // for (ExprNodeDesc nodeDesc: ((JoinOperator)sinkOp).getConf().getExprs()) {} // println(level, ((JoinOperator)sinkOp).getColumnExprMap()); // for exprs /* for (List<ExprNodeDesc> lst : ((JoinOperator)sinkOp).getConf().getExprs().values()) { printLevel(level); for (ExprNodeDesc desc: lst) { print(((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols()); } println(); } //for filters for (List<ExprNodeDesc> lst : ((JoinOperator)sinkOp).getConf().getFilters().values()) { printLevel(level); //print(((JoinOperator)sinkOp).getConf().getFilters()); for (ExprNodeDesc desc: lst) { print(desc.getClass() + " "); //print(((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols()); } println(); } println(level, "output"); println(level, ((JoinOperator)sinkOp).getConf().getOutputColumnNames()); */ // println(level, ((JoinOperator)sinkOp).getConf().getExprsStringMap()); } if (sinkOp instanceof ReduceSinkOperator) { // println(level, ((ReduceSinkOperator)sinkOp).getConf().getOutputKeyColumnNames()); /* for (ExprNodeDesc desc: ((ReduceSinkOperator)sinkOp).getConf().getValueCols()) { println(level, ((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols()); } */ } if (sinkOp instanceof SelectOperator) { /* for (ExprNodeDesc desc: ((SelectOperator)sinkOp).getConf().getColList()) { println(level, ((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols()); }*/ // println(level, ((SelectOperator)sinkOp).getConf().getColList()); // println(level, ((SelectOperator)sinkOp).getConf().getOutputColumnNames()); } if (sinkOp instanceof TableScanOperator) { // TableScanDesc desc = ((TableScanOperator)sinkOp).getConf(); // println(level, desc.getAlias()); // println(level, desc.getFilterExpr()); // println(level, desc.getBucketFileNameMapping()); // println(level, desc.getVirtualCols()); // println(level, desc.getPartColumns()); } if (sinkOp instanceof FilterOperator) { println(level, ((FilterOperator) sinkOp).getConf().getPredicate().getExprString()); // ExprNodeDesc desc = ((FilterOperator)sinkOp).getConf().getPredicate(); // (ExprNodeGenericFuncDesc)((FilterOperator)sinkOp).getConf().getPredicate() // println(level, ((ExprNodeGenericFuncDesc)desc).getExprString()); // println(level, ((ExprNodeGenericFuncDesc)desc).getCols()); } if (sinkOp instanceof LimitOperator) { println(level, ((LimitOperator) sinkOp).getConf().getClass()); // ExprNodeDesc desc = ((FilterOperator)sinkOp).getConf().getPredicate(); // (ExprNodeGenericFuncDesc)((FilterOperator)sinkOp).getConf().getPredicate() // println(level, ((ExprNodeGenericFuncDesc)desc).getExprString()); // println(level, ((ExprNodeGenericFuncDesc)desc).getCols()); } List<Operator> lst = sinkOp.getParentOperators(); if (lst != null) { for (Operator l : lst) { analyzeHelper(l, level + 1); } } }
public ReduceSinkOperator getReduceSinkOp( List<Integer> partitionPositions, List<Integer> sortPositions, List<Integer> sortOrder, List<Integer> sortNullOrder, ArrayList<ExprNodeDesc> allCols, ArrayList<ExprNodeDesc> bucketColumns, int numBuckets, Operator<? extends OperatorDesc> parent, AcidUtils.Operation writeType) throws SemanticException { // Order of KEY columns // 1) Partition columns // 2) Bucket number column // 3) Sort columns Set<Integer> keyColsPosInVal = Sets.newLinkedHashSet(); ArrayList<ExprNodeDesc> keyCols = Lists.newArrayList(); List<Integer> newSortOrder = Lists.newArrayList(); List<Integer> newSortNullOrder = Lists.newArrayList(); int numPartAndBuck = partitionPositions.size(); keyColsPosInVal.addAll(partitionPositions); if (!bucketColumns.isEmpty() || writeType == Operation.DELETE || writeType == Operation.UPDATE) { keyColsPosInVal.add(-1); numPartAndBuck += 1; } keyColsPosInVal.addAll(sortPositions); // by default partition and bucket columns are sorted in ascending order Integer order = 1; if (sortOrder != null && !sortOrder.isEmpty()) { if (sortOrder.get(0).intValue() == 0) { order = 0; } } for (int i = 0; i < numPartAndBuck; i++) { newSortOrder.add(order); } newSortOrder.addAll(sortOrder); String orderStr = ""; for (Integer i : newSortOrder) { if (i.intValue() == 1) { orderStr += "+"; } else { orderStr += "-"; } } // if partition and bucket columns are sorted in ascending order, by default // nulls come first; otherwise nulls come last Integer nullOrder = order == 1 ? 0 : 1; if (sortNullOrder != null && !sortNullOrder.isEmpty()) { if (sortNullOrder.get(0).intValue() == 0) { nullOrder = 0; } else { nullOrder = 1; } } for (int i = 0; i < numPartAndBuck; i++) { newSortNullOrder.add(nullOrder); } newSortNullOrder.addAll(sortNullOrder); String nullOrderStr = ""; for (Integer i : newSortNullOrder) { if (i.intValue() == 0) { nullOrderStr += "a"; } else { nullOrderStr += "z"; } } Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap(); ArrayList<ExprNodeDesc> partCols = Lists.newArrayList(); // we will clone here as RS will update bucket column key with its // corresponding with bucket number and hence their OIs for (Integer idx : keyColsPosInVal) { if (idx < 0) { ExprNodeConstantDesc bucketNumCol = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, BUCKET_NUMBER_COL_NAME); keyCols.add(bucketNumCol); colExprMap.put( Utilities.ReduceField.KEY + ".'" + BUCKET_NUMBER_COL_NAME + "'", bucketNumCol); } else { keyCols.add(allCols.get(idx).clone()); } } ArrayList<ExprNodeDesc> valCols = Lists.newArrayList(); for (int i = 0; i < allCols.size(); i++) { if (!keyColsPosInVal.contains(i)) { valCols.add(allCols.get(i).clone()); } } for (Integer idx : partitionPositions) { partCols.add(allCols.get(idx).clone()); } // in the absence of SORTED BY clause, the sorted dynamic partition insert // should honor the ordering of records provided by ORDER BY in SELECT statement ReduceSinkOperator parentRSOp = OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class); if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); String parentRSOpNullOrder = parentRSOp.getConf().getNullOrder(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { keyCols.addAll(parentRSOp.getConf().getKeyCols()); orderStr += parentRSOpOrder; nullOrderStr += parentRSOpNullOrder; } } // map _col0 to KEY._col0, etc Map<String, String> nameMapping = new HashMap<>(); ArrayList<String> keyColNames = Lists.newArrayList(); for (ExprNodeDesc keyCol : keyCols) { String keyColName = keyCol.getExprString(); keyColNames.add(keyColName); colExprMap.put(Utilities.ReduceField.KEY + "." + keyColName, keyCol); nameMapping.put(keyColName, Utilities.ReduceField.KEY + "." + keyColName); } ArrayList<String> valColNames = Lists.newArrayList(); for (ExprNodeDesc valCol : valCols) { String colName = valCol.getExprString(); valColNames.add(colName); colExprMap.put(Utilities.ReduceField.VALUE + "." + colName, valCol); nameMapping.put(colName, Utilities.ReduceField.VALUE + "." + colName); } // Create Key/Value TableDesc. When the operator plan is split into MR tasks, // the reduce operator will initialize Extract operator with information // from Key and Value TableDesc List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, ""); TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr); List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, ""); TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields); List<List<Integer>> distinctColumnIndices = Lists.newArrayList(); // Number of reducers is set to default (-1) ReduceSinkDesc rsConf = new ReduceSinkDesc( keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, writeType); rsConf.setBucketCols(bucketColumns); rsConf.setNumBuckets(numBuckets); ArrayList<ColumnInfo> signature = new ArrayList<>(); for (int index = 0; index < parent.getSchema().getSignature().size(); index++) { ColumnInfo colInfo = new ColumnInfo(parent.getSchema().getSignature().get(index)); colInfo.setInternalName(nameMapping.get(colInfo.getInternalName())); signature.add(colInfo); } ReduceSinkOperator op = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsConf, new RowSchema(signature), parent); op.setColumnExprMap(colExprMap); return op; }
@Override @SuppressWarnings("unchecked") public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver(); List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0); RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver(); List<ColumnInfo> sig = parentRR.getRowSchema().getSignature(); List<String> colList = new ArrayList<String>(); for (ColumnInfo cI : sig) { colList.add(cI.getInternalName()); } if (prunedCols.size() != inputRR.getRowSchema().getSignature().size() && !(op.getChildOperators().get(0) instanceof SelectOperator)) { ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>(); ArrayList<String> outputs = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); RowResolver outputRS = new RowResolver(); for (String internalName : prunedCols) { String[] nm = inputRR.reverseLookup(internalName); ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); ExprNodeDesc colDesc = new ExprNodeColumnDesc( valueInfo.getType(), valueInfo.getInternalName(), nm[0], valueInfo.getIsVirtualCol()); exprs.add(colDesc); outputs.add(internalName); outputRS.put( nm[0], nm[1], new ColumnInfo( internalName, valueInfo.getType(), nm[0], valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol())); colExprMap.put(internalName, colDesc); } SelectDesc select = new SelectDesc(exprs, outputs, false); Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); op.removeChild(child); SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( select, new RowSchema(outputRS.getColumnInfos()), op); OperatorFactory.makeChild(sel, child); OpParseContext parseCtx = new OpParseContext(outputRS); cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx); sel.setColumnExprMap(colExprMap); } cppCtx.getPrunedColLists().put(op, colList); return null; }