/** * Get a list of aliases for non-hidden columns * * @param max the maximum number of columns to return * @return a list of non-hidden column names no greater in size than max */ public List<String> getReferenceableColumnAliases(String tableAlias, int max) { int count = 0; Set<String> columnNames = new LinkedHashSet<String>(); int tables = rslvMap.size(); Map<String, ColumnInfo> mapping = rslvMap.get(tableAlias); if (mapping != null) { for (Map.Entry<String, ColumnInfo> entry : mapping.entrySet()) { if (max > 0 && count >= max) { break; } ColumnInfo columnInfo = entry.getValue(); if (!columnInfo.isHiddenVirtualCol()) { columnNames.add(entry.getKey()); count++; } } } else { for (ColumnInfo columnInfo : getColumnInfos()) { if (max > 0 && count >= max) { break; } if (!columnInfo.isHiddenVirtualCol()) { String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null; if (inverse != null) { columnNames.add( inverse[0] == null || tables <= 1 ? inverse[1] : inverse[0] + "." + inverse[1]); } else { columnNames.add(columnInfo.getAlias()); } count++; } } } return new ArrayList<String>(columnNames); }
private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { Task<? extends Serializable> currTask = ctx.getCurrTask(); RowSchema inputRS = fsOp.getSchema(); // create a reduce Sink operator - key is the first column ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>(); keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); // value is all the columns in the FileSink operator input ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); for (ColumnInfo ci : inputRS.getSignature()) { valueCols.add( new ExprNodeColumnDesc( ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol())); } // create a dummy tableScan operator Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); } ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1); OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge); ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver(); Integer pos = Integer.valueOf(0); for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put( info[0], info[1], new ColumnInfo( pos.toString(), colInfo.getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); } Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild( new ExtractDesc( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(out_rwsch.getColumnInfos())); TableDesc ts = (TableDesc) fsConf.getTableInfo().clone(); fsConf .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkDesc newFSD = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract); HiveConf conf = parseCtx.getConf(); MapredWork cplan = createMergeTask(conf, tsMerge, fsConf); cplan.setReducer(extract); // NOTE: we should gather stats in MR1 (rather than the merge MR job) // since it is unknown if the merge MR will be triggered at execution time. MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false); ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName()); LinkMoveTask(ctx, newOutput, cndTsk); }
@Override @SuppressWarnings("unchecked") public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver(); List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0); RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver(); List<ColumnInfo> sig = parentRR.getRowSchema().getSignature(); List<String> colList = new ArrayList<String>(); for (ColumnInfo cI : sig) { colList.add(cI.getInternalName()); } if (prunedCols.size() != inputRR.getRowSchema().getSignature().size() && !(op.getChildOperators().get(0) instanceof SelectOperator)) { ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>(); ArrayList<String> outputs = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); RowResolver outputRS = new RowResolver(); for (String internalName : prunedCols) { String[] nm = inputRR.reverseLookup(internalName); ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); ExprNodeDesc colDesc = new ExprNodeColumnDesc( valueInfo.getType(), valueInfo.getInternalName(), nm[0], valueInfo.getIsVirtualCol()); exprs.add(colDesc); outputs.add(internalName); outputRS.put( nm[0], nm[1], new ColumnInfo( internalName, valueInfo.getType(), nm[0], valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol())); colExprMap.put(internalName, colDesc); } SelectDesc select = new SelectDesc(exprs, outputs, false); Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); op.removeChild(child); SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( select, new RowSchema(outputRS.getColumnInfos()), op); OperatorFactory.makeChild(sel, child); OpParseContext parseCtx = new OpParseContext(outputRS); cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx); sel.setColumnExprMap(colExprMap); } cppCtx.getPrunedColLists().put(op, colList); return null; }