/** * Build ExprNodeColumnDesc for the projections in the input operator from sartpos to endpos(both * included). Operator must have an associated colExprMap. * * @param inputOp Input Hive Operator * @param startPos starting position in the input operator schema; must be >=0 and <= endPos * @param endPos end position in the input operator schema; must be >=0. * @return List of ExprNodeDesc */ public static ArrayList<ExprNodeDesc> genExprNodeDesc( Operator inputOp, int startPos, int endPos, boolean addEmptyTabAlias, boolean setColToNonVirtual) { ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>(); List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature(); String tabAlias; boolean vc; ColumnInfo ci; for (int i = startPos; i <= endPos; i++) { ci = colInfoLst.get(i); tabAlias = ci.getTabAlias(); if (addEmptyTabAlias) { tabAlias = ""; } vc = ci.getIsVirtualCol(); if (setColToNonVirtual) { vc = false; } exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); } return exprColLst; }
@Override protected ExprNodeColumnDesc processQualifiedColRef( TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. ColumnInfo colInfo = getColInfo( (JoinTypeCheckCtx) ctx, tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } return new ExprNodeColumnDesc( colInfo.getType(), colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol()); }
private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { Task<? extends Serializable> currTask = ctx.getCurrTask(); RowSchema inputRS = fsOp.getSchema(); // create a reduce Sink operator - key is the first column ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>(); keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); // value is all the columns in the FileSink operator input ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); for (ColumnInfo ci : inputRS.getSignature()) { valueCols.add( new ExprNodeColumnDesc( ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol())); } // create a dummy tableScan operator Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); } ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1); OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge); ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver(); Integer pos = Integer.valueOf(0); for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put( info[0], info[1], new ColumnInfo( pos.toString(), colInfo.getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); } Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild( new ExtractDesc( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(out_rwsch.getColumnInfos())); TableDesc ts = (TableDesc) fsConf.getTableInfo().clone(); fsConf .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkDesc newFSD = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract); HiveConf conf = parseCtx.getConf(); MapredWork cplan = createMergeTask(conf, tsMerge, fsConf); cplan.setReducer(extract); // NOTE: we should gather stats in MR1 (rather than the merge MR job) // since it is unknown if the merge MR will be triggered at execution time. MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false); ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName()); LinkMoveTask(ctx, newOutput, cndTsk); }
@Override @SuppressWarnings("unchecked") public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver(); List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0); RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver(); List<ColumnInfo> sig = parentRR.getRowSchema().getSignature(); List<String> colList = new ArrayList<String>(); for (ColumnInfo cI : sig) { colList.add(cI.getInternalName()); } if (prunedCols.size() != inputRR.getRowSchema().getSignature().size() && !(op.getChildOperators().get(0) instanceof SelectOperator)) { ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>(); ArrayList<String> outputs = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); RowResolver outputRS = new RowResolver(); for (String internalName : prunedCols) { String[] nm = inputRR.reverseLookup(internalName); ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); ExprNodeDesc colDesc = new ExprNodeColumnDesc( valueInfo.getType(), valueInfo.getInternalName(), nm[0], valueInfo.getIsVirtualCol()); exprs.add(colDesc); outputs.add(internalName); outputRS.put( nm[0], nm[1], new ColumnInfo( internalName, valueInfo.getType(), nm[0], valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol())); colExprMap.put(internalName, colDesc); } SelectDesc select = new SelectDesc(exprs, outputs, false); Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); op.removeChild(child); SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( select, new RowSchema(outputRS.getColumnInfos()), op); OperatorFactory.makeChild(sel, child); OpParseContext parseCtx = new OpParseContext(outputRS); cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx); sel.setColumnExprMap(colExprMap); } cppCtx.getPrunedColLists().put(op, colList); return null; }