/** * Build ExprNodeColumnDesc for the projections in the input operator from sartpos to endpos(both * included). Operator must have an associated colExprMap. * * @param inputOp Input Hive Operator * @param startPos starting position in the input operator schema; must be >=0 and <= endPos * @param endPos end position in the input operator schema; must be >=0. * @return List of ExprNodeDesc */ public static ArrayList<ExprNodeDesc> genExprNodeDesc( Operator inputOp, int startPos, int endPos, boolean addEmptyTabAlias, boolean setColToNonVirtual) { ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>(); List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature(); String tabAlias; boolean vc; ColumnInfo ci; for (int i = startPos; i <= endPos; i++) { ci = colInfoLst.get(i); tabAlias = ci.getTabAlias(); if (addEmptyTabAlias) { tabAlias = ""; } vc = ci.getIsVirtualCol(); if (setColToNonVirtual) { vc = false; } exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); } return exprColLst; }
public static void setupNeededColumns( TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException { List<Integer> neededColumnIds = new ArrayList<Integer>(); List<String> neededColumnNames = new ArrayList<String>(); List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); // add virtual columns for ANALYZE TABLE if (scanOp.getConf().isGatherStats()) { cols.add(VirtualColumn.RAWDATASIZE.getName()); } for (String column : cols) { String[] tabCol = inputRR.reverseLookup(column); if (tabCol == null) { continue; } referencedColumnNames.add(column); ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]); if (colInfo.getIsVirtualCol()) { // part is also a virtual column, but part col should not in this // list. for (int j = 0; j < virtualCols.size(); j++) { VirtualColumn vc = virtualCols.get(j); if (vc.getName().equals(colInfo.getInternalName())) { newVirtualCols.add(vc); } } // no need to pass virtual columns to reader. continue; } int position = inputRR.getPosition(column); if (position >= 0) { // get the needed columns by id and name neededColumnIds.add(position); neededColumnNames.add(column); } } desc.setVirtualCols(newVirtualCols); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames); scanOp.setReferencedColumns(referencedColumnNames); }
@Override protected ExprNodeColumnDesc processQualifiedColRef( TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. ColumnInfo colInfo = getColInfo( (JoinTypeCheckCtx) ctx, tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } return new ExprNodeColumnDesc( colInfo.getType(), colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol()); }
private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { Task<? extends Serializable> currTask = ctx.getCurrTask(); RowSchema inputRS = fsOp.getSchema(); // create a reduce Sink operator - key is the first column ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>(); keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); // value is all the columns in the FileSink operator input ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); for (ColumnInfo ci : inputRS.getSignature()) { valueCols.add( new ExprNodeColumnDesc( ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol())); } // create a dummy tableScan operator Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); } ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1); OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge); ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver(); Integer pos = Integer.valueOf(0); for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put( info[0], info[1], new ColumnInfo( pos.toString(), colInfo.getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); } Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild( new ExtractDesc( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(out_rwsch.getColumnInfos())); TableDesc ts = (TableDesc) fsConf.getTableInfo().clone(); fsConf .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkDesc newFSD = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract); HiveConf conf = parseCtx.getConf(); MapredWork cplan = createMergeTask(conf, tsMerge, fsConf); cplan.setReducer(extract); // NOTE: we should gather stats in MR1 (rather than the merge MR job) // since it is unknown if the merge MR will be triggered at execution time. MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false); ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName()); LinkMoveTask(ctx, newOutput, cndTsk); }
@Override @SuppressWarnings("unchecked") public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver(); List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0); RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver(); List<ColumnInfo> sig = parentRR.getRowSchema().getSignature(); List<String> colList = new ArrayList<String>(); for (ColumnInfo cI : sig) { colList.add(cI.getInternalName()); } if (prunedCols.size() != inputRR.getRowSchema().getSignature().size() && !(op.getChildOperators().get(0) instanceof SelectOperator)) { ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>(); ArrayList<String> outputs = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); RowResolver outputRS = new RowResolver(); for (String internalName : prunedCols) { String[] nm = inputRR.reverseLookup(internalName); ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); ExprNodeDesc colDesc = new ExprNodeColumnDesc( valueInfo.getType(), valueInfo.getInternalName(), nm[0], valueInfo.getIsVirtualCol()); exprs.add(colDesc); outputs.add(internalName); outputRS.put( nm[0], nm[1], new ColumnInfo( internalName, valueInfo.getType(), nm[0], valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol())); colExprMap.put(internalName, colDesc); } SelectDesc select = new SelectDesc(exprs, outputs, false); Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); op.removeChild(child); SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( select, new RowSchema(outputRS.getColumnInfos()), op); OperatorFactory.makeChild(sel, child); OpParseContext parseCtx = new OpParseContext(outputRS); cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx); sel.setColumnExprMap(colExprMap); } cppCtx.getPrunedColLists().put(op, colList); return null; }