/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<? extends Serializable>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal()) { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } String finalName = processFS(nd, stack, opProcCtx, chDir); // need to merge the files in the destination table/partitions if (chDir && (finalName != null)) { createMergeJob((FileSinkOperator) nd, ctx, finalName); } return null; }
// Remove the reduce sink operator // Use BucketizedHiveInputFormat so that one mapper processes exactly one file private void removeReduceSink( ReduceSinkOperator rsOp, TableScanOperator tsOp, FileSinkOperator fsOp) { Operator<? extends OperatorDesc> parRSOp = rsOp.getParentOperators().get(0); parRSOp.getChildOperators().set(0, fsOp); fsOp.getParentOperators().set(0, parRSOp); fsOp.getConf().setMultiFileSpray(false); fsOp.getConf().setTotalFiles(1); fsOp.getConf().setNumFiles(1); fsOp.getConf().setRemovedReduceSinkBucketSort(true); tsOp.setUseBucketizedHiveInputFormat(true); }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); // process reduce sink added by hive.enforce.bucketing or hive.enforce.sorting opRules.put( new RuleRegExp( "R1", ReduceSinkOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%" + FileSinkOperator.getOperatorName() + "%"), getBucketSortReduceSinkProc(pctx)); // The dispatcher fires the processor corresponding to the closest matching rule Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of top nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private Task<? extends Serializable> findMoveTask( List<Task<? extends Serializable>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<? extends Serializable> mvTsk : mvTasks) { MoveWork mvWork = (MoveWork) mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) { return mvTsk; } } return null; }
private Task<MoveWork> findMoveTask(List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } String fsOpDirName = fsOp.getConf().getFinalDirName(); if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOpDirName))) { return mvTsk; } } return null; }
@Override public ParseContext transform(ParseContext pCtx) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining the // operator stack. The dispatcher generates the plan from the operator tree Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); String FS = FileSinkOperator.getOperatorName() + "%"; opRules.put(new RuleRegExp("Sorted Dynamic Partition", FS), getSortDynPartProc(pCtx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pCtx.getTopOps().values()); ogw.startWalking(topNodes, null); return pCtx; }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = new StatsWork(mvWork.getLoadTableWork()); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
/** * Process the FileSink operator to generate a MoveTask if necessary. * * @param nd current FileSink operator * @param stack parent operators * @param opProcCtx * @param chDir whether the operator should be first output to a tmp dir and then merged to the * final dir later * @return the final file name to which the FileSinkOperator should store. * @throws SemanticException */ private String processFS(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { // Is it the dummy file sink after the mapjoin FileSinkOperator fsOp = (FileSinkOperator) nd; if ((fsOp.getParentOperators().size() == 1) && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { return null; } GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList<FileSinkOperator>(); } if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); } ctx.setSeenFileSinkOps(seenFSOps); Task<? extends Serializable> currTask = ctx.getCurrTask(); // If the directory needs to be changed, send the new directory String dest = null; if (chDir) { dest = fsOp.getConf().getDirName(); // generate the temporary file // it must be on the same file system as the current destination ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri()); fsOp.getConf().setDirName(tmpDir); } Task<? extends Serializable> mvTask = null; if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); } Operator<? extends Serializable> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap(); List<Operator<? extends Serializable>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { currTask.addDependentTask(mvTask); } // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { Task<? extends Serializable> mapTask = opTaskMap.get(null); if (mapTask == null) { assert (!seenOps.contains(currTopOp)); seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); opTaskMap.put(null, currTask); rootTasks.add(currTask); } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) mapTask.getWork(), false, ctx); } // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1j) which has multiple topOps. assert mapTask == currTask : "mapTask.id = " + mapTask.getId() + "; currTask.id = " + currTask.getId(); } return dest; } UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currTask, false); return dest; } AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp = ctx.getCurrMapJoinOp(); if (currMapJoinOp != null) { opTaskMap.put(null, currTask); GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); MapredWork plan = (MapredWork) currTask.getWork(); String taskTmpDir = mjCtx.getTaskTmpDir(); TableDesc tt_desc = mjCtx.getTTDesc(); assert plan.getPathToAliases().get(taskTmpDir) == null; plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); plan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); return dest; } return dest; }
/** * create a Map-only merge job with the following operators: * * @param fsInput * @param ctx * @param finalName MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v * TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge) * <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths * do not contain the dynamic partitions (their parent). So after the dynamic partitions are * created (after the first job finished before the moveTask or ConditionalTask start), we * need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition * directories. */ private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) { // // 1. create the operator tree // ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsInputDesc = fsInput.getConf(); // Create a TableScan operator RowSchema inputRS = fsInput.getSchema(); Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); // Create a FileSink operator TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone(); FileSinkDesc fsOutputDesc = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge); // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema // needs to include the partition column, and the fsOutput should have // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned. DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx(); if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // adding DP ColumnInfo to the RowSchema signature ArrayList<ColumnInfo> signature = inputRS.getSignature(); String tblAlias = fsInputDesc.getTableInfo().getTableName(); LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>(); StringBuilder partCols = new StringBuilder(); for (String dpCol : dpCtx.getDPColNames()) { ColumnInfo colInfo = new ColumnInfo( dpCol, TypeInfoFactory.stringTypeInfo, // all partition column type should be string tblAlias, true); // partition column is virtual column signature.add(colInfo); colMap.put(dpCol, dpCol); // input and output have the same column name partCols.append(dpCol).append('/'); } partCols.setLength(partCols.length() - 1); // remove the last '/' inputRS.setSignature(signature); // create another DynamicPartitionCtx, which has a different input-to-DP column mapping DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx); dpCtx2.setInputToDPCols(colMap); fsOutputDesc.setDynPartCtx(dpCtx2); // update the FileSinkOperator to include partition columns fsInputDesc .getTableInfo() .getProperties() .setProperty( org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS, partCols.toString()); // list of dynamic partition column names } else { // non-partitioned table fsInputDesc .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); } // // 2. Constructing a conditional task consisting of a move task and a map reduce task // MapRedTask currTask = (MapRedTask) ctx.getCurrTask(); MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false); MapredWork cplan = createMergeTask(ctx.getConf(), tsMerge, fsInputDesc); // use CombineHiveInputFormat for map-only merging cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't // know if merge MR2 will be triggered at execution time ConditionalTask cndTsk = createCondTask(ctx.getConf(), ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getDirName()); // keep the dynamic partition context in conditional task resolver context ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); // // 3. add the moveTask as the children of the conditional task // LinkMoveTask(ctx, fsOutput, cndTsk); }
private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { Task<? extends Serializable> currTask = ctx.getCurrTask(); RowSchema inputRS = fsOp.getSchema(); // create a reduce Sink operator - key is the first column ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>(); keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); // value is all the columns in the FileSink operator input ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); for (ColumnInfo ci : inputRS.getSignature()) { valueCols.add( new ExprNodeColumnDesc( ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol())); } // create a dummy tableScan operator Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); } ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1); OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge); ParseContext parseCtx = ctx.getParseCtx(); FileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver(); Integer pos = Integer.valueOf(0); for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put( info[0], info[1], new ColumnInfo( pos.toString(), colInfo.getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); } Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild( new ExtractDesc( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(out_rwsch.getColumnInfos())); TableDesc ts = (TableDesc) fsConf.getTableInfo().clone(); fsConf .getTableInfo() .getProperties() .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkDesc newFSD = new FileSinkDesc( finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)); FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract); HiveConf conf = parseCtx.getConf(); MapredWork cplan = createMergeTask(conf, tsMerge, fsConf); cplan.setReducer(extract); // NOTE: we should gather stats in MR1 (rather than the merge MR job) // since it is unknown if the merge MR will be triggered at execution time. MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false); ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName()); LinkMoveTask(ctx, newOutput, cndTsk); }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Mark this task as a final map reduce task (ignoring the optional merge task) ((MapredWork) currTask.getWork()).setFinalMapRed(true); // If this file sink desc has been processed due to a linked file sink desc, // use that task Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks(); if (fileSinkDescs != null) { Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf()); processLinkedFileDesc(ctx, childTask); return null; } // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<MoveWork>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { if (fsOp.getConf().isLinkedFileSink()) { // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the // number of reducers are few, so the number of files anyway are small. // However, with this optimization, we are increasing the number of files // possibly by a big margin. So, merge aggresively. if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { chDir = true; } } else { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } } String finalName = processFS(fsOp, stack, opProcCtx, chDir); if (chDir) { // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile or OrcFile a BlockMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); createMRWorkForMergingFiles(fsOp, ctx, finalName); } FileSinkDesc fileSinkDesc = fsOp.getConf(); if (fileSinkDesc.isLinkedFileSink()) { Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks = ctx.getLinkedFileDescTasks(); if (linkedFileDescTasks == null) { linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>(); ctx.setLinkedFileDescTasks(linkedFileDescTasks); } // The child tasks may be null in case of a select if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) { for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) { linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0)); } } } return null; }
/** * Process the FileSink operator to generate a MoveTask if necessary. * * @param fsOp current FileSink operator * @param stack parent operators * @param opProcCtx * @param chDir whether the operator should be first output to a tmp dir and then merged to the * final dir later * @return the final file name to which the FileSinkOperator should store. * @throws SemanticException */ private String processFS( FileSinkOperator fsOp, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList<FileSinkOperator>(); } if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); } ctx.setSeenFileSinkOps(seenFSOps); Task<? extends Serializable> currTask = ctx.getCurrTask(); // If the directory needs to be changed, send the new directory String dest = null; if (chDir) { dest = fsOp.getConf().getFinalDirName(); // generate the temporary file // it must be on the same file system as the current destination ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri()); FileSinkDesc fileSinkDesc = fsOp.getConf(); // Change all the linked file sink descriptors if (fileSinkDesc.isLinkedFileSink()) { for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName()); fsConf.setParentDir(tmpDir); fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName); } } else { fileSinkDesc.setDirName(tmpDir); } } Task<MoveWork> mvTask = null; if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); } Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap(); List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { addDependentMoveTasks(ctx, mvTask, currTask); } // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { Task<? extends Serializable> mapTask = opTaskMap.get(null); if (mapTask == null) { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); } opTaskMap.put(null, currTask); if (!rootTasks.contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) { rootTasks.add(currTask); } } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) mapTask.getWork(), false, ctx); } else { UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); ctx.setCurrTopOp(null); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } } // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1) which has multiple topOps. // assert mapTask == currTask : "mapTask.id = " + mapTask.getId() // + "; currTask.id = " + currTask.getId(); } return dest; } UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } return dest; }
/** * @param fsInput The FileSink operator. * @param ctx The MR processing context. * @param finalName the final destination path the merge job should output. * @throws SemanticException * <p>create a Map-only merge job using CombineHiveInputFormat for all partitions with * following operators: MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v * TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge) * <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths * do not contain the dynamic partitions (their parent). So after the dynamic partitions are * created (after the first job finished before the moveTask or ConditionalTask start), we * need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition * directories. */ private void createMRWorkForMergingFiles( FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException { // // 1. create the operator tree // HiveConf conf = ctx.getParseCtx().getConf(); FileSinkDesc fsInputDesc = fsInput.getConf(); // Create a TableScan operator RowSchema inputRS = fsInput.getSchema(); Operator<? extends OperatorDesc> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); // Create a FileSink operator TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone(); FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT)); FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge); // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema // needs to include the partition column, and the fsOutput should have // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned. DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx(); if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // adding DP ColumnInfo to the RowSchema signature ArrayList<ColumnInfo> signature = inputRS.getSignature(); String tblAlias = fsInputDesc.getTableInfo().getTableName(); LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>(); StringBuilder partCols = new StringBuilder(); for (String dpCol : dpCtx.getDPColNames()) { ColumnInfo colInfo = new ColumnInfo( dpCol, TypeInfoFactory.stringTypeInfo, // all partition column type should be string tblAlias, true); // partition column is virtual column signature.add(colInfo); colMap.put(dpCol, dpCol); // input and output have the same column name partCols.append(dpCol).append('/'); } partCols.setLength(partCols.length() - 1); // remove the last '/' inputRS.setSignature(signature); // create another DynamicPartitionCtx, which has a different input-to-DP column mapping DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx); dpCtx2.setInputToDPCols(colMap); fsOutputDesc.setDynPartCtx(dpCtx2); // update the FileSinkOperator to include partition columns fsInputDesc .getTableInfo() .getProperties() .setProperty( org.apache .hadoop .hive .metastore .api .hive_metastoreConstants .META_TABLE_PARTITION_COLUMNS, partCols.toString()); // list of dynamic partition column names } else { // non-partitioned table fsInputDesc .getTableInfo() .getProperties() .remove( org.apache .hadoop .hive .metastore .api .hive_metastoreConstants .META_TABLE_PARTITION_COLUMNS); } // // 2. Constructing a conditional task consisting of a move task and a map reduce task // MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); MapredWork cplan; if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { // Check if InputFormatClass is valid String inputFormatClass = conf.getVar(ConfVars.HIVEMERGERCFILEINPUTFORMATBLOCKLEVEL); try { Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass); LOG.info("RCFile format- Using block level merge"); cplan = createBlockMergeTask( fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, RCFileMergeMapper.class, RCFileInputFormat.class, RCFileBlockMergeInputFormat.class); } catch (ClassNotFoundException e) { String msg = "Illegal input format class: " + inputFormatClass; throw new SemanticException(msg); } } else if (conf.getBoolVar(ConfVars.HIVEMERGEORCBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class)) { // Check if InputFormatClass is valid String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEORCINPUTFORMATBLOCKLEVEL); try { Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass); LOG.info("ORCFile format- Using block level merge"); cplan = createBlockMergeTask( fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, OrcMergeMapper.class, OrcInputFormat.class, OrcBlockMergeInputFormat.class); } catch (ClassNotFoundException e) { String msg = "Illegal input format class: " + inputFormatClass; throw new SemanticException(msg); } } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); // use CombineHiveInputFormat for map-only merging } cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't // know if merge MR2 will be triggered at execution time ConditionalTask cndTsk = createCondTask(conf, ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getFinalDirName()); // keep the dynamic partition context in conditional task resolver context ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); mrCtx.setLbCtx(fsInputDesc.getLbCtx()); // // 3. add the moveTask as the children of the conditional task // linkMoveTask(ctx, fsOutput, cndTsk); }
@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // If the reduce sink has not been introduced due to bucketing/sorting, ignore it FileSinkOperator fsOp = (FileSinkOperator) nd; ReduceSinkOperator rsOp = (ReduceSinkOperator) fsOp.getParentOperators().get(0).getParentOperators().get(0); List<ReduceSinkOperator> rsOps = pGraphContext.getReduceSinkOperatorsAddedByEnforceBucketingSorting(); // nothing to do if ((rsOps != null) && (!rsOps.contains(rsOp))) { return null; } // Don't do this optimization with updates or deletes if (pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.UPDATE || pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.DELETE) { return null; } if (stack.get(0) instanceof TableScanOperator) { TableScanOperator tso = ((TableScanOperator) stack.get(0)); if (SemanticAnalyzer.isAcidTable(tso.getConf().getTableMetadata())) { /*ACID tables have complex directory layout and require merging of delta files * on read thus we should not try to read bucket files directly*/ return null; } } // Support for dynamic partitions can be added later if (fsOp.getConf().getDynPartCtx() != null) { return null; } // No conversion is possible for the reduce keys for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) { if (!(keyCol instanceof ExprNodeColumnDesc)) { return null; } } Table destTable = fsOp.getConf().getTable(); if (destTable == null) { return null; } int numBucketsDestination = destTable.getNumBuckets(); // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. // Get the positions for sorted and bucketed columns // For sorted columns, also get the order (ascending/descending) - that should // also match for this to be converted to a map-only job. List<Integer> bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); ObjectPair<List<Integer>, List<Integer>> sortOrderPositions = getSortPositionsOrder(destTable.getSortCols(), destTable.getCols()); List<Integer> sortPositions = sortOrderPositions.getFirst(); List<Integer> sortOrder = sortOrderPositions.getSecond(); boolean useBucketSortPositions = true; // Only selects and filters are allowed Operator<? extends OperatorDesc> op = rsOp; // TableScan will also be followed by a Select Operator. Find the expressions for the // bucketed/sorted columns for the destination table List<ExprNodeColumnDesc> sourceTableBucketCols = new ArrayList<ExprNodeColumnDesc>(); List<ExprNodeColumnDesc> sourceTableSortCols = new ArrayList<ExprNodeColumnDesc>(); op = op.getParentOperators().get(0); while (true) { if (!(op instanceof TableScanOperator) && !(op instanceof FilterOperator) && !(op instanceof SelectOperator) && !(op instanceof SMBMapJoinOperator)) { return null; } if (op instanceof SMBMapJoinOperator) { // Bucketing and sorting keys should exactly match if (!(bucketPositions.equals(sortPositions))) { return null; } SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op; SMBJoinDesc smbJoinDesc = smbOp.getConf(); int posBigTable = smbJoinDesc.getPosBigTable(); // join keys dont match the bucketing keys List<ExprNodeDesc> keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable); if (keysBigTable.size() != bucketPositions.size()) { return null; } if (!validateSMBJoinKeys( smbJoinDesc, sourceTableBucketCols, sourceTableSortCols, sortOrder)) { return null; } sourceTableBucketCols.clear(); sourceTableSortCols.clear(); useBucketSortPositions = false; for (ExprNodeDesc keyBigTable : keysBigTable) { if (!(keyBigTable instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) keyBigTable); sourceTableSortCols.add((ExprNodeColumnDesc) keyBigTable); } // since it is a sort-merge join, only follow the big table op = op.getParentOperators().get(posBigTable); } else { // nothing to be done for filters - the output schema does not change. if (op instanceof TableScanOperator) { assert !useBucketSortPositions; TableScanOperator ts = (TableScanOperator) op; Table srcTable = ts.getConf().getTableMetadata(); // Find the positions of the bucketed columns in the table corresponding // to the select list. // Consider the following scenario: // T1(key, value1, value2) bucketed/sorted by key into 2 buckets // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1 // should be optimized. // Start with the destination: T2, bucketed/sorted position is [1] // At the source T1, the column corresponding to that position is [key], which // maps to column [0] of T1, which is also bucketed/sorted into the same // number of buckets List<Integer> newBucketPositions = new ArrayList<Integer>(); for (int pos = 0; pos < bucketPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableBucketCols.get(pos); String colName = col.getColumn(); int bucketPos = findColumnPosition(srcTable.getCols(), colName); if (bucketPos < 0) { return null; } newBucketPositions.add(bucketPos); } // Find the positions/order of the sorted columns in the table corresponding // to the select list. List<Integer> newSortPositions = new ArrayList<Integer>(); for (int pos = 0; pos < sortPositions.size(); pos++) { ExprNodeColumnDesc col = sourceTableSortCols.get(pos); String colName = col.getColumn(); int sortPos = findColumnPosition(srcTable.getCols(), colName); if (sortPos < 0) { return null; } newSortPositions.add(sortPos); } if (srcTable.isPartitioned()) { PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts); List<Partition> partitions = prunedParts.getNotDeniedPartns(); // Support for dynamic partitions can be added later // The following is not optimized: // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1'; // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) { return null; } for (Partition partition : partitions) { if (!checkPartition( partition, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } } removeReduceSink( rsOp, (TableScanOperator) op, fsOp, partitions.get(0).getSortedPaths()); return null; } else { if (!checkTable( srcTable, newBucketPositions, newSortPositions, sortOrder, numBucketsDestination)) { return null; } removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths()); return null; } } // None of the operators is changing the positions else if (op instanceof SelectOperator) { SelectOperator selectOp = (SelectOperator) op; SelectDesc selectDesc = selectOp.getConf(); // Iterate backwards, from the destination table to the top of the tree // Based on the output column names, get the new columns. if (!useBucketSortPositions) { bucketPositions.clear(); sortPositions.clear(); List<String> outputColumnNames = selectDesc.getOutputColumnNames(); for (ExprNodeColumnDesc col : sourceTableBucketCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } bucketPositions.add(colPos); } for (ExprNodeColumnDesc col : sourceTableSortCols) { String colName = col.getColumn(); int colPos = outputColumnNames.indexOf(colName); if (colPos < 0) { return null; } sortPositions.add(colPos); } } // There may be multiple selects - chose the one closest to the table sourceTableBucketCols.clear(); sourceTableSortCols.clear(); // Only columns can be selected for both sorted and bucketed positions for (int pos : bucketPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList); } for (int pos : sortPositions) { ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; } sourceTableSortCols.add((ExprNodeColumnDesc) selectColList); } useBucketSortPositions = false; } op = op.getParentOperators().get(0); } } }
@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // introduce RS and EX before FS. If the operator tree already contains // RS then ReduceSinkDeDuplication optimization should merge them FileSinkOperator fsOp = (FileSinkOperator) nd; LOG.info("Sorted dynamic partitioning optimization kicked in.."); // if not dynamic partitioning then bail out if (fsOp.getConf().getDynPartCtx() == null) { LOG.debug( "Bailing out of sort dynamic partition optimization as dynamic partitioning context is null"); return null; } // if list bucketing then bail out ListBucketingCtx lbCtx = fsOp.getConf().getLbCtx(); if (lbCtx != null && !lbCtx.getSkewedColNames().isEmpty() && !lbCtx.getSkewedColValues().isEmpty()) { LOG.debug( "Bailing out of sort dynamic partition optimization as list bucketing is enabled"); return null; } Table destTable = fsOp.getConf().getTable(); if (destTable == null) { LOG.debug( "Bailing out of sort dynamic partition optimization as destination table is null"); return null; } // unlink connection between FS and its parent Operator<? extends OperatorDesc> fsParent = fsOp.getParentOperators().get(0); // if all dp columns got constant folded then disable this optimization if (allStaticPartitions(fsParent, fsOp.getConf().getDynPartCtx())) { LOG.debug( "Bailing out of sorted dynamic partition optimizer as all dynamic partition" + " columns got constant folded (static partitioning)"); return null; } // if RS is inserted by enforce bucketing or sorting, we need to remove it // since ReduceSinkDeDuplication will not merge them to single RS. // RS inserted by enforce bucketing/sorting will have bucketing column in // reduce sink key whereas RS inserted by this optimization will have // partition columns followed by bucket number followed by sort columns in // the reduce sink key. Since both key columns are not prefix subset // ReduceSinkDeDuplication will not merge them together resulting in 2 MR jobs. // To avoid that we will remove the RS (and EX) inserted by enforce bucketing/sorting. if (!removeRSInsertedByEnforceBucketing(fsOp)) { LOG.debug( "Bailing out of sort dynamic partition optimization as some partition columns " + "got constant folded."); return null; } // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); fsParent.getChildOperators().clear(); DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); int numBuckets = destTable.getNumBuckets(); // if enforce bucketing/sorting is disabled numBuckets will not be set. // set the number of buckets here to ensure creation of empty buckets dpCtx.setNumBuckets(numBuckets); // Get the positions for partition, bucket and sort columns List<Integer> bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); List<Integer> sortPositions = null; List<Integer> sortOrder = null; ArrayList<ExprNodeDesc> bucketColumns; if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) { // When doing updates and deletes we always want to sort on the rowid because the ACID // reader will expect this sort order when doing reads. So // ignore whatever comes from the table and enforce this sort order instead. sortPositions = Arrays.asList(0); sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if bucketColumns = new ArrayList<>(); // Bucketing column is already present in ROW__ID, which is specially // handled in ReduceSink } else { if (!destTable.getSortCols().isEmpty()) { // Sort columns specified by table sortPositions = getSortPositions(destTable.getSortCols(), destTable.getCols()); sortOrder = getSortOrders(destTable.getSortCols(), destTable.getCols()); } else { // Infer sort columns from operator tree sortPositions = Lists.newArrayList(); sortOrder = Lists.newArrayList(); inferSortPositions(fsParent, sortPositions, sortOrder); } List<ColumnInfo> colInfos = fsParent.getSchema().getSignature(); bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos); } List<Integer> sortNullOrder = new ArrayList<Integer>(); for (int order : sortOrder) { sortNullOrder.add(order == 1 ? 0 : 1); // for asc, nulls first; for desc, nulls last } LOG.debug("Got sort order"); for (int i : sortPositions) LOG.debug("sort position " + i); for (int i : sortOrder) LOG.debug("sort order " + i); for (int i : sortNullOrder) LOG.debug("sort null order " + i); List<Integer> partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); // update file sink descriptor fsOp.getConf().setMultiFileSpray(false); fsOp.getConf().setNumFiles(1); fsOp.getConf().setTotalFiles(1); ArrayList<ColumnInfo> parentCols = Lists.newArrayList(fsParent.getSchema().getSignature()); ArrayList<ExprNodeDesc> allRSCols = Lists.newArrayList(); for (ColumnInfo ci : parentCols) { allRSCols.add(new ExprNodeColumnDesc(ci)); } // Create ReduceSink operator ReduceSinkOperator rsOp = getReduceSinkOp( partitionPositions, sortPositions, sortOrder, sortNullOrder, allRSCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(allRSCols.size()); List<String> colNames = new ArrayList<String>(); String colName; for (int i = 0; i < allRSCols.size(); i++) { ExprNodeDesc col = allRSCols.get(i); colName = col.getExprString(); colNames.add(colName); if (partitionPositions.contains(i) || sortPositions.contains(i)) { descs.add( new ExprNodeColumnDesc( col.getTypeInfo(), ReduceField.KEY.toString() + "." + colName, null, false)); } else { descs.add( new ExprNodeColumnDesc( col.getTypeInfo(), ReduceField.VALUE.toString() + "." + colName, null, false)); } } RowSchema selRS = new RowSchema(fsParent.getSchema()); if (!bucketColumns.isEmpty() || fsOp.getConf().getWriteType() == Operation.DELETE || fsOp.getConf().getWriteType() == Operation.UPDATE) { descs.add( new ExprNodeColumnDesc( TypeInfoFactory.stringTypeInfo, ReduceField.KEY.toString() + ".'" + BUCKET_NUMBER_COL_NAME + "'", null, false)); colNames.add("'" + BUCKET_NUMBER_COL_NAME + "'"); ColumnInfo ci = new ColumnInfo( BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo, selRS.getSignature().get(0).getTabAlias(), true, true); selRS.getSignature().add(ci); fsParent.getSchema().getSignature().add(ci); } // Create SelectDesc SelectDesc selConf = new SelectDesc(descs, colNames); // Create Select Operator SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(selConf, selRS, rsOp); // link SEL to FS fsOp.getParentOperators().clear(); fsOp.getParentOperators().add(selOp); selOp.getChildOperators().add(fsOp); // Set if partition sorted or partition bucket sorted fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED); if (bucketColumns.size() > 0 || fsOp.getConf().getWriteType() == Operation.DELETE || fsOp.getConf().getWriteType() == Operation.UPDATE) { fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED); } // update partition column info in FS descriptor fsOp.getConf().setPartitionCols(rsOp.getConf().getPartitionCols()); LOG.info( "Inserted " + rsOp.getOperatorId() + " and " + selOp.getOperatorId() + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId()); parseCtx.setReduceSinkAddedBySortedDynPartition(true); return null; }