/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Mark this task as a final map reduce task (ignoring the optional merge task) ((MapredWork) currTask.getWork()).setFinalMapRed(true); // If this file sink desc has been processed due to a linked file sink desc, // use that task Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks(); if (fileSinkDescs != null) { Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf()); processLinkedFileDesc(ctx, childTask); return null; } // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<MoveWork>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { if (fsOp.getConf().isLinkedFileSink()) { // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the // number of reducers are few, so the number of files anyway are small. // However, with this optimization, we are increasing the number of files // possibly by a big margin. So, merge aggresively. if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { chDir = true; } } else { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } } String finalName = processFS(fsOp, stack, opProcCtx, chDir); if (chDir) { // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile or OrcFile a BlockMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); createMRWorkForMergingFiles(fsOp, ctx, finalName); } FileSinkDesc fileSinkDesc = fsOp.getConf(); if (fileSinkDesc.isLinkedFileSink()) { Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks = ctx.getLinkedFileDescTasks(); if (linkedFileDescTasks == null) { linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>(); ctx.setLinkedFileDescTasks(linkedFileDescTasks); } // The child tasks may be null in case of a select if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) { for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) { linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0)); } } } return null; }
private void outputPlan(Task<? extends Serializable> task) { if (task == null) return; out.printf("Stage: \n", task.getId()); // real output Serializable work = task.getWork(); if (work == null) return; if (work instanceof FetchWork) { out.println("Fetch"); output(((FetchWork) work).getSource()); } else if (work instanceof MapredLocalWork) { out.println("MapredLocalWork"); // fetch try { out.println("Fetch Part"); Collection<FetchWork> fetchWorkCollect = ((MapredLocalWork) work).getAliasToFetchWork().values(); for (FetchWork f : fetchWorkCollect) { output(f.getSource()); } } catch (Exception e) { out.println("Exception 1"); } // others try { out.println("Other Parts"); Collection<Operator<? extends OperatorDesc>> collect = ((MapredLocalWork) work).getAliasToWork().values(); for (Operator<? extends OperatorDesc> c : collect) { output(c); } } catch (Exception e) { out.println("Exception 2"); } } else if (work instanceof MapredWork) { out.println("MapredWork"); try { Collection<Operator<? extends OperatorDesc>> collect = ((MapredWork) work).getAllOperators(); for (Operator<? extends OperatorDesc> c : collect) { // out.println(1); output(c); break; // first operator will give out all info s } } catch (Exception e) { out.println("Exception 3"); } } else { output(work); } // -------other cases-------------------- if (task instanceof ConditionalTask && ((ConditionalTask) task).getListTasks() != null) { for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) { outputPlan(con); } } if (task.getChildTasks() != null) { for (Task<? extends Serializable> child : task.getChildTasks()) { outputPlan(child); } } }