/* * Multiple file sink descriptors are linked. * Use the task created by the first linked file descriptor */ private void processLinkedFileDesc(GenMRProcContext ctx, Task<? extends Serializable> childTask) throws SemanticException { Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); Task<? extends Serializable> currTask = ctx.getCurrTask(); if (currTopOp != null) { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); } if (!rootTasks.contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) { rootTasks.add(currTask); } } if (childTask != null) { currTask.addDependentTask(childTask); } }
/** * Remove the dependent task. * * @param dependent the task to remove */ public void removeDependentTask(Task<? extends Serializable> dependent) { if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) { getChildTasks().remove(dependent); if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) { dependent.getParentTasks().remove(this); } } }
public boolean addRootIfPossible(Task<? extends Serializable> task) { if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) { if (!rootTasks.contains(task)) { return rootTasks.add(task); } } return false; }
/** * Add a dependent task on the current task. Return if the dependency already existed or is this a * new one * * @return true if the task got added false if it already existed */ public boolean addDependentTask(Task<? extends Serializable> dependent) { boolean ret = false; if (getChildTasks() == null) { setChildTasks(new ArrayList<Task<? extends Serializable>>()); } if (!getChildTasks().contains(dependent)) { ret = true; getChildTasks().add(dependent); if (dependent.getParentTasks() == null) { dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>()); } if (!dependent.getParentTasks().contains(this)) { dependent.getParentTasks().add(this); } } return ret; }
public void removeFromChildrenTasks() { List<Task<? extends Serializable>> childrenTasks = this.getChildTasks(); if (childrenTasks == null) { return; } for (Task<? extends Serializable> childTsk : childrenTasks) { // remove this task from its children tasks childTsk.getParentTasks().remove(this); // recursively remove non-parent task from its children List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks(); if (siblingTasks == null || siblingTasks.size() == 0) { childTsk.removeFromChildrenTasks(); } } }
public Task<? extends Serializable> getAndInitBackupTask() { if (backupTask != null) { // first set back the backup task with its children task. if (backupChildrenTasks != null) { for (Task<? extends Serializable> backupChild : backupChildrenTasks) { backupChild.getParentTasks().add(backupTask); } } // recursively remove task from its children tasks if this task doesn't have any parent task this.removeFromChildrenTasks(); } return backupTask; }
public static void mergeMapJoinUnion(UnionOperator union, GenMRProcContext ctx, int pos) throws SemanticException { ParseContext parseCtx = ctx.getParseCtx(); UnionProcContext uCtx = parseCtx.getUCtx(); UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); assert uPrsCtx != null; Task<? extends Serializable> currTask = ctx.getCurrTask(); GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); Task<? extends Serializable> uTask = null; union.getParentOperators().get(pos); MapredWork uPlan = null; // union is encountered for the first time if (uCtxTask == null) { uCtxTask = new GenMRUnionCtx(); uPlan = GenMapRedUtils.getMapRedWork(parseCtx.getConf()); uTask = TaskFactory.get(uPlan, parseCtx.getConf()); uCtxTask.setUTask(uTask); ctx.setUnionTask(union, uCtxTask); } else { uTask = uCtxTask.getUTask(); uPlan = (MapredWork) uTask.getWork(); } // If there is a mapjoin at position 'pos' if (uPrsCtx.getMapJoinSubq(pos)) { GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(ctx.getCurrMapJoinOp()); String taskTmpDir = mjCtx.getTaskTmpDir(); if (uPlan.getPathToAliases().get(taskTmpDir) == null) { uPlan.getPathToAliases().put(taskTmpDir, new ArrayList<String>()); uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); uPlan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(mjCtx.getTTDesc(), null)); uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); } for (Task t : currTask.getParentTasks()) { t.addDependentTask(uTask); } try { boolean notDone = true; while (notDone) { for (Task t : currTask.getParentTasks()) { t.removeDependentTask(currTask); } notDone = false; } } catch (ConcurrentModificationException e) { } } else { setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); } ctx.setCurrTask(uTask); ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); ctx.setCurrMapJoinOp(null); ctx.getMapCurrCtx().put(union, new GenMapRedCtx(ctx.getCurrTask(), null, null)); }
/** * Merge the current task with the task for the current reducer. * * @param op operator being processed * @param oldTask the old task for the current reducer * @param task the current task for the current reducer * @param opProcCtx processing context * @param pos position of the parent in the stack */ public static void joinPlan( Operator<? extends Serializable> op, Task<? extends Serializable> oldTask, Task<? extends Serializable> task, GenMRProcContext opProcCtx, int pos, boolean split, boolean readMapJoinData, boolean readUnionData, boolean createLocalWork) throws SemanticException { Task<? extends Serializable> currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); List<Task<? extends Serializable>> parTasks = null; // terminate the old task and make current task dependent on it if (split) { assert oldTask != null; splitTasks(op, oldTask, currTask, opProcCtx, true, false, 0); } else { if ((oldTask != null) && (oldTask.getParentTasks() != null) && !oldTask.getParentTasks().isEmpty()) { parTasks = new ArrayList<Task<? extends Serializable>>(); parTasks.addAll(oldTask.getParentTasks()); Object[] parTaskArr = parTasks.toArray(); for (Object element : parTaskArr) { ((Task<? extends Serializable>) element).removeDependentTask(oldTask); } } } if (currTopOp != null) { List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); boolean local = false; if (pos != -1) { local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false : true; } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); if (op instanceof AbstractMapJoinOperator) { setupBucketMapJoinInfo( plan, (AbstractMapJoinOperator<? extends MapJoinDesc>) op, createLocalWork); } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); } else if (opProcCtx.getCurrMapJoinOp() != null) { AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = opProcCtx.getCurrMapJoinOp(); if (readUnionData) { initUnionPlan(opProcCtx, currTask, false); } else { GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); // In case of map-join followed by map-join, the file needs to be // obtained from the old map join AbstractMapJoinOperator<? extends MapJoinDesc> oldMapJoin = mjCtx.getOldMapJoin(); String taskTmpDir = null; TableDesc tt_desc = null; Operator<? extends Serializable> rootOp = null; boolean local = ((pos == -1) || (pos == (mjOp.getConf()).getPosBigTable())) ? false : true; if (oldMapJoin == null) { if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp) || local || (oldTask != null) && (parTasks != null)) { taskTmpDir = mjCtx.getTaskTmpDir(); tt_desc = mjCtx.getTTDesc(); rootOp = mjCtx.getRootMapJoinOp(); } } else { GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); assert oldMjCtx != null; taskTmpDir = oldMjCtx.getTaskTmpDir(); tt_desc = oldMjCtx.getTTDesc(); rootOp = oldMjCtx.getRootMapJoinOp(); } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); } opProcCtx.setCurrMapJoinOp(null); if ((oldTask != null) && (parTasks != null)) { for (Task<? extends Serializable> parTask : parTasks) { parTask.addDependentTask(currTask); if (opProcCtx.getRootTasks().contains(currTask)) { opProcCtx.getRootTasks().remove(currTask); } } } } opProcCtx.setCurrTask(currTask); }
/** * Process the FileSink operator to generate a MoveTask if necessary. * * @param fsOp current FileSink operator * @param stack parent operators * @param opProcCtx * @param chDir whether the operator should be first output to a tmp dir and then merged to the * final dir later * @return the final file name to which the FileSinkOperator should store. * @throws SemanticException */ private String processFS( FileSinkOperator fsOp, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList<FileSinkOperator>(); } if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); } ctx.setSeenFileSinkOps(seenFSOps); Task<? extends Serializable> currTask = ctx.getCurrTask(); // If the directory needs to be changed, send the new directory String dest = null; if (chDir) { dest = fsOp.getConf().getFinalDirName(); // generate the temporary file // it must be on the same file system as the current destination ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri()); FileSinkDesc fileSinkDesc = fsOp.getConf(); // Change all the linked file sink descriptors if (fileSinkDesc.isLinkedFileSink()) { for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName()); fsConf.setParentDir(tmpDir); fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName); } } else { fileSinkDesc.setDirName(tmpDir); } } Task<MoveWork> mvTask = null; if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); } Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap(); List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { addDependentMoveTasks(ctx, mvTask, currTask); } // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { Task<? extends Serializable> mapTask = opTaskMap.get(null); if (mapTask == null) { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); } opTaskMap.put(null, currTask); if (!rootTasks.contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) { rootTasks.add(currTask); } } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) mapTask.getWork(), false, ctx); } else { UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); ctx.setCurrTopOp(null); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } } // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1) which has multiple topOps. // assert mapTask == currTask : "mapTask.id = " + mapTask.getId() // + "; currTask.id = " + currTask.getId(); } return dest; } UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } return dest; }