private void resetFailedState(ExecutableFlowBase flow, List<ExecutableNode> nodesToRetry) { // bottom up LinkedList<ExecutableNode> queue = new LinkedList<ExecutableNode>(); for (String id : flow.getEndNodes()) { ExecutableNode node = flow.getExecutableNode(id); queue.add(node); } long maxStartTime = -1; while (!queue.isEmpty()) { ExecutableNode node = queue.poll(); Status oldStatus = node.getStatus(); maxStartTime = Math.max(node.getStartTime(), maxStartTime); long currentTime = System.currentTimeMillis(); if (node.getStatus() == Status.SUCCEEDED) { // This is a candidate parent for restart nodesToRetry.add(node); continue; } else if (node.getStatus() == Status.RUNNING) { continue; } else if (node.getStatus() == Status.SKIPPED) { node.setStatus(Status.DISABLED); node.setEndTime(-1); node.setStartTime(-1); node.setUpdateTime(currentTime); } else if (node instanceof ExecutableFlowBase) { ExecutableFlowBase base = (ExecutableFlowBase) node; switch (base.getStatus()) { case CANCELLED: node.setStatus(Status.READY); node.setEndTime(-1); node.setStartTime(-1); node.setUpdateTime(currentTime); // Break out of the switch. We'll reset the flow just like a normal node break; case KILLED: case FAILED: case FAILED_FINISHING: resetFailedState(base, nodesToRetry); continue; default: // Continue the while loop. If the job is in a finished state that's not // a failure, we don't want to reset the job. continue; } } else if (node.getStatus() == Status.CANCELLED) { // Not a flow, but killed node.setStatus(Status.READY); node.setStartTime(-1); node.setEndTime(-1); node.setUpdateTime(currentTime); } else if (node.getStatus() == Status.FAILED || node.getStatus() == Status.KILLED) { node.resetForRetry(); nodesToRetry.add(node); } if (!(node instanceof ExecutableFlowBase) && node.getStatus() != oldStatus) { logger.info( "Resetting job '" + node.getNestedId() + "' from " + oldStatus + " to " + node.getStatus()); } for (String inId : node.getInNodes()) { ExecutableNode nodeUp = flow.getExecutableNode(inId); queue.add(nodeUp); } } // At this point, the following code will reset the flow Status oldFlowState = flow.getStatus(); if (maxStartTime == -1) { // Nothing has run inside the flow, so we assume the flow hasn't even started running yet. flow.setStatus(Status.READY); } else { flow.setStatus(Status.RUNNING); // Add any READY start nodes. Usually it means the flow started, but the start node has not. for (String id : flow.getStartNodes()) { ExecutableNode node = flow.getExecutableNode(id); if (node.getStatus() == Status.READY || node.getStatus() == Status.DISABLED) { nodesToRetry.add(node); } } } flow.setUpdateTime(System.currentTimeMillis()); flow.setEndTime(-1); logger.info( "Resetting flow '" + flow.getNestedId() + "' from " + oldFlowState + " to " + flow.getStatus()); }
private void finalizeFlow(ExecutableFlowBase flow) { String id = flow == this.flow ? "" : flow.getNestedId(); // If it's not the starting flow, we'll create set of output props // for the finished flow. boolean succeeded = true; Props previousOutput = null; for (String end : flow.getEndNodes()) { ExecutableNode node = flow.getExecutableNode(end); if (node.getStatus() == Status.KILLED || node.getStatus() == Status.FAILED || node.getStatus() == Status.CANCELLED) { succeeded = false; } Props output = node.getOutputProps(); if (output != null) { output = Props.clone(output); output.setParent(previousOutput); previousOutput = output; } } flow.setOutputProps(previousOutput); if (!succeeded && (flow.getStatus() == Status.RUNNING)) { flow.setStatus(Status.KILLED); } flow.setEndTime(System.currentTimeMillis()); flow.setUpdateTime(System.currentTimeMillis()); long durationSec = (flow.getEndTime() - flow.getStartTime()) / 1000; switch (flow.getStatus()) { case FAILED_FINISHING: logger.info("Setting flow '" + id + "' status to FAILED in " + durationSec + " seconds"); flow.setStatus(Status.FAILED); break; case FAILED: case KILLED: case CANCELLED: case FAILED_SUCCEEDED: logger.info( "Flow '" + id + "' is set to " + flow.getStatus().toString() + " in " + durationSec + " seconds"); break; default: flow.setStatus(Status.SUCCEEDED); logger.info( "Flow '" + id + "' is set to " + flow.getStatus().toString() + " in " + durationSec + " seconds"); } // If the finalized flow is actually the top level flow, than we finish // the main loop. if (flow instanceof ExecutableFlow) { flowFinished = true; } }