Beispiel #1
0
  private void resetFailedState(ExecutableFlowBase flow, List<ExecutableNode> nodesToRetry) {
    // bottom up
    LinkedList<ExecutableNode> queue = new LinkedList<ExecutableNode>();
    for (String id : flow.getEndNodes()) {
      ExecutableNode node = flow.getExecutableNode(id);
      queue.add(node);
    }

    long maxStartTime = -1;
    while (!queue.isEmpty()) {
      ExecutableNode node = queue.poll();
      Status oldStatus = node.getStatus();
      maxStartTime = Math.max(node.getStartTime(), maxStartTime);

      long currentTime = System.currentTimeMillis();
      if (node.getStatus() == Status.SUCCEEDED) {
        // This is a candidate parent for restart
        nodesToRetry.add(node);
        continue;
      } else if (node.getStatus() == Status.RUNNING) {
        continue;
      } else if (node.getStatus() == Status.SKIPPED) {
        node.setStatus(Status.DISABLED);
        node.setEndTime(-1);
        node.setStartTime(-1);
        node.setUpdateTime(currentTime);
      } else if (node instanceof ExecutableFlowBase) {
        ExecutableFlowBase base = (ExecutableFlowBase) node;
        switch (base.getStatus()) {
          case CANCELLED:
            node.setStatus(Status.READY);
            node.setEndTime(-1);
            node.setStartTime(-1);
            node.setUpdateTime(currentTime);
            // Break out of the switch. We'll reset the flow just like a normal node
            break;
          case KILLED:
          case FAILED:
          case FAILED_FINISHING:
            resetFailedState(base, nodesToRetry);
            continue;
          default:
            // Continue the while loop. If the job is in a finished state that's not
            // a failure, we don't want to reset the job.
            continue;
        }
      } else if (node.getStatus() == Status.CANCELLED) {
        // Not a flow, but killed
        node.setStatus(Status.READY);
        node.setStartTime(-1);
        node.setEndTime(-1);
        node.setUpdateTime(currentTime);
      } else if (node.getStatus() == Status.FAILED || node.getStatus() == Status.KILLED) {
        node.resetForRetry();
        nodesToRetry.add(node);
      }

      if (!(node instanceof ExecutableFlowBase) && node.getStatus() != oldStatus) {
        logger.info(
            "Resetting job '"
                + node.getNestedId()
                + "' from "
                + oldStatus
                + " to "
                + node.getStatus());
      }

      for (String inId : node.getInNodes()) {
        ExecutableNode nodeUp = flow.getExecutableNode(inId);
        queue.add(nodeUp);
      }
    }

    // At this point, the following code will reset the flow
    Status oldFlowState = flow.getStatus();
    if (maxStartTime == -1) {
      // Nothing has run inside the flow, so we assume the flow hasn't even started running yet.
      flow.setStatus(Status.READY);
    } else {
      flow.setStatus(Status.RUNNING);

      // Add any READY start nodes. Usually it means the flow started, but the start node has not.
      for (String id : flow.getStartNodes()) {
        ExecutableNode node = flow.getExecutableNode(id);
        if (node.getStatus() == Status.READY || node.getStatus() == Status.DISABLED) {
          nodesToRetry.add(node);
        }
      }
    }
    flow.setUpdateTime(System.currentTimeMillis());
    flow.setEndTime(-1);
    logger.info(
        "Resetting flow '"
            + flow.getNestedId()
            + "' from "
            + oldFlowState
            + " to "
            + flow.getStatus());
  }