Exemple #1
0
  private void retryAllFailures() throws IOException {
    logger.info("Restarting all failed jobs");

    this.retryFailedJobs = false;
    this.flowKilled = false;
    this.flowFailed = false;
    this.flow.setStatus(Status.RUNNING);

    ArrayList<ExecutableNode> retryJobs = new ArrayList<ExecutableNode>();
    resetFailedState(this.flow, retryJobs);

    for (ExecutableNode node : retryJobs) {
      if (node.getStatus() == Status.READY || node.getStatus() == Status.DISABLED) {
        runReadyJob(node);
      } else if (node.getStatus() == Status.SUCCEEDED) {
        for (String outNodeId : node.getOutNodes()) {
          ExecutableFlowBase base = node.getParentFlow();
          runReadyJob(base.getExecutableNode(outNodeId));
        }
      }

      runReadyJob(node);
    }

    updateFlow();
  }
  /**
   * Determines what the state of the next node should be. Returns null if the node should not be
   * run.
   *
   * @param node
   * @return
   */
  public Status getImpliedStatus(ExecutableNode node) {
    // If it's running or finished with 'SUCCEEDED', than don't even
    // bother starting this job.
    if (Status.isStatusRunning(node.getStatus()) || node.getStatus() == Status.SUCCEEDED) {
      return null;
    }

    // Go through the node's dependencies. If all of the previous job's
    // statuses is finished and not FAILED or KILLED, than we can safely
    // run this job.
    ExecutableFlowBase flow = node.getParentFlow();
    boolean shouldKill = false;
    for (String dependency : node.getInNodes()) {
      ExecutableNode dependencyNode = flow.getExecutableNode(dependency);
      Status depStatus = dependencyNode.getStatus();

      if (!Status.isStatusFinished(depStatus)) {
        return null;
      } else if (depStatus == Status.FAILED
          || depStatus == Status.CANCELLED
          || depStatus == Status.KILLED) {
        // We propagate failures as KILLED states.
        shouldKill = true;
      }
    }

    // If it's disabled but ready to run, we want to make sure it continues
    // being disabled.
    if (node.getStatus() == Status.DISABLED || node.getStatus() == Status.SKIPPED) {
      return Status.SKIPPED;
    }

    // If the flow has failed, and we want to finish only the currently running
    // jobs, we just
    // kill everything else. We also kill, if the flow has been cancelled.
    if (flowFailed && failureAction == ExecutionOptions.FailureAction.FINISH_CURRENTLY_RUNNING) {
      return Status.CANCELLED;
    } else if (shouldKill || isKilled()) {
      return Status.CANCELLED;
    }

    // All good to go, ready to run.
    return Status.READY;
  }
Exemple #3
0
  private boolean runReadyJob(ExecutableNode node) throws IOException {
    if (Status.isStatusFinished(node.getStatus()) || Status.isStatusRunning(node.getStatus())) {
      return false;
    }

    Status nextNodeStatus = getImpliedStatus(node);
    if (nextNodeStatus == null) {
      return false;
    }

    if (nextNodeStatus == Status.CANCELLED) {
      logger.info("Cancelling '" + node.getNestedId() + "' due to prior errors.");
      node.cancelNode(System.currentTimeMillis());
      finishExecutableNode(node);
    } else if (nextNodeStatus == Status.SKIPPED) {
      logger.info("Skipping disabled job '" + node.getId() + "'.");
      node.skipNode(System.currentTimeMillis());
      finishExecutableNode(node);
    } else if (nextNodeStatus == Status.READY) {
      if (node instanceof ExecutableFlowBase) {
        ExecutableFlowBase flow = ((ExecutableFlowBase) node);
        logger.info("Running flow '" + flow.getNestedId() + "'.");
        flow.setStatus(Status.RUNNING);
        flow.setStartTime(System.currentTimeMillis());
        prepareJobProperties(flow);

        for (String startNodeId : ((ExecutableFlowBase) node).getStartNodes()) {
          ExecutableNode startNode = flow.getExecutableNode(startNodeId);
          runReadyJob(startNode);
        }
      } else {
        runExecutableNode(node);
      }
    }
    return true;
  }
Exemple #4
0
  private void resetFailedState(ExecutableFlowBase flow, List<ExecutableNode> nodesToRetry) {
    // bottom up
    LinkedList<ExecutableNode> queue = new LinkedList<ExecutableNode>();
    for (String id : flow.getEndNodes()) {
      ExecutableNode node = flow.getExecutableNode(id);
      queue.add(node);
    }

    long maxStartTime = -1;
    while (!queue.isEmpty()) {
      ExecutableNode node = queue.poll();
      Status oldStatus = node.getStatus();
      maxStartTime = Math.max(node.getStartTime(), maxStartTime);

      long currentTime = System.currentTimeMillis();
      if (node.getStatus() == Status.SUCCEEDED) {
        // This is a candidate parent for restart
        nodesToRetry.add(node);
        continue;
      } else if (node.getStatus() == Status.RUNNING) {
        continue;
      } else if (node.getStatus() == Status.SKIPPED) {
        node.setStatus(Status.DISABLED);
        node.setEndTime(-1);
        node.setStartTime(-1);
        node.setUpdateTime(currentTime);
      } else if (node instanceof ExecutableFlowBase) {
        ExecutableFlowBase base = (ExecutableFlowBase) node;
        switch (base.getStatus()) {
          case CANCELLED:
            node.setStatus(Status.READY);
            node.setEndTime(-1);
            node.setStartTime(-1);
            node.setUpdateTime(currentTime);
            // Break out of the switch. We'll reset the flow just like a normal node
            break;
          case KILLED:
          case FAILED:
          case FAILED_FINISHING:
            resetFailedState(base, nodesToRetry);
            continue;
          default:
            // Continue the while loop. If the job is in a finished state that's not
            // a failure, we don't want to reset the job.
            continue;
        }
      } else if (node.getStatus() == Status.CANCELLED) {
        // Not a flow, but killed
        node.setStatus(Status.READY);
        node.setStartTime(-1);
        node.setEndTime(-1);
        node.setUpdateTime(currentTime);
      } else if (node.getStatus() == Status.FAILED || node.getStatus() == Status.KILLED) {
        node.resetForRetry();
        nodesToRetry.add(node);
      }

      if (!(node instanceof ExecutableFlowBase) && node.getStatus() != oldStatus) {
        logger.info(
            "Resetting job '"
                + node.getNestedId()
                + "' from "
                + oldStatus
                + " to "
                + node.getStatus());
      }

      for (String inId : node.getInNodes()) {
        ExecutableNode nodeUp = flow.getExecutableNode(inId);
        queue.add(nodeUp);
      }
    }

    // At this point, the following code will reset the flow
    Status oldFlowState = flow.getStatus();
    if (maxStartTime == -1) {
      // Nothing has run inside the flow, so we assume the flow hasn't even started running yet.
      flow.setStatus(Status.READY);
    } else {
      flow.setStatus(Status.RUNNING);

      // Add any READY start nodes. Usually it means the flow started, but the start node has not.
      for (String id : flow.getStartNodes()) {
        ExecutableNode node = flow.getExecutableNode(id);
        if (node.getStatus() == Status.READY || node.getStatus() == Status.DISABLED) {
          nodesToRetry.add(node);
        }
      }
    }
    flow.setUpdateTime(System.currentTimeMillis());
    flow.setEndTime(-1);
    logger.info(
        "Resetting flow '"
            + flow.getNestedId()
            + "' from "
            + oldFlowState
            + " to "
            + flow.getStatus());
  }
Exemple #5
0
  private void finalizeFlow(ExecutableFlowBase flow) {
    String id = flow == this.flow ? "" : flow.getNestedId();

    // If it's not the starting flow, we'll create set of output props
    // for the finished flow.
    boolean succeeded = true;
    Props previousOutput = null;

    for (String end : flow.getEndNodes()) {
      ExecutableNode node = flow.getExecutableNode(end);

      if (node.getStatus() == Status.KILLED
          || node.getStatus() == Status.FAILED
          || node.getStatus() == Status.CANCELLED) {
        succeeded = false;
      }

      Props output = node.getOutputProps();
      if (output != null) {
        output = Props.clone(output);
        output.setParent(previousOutput);
        previousOutput = output;
      }
    }

    flow.setOutputProps(previousOutput);
    if (!succeeded && (flow.getStatus() == Status.RUNNING)) {
      flow.setStatus(Status.KILLED);
    }

    flow.setEndTime(System.currentTimeMillis());
    flow.setUpdateTime(System.currentTimeMillis());
    long durationSec = (flow.getEndTime() - flow.getStartTime()) / 1000;
    switch (flow.getStatus()) {
      case FAILED_FINISHING:
        logger.info("Setting flow '" + id + "' status to FAILED in " + durationSec + " seconds");
        flow.setStatus(Status.FAILED);
        break;
      case FAILED:
      case KILLED:
      case CANCELLED:
      case FAILED_SUCCEEDED:
        logger.info(
            "Flow '"
                + id
                + "' is set to "
                + flow.getStatus().toString()
                + " in "
                + durationSec
                + " seconds");
        break;
      default:
        flow.setStatus(Status.SUCCEEDED);
        logger.info(
            "Flow '"
                + id
                + "' is set to "
                + flow.getStatus().toString()
                + " in "
                + durationSec
                + " seconds");
    }

    // If the finalized flow is actually the top level flow, than we finish
    // the main loop.
    if (flow instanceof ExecutableFlow) {
      flowFinished = true;
    }
  }
Exemple #6
0
  private boolean progressGraph() throws IOException {
    finishedNodes.swap();

    // The following nodes are finished, so we'll collect a list of outnodes
    // that are candidates for running next.
    HashSet<ExecutableNode> nodesToCheck = new HashSet<ExecutableNode>();
    for (ExecutableNode node : finishedNodes) {
      Set<String> outNodeIds = node.getOutNodes();
      ExecutableFlowBase parentFlow = node.getParentFlow();

      // If a job is seen as failed, then we set the parent flow to FAILED_FINISHING
      if (node.getStatus() == Status.FAILED) {
        // The job cannot be retried or has run out of retry attempts. We will
        // fail the job and its flow now.
        if (!retryJobIfPossible(node)) {
          propagateStatus(node.getParentFlow(), Status.FAILED_FINISHING);
          if (failureAction == FailureAction.CANCEL_ALL) {
            this.kill();
          }
          this.flowFailed = true;
        } else {
          nodesToCheck.add(node);
          continue;
        }
      }

      if (outNodeIds.isEmpty()) {
        // There's no outnodes means it's the end of a flow, so we finalize
        // and fire an event.
        finalizeFlow(parentFlow);
        finishExecutableNode(parentFlow);

        // If the parent has a parent, then we process
        if (!(parentFlow instanceof ExecutableFlow)) {
          outNodeIds = parentFlow.getOutNodes();
          parentFlow = parentFlow.getParentFlow();
        }
      }

      // Add all out nodes from the finished job. We'll check against this set to
      // see if any are candidates for running.
      for (String nodeId : outNodeIds) {
        ExecutableNode outNode = parentFlow.getExecutableNode(nodeId);
        nodesToCheck.add(outNode);
      }
    }

    // Runs candidate jobs. The code will check to see if they are ready to run before
    // Instant kill or skip if necessary.
    boolean jobsRun = false;
    for (ExecutableNode node : nodesToCheck) {
      if (Status.isStatusFinished(node.getStatus()) || Status.isStatusRunning(node.getStatus())) {
        // Really shouldn't get in here.
        continue;
      }

      jobsRun |= runReadyJob(node);
    }

    if (jobsRun || finishedNodes.getSize() > 0) {
      updateFlow();
      return true;
    }

    return false;
  }