Example #1
0
 @Override
 protected void verifyPrecondition() throws CommandException, PreconditionException {
   if ((wfAction == null) || (wfAction.isComplete() && wfAction.isPending())) {
     if (wfJob.getStatus() != WorkflowJob.Status.RUNNING
         && wfJob.getStatus() != WorkflowJob.Status.PREP) {
       throw new PreconditionException(ErrorCode.E0813, wfJob.getStatusStr());
     }
   } else {
     throw new PreconditionException(
         ErrorCode.E0814, actionId, wfAction.getStatusStr(), wfAction.isPending());
   }
 }
Example #2
0
  protected Void call(WorkflowStore store) throws StoreException, CommandException {
    WorkflowJobBean workflow = store.getWorkflow(jobId, false);
    setLogInfo(workflow);
    WorkflowActionBean action = store.getAction(id, false);
    setLogInfo(action);
    if (action.isPending()
        && (action.getStatus() == WorkflowActionBean.Status.DONE
            || action.getStatus() == WorkflowActionBean.Status.END_RETRY
            || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) {
      if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {

        ActionExecutor executor =
            Services.get().get(ActionService.class).getExecutor(action.getType());
        Configuration conf = workflow.getWorkflowInstance().getConf();
        int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
        long retryInterval =
            conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
        executor.setMaxRetries(maxRetries);
        executor.setRetryInterval(retryInterval);

        if (executor != null) {
          boolean isRetry = false;
          if (action.getStatus() == WorkflowActionBean.Status.END_RETRY
              || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) {
            isRetry = true;
          }
          ActionExecutorContext context =
              new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
          try {

            XLog.getLog(getClass())
                .debug(
                    "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]",
                    action.getName(),
                    action.getType(),
                    action.getStatus(),
                    action.getExternalStatus(),
                    action.getSignalValue());
            WorkflowInstance wfInstance = workflow.getWorkflowInstance();
            DagELFunctions.setActionInfo(wfInstance, action);
            workflow.setWorkflowInstance(wfInstance);
            incrActionCounter(action.getType(), 1);

            Instrumentation.Cron cron = new Instrumentation.Cron();
            cron.start();
            executor.end(context, action);
            cron.stop();
            addActionCron(action.getType(), cron);

            if (!context.isEnded()) {
              XLog.getLog(getClass())
                  .warn(
                      XLog.OPS,
                      "Action Ended, ActionExecutor [{0}] must call setEndData()",
                      executor.getType());
              action.setErrorInfo(
                  END_DATA_MISSING, "Execution Ended, but End Data Missing from Action");
              failJob(context);
              store.updateAction(action);
              store.updateWorkflow(workflow);
              return null;
            }
            action.setRetries(0);
            action.setEndTime(new Date());
            store.updateAction(action);
            store.updateWorkflow(workflow);
            Status slaStatus = null;
            switch (action.getStatus()) {
              case OK:
                slaStatus = Status.SUCCEEDED;
                break;
              case KILLED:
                slaStatus = Status.KILLED;
                break;
              case FAILED:
                slaStatus = Status.FAILED;
                break;
              case ERROR:
                XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA");
                slaStatus = Status.KILLED;
                break;
              default: // TODO: What will happen for other Action
                // status
                slaStatus = Status.FAILED;
                break;
            }
            SLADbOperations.writeStausEvent(
                action.getSlaXml(), action.getId(), store, slaStatus, SlaAppType.WORKFLOW_ACTION);
            queueCallable(new NotificationCommand(workflow, action));
            XLog.getLog(getClass())
                .debug(
                    "Queuing commands for action "
                        + id
                        + " status "
                        + action.getStatus()
                        + ", Set pending="
                        + action.getPending());
            queueCallable(new SignalCommand(workflow.getId(), id));
          } catch (ActionExecutorException ex) {
            XLog.getLog(getClass())
                .warn(
                    "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
                    action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage());
            action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
            action.setEndTime(null);
            switch (ex.getErrorType()) {
              case TRANSIENT:
                if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) {
                  handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL);
                  action.setPendingAge(new Date());
                  action.setRetries(0);
                }
                action.setEndTime(null);
                break;
              case NON_TRANSIENT:
                handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL);
                action.setEndTime(null);
                break;
              case ERROR:
                handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR);
                queueCallable(new SignalCommand(workflow.getId(), id));
                break;
              case FAILED:
                failJob(context);
                break;
            }
            store.updateAction(action);
            store.updateWorkflow(workflow);
          }
        } else {
          throw new CommandException(ErrorCode.E0802, action.getType());
        }
      } else {
        XLog.getLog(getClass())
            .warn(
                "Job state is not {0}. Skipping ActionEnd Execution",
                WorkflowJob.Status.RUNNING.toString());
      }
    } else {
      XLog.getLog(getClass())
          .debug(
              "Action pending={0}, status={1}. Skipping ActionEnd Execution",
              action.getPending(), action.getStatusStr());
    }
    return null;
  }
Example #3
0
  /**
   * Provides functionality to test non transient failures and coordinator action update
   *
   * @param errorType the error type. (start.non-transient, end.non-transient)
   * @param expStatus1 expected status. (START_MANUAL, END_MANUAL)
   * @param expErrorMsg expected error message.
   * @throws Exception
   */
  private void _testNonTransientWithCoordActionUpdate(
      String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception {
    String workflowPath = getTestCaseFileUri("workflow.xml");
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, workflowPath);
    conf.set(OozieClient.USER_NAME, getTestUser());

    conf.set(OozieClient.LOG_TOKEN, "t");
    conf.set("signal-value", "OK");
    conf.set("external-status", "ok");
    conf.set("error", errorType);

    final String jobId = engine.submitJob(conf, false);

    final JPAService jpaService = Services.get().get(JPAService.class);
    final CoordinatorJobBean coordJob =
        addRecordToCoordJobTable(CoordinatorJob.Status.RUNNING, false, false);
    CoordinatorActionBean coordAction =
        addRecordToCoordActionTable(
            coordJob.getId(),
            1,
            CoordinatorAction.Status.RUNNING,
            "coord-action-get.xml",
            jobId,
            "RUNNING",
            0);

    engine.start(jobId);

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED);
          }
        });

    assertNotNull(jpaService);
    WorkflowJobGetJPAExecutor wfGetCmd = new WorkflowJobGetJPAExecutor(jobId);
    WorkflowJobBean job = jpaService.execute(wfGetCmd);
    WorkflowActionsGetForJobJPAExecutor actionsGetExe =
        new WorkflowActionsGetForJobJPAExecutor(jobId);
    List<WorkflowActionBean> actionsList = jpaService.execute(actionsGetExe);

    int n = actionsList.size();
    WorkflowActionBean action = actionsList.get(n - 1);
    assertEquals("TEST_ERROR", action.getErrorCode());
    assertEquals(expErrorMsg, action.getErrorMessage());
    assertEquals(expStatus1, action.getStatus());
    assertFalse(action.isPending());

    assertEquals(WorkflowJob.Status.SUSPENDED, job.getStatus());

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            CoordinatorActionBean coordAction2 =
                jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId));
            return coordAction2.getStatus().equals(CoordinatorAction.Status.SUSPENDED);
          }
        });

    coordAction = jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId));
    assertEquals(CoordinatorAction.Status.SUSPENDED, coordAction.getStatus());
  }
Example #4
0
  /**
   * Provides functionality to test transient failures.
   *
   * @param errorType the error type. (start.transient, end.transient)
   * @param expStatus1 expected status after the first step (START_RETRY, END_RETRY)
   * @param expStatus2 expected status after the second step (START_MANUAL, END_MANUAL)
   * @param expErrorMsg the expected error message.
   * @throws Exception
   */
  private void _testTransient(
      String errorType,
      WorkflowActionBean.Status expStatus1,
      final WorkflowActionBean.Status expStatus2,
      String expErrorMsg)
      throws Exception {
    String workflowPath = getTestCaseFileUri("workflow.xml");
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final int maxRetries = 2;
    final int retryInterval = 10;

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, workflowPath);
    conf.set(OozieClient.USER_NAME, getTestUser());

    conf.set(OozieClient.LOG_TOKEN, "t");
    conf.set("signal-value", "OK");
    conf.set("external-status", "ok");
    conf.set("error", errorType);
    conf.setInt(OozieClient.ACTION_MAX_RETRIES, maxRetries);
    conf.setInt(OozieClient.ACTION_RETRY_INTERVAL, retryInterval);

    final String jobId = engine.submitJob(conf, true);

    int retryCount = 1;
    WorkflowActionBean.Status expectedStatus = expStatus1;
    int expectedRetryCount = 2;

    Thread.sleep(20000);
    String aId = null;
    final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create();
    store.beginTrx();
    while (retryCount <= maxRetries) {
      List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, false);
      WorkflowActionBean action = null;
      for (WorkflowActionBean bean : actions) {
        if (bean.getType().equals("test")) {
          action = bean;
          break;
        }
      }
      assertNotNull(action);
      aId = action.getId();
      assertEquals(expectedStatus, action.getStatus());
      assertEquals(expectedRetryCount, action.getRetries());
      assertEquals("TEST_ERROR", action.getErrorCode());
      assertEquals(expErrorMsg, action.getErrorMessage());
      if (action.getRetries() == maxRetries) {
        expectedRetryCount = 0;
        expectedStatus = expStatus2;
        break;
      } else {
        expectedRetryCount++;
      }
      Thread.sleep(retryInterval * 1000);
      retryCount++;
    }
    store.commitTrx();
    store.closeTrx();
    Thread.sleep(5000);

    final String actionId = aId;

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getWorkflowAction(actionId).getStatus() == expStatus2);
          }
        });

    final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create();
    store2.beginTrx();
    WorkflowActionBean action = engine.getWorkflowAction(actionId);
    assertEquals("TEST_ERROR", action.getErrorCode());
    assertEquals(expErrorMsg, action.getErrorMessage());
    assertEquals(expStatus2, action.getStatus());
    assertTrue(action.isPending() == false);
    assertEquals(WorkflowJob.Status.SUSPENDED, engine.getJob(jobId).getStatus());
    store2.commitTrx();
    store2.closeTrx();
  }
Example #5
0
  /**
   * Provides functionality to test non transient failures.
   *
   * @param errorType the error type. (start.non-transient, end.non-transient)
   * @param expStatus1 expected status. (START_MANUAL, END_MANUAL)
   * @param expErrorMsg expected error message.
   * @throws Exception
   */
  private void _testNonTransient(
      String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception {
    String workflowPath = getTestCaseFileUri("workflow.xml");
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, workflowPath);
    conf.set(OozieClient.USER_NAME, getTestUser());

    conf.set(OozieClient.LOG_TOKEN, "t");
    conf.set("signal-value", "OK");
    conf.set("external-status", "ok");
    conf.set("error", errorType);

    final String jobId = engine.submitJob(conf, true);

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED);
          }
        });

    final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create();
    store.beginTrx();
    List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, true);
    int n = actions.size();
    WorkflowActionBean action = actions.get(n - 1);
    assertEquals("TEST_ERROR", action.getErrorCode());
    assertEquals(expErrorMsg, action.getErrorMessage());
    assertEquals(expStatus1, action.getStatus());
    assertTrue(action.isPending() == false);

    assertTrue(engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED);

    String actionConf = action.getConf();
    String fixedActionConf = actionConf.replaceAll(errorType, "none");
    action.setConf(fixedActionConf);
    store.updateAction(action);
    store.commitTrx();
    store.closeTrx();

    engine.resume(jobId);

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED);
          }
        });

    assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus());

    final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create();
    store2.beginTrx();
    actions = store2.getActionsForWorkflow(jobId, false);
    action = actions.get(0);
    assertEquals(null, action.getErrorCode());
    assertEquals(null, action.getErrorMessage());
    assertEquals(WorkflowActionBean.Status.OK, action.getStatus());
    store2.commitTrx();
    store2.closeTrx();
  }