public void testActionCheck() throws Exception {
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job =
        this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action =
        this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);

    ActionExecutorContext context =
        new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf =
        actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
          }
        });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData =
        LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);
    String mapperId = action.getExternalId();
    String childId = action.getExternalChildIDs();

    assertTrue(launcherId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return mrJob.isComplete();
          }
        });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action.getExternalStatus());
  }
  public void testActionCheckTransientDuringMRAction() throws Exception {
    // When using YARN, skip this test because it relies on shutting down the job tracker, which
    // isn't used in YARN
    if (createJobConf().get("yarn.resourcemanager.address") != null) {
      return;
    }
    services.destroy();
    // Make the max number of retries lower so the test won't take as long
    final int maxRetries = 2;
    setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries));
    services = new Services();
    // Disable ActionCheckerService so it doesn't interfere by triggering any extra
    // ActionCheckXCommands
    setClassesToBeExcluded(
        services.getConf(), new String[] {"org.apache.oozie.service.ActionCheckerService"});
    services.init();

    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job0 =
        this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    final String jobId = job0.getId();
    WorkflowActionBean action0 =
        this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP);
    final String actionId = action0.getId();
    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId);

    new ActionStartXCommand(actionId, "map-reduce").call();
    final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd);
    String originalLauncherId = action1.getExternalId();

    ActionExecutorContext context =
        new ActionXCommand.ActionExecutorContext(job0, action1, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf =
        actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
          }
        });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData =
        LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action1.getId()).call();
    WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd);
    String originalMapperId = action2.getExternalChildIDs();

    assertFalse(originalLauncherId.equals(originalMapperId));

    // At this point, the launcher job has finished and the map-reduce action has started (but not
    // finished)
    // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job
    executeWhileJobTrackerIsShutdown(
        new ShutdownJobTrackerExecutable() {
          @Override
          public void execute() throws Exception {
            assertEquals(0, action1.getRetries());
            new ActionCheckXCommand(actionId).call();

            waitFor(
                30 * 1000,
                new Predicate() {
                  @Override
                  public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() > 0);
                  }
                });
            waitFor(
                180 * 1000,
                new Predicate() {
                  @Override
                  public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() == 0);
                  }
                });
            WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd);
            assertEquals(0, action1b.getRetries());
            assertEquals("START_MANUAL", action1b.getStatusStr());

            WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
            assertEquals("SUSPENDED", job1.getStatusStr());

            // At this point, the action has gotten a transient error, even after maxRetries tries
            // so the workflow has been
            // SUSPENDED
          }
        });
    // Now, lets bring the job tracker back up and resume the workflow (which will restart the
    // current action)
    // It should now continue and finish with SUCCEEDED
    new ResumeXCommand(jobId).call();
    WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
    assertEquals("RUNNING", job2.getStatusStr());

    sleep(500);

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd);
    String launcherId = action3.getExternalId();

    assertFalse(originalLauncherId.equals(launcherId));

    final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return launcherJob2.isComplete();
          }
        });

    assertTrue(launcherJob2.isSuccessful());
    actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd);
    String mapperId = action4.getExternalChildIDs();
    assertFalse(originalMapperId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return mrJob.isComplete();
          }
        });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action5.getExternalStatus());
  }
  public void testGet() throws Exception {
    WorkflowActionBean bean =
        addRecordToWfActionTable("workflowId", "testAction", WorkflowAction.Status.PREP, "", true);
    WorkflowActionBean retBean;

    // GET_ACTION_ID_TYPE_LASTCHECK
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_ID_TYPE_LASTCHECK, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLastCheckTime(), retBean.getLastCheckTime());

    // GET_ACTION_FAIL
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_FAIL, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getName(), retBean.getName());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getPending(), retBean.getPending());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertEquals(bean.getTransition(), retBean.getTransition());
    assertEquals(bean.getErrorCode(), retBean.getErrorCode());
    assertEquals(bean.getErrorMessage(), retBean.getErrorMessage());
    assertNull(retBean.getConf());
    assertNull(retBean.getSlaXml());
    assertNull(retBean.getData());
    assertNull(retBean.getStats());
    assertNull(retBean.getExternalChildIDs());

    // GET_ACTION_SIGNAL
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_SIGNAL, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getName(), retBean.getName());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getPending(), retBean.getPending());
    assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertEquals(bean.getTransition(), retBean.getTransition());
    assertEquals(bean.getErrorCode(), retBean.getErrorCode());
    assertEquals(bean.getErrorMessage(), retBean.getErrorMessage());
    assertEquals(bean.getExecutionPath(), retBean.getExecutionPath());
    assertEquals(bean.getSignalValue(), retBean.getSignalValue());
    assertEquals(bean.getSlaXml(), retBean.getSlaXml());
    assertNull(retBean.getConf());
    assertNull(retBean.getData());
    assertNull(retBean.getStats());
    assertNull(retBean.getExternalChildIDs());

    // GET_ACTION_CHECK
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_CHECK, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getName(), retBean.getName());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getPending(), retBean.getPending());
    assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertEquals(bean.getTransition(), retBean.getTransition());
    assertEquals(bean.getRetries(), retBean.getRetries());
    assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount());
    assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax());
    assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval());
    assertEquals(bean.getTrackerUri(), retBean.getTrackerUri());
    assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime());
    assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime());
    assertEquals(bean.getLastCheckTime().getTime(), retBean.getLastCheckTime().getTime());
    assertEquals(bean.getErrorCode(), retBean.getErrorCode());
    assertEquals(bean.getErrorMessage(), retBean.getErrorMessage());
    assertEquals(bean.getExternalId(), retBean.getExternalId());
    assertEquals(bean.getExternalStatus(), retBean.getExternalStatus());
    assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs());
    assertEquals(bean.getConf(), retBean.getConf());
    assertNull(retBean.getData());
    assertNull(retBean.getStats());
    assertNull(retBean.getSlaXml());

    // GET_ACTION_END
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_END, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getName(), retBean.getName());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getPending(), retBean.getPending());
    assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertEquals(bean.getTransition(), retBean.getTransition());
    assertEquals(bean.getRetries(), retBean.getRetries());
    assertEquals(bean.getTrackerUri(), retBean.getTrackerUri());
    assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount());
    assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax());
    assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval());
    assertEquals(bean.getExternalId(), retBean.getExternalId());
    assertEquals(bean.getExternalStatus(), retBean.getExternalStatus());
    assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs());
    assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime());
    assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime());
    assertEquals(bean.getErrorCode(), retBean.getErrorCode());
    assertEquals(bean.getErrorMessage(), retBean.getErrorMessage());
    assertEquals(bean.getConf(), retBean.getConf());
    assertEquals(bean.getData(), retBean.getData());
    assertEquals(bean.getStats(), retBean.getStats());
    assertNull(retBean.getSlaXml());

    // GET_ACTION_COMPLETED
    retBean =
        WorkflowActionQueryExecutor.getInstance()
            .get(WorkflowActionQuery.GET_ACTION_COMPLETED, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertNull(retBean.getSlaXml());
    assertNull(retBean.getConf());
    assertNull(retBean.getData());
    assertNull(retBean.getStats());
    assertNull(retBean.getExternalChildIDs());

    // GET_ACTION (entire obj)
    retBean =
        WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, bean.getId());
    assertEquals(bean.getId(), retBean.getId());
    assertEquals(bean.getJobId(), retBean.getJobId());
    assertEquals(bean.getName(), retBean.getName());
    assertEquals(bean.getStatusStr(), retBean.getStatusStr());
    assertEquals(bean.getPending(), retBean.getPending());
    assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime());
    assertEquals(bean.getType(), retBean.getType());
    assertEquals(bean.getLogToken(), retBean.getLogToken());
    assertEquals(bean.getTransition(), retBean.getTransition());
    assertEquals(bean.getRetries(), retBean.getRetries());
    assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount());
    assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax());
    assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval());
    assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime());
    assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime());
    assertEquals(bean.getCreatedTime().getTime(), retBean.getCreatedTime().getTime());
    assertEquals(bean.getLastCheckTime().getTime(), retBean.getLastCheckTime().getTime());
    assertEquals(bean.getErrorCode(), retBean.getErrorCode());
    assertEquals(bean.getErrorMessage(), retBean.getErrorMessage());
    assertEquals(bean.getExecutionPath(), retBean.getExecutionPath());
    assertEquals(bean.getSignalValue(), retBean.getSignalValue());
    assertEquals(bean.getCred(), retBean.getCred());
    assertEquals(bean.getConf(), retBean.getConf());
    assertEquals(bean.getSlaXml(), retBean.getSlaXml());
    assertEquals(bean.getData(), retBean.getData());
    assertEquals(bean.getStats(), retBean.getStats());
    assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs());
  }
Example #4
0
  /**
   * Provides functionality to test non transient failures.
   *
   * @param errorType the error type. (start.non-transient, end.non-transient)
   * @param expStatus1 expected status. (START_MANUAL, END_MANUAL)
   * @param expErrorMsg expected error message.
   * @throws Exception
   */
  private void _testNonTransient(
      String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception {
    String workflowPath = getTestCaseFileUri("workflow.xml");
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, workflowPath);
    conf.set(OozieClient.USER_NAME, getTestUser());

    conf.set(OozieClient.LOG_TOKEN, "t");
    conf.set("signal-value", "OK");
    conf.set("external-status", "ok");
    conf.set("error", errorType);

    final String jobId = engine.submitJob(conf, true);

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED);
          }
        });

    final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create();
    store.beginTrx();
    List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, true);
    int n = actions.size();
    WorkflowActionBean action = actions.get(n - 1);
    assertEquals("TEST_ERROR", action.getErrorCode());
    assertEquals(expErrorMsg, action.getErrorMessage());
    assertEquals(expStatus1, action.getStatus());
    assertTrue(action.isPending() == false);

    assertTrue(engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED);

    String actionConf = action.getConf();
    String fixedActionConf = actionConf.replaceAll(errorType, "none");
    action.setConf(fixedActionConf);
    store.updateAction(action);
    store.commitTrx();
    store.closeTrx();

    engine.resume(jobId);

    waitFor(
        5000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED);
          }
        });

    assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus());

    final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create();
    store2.beginTrx();
    actions = store2.getActionsForWorkflow(jobId, false);
    action = actions.get(0);
    assertEquals(null, action.getErrorCode());
    assertEquals(null, action.getErrorMessage());
    assertEquals(WorkflowActionBean.Status.OK, action.getStatus());
    store2.commitTrx();
    store2.closeTrx();
  }