public void testActionCheck() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); String mapperId = action.getExternalId(); String childId = action.getExternalChildIDs(); assertTrue(launcherId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action.getExternalStatus()); }
public void testActionCheckTransientDuringMRAction() throws Exception { // When using YARN, skip this test because it relies on shutting down the job tracker, which // isn't used in YARN if (createJobConf().get("yarn.resourcemanager.address") != null) { return; } services.destroy(); // Make the max number of retries lower so the test won't take as long final int maxRetries = 2; setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries)); services = new Services(); // Disable ActionCheckerService so it doesn't interfere by triggering any extra // ActionCheckXCommands setClassesToBeExcluded( services.getConf(), new String[] {"org.apache.oozie.service.ActionCheckerService"}); services.init(); final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); final String jobId = job0.getId(); WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP); final String actionId = action0.getId(); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId); new ActionStartXCommand(actionId, "map-reduce").call(); final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd); String originalLauncherId = action1.getExternalId(); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job0, action1, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action1.getId()).call(); WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd); String originalMapperId = action2.getExternalChildIDs(); assertFalse(originalLauncherId.equals(originalMapperId)); // At this point, the launcher job has finished and the map-reduce action has started (but not // finished) // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job executeWhileJobTrackerIsShutdown( new ShutdownJobTrackerExecutable() { @Override public void execute() throws Exception { assertEquals(0, action1.getRetries()); new ActionCheckXCommand(actionId).call(); waitFor( 30 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() > 0); } }); waitFor( 180 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() == 0); } }); WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd); assertEquals(0, action1b.getRetries()); assertEquals("START_MANUAL", action1b.getStatusStr()); WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("SUSPENDED", job1.getStatusStr()); // At this point, the action has gotten a transient error, even after maxRetries tries // so the workflow has been // SUSPENDED } }); // Now, lets bring the job tracker back up and resume the workflow (which will restart the // current action) // It should now continue and finish with SUCCEEDED new ResumeXCommand(jobId).call(); WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("RUNNING", job2.getStatusStr()); sleep(500); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd); String launcherId = action3.getExternalId(); assertFalse(originalLauncherId.equals(launcherId)); final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob2.isComplete(); } }); assertTrue(launcherJob2.isSuccessful()); actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd); String mapperId = action4.getExternalChildIDs(); assertFalse(originalMapperId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action5.getExternalStatus()); }
public void testGet() throws Exception { WorkflowActionBean bean = addRecordToWfActionTable("workflowId", "testAction", WorkflowAction.Status.PREP, "", true); WorkflowActionBean retBean; // GET_ACTION_ID_TYPE_LASTCHECK retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_ID_TYPE_LASTCHECK, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLastCheckTime(), retBean.getLastCheckTime()); // GET_ACTION_FAIL retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_FAIL, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getName(), retBean.getName()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getPending(), retBean.getPending()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertEquals(bean.getTransition(), retBean.getTransition()); assertEquals(bean.getErrorCode(), retBean.getErrorCode()); assertEquals(bean.getErrorMessage(), retBean.getErrorMessage()); assertNull(retBean.getConf()); assertNull(retBean.getSlaXml()); assertNull(retBean.getData()); assertNull(retBean.getStats()); assertNull(retBean.getExternalChildIDs()); // GET_ACTION_SIGNAL retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_SIGNAL, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getName(), retBean.getName()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getPending(), retBean.getPending()); assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertEquals(bean.getTransition(), retBean.getTransition()); assertEquals(bean.getErrorCode(), retBean.getErrorCode()); assertEquals(bean.getErrorMessage(), retBean.getErrorMessage()); assertEquals(bean.getExecutionPath(), retBean.getExecutionPath()); assertEquals(bean.getSignalValue(), retBean.getSignalValue()); assertEquals(bean.getSlaXml(), retBean.getSlaXml()); assertNull(retBean.getConf()); assertNull(retBean.getData()); assertNull(retBean.getStats()); assertNull(retBean.getExternalChildIDs()); // GET_ACTION_CHECK retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_CHECK, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getName(), retBean.getName()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getPending(), retBean.getPending()); assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertEquals(bean.getTransition(), retBean.getTransition()); assertEquals(bean.getRetries(), retBean.getRetries()); assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount()); assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax()); assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval()); assertEquals(bean.getTrackerUri(), retBean.getTrackerUri()); assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime()); assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime()); assertEquals(bean.getLastCheckTime().getTime(), retBean.getLastCheckTime().getTime()); assertEquals(bean.getErrorCode(), retBean.getErrorCode()); assertEquals(bean.getErrorMessage(), retBean.getErrorMessage()); assertEquals(bean.getExternalId(), retBean.getExternalId()); assertEquals(bean.getExternalStatus(), retBean.getExternalStatus()); assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs()); assertEquals(bean.getConf(), retBean.getConf()); assertNull(retBean.getData()); assertNull(retBean.getStats()); assertNull(retBean.getSlaXml()); // GET_ACTION_END retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_END, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getName(), retBean.getName()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getPending(), retBean.getPending()); assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertEquals(bean.getTransition(), retBean.getTransition()); assertEquals(bean.getRetries(), retBean.getRetries()); assertEquals(bean.getTrackerUri(), retBean.getTrackerUri()); assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount()); assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax()); assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval()); assertEquals(bean.getExternalId(), retBean.getExternalId()); assertEquals(bean.getExternalStatus(), retBean.getExternalStatus()); assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs()); assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime()); assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime()); assertEquals(bean.getErrorCode(), retBean.getErrorCode()); assertEquals(bean.getErrorMessage(), retBean.getErrorMessage()); assertEquals(bean.getConf(), retBean.getConf()); assertEquals(bean.getData(), retBean.getData()); assertEquals(bean.getStats(), retBean.getStats()); assertNull(retBean.getSlaXml()); // GET_ACTION_COMPLETED retBean = WorkflowActionQueryExecutor.getInstance() .get(WorkflowActionQuery.GET_ACTION_COMPLETED, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertNull(retBean.getSlaXml()); assertNull(retBean.getConf()); assertNull(retBean.getData()); assertNull(retBean.getStats()); assertNull(retBean.getExternalChildIDs()); // GET_ACTION (entire obj) retBean = WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, bean.getId()); assertEquals(bean.getId(), retBean.getId()); assertEquals(bean.getJobId(), retBean.getJobId()); assertEquals(bean.getName(), retBean.getName()); assertEquals(bean.getStatusStr(), retBean.getStatusStr()); assertEquals(bean.getPending(), retBean.getPending()); assertEquals(bean.getPendingAge().getTime(), retBean.getPendingAge().getTime()); assertEquals(bean.getType(), retBean.getType()); assertEquals(bean.getLogToken(), retBean.getLogToken()); assertEquals(bean.getTransition(), retBean.getTransition()); assertEquals(bean.getRetries(), retBean.getRetries()); assertEquals(bean.getUserRetryCount(), retBean.getUserRetryCount()); assertEquals(bean.getUserRetryMax(), retBean.getUserRetryMax()); assertEquals(bean.getUserRetryInterval(), retBean.getUserRetryInterval()); assertEquals(bean.getStartTime().getTime(), retBean.getStartTime().getTime()); assertEquals(bean.getEndTime().getTime(), retBean.getEndTime().getTime()); assertEquals(bean.getCreatedTime().getTime(), retBean.getCreatedTime().getTime()); assertEquals(bean.getLastCheckTime().getTime(), retBean.getLastCheckTime().getTime()); assertEquals(bean.getErrorCode(), retBean.getErrorCode()); assertEquals(bean.getErrorMessage(), retBean.getErrorMessage()); assertEquals(bean.getExecutionPath(), retBean.getExecutionPath()); assertEquals(bean.getSignalValue(), retBean.getSignalValue()); assertEquals(bean.getCred(), retBean.getCred()); assertEquals(bean.getConf(), retBean.getConf()); assertEquals(bean.getSlaXml(), retBean.getSlaXml()); assertEquals(bean.getData(), retBean.getData()); assertEquals(bean.getStats(), retBean.getStats()); assertEquals(bean.getExternalChildIDs(), retBean.getExternalChildIDs()); }
/** * Provides functionality to test non transient failures. * * @param errorType the error type. (start.non-transient, end.non-transient) * @param expStatus1 expected status. (START_MANUAL, END_MANUAL) * @param expErrorMsg expected error message. * @throws Exception */ private void _testNonTransient( String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); final String jobId = engine.submitJob(conf, true); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); } }); final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, true); int n = actions.size(); WorkflowActionBean action = actions.get(n - 1); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus1, action.getStatus()); assertTrue(action.isPending() == false); assertTrue(engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); String actionConf = action.getConf(); String fixedActionConf = actionConf.replaceAll(errorType, "none"); action.setConf(fixedActionConf); store.updateAction(action); store.commitTrx(); store.closeTrx(); engine.resume(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED); } }); assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus()); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); actions = store2.getActionsForWorkflow(jobId, false); action = actions.get(0); assertEquals(null, action.getErrorCode()); assertEquals(null, action.getErrorMessage()); assertEquals(WorkflowActionBean.Status.OK, action.getStatus()); store2.commitTrx(); store2.closeTrx(); }