/** * Provides functionality to test errors * * @param errorType the error type. (start.non-transient, end.non-transient) * @param externalStatus the external status to set. * @param signalValue the signal value to set. * @throws Exception */ private void _testError(String errorType, String externalStatus, String signalValue) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("error", errorType); conf.set("external-status", externalStatus); conf.set("signal-value", signalValue); final String jobId = engine.submitJob(conf, true); final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { WorkflowJobBean bean = store.getWorkflow(jobId, false); return (bean.getWorkflowInstance().getStatus() == WorkflowInstance.Status.KILLED); } }); assertEquals(WorkflowJob.Status.KILLED, engine.getJob(jobId).getStatus()); store.commitTrx(); store.closeTrx(); }
/** * Provides functionality to test for set*Data calls not being made by the Action Handler. * * @param avoidParam set*Data function call to avoid. * @param expActionErrorCode the expected action error code. * @throws Exception */ private void _testDataNotSet(String avoidParam, String expActionErrorCode) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("external-status", "ok"); conf.set("signal-value", "based_on_action_status"); conf.set(avoidParam, "true"); final String jobId = engine.submitJob(conf, true); final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); Thread.sleep(2000); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { WorkflowJobBean bean = store.getWorkflow(jobId, false); return (bean.getWorkflowInstance().getStatus() == WorkflowInstance.Status.FAILED); } }); store.commitTrx(); store.closeTrx(); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); assertEquals( WorkflowInstance.Status.FAILED, store2.getWorkflow(jobId, false).getWorkflowInstance().getStatus()); assertEquals(WorkflowJob.Status.FAILED, engine.getJob(jobId).getStatus()); List<WorkflowActionBean> actions = store2.getActionsForWorkflow(jobId, false); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); assertEquals(expActionErrorCode, action.getErrorCode()); store2.commitTrx(); store2.closeTrx(); }
protected Void call(WorkflowStore store) throws StoreException, CommandException { WorkflowJobBean workflow = store.getWorkflow(jobId, false); setLogInfo(workflow); WorkflowActionBean action = store.getAction(id, false); setLogInfo(action); if (action.isPending() && (action.getStatus() == WorkflowActionBean.Status.DONE || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) { if (workflow.getStatus() == WorkflowJob.Status.RUNNING) { ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType()); Configuration conf = workflow.getWorkflowInstance().getConf(); int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); executor.setMaxRetries(maxRetries); executor.setRetryInterval(retryInterval); if (executor != null) { boolean isRetry = false; if (action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) { isRetry = true; } ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry); try { XLog.getLog(getClass()) .debug( "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(), action.getSignalValue()); WorkflowInstance wfInstance = workflow.getWorkflowInstance(); DagELFunctions.setActionInfo(wfInstance, action); workflow.setWorkflowInstance(wfInstance); incrActionCounter(action.getType(), 1); Instrumentation.Cron cron = new Instrumentation.Cron(); cron.start(); executor.end(context, action); cron.stop(); addActionCron(action.getType(), cron); if (!context.isEnded()) { XLog.getLog(getClass()) .warn( XLog.OPS, "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType()); action.setErrorInfo( END_DATA_MISSING, "Execution Ended, but End Data Missing from Action"); failJob(context); store.updateAction(action); store.updateWorkflow(workflow); return null; } action.setRetries(0); action.setEndTime(new Date()); store.updateAction(action); store.updateWorkflow(workflow); Status slaStatus = null; switch (action.getStatus()) { case OK: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; case ERROR: XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA"); slaStatus = Status.KILLED; break; default: // TODO: What will happen for other Action // status slaStatus = Status.FAILED; break; } SLADbOperations.writeStausEvent( action.getSlaXml(), action.getId(), store, slaStatus, SlaAppType.WORKFLOW_ACTION); queueCallable(new NotificationCommand(workflow, action)); XLog.getLog(getClass()) .debug( "Queuing commands for action " + id + " status " + action.getStatus() + ", Set pending=" + action.getPending()); queueCallable(new SignalCommand(workflow.getId(), id)); } catch (ActionExecutorException ex) { XLog.getLog(getClass()) .warn( "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage()); action.setErrorInfo(ex.getErrorCode(), ex.getMessage()); action.setEndTime(null); switch (ex.getErrorType()) { case TRANSIENT: if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) { handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setPendingAge(new Date()); action.setRetries(0); } action.setEndTime(null); break; case NON_TRANSIENT: handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setEndTime(null); break; case ERROR: handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR); queueCallable(new SignalCommand(workflow.getId(), id)); break; case FAILED: failJob(context); break; } store.updateAction(action); store.updateWorkflow(workflow); } } else { throw new CommandException(ErrorCode.E0802, action.getType()); } } else { XLog.getLog(getClass()) .warn( "Job state is not {0}. Skipping ActionEnd Execution", WorkflowJob.Status.RUNNING.toString()); } } else { XLog.getLog(getClass()) .debug( "Action pending={0}, status={1}. Skipping ActionEnd Execution", action.getPending(), action.getStatusStr()); } return null; }
public void testFsFailover() throws Exception { Reader reader = IOUtils.getResourceAsReader("failover-fs-wf.xml", -1); Writer writer = new FileWriter(getTestCaseDir() + "/workflow.xml"); IOUtils.copyCharStream(reader, writer); final OozieClient wfClient = LocalOozie.getClient(); Properties conf = wfClient.createConfiguration(); conf.setProperty(OozieClient.APP_PATH, getTestCaseDir()); conf.setProperty(OozieClient.USER_NAME, getTestUser()); conf.setProperty(OozieClient.GROUP_NAME, getTestGroup()); injectKerberosInfo(conf); final Path source = new Path(getFsTestCaseDir(), "fsfailover-source"); getFileSystem().mkdirs(source); final Path target = new Path(getFsTestCaseDir().toString(), "fsfailover-target"); conf.setProperty("source", source.toString()); conf.setProperty("target", target.toUri().getPath()); setSystemProperty(FaultInjection.FAULT_INJECTION, "true"); setSystemProperty(SkipCommitFaultInjection.ACTION_FAILOVER_FAULT_INJECTION, "true"); final String jobId1 = wfClient.submit(conf); wfClient.start(jobId1); waitFor( 10 * 1000, new Predicate() { public boolean evaluate() throws Exception { return getFileSystem().exists(target); } }); assertTrue(getFileSystem().exists(target)); waitFor( 10 * 1000, new Predicate() { public boolean evaluate() throws Exception { return FaultInjection.isActive("org.apache.oozie.command.SkipCommitFaultInjection"); } }); assertFalse(FaultInjection.isActive("org.apache.oozie.command.SkipCommitFaultInjection")); assertEquals(WorkflowJob.Status.RUNNING, wfClient.getJobInfo(jobId1).getStatus()); WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId1, false); WorkflowActionBean action = actions.get(0); assertEquals(WorkflowAction.Status.PREP, action.getStatus()); setSystemProperty(FaultInjection.FAULT_INJECTION, "false"); setSystemProperty(SkipCommitFaultInjection.ACTION_FAILOVER_FAULT_INJECTION, "false"); ActionStartCommand actionStartCommand = new ActionStartCommand(action.getId(), action.getType()); actionStartCommand.call(); store = Services.get().get(WorkflowStoreService.class).create(); actions = store.getActionsForWorkflow(jobId1, false); action = actions.get(0); assertEquals(WorkflowAction.Status.DONE, action.getStatus()); waitFor( 5 * 1000, new Predicate() { public boolean evaluate() throws Exception { return wfClient.getJobInfo(jobId1).getStatus() == WorkflowJob.Status.SUCCEEDED; } }); assertEquals(WorkflowJob.Status.SUCCEEDED, wfClient.getJobInfo(jobId1).getStatus()); final String jobId2 = wfClient.submit(conf); wfClient.start(jobId2); waitFor( 10 * 1000, new Predicate() { public boolean evaluate() throws Exception { return wfClient.getJobInfo(jobId2).getStatus() == WorkflowJob.Status.KILLED; } }); assertEquals(WorkflowJob.Status.KILLED, wfClient.getJobInfo(jobId2).getStatus()); }
/** * Provides functionality to test transient failures. * * @param errorType the error type. (start.transient, end.transient) * @param expStatus1 expected status after the first step (START_RETRY, END_RETRY) * @param expStatus2 expected status after the second step (START_MANUAL, END_MANUAL) * @param expErrorMsg the expected error message. * @throws Exception */ private void _testTransient( String errorType, WorkflowActionBean.Status expStatus1, final WorkflowActionBean.Status expStatus2, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final int maxRetries = 2; final int retryInterval = 10; final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); conf.setInt(OozieClient.ACTION_MAX_RETRIES, maxRetries); conf.setInt(OozieClient.ACTION_RETRY_INTERVAL, retryInterval); final String jobId = engine.submitJob(conf, true); int retryCount = 1; WorkflowActionBean.Status expectedStatus = expStatus1; int expectedRetryCount = 2; Thread.sleep(20000); String aId = null; final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); while (retryCount <= maxRetries) { List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, false); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); aId = action.getId(); assertEquals(expectedStatus, action.getStatus()); assertEquals(expectedRetryCount, action.getRetries()); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); if (action.getRetries() == maxRetries) { expectedRetryCount = 0; expectedStatus = expStatus2; break; } else { expectedRetryCount++; } Thread.sleep(retryInterval * 1000); retryCount++; } store.commitTrx(); store.closeTrx(); Thread.sleep(5000); final String actionId = aId; waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getWorkflowAction(actionId).getStatus() == expStatus2); } }); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); WorkflowActionBean action = engine.getWorkflowAction(actionId); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus2, action.getStatus()); assertTrue(action.isPending() == false); assertEquals(WorkflowJob.Status.SUSPENDED, engine.getJob(jobId).getStatus()); store2.commitTrx(); store2.closeTrx(); }
/** * Provides functionality to test non transient failures. * * @param errorType the error type. (start.non-transient, end.non-transient) * @param expStatus1 expected status. (START_MANUAL, END_MANUAL) * @param expErrorMsg expected error message. * @throws Exception */ private void _testNonTransient( String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); final String jobId = engine.submitJob(conf, true); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); } }); final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, true); int n = actions.size(); WorkflowActionBean action = actions.get(n - 1); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus1, action.getStatus()); assertTrue(action.isPending() == false); assertTrue(engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); String actionConf = action.getConf(); String fixedActionConf = actionConf.replaceAll(errorType, "none"); action.setConf(fixedActionConf); store.updateAction(action); store.commitTrx(); store.closeTrx(); engine.resume(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED); } }); assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus()); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); actions = store2.getActionsForWorkflow(jobId, false); action = actions.get(0); assertEquals(null, action.getErrorCode()); assertEquals(null, action.getErrorMessage()); assertEquals(WorkflowActionBean.Status.OK, action.getStatus()); store2.commitTrx(); store2.closeTrx(); }