@Override protected void verifyPrecondition() throws CommandException, PreconditionException { if ((wfAction == null) || (wfAction.isComplete() && wfAction.isPending())) { if (wfJob.getStatus() != WorkflowJob.Status.RUNNING && wfJob.getStatus() != WorkflowJob.Status.PREP) { throw new PreconditionException(ErrorCode.E0813, wfJob.getStatusStr()); } } else { throw new PreconditionException( ErrorCode.E0814, actionId, wfAction.getStatusStr(), wfAction.isPending()); } }
protected Void call(WorkflowStore store) throws StoreException, CommandException { WorkflowJobBean workflow = store.getWorkflow(jobId, false); setLogInfo(workflow); WorkflowActionBean action = store.getAction(id, false); setLogInfo(action); if (action.isPending() && (action.getStatus() == WorkflowActionBean.Status.DONE || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) { if (workflow.getStatus() == WorkflowJob.Status.RUNNING) { ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType()); Configuration conf = workflow.getWorkflowInstance().getConf(); int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); executor.setMaxRetries(maxRetries); executor.setRetryInterval(retryInterval); if (executor != null) { boolean isRetry = false; if (action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) { isRetry = true; } ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry); try { XLog.getLog(getClass()) .debug( "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(), action.getSignalValue()); WorkflowInstance wfInstance = workflow.getWorkflowInstance(); DagELFunctions.setActionInfo(wfInstance, action); workflow.setWorkflowInstance(wfInstance); incrActionCounter(action.getType(), 1); Instrumentation.Cron cron = new Instrumentation.Cron(); cron.start(); executor.end(context, action); cron.stop(); addActionCron(action.getType(), cron); if (!context.isEnded()) { XLog.getLog(getClass()) .warn( XLog.OPS, "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType()); action.setErrorInfo( END_DATA_MISSING, "Execution Ended, but End Data Missing from Action"); failJob(context); store.updateAction(action); store.updateWorkflow(workflow); return null; } action.setRetries(0); action.setEndTime(new Date()); store.updateAction(action); store.updateWorkflow(workflow); Status slaStatus = null; switch (action.getStatus()) { case OK: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; case ERROR: XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA"); slaStatus = Status.KILLED; break; default: // TODO: What will happen for other Action // status slaStatus = Status.FAILED; break; } SLADbOperations.writeStausEvent( action.getSlaXml(), action.getId(), store, slaStatus, SlaAppType.WORKFLOW_ACTION); queueCallable(new NotificationCommand(workflow, action)); XLog.getLog(getClass()) .debug( "Queuing commands for action " + id + " status " + action.getStatus() + ", Set pending=" + action.getPending()); queueCallable(new SignalCommand(workflow.getId(), id)); } catch (ActionExecutorException ex) { XLog.getLog(getClass()) .warn( "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage()); action.setErrorInfo(ex.getErrorCode(), ex.getMessage()); action.setEndTime(null); switch (ex.getErrorType()) { case TRANSIENT: if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) { handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setPendingAge(new Date()); action.setRetries(0); } action.setEndTime(null); break; case NON_TRANSIENT: handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setEndTime(null); break; case ERROR: handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR); queueCallable(new SignalCommand(workflow.getId(), id)); break; case FAILED: failJob(context); break; } store.updateAction(action); store.updateWorkflow(workflow); } } else { throw new CommandException(ErrorCode.E0802, action.getType()); } } else { XLog.getLog(getClass()) .warn( "Job state is not {0}. Skipping ActionEnd Execution", WorkflowJob.Status.RUNNING.toString()); } } else { XLog.getLog(getClass()) .debug( "Action pending={0}, status={1}. Skipping ActionEnd Execution", action.getPending(), action.getStatusStr()); } return null; }
/** * Provides functionality to test non transient failures and coordinator action update * * @param errorType the error type. (start.non-transient, end.non-transient) * @param expStatus1 expected status. (START_MANUAL, END_MANUAL) * @param expErrorMsg expected error message. * @throws Exception */ private void _testNonTransientWithCoordActionUpdate( String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); final String jobId = engine.submitJob(conf, false); final JPAService jpaService = Services.get().get(JPAService.class); final CoordinatorJobBean coordJob = addRecordToCoordJobTable(CoordinatorJob.Status.RUNNING, false, false); CoordinatorActionBean coordAction = addRecordToCoordActionTable( coordJob.getId(), 1, CoordinatorAction.Status.RUNNING, "coord-action-get.xml", jobId, "RUNNING", 0); engine.start(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); } }); assertNotNull(jpaService); WorkflowJobGetJPAExecutor wfGetCmd = new WorkflowJobGetJPAExecutor(jobId); WorkflowJobBean job = jpaService.execute(wfGetCmd); WorkflowActionsGetForJobJPAExecutor actionsGetExe = new WorkflowActionsGetForJobJPAExecutor(jobId); List<WorkflowActionBean> actionsList = jpaService.execute(actionsGetExe); int n = actionsList.size(); WorkflowActionBean action = actionsList.get(n - 1); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus1, action.getStatus()); assertFalse(action.isPending()); assertEquals(WorkflowJob.Status.SUSPENDED, job.getStatus()); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { CoordinatorActionBean coordAction2 = jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId)); return coordAction2.getStatus().equals(CoordinatorAction.Status.SUSPENDED); } }); coordAction = jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId)); assertEquals(CoordinatorAction.Status.SUSPENDED, coordAction.getStatus()); }
/** * Provides functionality to test transient failures. * * @param errorType the error type. (start.transient, end.transient) * @param expStatus1 expected status after the first step (START_RETRY, END_RETRY) * @param expStatus2 expected status after the second step (START_MANUAL, END_MANUAL) * @param expErrorMsg the expected error message. * @throws Exception */ private void _testTransient( String errorType, WorkflowActionBean.Status expStatus1, final WorkflowActionBean.Status expStatus2, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final int maxRetries = 2; final int retryInterval = 10; final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); conf.setInt(OozieClient.ACTION_MAX_RETRIES, maxRetries); conf.setInt(OozieClient.ACTION_RETRY_INTERVAL, retryInterval); final String jobId = engine.submitJob(conf, true); int retryCount = 1; WorkflowActionBean.Status expectedStatus = expStatus1; int expectedRetryCount = 2; Thread.sleep(20000); String aId = null; final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); while (retryCount <= maxRetries) { List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, false); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); aId = action.getId(); assertEquals(expectedStatus, action.getStatus()); assertEquals(expectedRetryCount, action.getRetries()); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); if (action.getRetries() == maxRetries) { expectedRetryCount = 0; expectedStatus = expStatus2; break; } else { expectedRetryCount++; } Thread.sleep(retryInterval * 1000); retryCount++; } store.commitTrx(); store.closeTrx(); Thread.sleep(5000); final String actionId = aId; waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getWorkflowAction(actionId).getStatus() == expStatus2); } }); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); WorkflowActionBean action = engine.getWorkflowAction(actionId); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus2, action.getStatus()); assertTrue(action.isPending() == false); assertEquals(WorkflowJob.Status.SUSPENDED, engine.getJob(jobId).getStatus()); store2.commitTrx(); store2.closeTrx(); }
/** * Provides functionality to test non transient failures. * * @param errorType the error type. (start.non-transient, end.non-transient) * @param expStatus1 expected status. (START_MANUAL, END_MANUAL) * @param expErrorMsg expected error message. * @throws Exception */ private void _testNonTransient( String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); final String jobId = engine.submitJob(conf, true); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); } }); final WorkflowStore store = Services.get().get(WorkflowStoreService.class).create(); store.beginTrx(); List<WorkflowActionBean> actions = store.getActionsForWorkflow(jobId, true); int n = actions.size(); WorkflowActionBean action = actions.get(n - 1); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus1, action.getStatus()); assertTrue(action.isPending() == false); assertTrue(engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); String actionConf = action.getConf(); String fixedActionConf = actionConf.replaceAll(errorType, "none"); action.setConf(fixedActionConf); store.updateAction(action); store.commitTrx(); store.closeTrx(); engine.resume(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED); } }); assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus()); final WorkflowStore store2 = Services.get().get(WorkflowStoreService.class).create(); store2.beginTrx(); actions = store2.getActionsForWorkflow(jobId, false); action = actions.get(0); assertEquals(null, action.getErrorCode()); assertEquals(null, action.getErrorMessage()); assertEquals(WorkflowActionBean.Status.OK, action.getStatus()); store2.commitTrx(); store2.closeTrx(); }