private void checkCoordActions(String jobId, int number, CoordinatorJob.Status status) { try { JPAService jpaService = Services.get().get(JPAService.class); List<CoordinatorActionBean> actions = jpaService.execute(new CoordJobGetActionsJPAExecutor(jobId)); if (actions.size() != number) { fail( "Should have " + number + " actions created for job " + jobId + ", but jave " + actions.size() + " actions."); } if (status != null) { CoordinatorJob job = jpaService.execute(new CoordJobGetJPAExecutor(jobId)); if (job.getStatus() != status) { fail("Job status " + job.getStatus() + " should be " + status); } } } catch (JPAExecutorException se) { se.printStackTrace(); fail("Job ID " + jobId + " was not stored properly in db"); } }
public void testActionCheck() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); String mapperId = action.getExternalId(); String childId = action.getExternalChildIDs(); assertTrue(launcherId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action.getExternalStatus()); }
/** * Provides functionality to test kill node message * * @throws Exception */ public void testKillNodeErrorMessage() throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-test-kill-node-message.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("error", "end.error"); conf.set("external-status", "FAILED/KILLED"); conf.set("signal-value", "fail"); final String jobId = engine.submitJob(conf, true); final JPAService jpaService = Services.get().get(JPAService.class); final WorkflowJobGetJPAExecutor wfJobGetCmd = new WorkflowJobGetJPAExecutor(jobId); waitFor( 50000, new Predicate() { public boolean evaluate() throws Exception { WorkflowJobBean job = jpaService.execute(wfJobGetCmd); return (job.getWorkflowInstance().getStatus() == WorkflowInstance.Status.KILLED); } }); WorkflowJobBean job = jpaService.execute(wfJobGetCmd); assertEquals(WorkflowJob.Status.KILLED, job.getStatus()); WorkflowActionsGetForJobJPAExecutor wfActionsGetCmd = new WorkflowActionsGetForJobJPAExecutor(jobId); List<WorkflowActionBean> actions = jpaService.execute(wfActionsGetCmd); int n = actions.size(); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals("end", action.getErrorMessage()); assertEquals(WorkflowAction.Status.ERROR, action.getStatus()); }
private void _testUpdateAction(String actionId) throws Exception { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); WorkflowActionGetJPAExecutor actionGetCmd = new WorkflowActionGetJPAExecutor(actionId); WorkflowActionBean action1 = jpaService.execute(actionGetCmd); action1.setStatus(WorkflowAction.Status.RUNNING); WorkflowActionUpdateJPAExecutor wfActionUpdateCmd = new WorkflowActionUpdateJPAExecutor(action1); jpaService.execute(wfActionUpdateCmd); WorkflowActionBean action2 = jpaService.execute(actionGetCmd); assertEquals(action2.getStatus(), WorkflowAction.Status.RUNNING); }
private CoordinatorActionBean checkCoordAction(String actionId) throws JPAExecutorException { long lastSeqId[] = new long[1]; JPAService jpaService = Services.get().get(JPAService.class); List<SLAEventBean> slaEventList = jpaService.execute(new SLAEventsGetForSeqIdJPAExecutor(0, 10, lastSeqId)); if (slaEventList.size() == 0) { fail("Unable to GET any record of sequence id greater than 0"); } CoordinatorActionBean actionBean; actionBean = jpaService.execute(new CoordActionGetJPAExecutor(actionId)); return actionBean; }
protected CoordinatorJobBean addRecordToCoordJobTable( CoordinatorJob.Status status, Date startTime, Date endTime, Date pauseTime, int timeout) throws Exception { CoordinatorJobBean coordJob = createCoordJob(status); coordJob.setStartTime(startTime); coordJob.setEndTime(endTime); coordJob.setPauseTime(pauseTime); coordJob.setFrequency(5); coordJob.setTimeUnit(Timeunit.MINUTE); coordJob.setTimeout(timeout); coordJob.setConcurrency(3); try { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); CoordJobInsertJPAExecutor coordInsertCmd = new CoordJobInsertJPAExecutor(coordJob); jpaService.execute(coordInsertCmd); } catch (JPAExecutorException ex) { ex.printStackTrace(); fail("Unable to insert the test coord job record to table"); throw ex; } return coordJob; }
/* (non-Javadoc) * @see org.apache.oozie.command.SuspendTransitionXCommand#performWrites() */ @Override public void performWrites() throws CommandException { try { jpaService.execute(new BulkUpdateInsertJPAExecutor(updateList, null)); } catch (JPAExecutorException e) { throw new CommandException(e); } }
/** * Test : Pause a RUNNINGWITHERROR bundle * * @throws Exception */ public void testBundlePauseUnpause3() throws Exception { BundleJobBean job = this.addRecordToBundleJobTable(Job.Status.RUNNINGWITHERROR, false); JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); BundleJobGetJPAExecutor bundleJobGetCmd = new BundleJobGetJPAExecutor(job.getId()); job = jpaService.execute(bundleJobGetCmd); assertEquals(job.getStatus(), Job.Status.RUNNINGWITHERROR); new BundlePauseXCommand(job).call(); job = jpaService.execute(bundleJobGetCmd); assertEquals(job.getStatus(), Job.Status.PAUSEDWITHERROR); new BundleUnpauseXCommand(job).call(); job = jpaService.execute(bundleJobGetCmd); assertEquals(job.getStatus(), Job.Status.RUNNINGWITHERROR); }
/* (non-Javadoc) * @see org.apache.oozie.command.XCommand#loadState() */ @Override protected void loadState() throws CommandException { try { eagerLoadState(); this.bundleActions = jpaService.execute(new BundleActionsGetJPAExecutor(jobId)); } catch (Exception Ex) { throw new CommandException(ErrorCode.E1311, this.jobId); } }
@Override protected void loadState() throws CommandException { try { jpaService = Services.get().get(JPAService.class); if (jpaService != null) { this.wfJob = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); LogUtils.setLogInfo(wfJob, logInfo); if (actionId != null) { this.wfAction = jpaService.execute(new WorkflowActionGetJPAExecutor(actionId)); LogUtils.setLogInfo(wfAction, logInfo); } } else { throw new CommandException(ErrorCode.E0610); } } catch (XException ex) { throw new CommandException(ex); } }
private void checkCoordActionsTimeout(String actionId, int expected) { try { JPAService jpaService = Services.get().get(JPAService.class); CoordinatorActionBean action = jpaService.execute(new CoordActionGetJPAExecutor(actionId)); assertEquals(action.getTimeOut(), expected); } catch (JPAExecutorException se) { se.printStackTrace(); fail("Action ID " + actionId + " was not stored properly in db"); } }
/* (non-Javadoc) * @see org.apache.oozie.command.XCommand#loadState() */ @Override protected void loadState() throws CommandException { jpaService = Services.get().get(JPAService.class); if (jpaService == null) { throw new CommandException(ErrorCode.E0610); } try { bundleJob = jpaService.execute(new BundleJobGetJPAExecutor(jobId)); } catch (Exception Ex) { throw new CommandException(ErrorCode.E0604, jobId); } try { bundleActions = jpaService.execute(new BundleActionsGetJPAExecutor(jobId)); } catch (Exception Ex) { throw new CommandException(ErrorCode.E1311, jobId); } LogUtils.setLogInfo(bundleJob, logInfo); }
private void _testGetJobInfoForId(String jobId) throws Exception { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); Map<String, List<String>> filter = new HashMap<String, List<String>>(); List<String> jobIdList = new ArrayList<String>(); jobIdList.add(jobId); filter.put(OozieClient.FILTER_ID, jobIdList); BundleJobInfoGetJPAExecutor bundleInfoGetCmd = new BundleJobInfoGetJPAExecutor(filter, 1, 20); BundleJobInfo ret = jpaService.execute(bundleInfoGetCmd); assertNotNull(ret); assertEquals(ret.getBundleJobs().size(), 1); }
private void _testGetJobInfoForAppName() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); Map<String, List<String>> filter = new HashMap<String, List<String>>(); List<String> list = new ArrayList<String>(); list.add("BUNDLE-TEST"); filter.put(OozieClient.FILTER_NAME, list); BundleJobInfoGetJPAExecutor bundleInfoGetCmd = new BundleJobInfoGetJPAExecutor(filter, 1, 20); BundleJobInfo ret = jpaService.execute(bundleInfoGetCmd); assertNotNull(ret); assertEquals(4, ret.getBundleJobs().size()); }
/** * Provides functionality to test user retry * * @param errorType the error type. (start.non-transient, end.non-transient) * @param externalStatus the external status to set. * @param signalValue the signal value to set. * @throws Exception */ private void _testErrorWithUserRetry(String errorType, String externalStatus, String signalValue) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid-user-retry.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("error", errorType); conf.set("external-status", externalStatus); conf.set("signal-value", signalValue); final String jobId = engine.submitJob(conf, true); final JPAService jpaService = Services.get().get(JPAService.class); final WorkflowJobGetJPAExecutor wfJobGetCmd = new WorkflowJobGetJPAExecutor(jobId); final WorkflowActionsGetForJobJPAExecutor actionsGetExecutor = new WorkflowActionsGetForJobJPAExecutor(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { List<WorkflowActionBean> actions = jpaService.execute(actionsGetExecutor); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } return (action != null && action.getUserRetryCount() == 2); } }); List<WorkflowActionBean> actions = jpaService.execute(actionsGetExecutor); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); assertEquals(2, action.getUserRetryCount()); }
public void testTimeout() throws Exception { Date startTime = DateUtils.parseDateUTC("2009-03-06T10:00Z"); Date endTime = DateUtils.parseDateUTC("2009-03-06T10:14Z"); Date pauseTime = null; CoordinatorJobBean job = addRecordToCoordJobTable( CoordinatorJob.Status.PREMATER, startTime, endTime, pauseTime, 300); new CoordActionMaterializeXCommand(job.getId(), startTime, endTime).call(); JPAService jpaService = Services.get().get(JPAService.class); List<CoordinatorActionBean> actions = jpaService.execute(new CoordJobGetActionsJPAExecutor(job.getId())); checkCoordActionsTimeout(job.getId() + "@1", 300); }
/* (non-Javadoc) * @see org.apache.oozie.command.XCommand#eagerLoadState() */ @Override protected void eagerLoadState() throws CommandException { try { jpaService = Services.get().get(JPAService.class); if (jpaService != null) { this.bundleJob = jpaService.execute(new BundleJobGetJPAExecutor(jobId)); LogUtils.setLogInfo(bundleJob, logInfo); } else { throw new CommandException(ErrorCode.E0610); } } catch (XException ex) { throw new CommandException(ex); } }
/** * Test : Negative case - pause a suspended bundle * * @throws Exception */ public void testBundlePauseUnpauseNeg1() throws Exception { BundleJobBean job = this.addRecordToBundleJobTable(Job.Status.SUSPENDED, false); JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); BundleJobGetJPAExecutor bundleJobGetCmd = new BundleJobGetJPAExecutor(job.getId()); job = jpaService.execute(bundleJobGetCmd); assertEquals(job.getStatus(), Job.Status.SUSPENDED); try { new BundlePauseXCommand(job).call(); fail("should not reach here."); } catch (Exception ex) { } }
private void _testGetJobInfoForUserAndStatus() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); Map<String, List<String>> filter = new HashMap<String, List<String>>(); List<String> list1 = new ArrayList<String>(); list1.add(getTestUser()); filter.put(OozieClient.FILTER_USER, list1); List<String> list2 = new ArrayList<String>(); list2.add("KILLED"); filter.put(OozieClient.FILTER_STATUS, list2); BundleJobInfoGetJPAExecutor bundleInfoGetCmd = new BundleJobInfoGetJPAExecutor(filter, 1, 20); BundleJobInfo ret = jpaService.execute(bundleInfoGetCmd); assertNotNull(ret); assertEquals(1, ret.getBundleJobs().size()); }
@Override protected WorkflowActionBean addRecordToWfActionTable( String wfId, String actionName, WorkflowAction.Status status) throws Exception { WorkflowActionBean action = createWorkflowActionSetPending(wfId, status); try { JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); WorkflowActionInsertJPAExecutor actionInsertCmd = new WorkflowActionInsertJPAExecutor(action); jpaService.execute(actionInsertCmd); } catch (JPAExecutorException ce) { ce.printStackTrace(); fail("Unable to insert the test wf action record to table"); throw ce; } return action; }
/* (non-Javadoc) * @see org.apache.oozie.command.XCommand#execute() */ @Override protected Void execute() throws CommandException { try { if (isChangePauseTime || isChangeEndTime) { if (isChangePauseTime) { bundleJob.setPauseTime(newPauseTime); } else if (isChangeEndTime) { bundleJob.setEndTime(newEndTime); if (bundleJob.getStatus() == Job.Status.SUCCEEDED) { bundleJob.setStatus(Job.Status.RUNNING); } if (bundleJob.getStatus() == Job.Status.DONEWITHERROR || bundleJob.getStatus() == Job.Status.FAILED) { bundleJob.setStatus( StatusUtils.getStatusIfBackwardSupportTrue(Job.Status.RUNNINGWITHERROR)); } } for (BundleActionBean action : this.bundleActions) { // queue coord change commands; if (action.getStatus() != Job.Status.KILLED && action.getCoordId() != null) { queue(new CoordChangeXCommand(action.getCoordId(), changeValue)); LOG.info( "Queuing CoordChangeXCommand coord job = " + action.getCoordId() + " to change " + changeValue); action.setPending(action.getPending() + 1); updateList.add(action); } } updateList.add(bundleJob); jpaService.execute(new BulkUpdateInsertJPAExecutor(updateList, null)); } return null; } catch (XException ex) { throw new CommandException(ex); } }
/** * Write a coordinator job into database * * @param eJob : XML element of job * @param coordJob : Coordinator job bean * @return Job id * @throws CommandException thrown if unable to save coordinator job to db */ private String storeToDB(Element eJob, CoordinatorJobBean coordJob) throws CommandException { String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR); coordJob.setId(jobId); coordJob.setAuthToken(this.authToken); coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH)); coordJob.setCreatedTime(new Date()); coordJob.setUser(conf.get(OozieClient.USER_NAME)); coordJob.setGroup(conf.get(OozieClient.GROUP_NAME)); coordJob.setConf(XmlUtils.prettyPrint(conf).toString()); coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString()); coordJob.setLastActionNumber(0); coordJob.setLastModifiedTime(new Date()); if (!dryrun) { coordJob.setLastModifiedTime(new Date()); try { jpaService.execute(new CoordJobInsertJPAExecutor(coordJob)); } catch (JPAExecutorException je) { throw new CommandException(je); } } return jobId; }
/** * Provides functionality to test non transient failures and coordinator action update * * @param errorType the error type. (start.non-transient, end.non-transient) * @param expStatus1 expected status. (START_MANUAL, END_MANUAL) * @param expErrorMsg expected error message. * @throws Exception */ private void _testNonTransientWithCoordActionUpdate( String errorType, WorkflowActionBean.Status expStatus1, String expErrorMsg) throws Exception { String workflowPath = getTestCaseFileUri("workflow.xml"); Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, workflowPath); conf.set(OozieClient.USER_NAME, getTestUser()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("signal-value", "OK"); conf.set("external-status", "ok"); conf.set("error", errorType); final String jobId = engine.submitJob(conf, false); final JPAService jpaService = Services.get().get(JPAService.class); final CoordinatorJobBean coordJob = addRecordToCoordJobTable(CoordinatorJob.Status.RUNNING, false, false); CoordinatorActionBean coordAction = addRecordToCoordActionTable( coordJob.getId(), 1, CoordinatorAction.Status.RUNNING, "coord-action-get.xml", jobId, "RUNNING", 0); engine.start(jobId); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUSPENDED); } }); assertNotNull(jpaService); WorkflowJobGetJPAExecutor wfGetCmd = new WorkflowJobGetJPAExecutor(jobId); WorkflowJobBean job = jpaService.execute(wfGetCmd); WorkflowActionsGetForJobJPAExecutor actionsGetExe = new WorkflowActionsGetForJobJPAExecutor(jobId); List<WorkflowActionBean> actionsList = jpaService.execute(actionsGetExe); int n = actionsList.size(); WorkflowActionBean action = actionsList.get(n - 1); assertEquals("TEST_ERROR", action.getErrorCode()); assertEquals(expErrorMsg, action.getErrorMessage()); assertEquals(expStatus1, action.getStatus()); assertFalse(action.isPending()); assertEquals(WorkflowJob.Status.SUSPENDED, job.getStatus()); waitFor( 5000, new Predicate() { public boolean evaluate() throws Exception { CoordinatorActionBean coordAction2 = jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId)); return coordAction2.getStatus().equals(CoordinatorAction.Status.SUSPENDED); } }); coordAction = jpaService.execute(new CoordActionGetForExternalIdJPAExecutor(jobId)); assertEquals(CoordinatorAction.Status.SUSPENDED, coordAction.getStatus()); }
public void testActionCheckTransientDuringMRAction() throws Exception { // When using YARN, skip this test because it relies on shutting down the job tracker, which // isn't used in YARN if (createJobConf().get("yarn.resourcemanager.address") != null) { return; } services.destroy(); // Make the max number of retries lower so the test won't take as long final int maxRetries = 2; setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries)); services = new Services(); // Disable ActionCheckerService so it doesn't interfere by triggering any extra // ActionCheckXCommands setClassesToBeExcluded( services.getConf(), new String[] {"org.apache.oozie.service.ActionCheckerService"}); services.init(); final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); final String jobId = job0.getId(); WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP); final String actionId = action0.getId(); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId); new ActionStartXCommand(actionId, "map-reduce").call(); final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd); String originalLauncherId = action1.getExternalId(); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job0, action1, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action1.getId()).call(); WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd); String originalMapperId = action2.getExternalChildIDs(); assertFalse(originalLauncherId.equals(originalMapperId)); // At this point, the launcher job has finished and the map-reduce action has started (but not // finished) // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job executeWhileJobTrackerIsShutdown( new ShutdownJobTrackerExecutable() { @Override public void execute() throws Exception { assertEquals(0, action1.getRetries()); new ActionCheckXCommand(actionId).call(); waitFor( 30 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() > 0); } }); waitFor( 180 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() == 0); } }); WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd); assertEquals(0, action1b.getRetries()); assertEquals("START_MANUAL", action1b.getStatusStr()); WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("SUSPENDED", job1.getStatusStr()); // At this point, the action has gotten a transient error, even after maxRetries tries // so the workflow has been // SUSPENDED } }); // Now, lets bring the job tracker back up and resume the workflow (which will restart the // current action) // It should now continue and finish with SUCCEEDED new ResumeXCommand(jobId).call(); WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("RUNNING", job2.getStatusStr()); sleep(500); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd); String launcherId = action3.getExternalId(); assertFalse(originalLauncherId.equals(launcherId)); final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob2.isComplete(); } }); assertTrue(launcherJob2.isSuccessful()); actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd); String mapperId = action4.getExternalChildIDs(); assertFalse(originalMapperId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId)); waitFor( 120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action5.getExternalStatus()); }
@Override protected Void execute() throws CommandException { LOG.debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); WorkflowInstance workflowInstance = wfJob.getWorkflowInstance(); workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, wfJob); boolean completed = false; boolean skipAction = false; if (wfAction == null) { if (wfJob.getStatus() == WorkflowJob.Status.PREP) { try { completed = workflowInstance.start(); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setStatus(WorkflowJob.Status.RUNNING); wfJob.setStartTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); // 1. Add SLA status event for WF-JOB with status STARTED // 2. Add SLA registration events for all WF_ACTIONS SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, Status.STARTED, SlaAppType.WORKFLOW_JOB); writeSLARegistrationForAllActions( workflowInstance.getApp().getDefinition(), wfJob.getUser(), wfJob.getGroup(), wfJob.getConf()); queue(new NotificationXCommand(wfJob)); } else { throw new CommandException(ErrorCode.E0801, wfJob.getId()); } } else { String skipVar = workflowInstance.getVar( wfAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); if (skipVar != null) { skipAction = skipVar.equals("true"); } try { completed = workflowInstance.signal(wfAction.getExecutionPath(), wfAction.getSignalValue()); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setWorkflowInstance(workflowInstance); wfAction.resetPending(); if (!skipAction) { wfAction.setTransition(workflowInstance.getTransition(wfAction.getName())); } try { jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); } catch (JPAExecutorException je) { throw new CommandException(je); } } if (completed) { try { for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) { WorkflowActionBean actionToKill; actionToKill = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToKillId)); actionToKill.setPending(); actionToKill.setStatus(WorkflowActionBean.Status.KILLED); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToKill)); queue(new ActionKillXCommand(actionToKill.getId(), actionToKill.getType())); } for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) { WorkflowActionBean actionToFail = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToFailId)); actionToFail.resetPending(); actionToFail.setStatus(WorkflowActionBean.Status.FAILED); SLADbXOperations.writeStausEvent( wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToFail)); } } catch (JPAExecutorException je) { throw new CommandException(je); } wfJob.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString())); wfJob.setEndTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); Status slaStatus = Status.SUCCEEDED; switch (wfJob.getStatus()) { case SUCCEEDED: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; default: // TODO SUSPENDED break; } SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, slaStatus, SlaAppType.WORKFLOW_JOB); queue(new NotificationXCommand(wfJob)); if (wfJob.getStatus() == WorkflowJob.Status.SUCCEEDED) { InstrumentUtils.incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1, getInstrumentation()); } } else { for (WorkflowActionBean newAction : WorkflowStoreService.getStartedActions(workflowInstance)) { String skipVar = workflowInstance.getVar( newAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); boolean skipNewAction = false; if (skipVar != null) { skipNewAction = skipVar.equals("true"); } try { if (skipNewAction) { WorkflowActionBean oldAction; oldAction = jpaService.execute(new WorkflowActionGetJPAExecutor(newAction.getId())); oldAction.setPending(); jpaService.execute(new WorkflowActionUpdateJPAExecutor(oldAction)); queue(new SignalXCommand(jobId, oldAction.getId())); } else { newAction.setPending(); String actionSlaXml = getActionSLAXml( newAction.getName(), workflowInstance.getApp().getDefinition(), wfJob.getConf()); newAction.setSlaXml(actionSlaXml); jpaService.execute(new WorkflowActionInsertJPAExecutor(newAction)); LOG.debug( "SignalXCommand: Name: " + newAction.getName() + ", Id: " + newAction.getId() + ", Authcode:" + newAction.getCred()); queue(new ActionStartXCommand(newAction.getId(), newAction.getType())); } } catch (JPAExecutorException je) { throw new CommandException(je); } } } try { jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); } catch (JPAExecutorException je) { throw new CommandException(je); } XLog.getLog(getClass()) .debug( "Updated the workflow status to " + wfJob.getId() + " status =" + wfJob.getStatusStr()); if (wfJob.getStatus() != WorkflowJob.Status.RUNNING && wfJob.getStatus() != WorkflowJob.Status.SUSPENDED) { // update coordinator action new CoordActionUpdateXCommand(wfJob).call(); new WfEndXCommand(wfJob).call(); // To delete the WF temp dir } LOG.debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); return null; }