@Override public void rerunChildren() throws CommandException { boolean isError = false; try { CoordinatorActionInfo coordInfo = null; InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation()); List<CoordinatorActionBean> coordActions = CoordUtils.getCoordActions(rerunType, jobId, scope, false); if (checkAllActionsRunnable(coordActions)) { for (CoordinatorActionBean coordAction : coordActions) { String actionXml = coordAction.getActionXml(); if (!noCleanup && !failed) { Element eAction = XmlUtils.parseXml(actionXml); cleanupOutputEvents(eAction); } if (refresh) { refreshAction(coordJob, coordAction); } updateAction(coordJob, coordAction); if (SLAService.isEnabled()) { SLAOperations.updateRegistrationEvent(coordAction.getId()); } queue(new CoordActionNotificationXCommand(coordAction), 100); queue( new CoordActionInputCheckXCommand(coordAction.getId(), coordAction.getJobId()), 100); } } else { isError = true; throw new CommandException( ErrorCode.E1018, "part or all actions are not eligible to rerun!"); } coordInfo = new CoordinatorActionInfo(coordActions); ret = coordInfo; } catch (XException xex) { isError = true; throw new CommandException(xex); } catch (JDOMException jex) { isError = true; throw new CommandException(ErrorCode.E0700, jex.getMessage(), jex); } catch (Exception ex) { isError = true; throw new CommandException(ErrorCode.E1018, ex.getMessage(), ex); } finally { if (isError) { transitToPrevious(); } } }
/* (non-Javadoc) * @see org.apache.oozie.command.TransitionXCommand#updateJob() */ @Override public void updateJob() { InstrumentUtils.incrJobCounter("bundle_suspend", 1, null); bundleJob.setSuspendedTime(new Date()); bundleJob.setLastModifiedTime(new Date()); LOG.debug( "Suspend bundle job id = " + jobId + ", status = " + bundleJob.getStatus() + ", pending = " + bundleJob.isPending()); updateList.add(bundleJob); }
/* (non-Javadoc) * @see org.apache.oozie.command.XCommand#execute() */ @Override protected String submit() throws CommandException { String jobId = null; LOG.info("STARTED Coordinator Submit"); InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation()); boolean exceptionOccured = false; try { mergeDefaultConfig(); String appXml = readAndValidateXml(); coordJob.setOrigJobXml(appXml); LOG.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString()); String appNamespace = readAppNamespace(appXml); coordJob.setAppNamespace(appNamespace); appXml = XmlUtils.removeComments(appXml); initEvaluators(); Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob); LOG.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString()); jobId = storeToDB(eJob, coordJob); // log job info for coordinator job LogUtils.setLogInfo(coordJob, logInfo); LOG = XLog.resetPrefix(LOG); if (!dryrun) { // submit a command to materialize jobs for the next 1 hour (3600 secs) // so we don't wait 10 mins for the Service to run. queue(new CoordMaterializeTransitionXCommand(jobId, 3600), 100); } else { Date startTime = coordJob.getStartTime(); long startTimeMilli = startTime.getTime(); long endTimeMilli = startTimeMilli + (3600 * 1000); Date jobEndTime = coordJob.getEndTime(); Date endTime = new Date(endTimeMilli); if (endTime.compareTo(jobEndTime) > 0) { endTime = jobEndTime; } jobId = coordJob.getId(); LOG.info("[" + jobId + "]: Update status to RUNNING"); coordJob.setStatus(Job.Status.RUNNING); coordJob.setPending(); CoordActionMaterializeCommand coordActionMatCom = new CoordActionMaterializeCommand(jobId, startTime, endTime); Configuration jobConf = null; try { jobConf = new XConfiguration(new StringReader(coordJob.getConf())); } catch (IOException e1) { LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1); } String action = coordActionMatCom.materializeJobs(true, coordJob, jobConf, null); String output = coordJob.getJobXml() + System.getProperty("line.separator") + "***actions for instance***" + action; return output; } } catch (CoordinatorJobException cex) { exceptionOccured = true; LOG.warn("ERROR: ", cex); throw new CommandException(cex); } catch (IllegalArgumentException iex) { exceptionOccured = true; LOG.warn("ERROR: ", iex); throw new CommandException(ErrorCode.E1003, iex); } catch (Exception ex) { exceptionOccured = true; LOG.warn("ERROR: ", ex); throw new CommandException(ErrorCode.E0803, ex); } finally { if (exceptionOccured) { if (coordJob.getId() == null || coordJob.getId().equalsIgnoreCase("")) { coordJob.setStatus(CoordinatorJob.Status.FAILED); coordJob.resetPending(); } } } LOG.info("ENDED Coordinator Submit jobId=" + jobId); return jobId; }
@Override protected Void execute() throws CommandException { LOG.debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); WorkflowInstance workflowInstance = wfJob.getWorkflowInstance(); workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, wfJob); boolean completed = false; boolean skipAction = false; if (wfAction == null) { if (wfJob.getStatus() == WorkflowJob.Status.PREP) { try { completed = workflowInstance.start(); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setStatus(WorkflowJob.Status.RUNNING); wfJob.setStartTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); // 1. Add SLA status event for WF-JOB with status STARTED // 2. Add SLA registration events for all WF_ACTIONS SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, Status.STARTED, SlaAppType.WORKFLOW_JOB); writeSLARegistrationForAllActions( workflowInstance.getApp().getDefinition(), wfJob.getUser(), wfJob.getGroup(), wfJob.getConf()); queue(new NotificationXCommand(wfJob)); } else { throw new CommandException(ErrorCode.E0801, wfJob.getId()); } } else { String skipVar = workflowInstance.getVar( wfAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); if (skipVar != null) { skipAction = skipVar.equals("true"); } try { completed = workflowInstance.signal(wfAction.getExecutionPath(), wfAction.getSignalValue()); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setWorkflowInstance(workflowInstance); wfAction.resetPending(); if (!skipAction) { wfAction.setTransition(workflowInstance.getTransition(wfAction.getName())); } try { jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); } catch (JPAExecutorException je) { throw new CommandException(je); } } if (completed) { try { for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) { WorkflowActionBean actionToKill; actionToKill = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToKillId)); actionToKill.setPending(); actionToKill.setStatus(WorkflowActionBean.Status.KILLED); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToKill)); queue(new ActionKillXCommand(actionToKill.getId(), actionToKill.getType())); } for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) { WorkflowActionBean actionToFail = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToFailId)); actionToFail.resetPending(); actionToFail.setStatus(WorkflowActionBean.Status.FAILED); SLADbXOperations.writeStausEvent( wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToFail)); } } catch (JPAExecutorException je) { throw new CommandException(je); } wfJob.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString())); wfJob.setEndTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); Status slaStatus = Status.SUCCEEDED; switch (wfJob.getStatus()) { case SUCCEEDED: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; default: // TODO SUSPENDED break; } SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, slaStatus, SlaAppType.WORKFLOW_JOB); queue(new NotificationXCommand(wfJob)); if (wfJob.getStatus() == WorkflowJob.Status.SUCCEEDED) { InstrumentUtils.incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1, getInstrumentation()); } } else { for (WorkflowActionBean newAction : WorkflowStoreService.getStartedActions(workflowInstance)) { String skipVar = workflowInstance.getVar( newAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); boolean skipNewAction = false; if (skipVar != null) { skipNewAction = skipVar.equals("true"); } try { if (skipNewAction) { WorkflowActionBean oldAction; oldAction = jpaService.execute(new WorkflowActionGetJPAExecutor(newAction.getId())); oldAction.setPending(); jpaService.execute(new WorkflowActionUpdateJPAExecutor(oldAction)); queue(new SignalXCommand(jobId, oldAction.getId())); } else { newAction.setPending(); String actionSlaXml = getActionSLAXml( newAction.getName(), workflowInstance.getApp().getDefinition(), wfJob.getConf()); newAction.setSlaXml(actionSlaXml); jpaService.execute(new WorkflowActionInsertJPAExecutor(newAction)); LOG.debug( "SignalXCommand: Name: " + newAction.getName() + ", Id: " + newAction.getId() + ", Authcode:" + newAction.getCred()); queue(new ActionStartXCommand(newAction.getId(), newAction.getType())); } } catch (JPAExecutorException je) { throw new CommandException(je); } } } try { jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); } catch (JPAExecutorException je) { throw new CommandException(je); } XLog.getLog(getClass()) .debug( "Updated the workflow status to " + wfJob.getId() + " status =" + wfJob.getStatusStr()); if (wfJob.getStatus() != WorkflowJob.Status.RUNNING && wfJob.getStatus() != WorkflowJob.Status.SUSPENDED) { // update coordinator action new CoordActionUpdateXCommand(wfJob).call(); new WfEndXCommand(wfJob).call(); // To delete the WF temp dir } LOG.debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); return null; }