protected Void call(WorkflowStore store) throws StoreException, CommandException { WorkflowJobBean workflow = store.getWorkflow(jobId, false); setLogInfo(workflow); WorkflowActionBean action = store.getAction(id, false); setLogInfo(action); if (action.isPending() && (action.getStatus() == WorkflowActionBean.Status.DONE || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) { if (workflow.getStatus() == WorkflowJob.Status.RUNNING) { ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType()); Configuration conf = workflow.getWorkflowInstance().getConf(); int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); executor.setMaxRetries(maxRetries); executor.setRetryInterval(retryInterval); if (executor != null) { boolean isRetry = false; if (action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) { isRetry = true; } ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry); try { XLog.getLog(getClass()) .debug( "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(), action.getSignalValue()); WorkflowInstance wfInstance = workflow.getWorkflowInstance(); DagELFunctions.setActionInfo(wfInstance, action); workflow.setWorkflowInstance(wfInstance); incrActionCounter(action.getType(), 1); Instrumentation.Cron cron = new Instrumentation.Cron(); cron.start(); executor.end(context, action); cron.stop(); addActionCron(action.getType(), cron); if (!context.isEnded()) { XLog.getLog(getClass()) .warn( XLog.OPS, "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType()); action.setErrorInfo( END_DATA_MISSING, "Execution Ended, but End Data Missing from Action"); failJob(context); store.updateAction(action); store.updateWorkflow(workflow); return null; } action.setRetries(0); action.setEndTime(new Date()); store.updateAction(action); store.updateWorkflow(workflow); Status slaStatus = null; switch (action.getStatus()) { case OK: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; case ERROR: XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA"); slaStatus = Status.KILLED; break; default: // TODO: What will happen for other Action // status slaStatus = Status.FAILED; break; } SLADbOperations.writeStausEvent( action.getSlaXml(), action.getId(), store, slaStatus, SlaAppType.WORKFLOW_ACTION); queueCallable(new NotificationCommand(workflow, action)); XLog.getLog(getClass()) .debug( "Queuing commands for action " + id + " status " + action.getStatus() + ", Set pending=" + action.getPending()); queueCallable(new SignalCommand(workflow.getId(), id)); } catch (ActionExecutorException ex) { XLog.getLog(getClass()) .warn( "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage()); action.setErrorInfo(ex.getErrorCode(), ex.getMessage()); action.setEndTime(null); switch (ex.getErrorType()) { case TRANSIENT: if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) { handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setPendingAge(new Date()); action.setRetries(0); } action.setEndTime(null); break; case NON_TRANSIENT: handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL); action.setEndTime(null); break; case ERROR: handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR); queueCallable(new SignalCommand(workflow.getId(), id)); break; case FAILED: failJob(context); break; } store.updateAction(action); store.updateWorkflow(workflow); } } else { throw new CommandException(ErrorCode.E0802, action.getType()); } } else { XLog.getLog(getClass()) .warn( "Job state is not {0}. Skipping ActionEnd Execution", WorkflowJob.Status.RUNNING.toString()); } } else { XLog.getLog(getClass()) .debug( "Action pending={0}, status={1}. Skipping ActionEnd Execution", action.getPending(), action.getStatusStr()); } return null; }
@Override protected Void execute() throws CommandException { LOG.debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); WorkflowInstance workflowInstance = wfJob.getWorkflowInstance(); workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, wfJob); boolean completed = false; boolean skipAction = false; if (wfAction == null) { if (wfJob.getStatus() == WorkflowJob.Status.PREP) { try { completed = workflowInstance.start(); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setStatus(WorkflowJob.Status.RUNNING); wfJob.setStartTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); // 1. Add SLA status event for WF-JOB with status STARTED // 2. Add SLA registration events for all WF_ACTIONS SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, Status.STARTED, SlaAppType.WORKFLOW_JOB); writeSLARegistrationForAllActions( workflowInstance.getApp().getDefinition(), wfJob.getUser(), wfJob.getGroup(), wfJob.getConf()); queue(new NotificationXCommand(wfJob)); } else { throw new CommandException(ErrorCode.E0801, wfJob.getId()); } } else { String skipVar = workflowInstance.getVar( wfAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); if (skipVar != null) { skipAction = skipVar.equals("true"); } try { completed = workflowInstance.signal(wfAction.getExecutionPath(), wfAction.getSignalValue()); } catch (WorkflowException e) { throw new CommandException(e); } wfJob.setWorkflowInstance(workflowInstance); wfAction.resetPending(); if (!skipAction) { wfAction.setTransition(workflowInstance.getTransition(wfAction.getName())); } try { jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); } catch (JPAExecutorException je) { throw new CommandException(je); } } if (completed) { try { for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) { WorkflowActionBean actionToKill; actionToKill = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToKillId)); actionToKill.setPending(); actionToKill.setStatus(WorkflowActionBean.Status.KILLED); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToKill)); queue(new ActionKillXCommand(actionToKill.getId(), actionToKill.getType())); } for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) { WorkflowActionBean actionToFail = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToFailId)); actionToFail.resetPending(); actionToFail.setStatus(WorkflowActionBean.Status.FAILED); SLADbXOperations.writeStausEvent( wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION); jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToFail)); } } catch (JPAExecutorException je) { throw new CommandException(je); } wfJob.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString())); wfJob.setEndTime(new Date()); wfJob.setWorkflowInstance(workflowInstance); Status slaStatus = Status.SUCCEEDED; switch (wfJob.getStatus()) { case SUCCEEDED: slaStatus = Status.SUCCEEDED; break; case KILLED: slaStatus = Status.KILLED; break; case FAILED: slaStatus = Status.FAILED; break; default: // TODO SUSPENDED break; } SLADbXOperations.writeStausEvent( wfJob.getSlaXml(), jobId, slaStatus, SlaAppType.WORKFLOW_JOB); queue(new NotificationXCommand(wfJob)); if (wfJob.getStatus() == WorkflowJob.Status.SUCCEEDED) { InstrumentUtils.incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1, getInstrumentation()); } } else { for (WorkflowActionBean newAction : WorkflowStoreService.getStartedActions(workflowInstance)) { String skipVar = workflowInstance.getVar( newAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); boolean skipNewAction = false; if (skipVar != null) { skipNewAction = skipVar.equals("true"); } try { if (skipNewAction) { WorkflowActionBean oldAction; oldAction = jpaService.execute(new WorkflowActionGetJPAExecutor(newAction.getId())); oldAction.setPending(); jpaService.execute(new WorkflowActionUpdateJPAExecutor(oldAction)); queue(new SignalXCommand(jobId, oldAction.getId())); } else { newAction.setPending(); String actionSlaXml = getActionSLAXml( newAction.getName(), workflowInstance.getApp().getDefinition(), wfJob.getConf()); newAction.setSlaXml(actionSlaXml); jpaService.execute(new WorkflowActionInsertJPAExecutor(newAction)); LOG.debug( "SignalXCommand: Name: " + newAction.getName() + ", Id: " + newAction.getId() + ", Authcode:" + newAction.getCred()); queue(new ActionStartXCommand(newAction.getId(), newAction.getType())); } } catch (JPAExecutorException je) { throw new CommandException(je); } } } try { jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); } catch (JPAExecutorException je) { throw new CommandException(je); } XLog.getLog(getClass()) .debug( "Updated the workflow status to " + wfJob.getId() + " status =" + wfJob.getStatusStr()); if (wfJob.getStatus() != WorkflowJob.Status.RUNNING && wfJob.getStatus() != WorkflowJob.Status.SUSPENDED) { // update coordinator action new CoordActionUpdateXCommand(wfJob).call(); new WfEndXCommand(wfJob).call(); // To delete the WF temp dir } LOG.debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); return null; }