예제 #1
0
  @Override
  public void rerunChildren() throws CommandException {
    boolean isError = false;
    try {
      CoordinatorActionInfo coordInfo = null;
      InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
      List<CoordinatorActionBean> coordActions =
          CoordUtils.getCoordActions(rerunType, jobId, scope, false);
      if (checkAllActionsRunnable(coordActions)) {
        for (CoordinatorActionBean coordAction : coordActions) {
          String actionXml = coordAction.getActionXml();
          if (!noCleanup && !failed) {
            Element eAction = XmlUtils.parseXml(actionXml);
            cleanupOutputEvents(eAction);
          }
          if (refresh) {
            refreshAction(coordJob, coordAction);
          }
          updateAction(coordJob, coordAction);
          if (SLAService.isEnabled()) {
            SLAOperations.updateRegistrationEvent(coordAction.getId());
          }
          queue(new CoordActionNotificationXCommand(coordAction), 100);
          queue(
              new CoordActionInputCheckXCommand(coordAction.getId(), coordAction.getJobId()), 100);
        }
      } else {
        isError = true;
        throw new CommandException(
            ErrorCode.E1018, "part or all actions are not eligible to rerun!");
      }
      coordInfo = new CoordinatorActionInfo(coordActions);

      ret = coordInfo;
    } catch (XException xex) {
      isError = true;
      throw new CommandException(xex);
    } catch (JDOMException jex) {
      isError = true;
      throw new CommandException(ErrorCode.E0700, jex.getMessage(), jex);
    } catch (Exception ex) {
      isError = true;
      throw new CommandException(ErrorCode.E1018, ex.getMessage(), ex);
    } finally {
      if (isError) {
        transitToPrevious();
      }
    }
  }
  /* (non-Javadoc)
   * @see org.apache.oozie.command.TransitionXCommand#updateJob()
   */
  @Override
  public void updateJob() {
    InstrumentUtils.incrJobCounter("bundle_suspend", 1, null);
    bundleJob.setSuspendedTime(new Date());
    bundleJob.setLastModifiedTime(new Date());

    LOG.debug(
        "Suspend bundle job id = "
            + jobId
            + ", status = "
            + bundleJob.getStatus()
            + ", pending = "
            + bundleJob.isPending());
    updateList.add(bundleJob);
  }
예제 #3
0
  /* (non-Javadoc)
   * @see org.apache.oozie.command.XCommand#execute()
   */
  @Override
  protected String submit() throws CommandException {
    String jobId = null;
    LOG.info("STARTED Coordinator Submit");
    InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());

    boolean exceptionOccured = false;
    try {
      mergeDefaultConfig();

      String appXml = readAndValidateXml();
      coordJob.setOrigJobXml(appXml);
      LOG.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString());

      String appNamespace = readAppNamespace(appXml);
      coordJob.setAppNamespace(appNamespace);

      appXml = XmlUtils.removeComments(appXml);
      initEvaluators();
      Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob);
      LOG.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString());

      jobId = storeToDB(eJob, coordJob);
      // log job info for coordinator job
      LogUtils.setLogInfo(coordJob, logInfo);
      LOG = XLog.resetPrefix(LOG);

      if (!dryrun) {
        // submit a command to materialize jobs for the next 1 hour (3600 secs)
        // so we don't wait 10 mins for the Service to run.
        queue(new CoordMaterializeTransitionXCommand(jobId, 3600), 100);
      } else {
        Date startTime = coordJob.getStartTime();
        long startTimeMilli = startTime.getTime();
        long endTimeMilli = startTimeMilli + (3600 * 1000);
        Date jobEndTime = coordJob.getEndTime();
        Date endTime = new Date(endTimeMilli);
        if (endTime.compareTo(jobEndTime) > 0) {
          endTime = jobEndTime;
        }
        jobId = coordJob.getId();
        LOG.info("[" + jobId + "]: Update status to RUNNING");
        coordJob.setStatus(Job.Status.RUNNING);
        coordJob.setPending();
        CoordActionMaterializeCommand coordActionMatCom =
            new CoordActionMaterializeCommand(jobId, startTime, endTime);
        Configuration jobConf = null;
        try {
          jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
        } catch (IOException e1) {
          LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1);
        }
        String action = coordActionMatCom.materializeJobs(true, coordJob, jobConf, null);
        String output =
            coordJob.getJobXml()
                + System.getProperty("line.separator")
                + "***actions for instance***"
                + action;
        return output;
      }
    } catch (CoordinatorJobException cex) {
      exceptionOccured = true;
      LOG.warn("ERROR:  ", cex);
      throw new CommandException(cex);
    } catch (IllegalArgumentException iex) {
      exceptionOccured = true;
      LOG.warn("ERROR:  ", iex);
      throw new CommandException(ErrorCode.E1003, iex);
    } catch (Exception ex) {
      exceptionOccured = true;
      LOG.warn("ERROR:  ", ex);
      throw new CommandException(ErrorCode.E0803, ex);
    } finally {
      if (exceptionOccured) {
        if (coordJob.getId() == null || coordJob.getId().equalsIgnoreCase("")) {
          coordJob.setStatus(CoordinatorJob.Status.FAILED);
          coordJob.resetPending();
        }
      }
    }

    LOG.info("ENDED Coordinator Submit jobId=" + jobId);
    return jobId;
  }
예제 #4
0
  @Override
  protected Void execute() throws CommandException {
    LOG.debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId);
    WorkflowInstance workflowInstance = wfJob.getWorkflowInstance();
    workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, wfJob);
    boolean completed = false;
    boolean skipAction = false;
    if (wfAction == null) {
      if (wfJob.getStatus() == WorkflowJob.Status.PREP) {
        try {
          completed = workflowInstance.start();
        } catch (WorkflowException e) {
          throw new CommandException(e);
        }
        wfJob.setStatus(WorkflowJob.Status.RUNNING);
        wfJob.setStartTime(new Date());
        wfJob.setWorkflowInstance(workflowInstance);
        // 1. Add SLA status event for WF-JOB with status STARTED
        // 2. Add SLA registration events for all WF_ACTIONS
        SLADbXOperations.writeStausEvent(
            wfJob.getSlaXml(), jobId, Status.STARTED, SlaAppType.WORKFLOW_JOB);
        writeSLARegistrationForAllActions(
            workflowInstance.getApp().getDefinition(),
            wfJob.getUser(),
            wfJob.getGroup(),
            wfJob.getConf());
        queue(new NotificationXCommand(wfJob));
      } else {
        throw new CommandException(ErrorCode.E0801, wfJob.getId());
      }
    } else {
      String skipVar =
          workflowInstance.getVar(
              wfAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP);
      if (skipVar != null) {
        skipAction = skipVar.equals("true");
      }
      try {
        completed = workflowInstance.signal(wfAction.getExecutionPath(), wfAction.getSignalValue());
      } catch (WorkflowException e) {
        throw new CommandException(e);
      }
      wfJob.setWorkflowInstance(workflowInstance);
      wfAction.resetPending();
      if (!skipAction) {
        wfAction.setTransition(workflowInstance.getTransition(wfAction.getName()));
      }
      try {
        jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction));
      } catch (JPAExecutorException je) {
        throw new CommandException(je);
      }
    }

    if (completed) {
      try {
        for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) {
          WorkflowActionBean actionToKill;

          actionToKill = jpaService.execute(new WorkflowActionGetJPAExecutor(actionToKillId));

          actionToKill.setPending();
          actionToKill.setStatus(WorkflowActionBean.Status.KILLED);
          jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToKill));
          queue(new ActionKillXCommand(actionToKill.getId(), actionToKill.getType()));
        }

        for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) {
          WorkflowActionBean actionToFail =
              jpaService.execute(new WorkflowActionGetJPAExecutor(actionToFailId));
          actionToFail.resetPending();
          actionToFail.setStatus(WorkflowActionBean.Status.FAILED);
          SLADbXOperations.writeStausEvent(
              wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION);
          jpaService.execute(new WorkflowActionUpdateJPAExecutor(actionToFail));
        }
      } catch (JPAExecutorException je) {
        throw new CommandException(je);
      }

      wfJob.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString()));
      wfJob.setEndTime(new Date());
      wfJob.setWorkflowInstance(workflowInstance);
      Status slaStatus = Status.SUCCEEDED;
      switch (wfJob.getStatus()) {
        case SUCCEEDED:
          slaStatus = Status.SUCCEEDED;
          break;
        case KILLED:
          slaStatus = Status.KILLED;
          break;
        case FAILED:
          slaStatus = Status.FAILED;
          break;
        default: // TODO SUSPENDED
          break;
      }
      SLADbXOperations.writeStausEvent(
          wfJob.getSlaXml(), jobId, slaStatus, SlaAppType.WORKFLOW_JOB);
      queue(new NotificationXCommand(wfJob));
      if (wfJob.getStatus() == WorkflowJob.Status.SUCCEEDED) {
        InstrumentUtils.incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1, getInstrumentation());
      }
    } else {
      for (WorkflowActionBean newAction :
          WorkflowStoreService.getStartedActions(workflowInstance)) {
        String skipVar =
            workflowInstance.getVar(
                newAction.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP);
        boolean skipNewAction = false;
        if (skipVar != null) {
          skipNewAction = skipVar.equals("true");
        }
        try {
          if (skipNewAction) {
            WorkflowActionBean oldAction;

            oldAction = jpaService.execute(new WorkflowActionGetJPAExecutor(newAction.getId()));

            oldAction.setPending();
            jpaService.execute(new WorkflowActionUpdateJPAExecutor(oldAction));

            queue(new SignalXCommand(jobId, oldAction.getId()));
          } else {
            newAction.setPending();
            String actionSlaXml =
                getActionSLAXml(
                    newAction.getName(),
                    workflowInstance.getApp().getDefinition(),
                    wfJob.getConf());
            newAction.setSlaXml(actionSlaXml);
            jpaService.execute(new WorkflowActionInsertJPAExecutor(newAction));
            LOG.debug(
                "SignalXCommand: Name: "
                    + newAction.getName()
                    + ", Id: "
                    + newAction.getId()
                    + ", Authcode:"
                    + newAction.getCred());
            queue(new ActionStartXCommand(newAction.getId(), newAction.getType()));
          }
        } catch (JPAExecutorException je) {
          throw new CommandException(je);
        }
      }
    }

    try {
      jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob));
    } catch (JPAExecutorException je) {
      throw new CommandException(je);
    }
    XLog.getLog(getClass())
        .debug(
            "Updated the workflow status to "
                + wfJob.getId()
                + "  status ="
                + wfJob.getStatusStr());
    if (wfJob.getStatus() != WorkflowJob.Status.RUNNING
        && wfJob.getStatus() != WorkflowJob.Status.SUSPENDED) {
      // update coordinator action
      new CoordActionUpdateXCommand(wfJob).call();
      new WfEndXCommand(wfJob).call(); // To delete the WF temp dir
    }
    LOG.debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId);
    return null;
  }