private void updateExecution(
      GridProcess execution, GridProcessState state, List<IResponseTO> responses) {
    try {
      execution
          .getJob()
          .newReplicaResult(
              execution.getResult(),
              state,
              verifyFailure(execution.getTask(), state),
              canReplicate(execution.getTask()));
    } catch (IllegalResultException e) {

      responses.add(
          new LoggerResponseTO(
              "Illegal result on replicaEnded: " + e.getMessage(), LoggerResponseTO.ERROR));
    }

    if (state.equals(GridProcessState.FINISHED)) {
      abortReplicaSisters(execution, responses);
    }

    WorkerEntry workerEntry = execution.getWorkerEntry();
    workerEntry.deallocate();

    if (!isWorkerNeeded(workerEntry, execution)) {
      disposeWorker(workerEntry, responses);
    }
  }
  private void abortReplicaSisters(GridProcess execution, List<IResponseTO> responses) {

    for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) {
      if (!sisterGridProcess.equals(execution) && sisterGridProcess.getState().isRunnable()) {
        abort(sisterGridProcess, responses);
      }
    }
  }
 private boolean hasAnySisterGridProcessFinished(GridProcess execution) {
   for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) {
     if (!sisterGridProcess.equals(execution)
         && sisterGridProcess.getState().equals(GridProcessState.FINISHED)) {
       return true;
     }
   }
   return false;
 }
Пример #4
0
  @Override
  public boolean equals(Object o) {

    if (o instanceof GridProcess) {
      GridProcess otherReplica = (GridProcess) o;
      return otherReplica.getHandle().equals(this.getHandle());
    }

    return false;
  }
  private void executionEnded(
      GridProcess execution, GridProcessState state, List<IResponseTO> responses) {

    updateExecution(execution, state, responses);

    if (hasJobEnded(execution.getJob())) {
      finishJob(execution.getJob(), responses);
    }

    updateScheduler(responses);
  }
  private void abort(GridProcess gridProcess, List<IResponseTO> responses) {
    if (gridProcess.getState().isRunnable()) {
      gridProcess.setGridProcessState(GridProcessState.ABORTED);
      gridProcess.getReplicaAccounting().setState(GridProcessState.ABORTED);

      GridProcessAccounting accounting = setAccountingFields(gridProcess);
      accounting.setTransfersProgress(convertTransfer(gridProcess.getTransfersProgress()));

      reportReplicaAccounting(gridProcess, responses);

      gridProcess.getOperations().cancelOperations(responses);

      executionEnded(gridProcess, GridProcessState.ABORTED, responses);
    }
  }
  public boolean executionFailedOnWorker(
      WorkerEntry workerEntry,
      GridProcessErrorTypes type,
      GridProcess execution,
      List<IResponseTO> responses) {

    if (workerEntry != null && type != null) {
      if (type.blackListError()) {

        int taskid = execution.getTaskId();

        responses.add(
            new LoggerResponseTO(
                "Adding to blacklist. Task: " + taskid + ", Worker: " + workerEntry.getWorkerID(),
                LoggerResponseTO.DEBUG));

        workerEntry.addBlacklistedTask(taskid);

        // a sabotage error causes a immediately job blacklist entry
        if (type.equals(GridProcessErrorTypes.SABOTAGE_ERROR)) {
          saboteurs.add(workerEntry);
        }
        return true;
      }
    }
    return false;
  }
  private boolean verifyRunningProccess(Task task) {

    int running = 0;

    for (GridProcess replica : task.getGridProcesses()) {
      GridProcessState replicaState = replica.getState();

      if (GridProcessState.RUNNING.equals(replicaState)
          || GridProcessState.UNSTARTED.equals(replicaState)) {
        running++;
      }
    }

    return running < maxReplicas
        && (GridProcessState.RUNNING.equals(task.getState())
            || GridProcessState.UNSTARTED.equals(task.getState()));
  }
  private boolean createAndAllocateExecution(Job job, Task task, WorkerEntry chosenWorker) {

    GridProcess replica = null;
    if (canReplicate(task)) {
      replica = job.createAndAllocateExecution(task.getTaskid(), chosenWorker);
      replica.setRunningState(stateMachine.getInitialState());
    }

    if (replica != null) {
      chosenWorker.allocate(replica);

      WorkerEntry worker =
          WorkerInfo.getInstance()
              .getWorker(chosenWorker.getServiceID().getContainerID().toString());
      worker.allocate(replica);
      return true;
    }

    return false;
  }
  private GridProcessAccounting setAccountingFields(GridProcess process) {

    GridProcessAccounting accounting = process.getReplicaAccounting();
    GridProcessExecutionResult result = process.getResult();

    GridProcessPhasesData phasesData = new GridProcessPhasesData();
    phasesData.setInitBeginning(result.getInitData().getStartTime());
    phasesData.setInitEnd(result.getInitData().getEndTime());
    phasesData.setRemoteBeginning(result.getRemoteData().getStartTime());
    phasesData.setRemoteEnd(result.getRemoteData().getEndTime());
    phasesData.setFinalBeginning(result.getFinalData().getStartTime());
    phasesData.setFinalEnd(result.getFinalData().getEndTime());
    phasesData.setInitOperations(result.getInitOperations());
    phasesData.setGetOperations(result.getGetOperations());

    accounting.setPhasesData(phasesData);

    GridProcessResultInfo resultInfo = new GridProcessResultInfo();

    GridProcessError error = result.getExecutionError();
    if (error != null && error.getErrorCause() != null) {
      resultInfo.setErrorCause(error.getErrorCause().getMessage());
      resultInfo.setExecutionErrorType(error.getType().getName());
    }

    ExecutorResult executorResult = result.getExecutorResult();
    if (executorResult != null) {
      resultInfo.setExitValue(executorResult.getExitValue());
      resultInfo.setStderr(executorResult.getStderr());
      resultInfo.setStdout(executorResult.getStdout());
    }
    accounting.setResultInfo(resultInfo);

    accounting.setCreationTime(process.getCreationTime());
    accounting.setLatestPhase(process.getState().toString());

    SabotageCheckResult sabotageCheckResult = result.getSabotageCheckResult();
    String sabotageCheck = sabotageCheckResult == null ? null : sabotageCheckResult.toString();
    accounting.setSabotageCheck(sabotageCheck);

    accounting.setTaskSequenceNumber(process.getSpec().getTaskSequenceNumber());
    accounting.setGridProcessSequenceNumber(process.getId());

    accounting.setState(process.getState());

    return accounting;
  }
  public void executionFailed(GridProcess execution, List<IResponseTO> responses) {

    reportReplicaAccounting(execution, responses);

    Job job = execution.getJob();
    GridProcessExecutionResult executionResult = execution.getResult();

    try {
      job.newReplicaResult(
          executionResult,
          GridProcessState.FAILED,
          verifyFailure(execution.getTask(), GridProcessState.FAILED),
          canReplicate(execution.getTask()));
    } catch (IllegalResultException e) {

      responses.add(
          new LoggerResponseTO(
              "Illegal result on replica " + execution.getState() + " : " + e.getMessage(),
              LoggerResponseTO.ERROR));
    }

    GridProcessHandle handle = executionResult.getReplicaHandle();

    WorkerEntry workerEntry = execution.getWorkerEntry();
    workerEntry.deallocate();

    GridProcessErrorTypes type = null;

    if (executionResult != null && executionResult.getExecutionError() != null) {
      type = executionResult.getExecutionError().getType();
    }

    boolean enteredTaskBlacklist = executionFailedOnWorker(workerEntry, type, execution, responses);

    if (enteredTaskBlacklist) {
      if (!isWorkerNeeded(workerEntry, execution)) {
        unwantWorker(job, workerEntry, responses);
      }
    } else {
      disposeWorker(workerEntry, responses);
    }

    boolean hasJobEnded = hasJobEnded(job);

    String executorMsg = "";

    if (executionResult != null
        && executionResult.getExecutionError() != null
        && executionResult.getExecutionError().getErrorCause() != null) {
      executorMsg = executionResult.getExecutionError().getErrorCause().toString();
    }

    responses.add(
        new LoggerResponseTO(
            "Grid process "
                + execution.getState()
                + " "
                + handle
                + ". Job ended: "
                + hasJobEnded
                + " "
                + executorMsg
                + ".",
            LoggerResponseTO.DEBUG));

    if (hasJobEnded) {
      finishJob(execution.getJob(), responses);
    }

    if (!isJobSatisfied(job) && !hasJobEnded) {

      Request request = execution.getJob().getRequest(workerEntry.getRequestID());
      if (request != null) {
        request.setPaused(false);
      }

      ResumeRequestResponseTO to = new ResumeRequestResponseTO();

      to.setPeerAddress(StringUtil.deploymentIDToAddress(workerEntry.getPeerID()));
      to.setRequestID(workerEntry.getRequestID());

      responses.add(to);
    }

    updateScheduler(responses);
  }
  private boolean isWorkerNeeded(WorkerEntry workerEntry, GridProcess execution) {
    Job job = execution.getJob();

    return isWorkerBlacklistedForEntireJob(workerEntry, job) || isJobSatisfied(job) ? false : true;
  }
  private GridProcessStatusInfo fillProcess(GridProcess process) {

    WorkerStatusInfo workerInfo =
        new WorkerStatusInfo(
            process.getWorkerEntry().getWorkerSpecification(),
            process.getHandle(),
            process.getWorkerEntry().getWorkerID(),
            process.getState().toString());

    GridProcessStatusInfoResult result = null;

    if (process.getResult() != null) {

      String error = "";
      String errorCause = null;

      GridProcessError executionError = process.getResult().getExecutionError();
      if (executionError != null) {
        error = executionError.getType().getName();

        if (executionError.getErrorCause() != null) {
          errorCause = executionError.getErrorCause().getMessage();
        }
      }

      result =
          new GridProcessStatusInfoResult(
              error,
              errorCause,
              process.getResult().getInitData().getElapsedTimeInMillis(),
              process.getResult().getRemoteData().getElapsedTimeInMillis(),
              process.getResult().getFinalData().getElapsedTimeInMillis(),
              process.getResult().getExecutorResult());

      SabotageCheckResult sabotageCheckResult = process.getResult().getSabotageCheckResult();
      if (sabotageCheckResult != null) {
        result.setSabotageCheck(sabotageCheckResult.toString());
      }
    }

    GridProcessStatusInfo info =
        new GridProcessStatusInfo(
            process.getId(),
            process.getTaskId(),
            process.getJobId(),
            process.getState().toString(),
            process.getCurrentPhase().toString(),
            workerInfo,
            result,
            process.getHandle());

    info.setCreationTime(process.getCreationTime());
    info.setFinalizationTime(process.getFinalizationTime());

    return info;
  }
  public JobWorkerStatus getCompleteStatus() {

    Map<Integer, Job> jobsMap = JobInfo.getInstance().getJobs();
    Map<Integer, Set<WorkerEntry>> workersByJob = CommonUtils.createMap();

    JobStatusInfo jobInfo = null;
    List<TaskStatusInfo> tasksList = null;
    Map<Integer, JobStatusInfo> jobs = CommonUtils.createSerializableMap();

    // Jobs
    for (Job job : jobsMap.values()) {
      Set<WorkerEntry> workers = new LinkedHashSet<WorkerEntry>();
      tasksList = new ArrayList<TaskStatusInfo>();

      for (Task task : job.getTasks()) {
        tasksList.add(fillTask(task));
        for (GridProcess gridProcess : task.getGridProcesses()) {
          if (gridProcess.getState() == GridProcessState.RUNNING)
            workers.add(gridProcess.getWorkerEntry());
        }
      }

      jobInfo =
          new JobStatusInfo(
              job.getJobId(),
              job.getSpec(),
              UtilConverter.getJobState(job.getState()),
              tasksList,
              job.getCreationTime(),
              job.getFinalizationTime());

      jobs.put(jobInfo.getJobId(), jobInfo);

      if (job.isRunning()) {
        workersByJob.put(job.getJobId(), workers);
      }
    }

    Map<Integer, WorkerStatusInfo[]> workers = CommonUtils.createSerializableMap();

    WorkerStatusInfo[] workerList = null;
    for (Entry<Integer, Set<WorkerEntry>> entry : workersByJob.entrySet()) {

      workerList = workers.get(entry.getKey());
      if (workerList == null) {
        workerList = new WorkerStatusInfo[entry.getValue().size()];
        workers.put(entry.getKey(), workerList);
      }

      int i = 0;
      for (WorkerEntry workerEntry : entry.getValue()) {

        GridProcessHandle handle = null;
        String state = null;

        if (workerEntry.getGridProcess() != null) {
          handle = workerEntry.getGridProcess().getHandle();
          state = workerEntry.getGridProcess().getState().toString();
        }

        workerList[i] =
            new WorkerStatusInfo(
                workerEntry.getWorkerSpecification(), handle, workerEntry.getWorkerID(), state);
        i++;
      }
    }

    JobWorkerStatus status = new JobWorkerStatus(jobs, workers);

    return status;
  }
  private void reportReplicaAccounting(GridProcess process, List<IResponseTO> responses) {

    GridProcessAccounting accounting = setAccountingFields(process);
    accounting.setTransfersProgress(convertTransfer(process.getTransfersProgress()));
    String peerID = process.getWorkerProviderID();

    String peerAddress = StringUtil.deploymentIDToAddress(peerID);

    ReportReplicaAccountingResponseTO to = new ReportReplicaAccountingResponseTO();
    to.setCreationTime(accounting.getCreationTime());
    to.setErrorCause(accounting.getErrorCause());
    to.setExecutionErrorType(accounting.getExecutionErrorType());
    to.setExitValue(accounting.getExitValue());
    to.setFinalBeginning(accounting.getFinalBeginning());
    to.setFinalEnd(accounting.getFinalEnd());
    to.setInitBeginning(accounting.getInitBeginning());
    to.setInitEnd(accounting.getInitEnd());
    to.setJobID(process.getJobId());

    to.setLatestPhase(accounting.getLatestPhase());
    to.setMaxFails(accounting.getMaxFails());
    to.setMaxReplicas(accounting.getMaxReplicas());
    to.setPeerAddress(peerAddress);
    to.setRemoteBeginning(accounting.getRemoteBeginning());
    to.setRemoteEnd(accounting.getRemoteEnd());
    to.setRequestID(accounting.getRequestId());
    to.setRequiredWorkers(accounting.getRequiredWorkers());
    to.setSabotageCheck(accounting.getSabotageCheck());
    to.setState(accounting.getState().name());
    to.setStderr(accounting.getStderr());
    to.setStdout(accounting.getStdout());
    to.setTaskSequenceNumber(accounting.getTaskSequenceNumber());
    to.setGridProcessSequenceNumber(accounting.getGridProcessSequenceNumber());
    to.setWorkerID(accounting.getWorkerID());
    to.setWorkerPK(accounting.getWorkerPublicKey());

    String workerAddress = StringUtil.deploymentIDToAddress(accounting.getWorkerID());
    WorkerSpecification workerSpec =
        BrokerDAOFactory.getInstance().getWorkerDAO().getWorkerSpec(workerAddress);
    to.setWorkerSpec(workerSpec);

    to.setGetOperationsList(
        fillFinalGetOperations(
            accounting.getFinalCommands(),
            process.getTask(),
            process.getId(),
            process.getWorkerEntry().getWorkerID(),
            accounting.getRequestId()));
    to.setInitOperationsList(
        fillInitGetOperations(
            accounting.getInitCommands(),
            process.getTask(),
            process.getId(),
            process.getWorkerEntry().getWorkerID(),
            accounting.getRequestId()));
    to.setPeerBalancesList(fillPeerBalances(accounting.getAccountings().getBalances()));
    to.setTransferProgressList(
        fillTransferProgress(accounting.getTransfersProgress(), "" + process.getId()));

    responses.add(to);
  }