private void updateExecution( GridProcess execution, GridProcessState state, List<IResponseTO> responses) { try { execution .getJob() .newReplicaResult( execution.getResult(), state, verifyFailure(execution.getTask(), state), canReplicate(execution.getTask())); } catch (IllegalResultException e) { responses.add( new LoggerResponseTO( "Illegal result on replicaEnded: " + e.getMessage(), LoggerResponseTO.ERROR)); } if (state.equals(GridProcessState.FINISHED)) { abortReplicaSisters(execution, responses); } WorkerEntry workerEntry = execution.getWorkerEntry(); workerEntry.deallocate(); if (!isWorkerNeeded(workerEntry, execution)) { disposeWorker(workerEntry, responses); } }
private void abortReplicaSisters(GridProcess execution, List<IResponseTO> responses) { for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) { if (!sisterGridProcess.equals(execution) && sisterGridProcess.getState().isRunnable()) { abort(sisterGridProcess, responses); } } }
private boolean hasAnySisterGridProcessFinished(GridProcess execution) { for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) { if (!sisterGridProcess.equals(execution) && sisterGridProcess.getState().equals(GridProcessState.FINISHED)) { return true; } } return false; }
@Override public boolean equals(Object o) { if (o instanceof GridProcess) { GridProcess otherReplica = (GridProcess) o; return otherReplica.getHandle().equals(this.getHandle()); } return false; }
private void executionEnded( GridProcess execution, GridProcessState state, List<IResponseTO> responses) { updateExecution(execution, state, responses); if (hasJobEnded(execution.getJob())) { finishJob(execution.getJob(), responses); } updateScheduler(responses); }
private void abort(GridProcess gridProcess, List<IResponseTO> responses) { if (gridProcess.getState().isRunnable()) { gridProcess.setGridProcessState(GridProcessState.ABORTED); gridProcess.getReplicaAccounting().setState(GridProcessState.ABORTED); GridProcessAccounting accounting = setAccountingFields(gridProcess); accounting.setTransfersProgress(convertTransfer(gridProcess.getTransfersProgress())); reportReplicaAccounting(gridProcess, responses); gridProcess.getOperations().cancelOperations(responses); executionEnded(gridProcess, GridProcessState.ABORTED, responses); } }
public boolean executionFailedOnWorker( WorkerEntry workerEntry, GridProcessErrorTypes type, GridProcess execution, List<IResponseTO> responses) { if (workerEntry != null && type != null) { if (type.blackListError()) { int taskid = execution.getTaskId(); responses.add( new LoggerResponseTO( "Adding to blacklist. Task: " + taskid + ", Worker: " + workerEntry.getWorkerID(), LoggerResponseTO.DEBUG)); workerEntry.addBlacklistedTask(taskid); // a sabotage error causes a immediately job blacklist entry if (type.equals(GridProcessErrorTypes.SABOTAGE_ERROR)) { saboteurs.add(workerEntry); } return true; } } return false; }
private boolean verifyRunningProccess(Task task) { int running = 0; for (GridProcess replica : task.getGridProcesses()) { GridProcessState replicaState = replica.getState(); if (GridProcessState.RUNNING.equals(replicaState) || GridProcessState.UNSTARTED.equals(replicaState)) { running++; } } return running < maxReplicas && (GridProcessState.RUNNING.equals(task.getState()) || GridProcessState.UNSTARTED.equals(task.getState())); }
private boolean createAndAllocateExecution(Job job, Task task, WorkerEntry chosenWorker) { GridProcess replica = null; if (canReplicate(task)) { replica = job.createAndAllocateExecution(task.getTaskid(), chosenWorker); replica.setRunningState(stateMachine.getInitialState()); } if (replica != null) { chosenWorker.allocate(replica); WorkerEntry worker = WorkerInfo.getInstance() .getWorker(chosenWorker.getServiceID().getContainerID().toString()); worker.allocate(replica); return true; } return false; }
private GridProcessAccounting setAccountingFields(GridProcess process) { GridProcessAccounting accounting = process.getReplicaAccounting(); GridProcessExecutionResult result = process.getResult(); GridProcessPhasesData phasesData = new GridProcessPhasesData(); phasesData.setInitBeginning(result.getInitData().getStartTime()); phasesData.setInitEnd(result.getInitData().getEndTime()); phasesData.setRemoteBeginning(result.getRemoteData().getStartTime()); phasesData.setRemoteEnd(result.getRemoteData().getEndTime()); phasesData.setFinalBeginning(result.getFinalData().getStartTime()); phasesData.setFinalEnd(result.getFinalData().getEndTime()); phasesData.setInitOperations(result.getInitOperations()); phasesData.setGetOperations(result.getGetOperations()); accounting.setPhasesData(phasesData); GridProcessResultInfo resultInfo = new GridProcessResultInfo(); GridProcessError error = result.getExecutionError(); if (error != null && error.getErrorCause() != null) { resultInfo.setErrorCause(error.getErrorCause().getMessage()); resultInfo.setExecutionErrorType(error.getType().getName()); } ExecutorResult executorResult = result.getExecutorResult(); if (executorResult != null) { resultInfo.setExitValue(executorResult.getExitValue()); resultInfo.setStderr(executorResult.getStderr()); resultInfo.setStdout(executorResult.getStdout()); } accounting.setResultInfo(resultInfo); accounting.setCreationTime(process.getCreationTime()); accounting.setLatestPhase(process.getState().toString()); SabotageCheckResult sabotageCheckResult = result.getSabotageCheckResult(); String sabotageCheck = sabotageCheckResult == null ? null : sabotageCheckResult.toString(); accounting.setSabotageCheck(sabotageCheck); accounting.setTaskSequenceNumber(process.getSpec().getTaskSequenceNumber()); accounting.setGridProcessSequenceNumber(process.getId()); accounting.setState(process.getState()); return accounting; }
public void executionFailed(GridProcess execution, List<IResponseTO> responses) { reportReplicaAccounting(execution, responses); Job job = execution.getJob(); GridProcessExecutionResult executionResult = execution.getResult(); try { job.newReplicaResult( executionResult, GridProcessState.FAILED, verifyFailure(execution.getTask(), GridProcessState.FAILED), canReplicate(execution.getTask())); } catch (IllegalResultException e) { responses.add( new LoggerResponseTO( "Illegal result on replica " + execution.getState() + " : " + e.getMessage(), LoggerResponseTO.ERROR)); } GridProcessHandle handle = executionResult.getReplicaHandle(); WorkerEntry workerEntry = execution.getWorkerEntry(); workerEntry.deallocate(); GridProcessErrorTypes type = null; if (executionResult != null && executionResult.getExecutionError() != null) { type = executionResult.getExecutionError().getType(); } boolean enteredTaskBlacklist = executionFailedOnWorker(workerEntry, type, execution, responses); if (enteredTaskBlacklist) { if (!isWorkerNeeded(workerEntry, execution)) { unwantWorker(job, workerEntry, responses); } } else { disposeWorker(workerEntry, responses); } boolean hasJobEnded = hasJobEnded(job); String executorMsg = ""; if (executionResult != null && executionResult.getExecutionError() != null && executionResult.getExecutionError().getErrorCause() != null) { executorMsg = executionResult.getExecutionError().getErrorCause().toString(); } responses.add( new LoggerResponseTO( "Grid process " + execution.getState() + " " + handle + ". Job ended: " + hasJobEnded + " " + executorMsg + ".", LoggerResponseTO.DEBUG)); if (hasJobEnded) { finishJob(execution.getJob(), responses); } if (!isJobSatisfied(job) && !hasJobEnded) { Request request = execution.getJob().getRequest(workerEntry.getRequestID()); if (request != null) { request.setPaused(false); } ResumeRequestResponseTO to = new ResumeRequestResponseTO(); to.setPeerAddress(StringUtil.deploymentIDToAddress(workerEntry.getPeerID())); to.setRequestID(workerEntry.getRequestID()); responses.add(to); } updateScheduler(responses); }
private boolean isWorkerNeeded(WorkerEntry workerEntry, GridProcess execution) { Job job = execution.getJob(); return isWorkerBlacklistedForEntireJob(workerEntry, job) || isJobSatisfied(job) ? false : true; }
private GridProcessStatusInfo fillProcess(GridProcess process) { WorkerStatusInfo workerInfo = new WorkerStatusInfo( process.getWorkerEntry().getWorkerSpecification(), process.getHandle(), process.getWorkerEntry().getWorkerID(), process.getState().toString()); GridProcessStatusInfoResult result = null; if (process.getResult() != null) { String error = ""; String errorCause = null; GridProcessError executionError = process.getResult().getExecutionError(); if (executionError != null) { error = executionError.getType().getName(); if (executionError.getErrorCause() != null) { errorCause = executionError.getErrorCause().getMessage(); } } result = new GridProcessStatusInfoResult( error, errorCause, process.getResult().getInitData().getElapsedTimeInMillis(), process.getResult().getRemoteData().getElapsedTimeInMillis(), process.getResult().getFinalData().getElapsedTimeInMillis(), process.getResult().getExecutorResult()); SabotageCheckResult sabotageCheckResult = process.getResult().getSabotageCheckResult(); if (sabotageCheckResult != null) { result.setSabotageCheck(sabotageCheckResult.toString()); } } GridProcessStatusInfo info = new GridProcessStatusInfo( process.getId(), process.getTaskId(), process.getJobId(), process.getState().toString(), process.getCurrentPhase().toString(), workerInfo, result, process.getHandle()); info.setCreationTime(process.getCreationTime()); info.setFinalizationTime(process.getFinalizationTime()); return info; }
public JobWorkerStatus getCompleteStatus() { Map<Integer, Job> jobsMap = JobInfo.getInstance().getJobs(); Map<Integer, Set<WorkerEntry>> workersByJob = CommonUtils.createMap(); JobStatusInfo jobInfo = null; List<TaskStatusInfo> tasksList = null; Map<Integer, JobStatusInfo> jobs = CommonUtils.createSerializableMap(); // Jobs for (Job job : jobsMap.values()) { Set<WorkerEntry> workers = new LinkedHashSet<WorkerEntry>(); tasksList = new ArrayList<TaskStatusInfo>(); for (Task task : job.getTasks()) { tasksList.add(fillTask(task)); for (GridProcess gridProcess : task.getGridProcesses()) { if (gridProcess.getState() == GridProcessState.RUNNING) workers.add(gridProcess.getWorkerEntry()); } } jobInfo = new JobStatusInfo( job.getJobId(), job.getSpec(), UtilConverter.getJobState(job.getState()), tasksList, job.getCreationTime(), job.getFinalizationTime()); jobs.put(jobInfo.getJobId(), jobInfo); if (job.isRunning()) { workersByJob.put(job.getJobId(), workers); } } Map<Integer, WorkerStatusInfo[]> workers = CommonUtils.createSerializableMap(); WorkerStatusInfo[] workerList = null; for (Entry<Integer, Set<WorkerEntry>> entry : workersByJob.entrySet()) { workerList = workers.get(entry.getKey()); if (workerList == null) { workerList = new WorkerStatusInfo[entry.getValue().size()]; workers.put(entry.getKey(), workerList); } int i = 0; for (WorkerEntry workerEntry : entry.getValue()) { GridProcessHandle handle = null; String state = null; if (workerEntry.getGridProcess() != null) { handle = workerEntry.getGridProcess().getHandle(); state = workerEntry.getGridProcess().getState().toString(); } workerList[i] = new WorkerStatusInfo( workerEntry.getWorkerSpecification(), handle, workerEntry.getWorkerID(), state); i++; } } JobWorkerStatus status = new JobWorkerStatus(jobs, workers); return status; }
private void reportReplicaAccounting(GridProcess process, List<IResponseTO> responses) { GridProcessAccounting accounting = setAccountingFields(process); accounting.setTransfersProgress(convertTransfer(process.getTransfersProgress())); String peerID = process.getWorkerProviderID(); String peerAddress = StringUtil.deploymentIDToAddress(peerID); ReportReplicaAccountingResponseTO to = new ReportReplicaAccountingResponseTO(); to.setCreationTime(accounting.getCreationTime()); to.setErrorCause(accounting.getErrorCause()); to.setExecutionErrorType(accounting.getExecutionErrorType()); to.setExitValue(accounting.getExitValue()); to.setFinalBeginning(accounting.getFinalBeginning()); to.setFinalEnd(accounting.getFinalEnd()); to.setInitBeginning(accounting.getInitBeginning()); to.setInitEnd(accounting.getInitEnd()); to.setJobID(process.getJobId()); to.setLatestPhase(accounting.getLatestPhase()); to.setMaxFails(accounting.getMaxFails()); to.setMaxReplicas(accounting.getMaxReplicas()); to.setPeerAddress(peerAddress); to.setRemoteBeginning(accounting.getRemoteBeginning()); to.setRemoteEnd(accounting.getRemoteEnd()); to.setRequestID(accounting.getRequestId()); to.setRequiredWorkers(accounting.getRequiredWorkers()); to.setSabotageCheck(accounting.getSabotageCheck()); to.setState(accounting.getState().name()); to.setStderr(accounting.getStderr()); to.setStdout(accounting.getStdout()); to.setTaskSequenceNumber(accounting.getTaskSequenceNumber()); to.setGridProcessSequenceNumber(accounting.getGridProcessSequenceNumber()); to.setWorkerID(accounting.getWorkerID()); to.setWorkerPK(accounting.getWorkerPublicKey()); String workerAddress = StringUtil.deploymentIDToAddress(accounting.getWorkerID()); WorkerSpecification workerSpec = BrokerDAOFactory.getInstance().getWorkerDAO().getWorkerSpec(workerAddress); to.setWorkerSpec(workerSpec); to.setGetOperationsList( fillFinalGetOperations( accounting.getFinalCommands(), process.getTask(), process.getId(), process.getWorkerEntry().getWorkerID(), accounting.getRequestId())); to.setInitOperationsList( fillInitGetOperations( accounting.getInitCommands(), process.getTask(), process.getId(), process.getWorkerEntry().getWorkerID(), accounting.getRequestId())); to.setPeerBalancesList(fillPeerBalances(accounting.getAccountings().getBalances())); to.setTransferProgressList( fillTransferProgress(accounting.getTransfersProgress(), "" + process.getId())); responses.add(to); }