private void updateExecution( GridProcess execution, GridProcessState state, List<IResponseTO> responses) { try { execution .getJob() .newReplicaResult( execution.getResult(), state, verifyFailure(execution.getTask(), state), canReplicate(execution.getTask())); } catch (IllegalResultException e) { responses.add( new LoggerResponseTO( "Illegal result on replicaEnded: " + e.getMessage(), LoggerResponseTO.ERROR)); } if (state.equals(GridProcessState.FINISHED)) { abortReplicaSisters(execution, responses); } WorkerEntry workerEntry = execution.getWorkerEntry(); workerEntry.deallocate(); if (!isWorkerNeeded(workerEntry, execution)) { disposeWorker(workerEntry, responses); } }
private void abortReplicaSisters(GridProcess execution, List<IResponseTO> responses) { for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) { if (!sisterGridProcess.equals(execution) && sisterGridProcess.getState().isRunnable()) { abort(sisterGridProcess, responses); } } }
private boolean hasAnySisterGridProcessFinished(GridProcess execution) { for (GridProcess sisterGridProcess : execution.getTask().getGridProcesses()) { if (!sisterGridProcess.equals(execution) && sisterGridProcess.getState().equals(GridProcessState.FINISHED)) { return true; } } return false; }
public void executionFailed(GridProcess execution, List<IResponseTO> responses) { reportReplicaAccounting(execution, responses); Job job = execution.getJob(); GridProcessExecutionResult executionResult = execution.getResult(); try { job.newReplicaResult( executionResult, GridProcessState.FAILED, verifyFailure(execution.getTask(), GridProcessState.FAILED), canReplicate(execution.getTask())); } catch (IllegalResultException e) { responses.add( new LoggerResponseTO( "Illegal result on replica " + execution.getState() + " : " + e.getMessage(), LoggerResponseTO.ERROR)); } GridProcessHandle handle = executionResult.getReplicaHandle(); WorkerEntry workerEntry = execution.getWorkerEntry(); workerEntry.deallocate(); GridProcessErrorTypes type = null; if (executionResult != null && executionResult.getExecutionError() != null) { type = executionResult.getExecutionError().getType(); } boolean enteredTaskBlacklist = executionFailedOnWorker(workerEntry, type, execution, responses); if (enteredTaskBlacklist) { if (!isWorkerNeeded(workerEntry, execution)) { unwantWorker(job, workerEntry, responses); } } else { disposeWorker(workerEntry, responses); } boolean hasJobEnded = hasJobEnded(job); String executorMsg = ""; if (executionResult != null && executionResult.getExecutionError() != null && executionResult.getExecutionError().getErrorCause() != null) { executorMsg = executionResult.getExecutionError().getErrorCause().toString(); } responses.add( new LoggerResponseTO( "Grid process " + execution.getState() + " " + handle + ". Job ended: " + hasJobEnded + " " + executorMsg + ".", LoggerResponseTO.DEBUG)); if (hasJobEnded) { finishJob(execution.getJob(), responses); } if (!isJobSatisfied(job) && !hasJobEnded) { Request request = execution.getJob().getRequest(workerEntry.getRequestID()); if (request != null) { request.setPaused(false); } ResumeRequestResponseTO to = new ResumeRequestResponseTO(); to.setPeerAddress(StringUtil.deploymentIDToAddress(workerEntry.getPeerID())); to.setRequestID(workerEntry.getRequestID()); responses.add(to); } updateScheduler(responses); }
private void reportReplicaAccounting(GridProcess process, List<IResponseTO> responses) { GridProcessAccounting accounting = setAccountingFields(process); accounting.setTransfersProgress(convertTransfer(process.getTransfersProgress())); String peerID = process.getWorkerProviderID(); String peerAddress = StringUtil.deploymentIDToAddress(peerID); ReportReplicaAccountingResponseTO to = new ReportReplicaAccountingResponseTO(); to.setCreationTime(accounting.getCreationTime()); to.setErrorCause(accounting.getErrorCause()); to.setExecutionErrorType(accounting.getExecutionErrorType()); to.setExitValue(accounting.getExitValue()); to.setFinalBeginning(accounting.getFinalBeginning()); to.setFinalEnd(accounting.getFinalEnd()); to.setInitBeginning(accounting.getInitBeginning()); to.setInitEnd(accounting.getInitEnd()); to.setJobID(process.getJobId()); to.setLatestPhase(accounting.getLatestPhase()); to.setMaxFails(accounting.getMaxFails()); to.setMaxReplicas(accounting.getMaxReplicas()); to.setPeerAddress(peerAddress); to.setRemoteBeginning(accounting.getRemoteBeginning()); to.setRemoteEnd(accounting.getRemoteEnd()); to.setRequestID(accounting.getRequestId()); to.setRequiredWorkers(accounting.getRequiredWorkers()); to.setSabotageCheck(accounting.getSabotageCheck()); to.setState(accounting.getState().name()); to.setStderr(accounting.getStderr()); to.setStdout(accounting.getStdout()); to.setTaskSequenceNumber(accounting.getTaskSequenceNumber()); to.setGridProcessSequenceNumber(accounting.getGridProcessSequenceNumber()); to.setWorkerID(accounting.getWorkerID()); to.setWorkerPK(accounting.getWorkerPublicKey()); String workerAddress = StringUtil.deploymentIDToAddress(accounting.getWorkerID()); WorkerSpecification workerSpec = BrokerDAOFactory.getInstance().getWorkerDAO().getWorkerSpec(workerAddress); to.setWorkerSpec(workerSpec); to.setGetOperationsList( fillFinalGetOperations( accounting.getFinalCommands(), process.getTask(), process.getId(), process.getWorkerEntry().getWorkerID(), accounting.getRequestId())); to.setInitOperationsList( fillInitGetOperations( accounting.getInitCommands(), process.getTask(), process.getId(), process.getWorkerEntry().getWorkerID(), accounting.getRequestId())); to.setPeerBalancesList(fillPeerBalances(accounting.getAccountings().getBalances())); to.setTransferProgressList( fillTransferProgress(accounting.getTransfersProgress(), "" + process.getId())); responses.add(to); }