@Override public void releaseSlot() { // try to transition to the CANCELED state. That state marks // that the releasing is in progress if (markCancelled()) { // kill all tasks currently running in this slot Execution exec = this.executedTask; if (exec != null && !exec.isFinished()) { exec.fail( new Exception( "The slot in which the task was executed has been released. Probably loss of TaskManager " + getInstance())); } // release directly (if we are directly allocated), // otherwise release through the parent shared slot if (getParent() == null) { // we have to give back the slot to the owning instance getInstance().returnAllocatedSlot(this); } else { // we have to ask our parent to dispose us getParent().releaseChild(this); } } }
@Test public void testRegistrationOfExecutionsFailingFinalize() { try { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); JobVertex v1 = new FailingFinalizeJobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 6, v2, 4); List<Execution> execList = new ArrayList<Execution>(); execList.addAll(executions.values()); // sort executions by job vertex. Failing job vertex first Collections.sort( execList, new Comparator<Execution>() { @Override public int compare(Execution o1, Execution o2) { return o1.getVertex().getSimpleName().compareTo(o2.getVertex().getSimpleName()); } }); int cnt = 0; for (Execution e : execList) { cnt++; e.markFinished(); if (cnt <= 6) { // the last execution of the first job vertex triggers the failing finalize hook assertEquals(ExecutionState.FINISHED, e.getState()); } else { // all following executions should be canceled assertEquals(ExecutionState.CANCELED, e.getState()); } } assertEquals(0, executions.size()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testRegistrationOfExecutionsFinishing() { try { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 7650, v2, 2350); for (Execution e : executions.values()) { e.markFinished(); } assertEquals(0, executions.size()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testRegistrationOfExecutionsFailedExternally() { try { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 7, v2, 6); for (Execution e : executions.values()) { e.fail(null); } assertEquals(0, executions.size()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testRegistrationOfExecutionsCanceled() { try { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 19, v2, 37); for (Execution e : executions.values()) { e.cancel(); e.cancelingComplete(); } assertEquals(0, executions.size()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) { final int numConsumers = allConsumers.size(); if (numConsumers > 1) { fail( new IllegalStateException( "Currently, only a single consumer group per partition is supported.")); } else if (numConsumers == 0) { return; } for (ExecutionEdge edge : allConsumers.get(0)) { final ExecutionVertex consumerVertex = edge.getTarget(); final Execution consumer = consumerVertex.getCurrentExecutionAttempt(); final ExecutionState consumerState = consumer.getState(); final IntermediateResultPartition partition = edge.getSource(); // ---------------------------------------------------------------- // Consumer is created => try to deploy and cache input channel // descriptors if there is a deployment race // ---------------------------------------------------------------- if (consumerState == CREATED) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // When deploying a consuming task, its task deployment descriptor will contain all // deployment information available at the respective time. It is possible that some // of the partitions to be consumed have not been created yet. These are updated // runtime via the update messages. // // TODO The current approach may send many update messages even though the consuming // task has already been deployed with all necessary information. We have to check // whether this is a problem and fix it, if it is. future( new Callable<Boolean>() { @Override public Boolean call() throws Exception { try { consumerVertex.scheduleForExecution( consumerVertex.getExecutionGraph().getScheduler(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed()); } catch (Throwable t) { fail( new IllegalStateException( "Could not schedule consumer " + "vertex " + consumerVertex, t)); } return true; } }, executionContext); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } // ---------------------------------------------------------------- // Consumer is running => send update message now // ---------------------------------------------------------------- else { if (consumerState == RUNNING) { final SimpleSlot consumerSlot = consumer.getAssignedResource(); if (consumerSlot == null) { // The consumer has been reset concurrently continue; } final Instance consumerInstance = consumerSlot.getInstance(); final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId); final Instance partitionInstance = partition.getProducer().getCurrentAssignedResource().getInstance(); final ResultPartitionLocation partitionLocation; if (consumerInstance.equals(partitionInstance)) { // Consuming task is deployed to the same instance as the partition => local partitionLocation = ResultPartitionLocation.createLocal(); } else { // Different instances => remote final ConnectionID connectionId = new ConnectionID( partitionInstance.getInstanceConnectionInfo(), partition.getIntermediateResult().getConnectionIndex()); partitionLocation = ResultPartitionLocation.createRemote(connectionId); } final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation); final UpdatePartitionInfo updateTaskMessage = new UpdateTaskSinglePartitionInfo( consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor); sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage); } // ---------------------------------------------------------------- // Consumer is scheduled or deploying => cache input channel // deployment descriptors and send update message later // ---------------------------------------------------------------- else if (consumerState == SCHEDULED || consumerState == DEPLOYING) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } } } }
public boolean restoreLatestCheckpointedState( Map<JobVertexID, ExecutionJobVertex> tasks, boolean errorIfNoCheckpoint, boolean allOrNothingState) throws Exception { synchronized (lock) { if (shutdown) { throw new IllegalStateException("CheckpointCoordinator is shut down"); } // Recover the checkpoints completedCheckpointStore.recover(); // restore from the latest checkpoint CompletedCheckpoint latest = completedCheckpointStore.getLatestCheckpoint(); if (latest == null) { if (errorIfNoCheckpoint) { throw new IllegalStateException("No completed checkpoint available"); } else { return false; } } long recoveryTimestamp = System.currentTimeMillis(); for (Map.Entry<JobVertexID, TaskState> taskGroupStateEntry : latest.getTaskStates().entrySet()) { TaskState taskState = taskGroupStateEntry.getValue(); ExecutionJobVertex executionJobVertex = tasks.get(taskGroupStateEntry.getKey()); if (executionJobVertex != null) { // check that we only restore the state if the parallelism has not been changed if (taskState.getParallelism() != executionJobVertex.getParallelism()) { throw new RuntimeException( "Cannot restore the latest checkpoint because " + "the parallelism changed. The operator" + executionJobVertex.getJobVertexId() + " has parallelism " + executionJobVertex.getParallelism() + " whereas the corresponding" + "state object has a parallelism of " + taskState.getParallelism()); } int counter = 0; List<Set<Integer>> keyGroupPartitions = createKeyGroupPartitions(numberKeyGroups, executionJobVertex.getParallelism()); for (int i = 0; i < executionJobVertex.getParallelism(); i++) { SubtaskState subtaskState = taskState.getState(i); SerializedValue<StateHandle<?>> state = null; if (subtaskState != null) { // count the number of executions for which we set a state counter++; state = subtaskState.getState(); } Map<Integer, SerializedValue<StateHandle<?>>> kvStateForTaskMap = taskState.getUnwrappedKvStates(keyGroupPartitions.get(i)); Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[i].getCurrentExecutionAttempt(); currentExecutionAttempt.setInitialState(state, kvStateForTaskMap, recoveryTimestamp); } if (allOrNothingState && counter > 0 && counter < executionJobVertex.getParallelism()) { throw new IllegalStateException( "The checkpoint contained state only for " + "a subset of tasks for vertex " + executionJobVertex); } } else { throw new IllegalStateException( "There is no execution job vertex for the job" + " vertex ID " + taskGroupStateEntry.getKey()); } } return true; } }
/** * Receives an AcknowledgeCheckpoint message and returns whether the message was associated with a * pending checkpoint. * * @param message Checkpoint ack from the task manager * @return Flag indicating whether the ack'd checkpoint was associated with a pending checkpoint. * @throws Exception If the checkpoint cannot be added to the completed checkpoint store. */ public boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message) throws Exception { if (shutdown || message == null) { return false; } if (!job.equals(message.getJob())) { LOG.error("Received AcknowledgeCheckpoint message for wrong job: {}", message); return false; } final long checkpointId = message.getCheckpointId(); CompletedCheckpoint completed = null; PendingCheckpoint checkpoint; // Flag indicating whether the ack message was for a known pending // checkpoint. boolean isPendingCheckpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return false; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { isPendingCheckpoint = true; if (checkpoint.acknowledgeTask( message.getTaskExecutionId(), message.getState(), message.getStateSize(), null)) { // TODO: Give KV-state to the acknowledgeTask method if (checkpoint.isFullyAcknowledged()) { completed = checkpoint.toCompletedCheckpoint(); completedCheckpointStore.addCheckpoint(completed); LOG.info( "Completed checkpoint " + checkpointId + " (in " + completed.getDuration() + " ms)"); if (LOG.isDebugEnabled()) { StringBuilder builder = new StringBuilder(); for (Map.Entry<JobVertexID, TaskState> entry : completed.getTaskStates().entrySet()) { builder .append("JobVertexID: ") .append(entry.getKey()) .append(" {") .append(entry.getValue()) .append("}"); } LOG.debug(builder.toString()); } pendingCheckpoints.remove(checkpointId); rememberRecentCheckpointId(checkpointId); dropSubsumedCheckpoints(completed.getTimestamp()); onFullyAcknowledgedCheckpoint(completed); triggerQueuedRequests(); } } else { // checkpoint did not accept message LOG.error( "Received duplicate or invalid acknowledge message for checkpoint " + checkpointId + " , task " + message.getTaskExecutionId()); } } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) if (recentPendingCheckpoints.contains(checkpointId)) { isPendingCheckpoint = true; LOG.warn("Received late message for now expired checkpoint attempt " + checkpointId); } else { isPendingCheckpoint = false; } } } // send the confirmation messages to the necessary targets. we do this here // to be outside the lock scope if (completed != null) { final long timestamp = completed.getTimestamp(); for (ExecutionVertex ev : tasksToCommitTo) { Execution ee = ev.getCurrentExecutionAttempt(); if (ee != null) { ExecutionAttemptID attemptId = ee.getAttemptId(); NotifyCheckpointComplete notifyMessage = new NotifyCheckpointComplete(job, attemptId, checkpointId, timestamp); ev.sendMessageToCurrentExecution(notifyMessage, ee.getAttemptId()); } } statsTracker.onCompletedCheckpoint(completed); } return isPendingCheckpoint; }
/** * Triggers a new checkpoint and uses the given timestamp as the checkpoint timestamp. * * @param timestamp The timestamp for the checkpoint. * @param nextCheckpointId The checkpoint ID to use for this checkpoint or <code>-1</code> if the * checkpoint ID counter should be queried. */ public boolean triggerCheckpoint(long timestamp, long nextCheckpointId) throws Exception { // make some eager pre-checks synchronized (lock) { // abort if the coordinator has been shutdown in the meantime if (shutdown) { return false; } // sanity check: there should never be more than one trigger request queued if (triggerRequestQueued) { LOG.warn("Trying to trigger another checkpoint while one was queued already"); return false; } // if too many checkpoints are currently in progress, we need to mark that a request is queued if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) { triggerRequestQueued = true; if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } return false; } // make sure the minimum interval between checkpoints has passed if (lastTriggeredCheckpoint + minPauseBetweenCheckpoints > timestamp) { if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } ScheduledTrigger trigger = new ScheduledTrigger(); timer.scheduleAtFixedRate(trigger, minPauseBetweenCheckpoints, baseInterval); return false; } } // first check if all tasks that we need to trigger are running. // if not, abort the checkpoint ExecutionAttemptID[] triggerIDs = new ExecutionAttemptID[tasksToTrigger.length]; for (int i = 0; i < tasksToTrigger.length; i++) { Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt(); if (ee != null && ee.getState() == ExecutionState.RUNNING) { triggerIDs[i] = ee.getAttemptId(); } else { LOG.info( "Checkpoint triggering task {} is not being executed at the moment. Aborting checkpoint.", tasksToTrigger[i].getSimpleName()); return false; } } // next, check if all tasks that need to acknowledge the checkpoint are running. // if not, abort the checkpoint Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length); for (ExecutionVertex ev : tasksToWaitFor) { Execution ee = ev.getCurrentExecutionAttempt(); if (ee != null) { ackTasks.put(ee.getAttemptId(), ev); } else { LOG.info( "Checkpoint acknowledging task {} is not being executed at the moment. Aborting checkpoint.", ev.getSimpleName()); return false; } } // we will actually trigger this checkpoint! lastTriggeredCheckpoint = timestamp; final long checkpointID; if (nextCheckpointId < 0) { try { // this must happen outside the locked scope, because it communicates // with external services (in HA mode) and may block for a while. checkpointID = checkpointIdCounter.getAndIncrement(); } catch (Throwable t) { int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers; LOG.warn( "Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t); return false; } } else { checkpointID = nextCheckpointId; } LOG.info("Triggering checkpoint " + checkpointID + " @ " + timestamp); final PendingCheckpoint checkpoint = new PendingCheckpoint(job, checkpointID, timestamp, ackTasks); // schedule the timer that will clean up the expired checkpoints TimerTask canceller = new TimerTask() { @Override public void run() { try { synchronized (lock) { // only do the work if the checkpoint is not discarded anyways // note that checkpoint completion discards the pending checkpoint object if (!checkpoint.isDiscarded()) { LOG.info("Checkpoint " + checkpointID + " expired before completing."); checkpoint.discard(userClassLoader); pendingCheckpoints.remove(checkpointID); rememberRecentCheckpointId(checkpointID); onCancelCheckpoint(checkpointID); triggerQueuedRequests(); } } } catch (Throwable t) { LOG.error("Exception while handling checkpoint timeout", t); } } }; try { // re-acquire the lock synchronized (lock) { // since we released the lock in the meantime, we need to re-check // that the conditions still hold. this is clumsy, but it allows us to // release the lock in the meantime while calls to external services are // blocking progress, and still gives us early checks that skip work // if no checkpoint can happen anyways if (shutdown) { return false; } else if (triggerRequestQueued) { LOG.warn("Trying to trigger another checkpoint while one was queued already"); return false; } else if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) { triggerRequestQueued = true; if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } return false; } pendingCheckpoints.put(checkpointID, checkpoint); timer.schedule(canceller, checkpointTimeout); } // end of lock scope // send the messages to the tasks that trigger their checkpoint for (int i = 0; i < tasksToTrigger.length; i++) { ExecutionAttemptID id = triggerIDs[i]; TriggerCheckpoint message = new TriggerCheckpoint(job, id, checkpointID, timestamp); tasksToTrigger[i].sendMessageToCurrentExecution(message, id); } numUnsuccessfulCheckpointsTriggers = 0; return true; } catch (Throwable t) { // guard the map against concurrent modifications synchronized (lock) { pendingCheckpoints.remove(checkpointID); } int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers; LOG.warn( "Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t); if (!checkpoint.isDiscarded()) { checkpoint.discard(userClassLoader); } return false; } }
public void restoreLatestCheckpointedState( Map<JobVertexID, ExecutionJobVertex> tasks, boolean errorIfNoCheckpoint, boolean allOrNothingState) throws Exception { synchronized (lock) { if (shutdown) { throw new IllegalStateException("CheckpointCoordinator is shut down"); } // Recover the checkpoints completedCheckpointStore.recover(); // restore from the latest checkpoint CompletedCheckpoint latest = completedCheckpointStore.getLatestCheckpoint(); if (latest == null) { if (errorIfNoCheckpoint) { throw new IllegalStateException("No completed checkpoint available"); } else { return; } } long recoveryTimestamp = System.currentTimeMillis(); if (allOrNothingState) { Map<ExecutionJobVertex, Integer> stateCounts = new HashMap<ExecutionJobVertex, Integer>(); for (StateForTask state : latest.getStates()) { ExecutionJobVertex vertex = tasks.get(state.getOperatorId()); Execution exec = vertex.getTaskVertices()[state.getSubtask()].getCurrentExecutionAttempt(); exec.setInitialState(state.getState(), recoveryTimestamp); Integer count = stateCounts.get(vertex); if (count != null) { stateCounts.put(vertex, count + 1); } else { stateCounts.put(vertex, 1); } } // validate that either all task vertices have state, or none for (Map.Entry<ExecutionJobVertex, Integer> entry : stateCounts.entrySet()) { ExecutionJobVertex vertex = entry.getKey(); if (entry.getValue() != vertex.getParallelism()) { throw new IllegalStateException( "The checkpoint contained state only for a subset of tasks for vertex " + vertex); } } } else { for (StateForTask state : latest.getStates()) { ExecutionJobVertex vertex = tasks.get(state.getOperatorId()); Execution exec = vertex.getTaskVertices()[state.getSubtask()].getCurrentExecutionAttempt(); exec.setInitialState(state.getState(), recoveryTimestamp); } } } }