@Override public void releaseSlot() { // try to transition to the CANCELED state. That state marks // that the releasing is in progress if (markCancelled()) { // kill all tasks currently running in this slot Execution exec = this.executedTask; if (exec != null && !exec.isFinished()) { exec.fail( new Exception( "The slot in which the task was executed has been released. Probably loss of TaskManager " + getInstance())); } // release directly (if we are directly allocated), // otherwise release through the parent shared slot if (getParent() == null) { // we have to give back the slot to the owning instance getInstance().returnAllocatedSlot(this); } else { // we have to ask our parent to dispose us getParent().releaseChild(this); } } }
@Test public void testRegistrationOfExecutionsFailedExternally() { try { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 7, v2, 6); for (Execution e : executions.values()) { e.fail(null); } assertEquals(0, executions.size()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) { final int numConsumers = allConsumers.size(); if (numConsumers > 1) { fail( new IllegalStateException( "Currently, only a single consumer group per partition is supported.")); } else if (numConsumers == 0) { return; } for (ExecutionEdge edge : allConsumers.get(0)) { final ExecutionVertex consumerVertex = edge.getTarget(); final Execution consumer = consumerVertex.getCurrentExecutionAttempt(); final ExecutionState consumerState = consumer.getState(); final IntermediateResultPartition partition = edge.getSource(); // ---------------------------------------------------------------- // Consumer is created => try to deploy and cache input channel // descriptors if there is a deployment race // ---------------------------------------------------------------- if (consumerState == CREATED) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // When deploying a consuming task, its task deployment descriptor will contain all // deployment information available at the respective time. It is possible that some // of the partitions to be consumed have not been created yet. These are updated // runtime via the update messages. // // TODO The current approach may send many update messages even though the consuming // task has already been deployed with all necessary information. We have to check // whether this is a problem and fix it, if it is. future( new Callable<Boolean>() { @Override public Boolean call() throws Exception { try { consumerVertex.scheduleForExecution( consumerVertex.getExecutionGraph().getScheduler(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed()); } catch (Throwable t) { fail( new IllegalStateException( "Could not schedule consumer " + "vertex " + consumerVertex, t)); } return true; } }, executionContext); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } // ---------------------------------------------------------------- // Consumer is running => send update message now // ---------------------------------------------------------------- else { if (consumerState == RUNNING) { final SimpleSlot consumerSlot = consumer.getAssignedResource(); if (consumerSlot == null) { // The consumer has been reset concurrently continue; } final Instance consumerInstance = consumerSlot.getInstance(); final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId); final Instance partitionInstance = partition.getProducer().getCurrentAssignedResource().getInstance(); final ResultPartitionLocation partitionLocation; if (consumerInstance.equals(partitionInstance)) { // Consuming task is deployed to the same instance as the partition => local partitionLocation = ResultPartitionLocation.createLocal(); } else { // Different instances => remote final ConnectionID connectionId = new ConnectionID( partitionInstance.getInstanceConnectionInfo(), partition.getIntermediateResult().getConnectionIndex()); partitionLocation = ResultPartitionLocation.createRemote(connectionId); } final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation); final UpdatePartitionInfo updateTaskMessage = new UpdateTaskSinglePartitionInfo( consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor); sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage); } // ---------------------------------------------------------------- // Consumer is scheduled or deploying => cache input channel // deployment descriptors and send update message later // ---------------------------------------------------------------- else if (consumerState == SCHEDULED || consumerState == DEPLOYING) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } } } }