private void sendFailIntermediateResultPartitionsRpcCall() { final SimpleSlot slot = this.assignedResource; if (slot != null) { final Instance instance = slot.getInstance(); if (instance.isAlive()) { final ActorGateway gateway = instance.getActorGateway(); // TODO For some tests this could be a problem when querying too early if all resources were // released gateway.tell(new FailIntermediateResultPartitions(attemptId)); } } }
/** * Sends an UpdatePartitionInfo message to the instance of the consumerSlot. * * @param consumerSlot Slot to whose instance the message will be sent * @param updatePartitionInfo UpdatePartitionInfo message */ private void sendUpdatePartitionInfoRpcCall( final SimpleSlot consumerSlot, final UpdatePartitionInfo updatePartitionInfo) { if (consumerSlot != null) { final Instance instance = consumerSlot.getInstance(); final ActorGateway gateway = instance.getActorGateway(); Future<Object> futureUpdate = gateway.ask(updatePartitionInfo, timeout); futureUpdate.onFailure( new OnFailure() { @Override public void onFailure(Throwable failure) throws Throwable { fail( new IllegalStateException( "Update task on instance " + instance + " failed due to:", failure)); } }, executionContext); } }
@Test public void testAddAndRemoveInstance() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance i1 = getRandomInstance(2); Instance i2 = getRandomInstance(2); Instance i3 = getRandomInstance(2); assertEquals(0, scheduler.getNumberOfAvailableInstances()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i1); assertEquals(1, scheduler.getNumberOfAvailableInstances()); assertEquals(2, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i2); assertEquals(2, scheduler.getNumberOfAvailableInstances()); assertEquals(4, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i3); assertEquals(3, scheduler.getNumberOfAvailableInstances()); assertEquals(6, scheduler.getNumberOfAvailableSlots()); // cannot add available instance again try { scheduler.newInstanceAvailable(i2); fail("Scheduler accepted instance twice"); } catch (IllegalArgumentException e) { // bueno! } // some instances die assertEquals(3, scheduler.getNumberOfAvailableInstances()); assertEquals(6, scheduler.getNumberOfAvailableSlots()); scheduler.instanceDied(i2); assertEquals(2, scheduler.getNumberOfAvailableInstances()); assertEquals(4, scheduler.getNumberOfAvailableSlots()); // try to add a dead instance try { scheduler.newInstanceAvailable(i2); fail("Scheduler accepted dead instance"); } catch (IllegalArgumentException e) { // stimmt } scheduler.instanceDied(i1); assertEquals(1, scheduler.getNumberOfAvailableInstances()); assertEquals(2, scheduler.getNumberOfAvailableSlots()); scheduler.instanceDied(i3); assertEquals(0, scheduler.getNumberOfAvailableInstances()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); assertFalse(i1.isAlive()); assertFalse(i2.isAlive()); assertFalse(i3.isAlive()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testScheduleWithDyingInstances() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance i1 = getRandomInstance(2); Instance i2 = getRandomInstance(2); Instance i3 = getRandomInstance(1); scheduler.newInstanceAvailable(i1); scheduler.newInstanceAvailable(i2); scheduler.newInstanceAvailable(i3); List<SimpleSlot> slots = new ArrayList<SimpleSlot>(); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); i2.markDead(); for (SimpleSlot slot : slots) { if (slot.getOwner() == i2) { assertTrue(slot.isCanceled()); } else { assertFalse(slot.isCanceled()); } slot.releaseSlot(); } assertEquals(3, scheduler.getNumberOfAvailableSlots()); i1.markDead(); i3.markDead(); // cannot get another slot, since all instances are dead try { scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); fail("Scheduler served a slot from a dead instance"); } catch (NoResourceAvailableException e) { // fine } catch (Exception e) { fail("Wrong exception type."); } // now the latest, the scheduler should have noticed (through the lazy mechanisms) // that all instances have vanished assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Returns a list of file input splits specifically ordered for the given {@link * org.apache.flink.runtime.instance.Instance}. When the list is initially created, it contains * all the unconsumed file input splits at that point in time, ascendingly ordered by the minimum * distance between the input splits' storage locations and the given {@link * org.apache.flink.runtime.instance.Instance}. * * @param instance the instance for which the file input split list has been computed * @return the list of file input splits ordered specifically for the given instance */ private Queue<QueueElem> getInstanceSplitList(final Instance instance) { Queue<QueueElem> instanceSplitList = this.instanceMap.get(instance); if (instanceSplitList == null) { // Create and populate instance specific split list instanceSplitList = new PriorityQueue<FileInputSplitList.QueueElem>(); final Iterator<FileInputSplit> it = this.masterSet.iterator(); while (it.hasNext()) { final FileInputSplit split = it.next(); final String[] hostNames = split.getHostNames(); if (hostNames == null) { instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE)); } else { int minDistance = Integer.MAX_VALUE; for (int i = 0; i < hostNames.length; ++i) { final int distance = instance.getDistance(hostNames[i]); if (LOG.isDebugEnabled()) { LOG.debug( "Distance between " + instance + " and " + hostNames[i] + " is " + distance); } if (distance < minDistance) { minDistance = distance; } } instanceSplitList.add(new QueueElem(split, minDistance)); } } this.instanceMap.put(instance, instanceSplitList); } return instanceSplitList; }
@Override public String handleJsonRequest( Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception { try { if (jobManager != null) { // whether one task manager's metrics are requested, or all task manager, we // return them in an array. This avoids unnecessary code complexity. // If only one task manager is requested, we only fetch one task manager metrics. final List<Instance> instances = new ArrayList<>(); if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { try { InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY))); Future<Object> future = jobManager.ask( new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout); TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout); if (instance.instance().nonEmpty()) { instances.add(instance.instance().get()); } } // this means the id string was invalid. Keep the list empty. catch (IllegalArgumentException e) { // do nothing. } } else { Future<Object> future = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); RegisteredTaskManagers taskManagers = (RegisteredTaskManagers) Await.result(future, timeout); instances.addAll(taskManagers.asJavaCollection()); } StringWriter writer = new StringWriter(); JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer); gen.writeStartObject(); gen.writeArrayFieldStart("taskmanagers"); for (Instance instance : instances) { gen.writeStartObject(); gen.writeStringField("id", instance.getId().toString()); gen.writeStringField("path", instance.getActorGateway().path()); gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort()); gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat()); gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots()); gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots()); gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores()); gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory()); gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap()); gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory()); // only send metrics when only one task manager requests them. if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { byte[] report = instance.getLastMetricsReport(); if (report != null) { gen.writeFieldName("metrics"); gen.writeRawValue(new String(report, "utf-8")); } } gen.writeEndObject(); } gen.writeEndArray(); gen.writeEndObject(); gen.close(); return writer.toString(); } else { throw new Exception("No connection to the leading JobManager."); } } catch (Exception e) { throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e); } }
@Test public void testBuildDeploymentDescriptor() { try { final JobID jobId = new JobID(); final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); final JobVertexID jid3 = new JobVertexID(); final JobVertexID jid4 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); JobVertex v3 = new JobVertex("v3", jid3); JobVertex v4 = new JobVertex("v4", jid4); v1.setParallelism(10); v2.setParallelism(10); v3.setParallelism(10); v4.setParallelism(10); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v3.setInvokableClass(BatchTask.class); v4.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL); v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4); eg.attachJobGraph(ordered); ExecutionJobVertex ejv = eg.getAllVertices().get(jid2); ExecutionVertex vertex = ejv.getTaskVertices()[3]; ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()); final Instance instance = getInstance(instanceGateway); final SimpleSlot slot = instance.allocateSimpleSlot(jobId); assertEquals(ExecutionState.CREATED, vertex.getExecutionState()); vertex.deployToSlot(slot); assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState()); TaskDeploymentDescriptor descr = instanceGateway.lastTDD; assertNotNull(descr); assertEquals(jobId, descr.getJobID()); assertEquals(jid2, descr.getVertexID()); assertEquals(3, descr.getIndexInSubtaskGroup()); assertEquals(10, descr.getNumberOfSubtasks()); assertEquals(BatchTask.class.getName(), descr.getInvokableClassName()); assertEquals("v2", descr.getTaskName()); List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions(); List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates(); assertEquals(2, producedPartitions.size()); assertEquals(1, consumedPartitions.size()); assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions()); assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions()); assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) { final int numConsumers = allConsumers.size(); if (numConsumers > 1) { fail( new IllegalStateException( "Currently, only a single consumer group per partition is supported.")); } else if (numConsumers == 0) { return; } for (ExecutionEdge edge : allConsumers.get(0)) { final ExecutionVertex consumerVertex = edge.getTarget(); final Execution consumer = consumerVertex.getCurrentExecutionAttempt(); final ExecutionState consumerState = consumer.getState(); final IntermediateResultPartition partition = edge.getSource(); // ---------------------------------------------------------------- // Consumer is created => try to deploy and cache input channel // descriptors if there is a deployment race // ---------------------------------------------------------------- if (consumerState == CREATED) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // When deploying a consuming task, its task deployment descriptor will contain all // deployment information available at the respective time. It is possible that some // of the partitions to be consumed have not been created yet. These are updated // runtime via the update messages. // // TODO The current approach may send many update messages even though the consuming // task has already been deployed with all necessary information. We have to check // whether this is a problem and fix it, if it is. future( new Callable<Boolean>() { @Override public Boolean call() throws Exception { try { consumerVertex.scheduleForExecution( consumerVertex.getExecutionGraph().getScheduler(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed()); } catch (Throwable t) { fail( new IllegalStateException( "Could not schedule consumer " + "vertex " + consumerVertex, t)); } return true; } }, executionContext); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } // ---------------------------------------------------------------- // Consumer is running => send update message now // ---------------------------------------------------------------- else { if (consumerState == RUNNING) { final SimpleSlot consumerSlot = consumer.getAssignedResource(); if (consumerSlot == null) { // The consumer has been reset concurrently continue; } final Instance consumerInstance = consumerSlot.getInstance(); final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId); final Instance partitionInstance = partition.getProducer().getCurrentAssignedResource().getInstance(); final ResultPartitionLocation partitionLocation; if (consumerInstance.equals(partitionInstance)) { // Consuming task is deployed to the same instance as the partition => local partitionLocation = ResultPartitionLocation.createLocal(); } else { // Different instances => remote final ConnectionID connectionId = new ConnectionID( partitionInstance.getInstanceConnectionInfo(), partition.getIntermediateResult().getConnectionIndex()); partitionLocation = ResultPartitionLocation.createRemote(connectionId); } final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation); final UpdatePartitionInfo updateTaskMessage = new UpdateTaskSinglePartitionInfo( consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor); sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage); } // ---------------------------------------------------------------- // Consumer is scheduled or deploying => cache input channel // deployment descriptors and send update message later // ---------------------------------------------------------------- else if (consumerState == SCHEDULED || consumerState == DEPLOYING) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } } } }
public void deployToSlot(final SimpleSlot slot) throws JobException { // sanity checks if (slot == null) { throw new NullPointerException(); } if (!slot.isAlive()) { throw new JobException("Target slot for deployment is not alive."); } // make sure exactly one deployment call happens from the correct state // note: the transition from CREATED to DEPLOYING is for testing purposes only ExecutionState previous = this.state; if (previous == SCHEDULED || previous == CREATED) { if (!transitionState(previous, DEPLOYING)) { // race condition, someone else beat us to the deploying call. // this should actually not happen and indicates a race somewhere else throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race."); } } else { // vertex may have been cancelled, or it was already scheduled throw new IllegalStateException( "The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous); } try { // good, we are allowed to deploy if (!slot.setExecutedVertex(this)) { throw new JobException("Could not assign the ExecutionVertex to the slot " + slot); } this.assignedResource = slot; this.assignedResourceLocation = slot.getInstance().getInstanceConnectionInfo(); // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { slot.releaseSlot(); return; } if (LOG.isInfoEnabled()) { LOG.info( String.format( "Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, slot.getInstance().getInstanceConnectionInfo().getHostname())); } final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor( attemptId, slot, operatorState, recoveryTimestamp, attemptNumber); // register this execution at the execution graph, to receive call backs vertex.getExecutionGraph().registerExecution(this); final Instance instance = slot.getInstance(); final ActorGateway gateway = instance.getActorGateway(); final Future<Object> deployAction = gateway.ask(new SubmitTask(deployment), timeout); deployAction.onComplete( new OnComplete<Object>() { @Override public void onComplete(Throwable failure, Object success) throws Throwable { if (failure != null) { if (failure instanceof TimeoutException) { String taskname = deployment.getTaskInfo().getTaskNameWithSubtasks() + " (" + attemptId + ')'; markFailed( new Exception( "Cannot deploy task " + taskname + " - TaskManager (" + instance + ") not responding after a timeout of " + timeout, failure)); } else { markFailed(failure); } } else { if (!(success.equals(Messages.getAcknowledge()))) { markFailed( new Exception( "Failed to deploy the task to slot " + slot + ": Response was not of type Acknowledge")); } } } }, executionContext); } catch (Throwable t) { markFailed(t); ExceptionUtils.rethrow(t); } }