@Test public void testAddAndRemoveInstance() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance i1 = getRandomInstance(2); Instance i2 = getRandomInstance(2); Instance i3 = getRandomInstance(2); assertEquals(0, scheduler.getNumberOfAvailableInstances()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i1); assertEquals(1, scheduler.getNumberOfAvailableInstances()); assertEquals(2, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i2); assertEquals(2, scheduler.getNumberOfAvailableInstances()); assertEquals(4, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(i3); assertEquals(3, scheduler.getNumberOfAvailableInstances()); assertEquals(6, scheduler.getNumberOfAvailableSlots()); // cannot add available instance again try { scheduler.newInstanceAvailable(i2); fail("Scheduler accepted instance twice"); } catch (IllegalArgumentException e) { // bueno! } // some instances die assertEquals(3, scheduler.getNumberOfAvailableInstances()); assertEquals(6, scheduler.getNumberOfAvailableSlots()); scheduler.instanceDied(i2); assertEquals(2, scheduler.getNumberOfAvailableInstances()); assertEquals(4, scheduler.getNumberOfAvailableSlots()); // try to add a dead instance try { scheduler.newInstanceAvailable(i2); fail("Scheduler accepted dead instance"); } catch (IllegalArgumentException e) { // stimmt } scheduler.instanceDied(i1); assertEquals(1, scheduler.getNumberOfAvailableInstances()); assertEquals(2, scheduler.getNumberOfAvailableSlots()); scheduler.instanceDied(i3); assertEquals(0, scheduler.getNumberOfAvailableInstances()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); assertFalse(i1.isAlive()); assertFalse(i2.isAlive()); assertFalse(i3.isAlive()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@BeforeClass public static void setup() { system = ActorSystem.create("TestingActorSystem", TestingUtils.testConfig()); taskManager = TestActorRef.create( system, Props.create(ExecutionGraphTestUtils.SimpleAcknowledgingTaskManager.class)); }
@Test public void testScheduleWithDyingInstances() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance i1 = getRandomInstance(2); Instance i2 = getRandomInstance(2); Instance i3 = getRandomInstance(1); scheduler.newInstanceAvailable(i1); scheduler.newInstanceAvailable(i2); scheduler.newInstanceAvailable(i3); List<SimpleSlot> slots = new ArrayList<SimpleSlot>(); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get()); i2.markDead(); for (SimpleSlot slot : slots) { if (slot.getOwner() == i2) { assertTrue(slot.isCanceled()); } else { assertFalse(slot.isCanceled()); } slot.releaseSlot(); } assertEquals(3, scheduler.getNumberOfAvailableSlots()); i1.markDead(); i3.markDead(); // cannot get another slot, since all instances are dead try { scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); fail("Scheduler served a slot from a dead instance"); } catch (NoResourceAvailableException e) { // fine } catch (Exception e) { fail("Wrong exception type."); } // now the latest, the scheduler should have noticed (through the lazy mechanisms) // that all instances have vanished assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private Map<ExecutionAttemptID, Execution> setupExecution( JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception { final JobID jobId = new JobID(); v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); for (int i = 0; i < dop1 + dop2; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions(); assertEquals(dop1 + dop2, executions.size()); return executions; }
public class LeaderChangeStateCleanupTest extends TestLogger { private static FiniteDuration timeout = TestingUtils.TESTING_DURATION(); private int numJMs = 2; private int numTMs = 2; private int numSlotsPerTM = 2; private int parallelism = numTMs * numSlotsPerTM; private Configuration configuration; private LeaderElectionRetrievalTestingCluster cluster = null; private JobGraph job = createBlockingJob(parallelism); @Before public void before() throws Exception { Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true); configuration = new Configuration(); configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs); configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs); configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTM); cluster = new LeaderElectionRetrievalTestingCluster( configuration, true, false, StreamingMode.BATCH_ONLY); cluster.start(false); // TaskManagers don't have to register at the JobManager cluster.waitForActorsToBeAlive(); // we only wait until all actors are alive } @After public void after() { if (cluster != null) { cluster.stop(); } } /** * Tests that a job is properly canceled in the case of a leader change. In such an event all * TaskManagers have to disconnect from the previous leader and connect to the newly elected * leader. */ @Test public void testStateCleanupAfterNewLeaderElectionAndListenerNotification() throws Exception { UUID leaderSessionID1 = UUID.randomUUID(); UUID leaderSessionID2 = UUID.randomUUID(); // first make JM(0) the leader cluster.grantLeadership(0, leaderSessionID1); // notify all listeners cluster.notifyRetrievalListeners(0, leaderSessionID1); cluster.waitForTaskManagersToBeRegistered(); // submit blocking job so that it is not finished when we cancel it cluster.submitJobDetached(job); ActorGateway jm = cluster.getLeaderGateway(timeout); Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout); Await.ready(wait, timeout); Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout); // make the JM(1) the new leader cluster.grantLeadership(1, leaderSessionID2); // notify all listeners about the event cluster.notifyRetrievalListeners(1, leaderSessionID2); Await.ready(jobRemoval, timeout); cluster.waitForTaskManagersToBeRegistered(); ActorGateway jm2 = cluster.getLeaderGateway(timeout); Future<Object> futureNumberSlots = jm2.ask(JobManagerMessages.getRequestTotalNumberOfSlots(), timeout); // check that all TMs have registered at the new leader int numberSlots = (Integer) Await.result(futureNumberSlots, timeout); assertEquals(parallelism, numberSlots); // try to resubmit now the non-blocking job, it should complete successfully Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false); cluster.submitJobAndWait(job, false, timeout); } /** * Tests that a job is properly canceled in the case of a leader change. However, this time only * the JMs are notified about the leader change and the TMs still believe the old leader to have * leadership. */ @Test public void testStateCleanupAfterNewLeaderElection() throws Exception { UUID leaderSessionID = UUID.randomUUID(); UUID newLeaderSessionID = UUID.randomUUID(); cluster.grantLeadership(0, leaderSessionID); cluster.notifyRetrievalListeners(0, leaderSessionID); cluster.waitForTaskManagersToBeRegistered(); // submit blocking job so that we can test job clean up cluster.submitJobDetached(job); ActorGateway jm = cluster.getLeaderGateway(timeout); Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout); Await.ready(wait, timeout); Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout); // only notify the JMs about the new leader JM(1) cluster.grantLeadership(1, newLeaderSessionID); // job should be removed anyway Await.ready(jobRemoval, timeout); } /** * Tests that a job is properly canceled in the event of a leader change. However, this time only * the TMs are notified about the changing leader. This should be enough to cancel the currently * running job, though. */ @Test public void testStateCleanupAfterListenerNotification() throws Exception { UUID leaderSessionID = UUID.randomUUID(); UUID newLeaderSessionID = UUID.randomUUID(); cluster.grantLeadership(0, leaderSessionID); cluster.notifyRetrievalListeners(0, leaderSessionID); cluster.waitForTaskManagersToBeRegistered(); // submit blocking job cluster.submitJobDetached(job); ActorGateway jm = cluster.getLeaderGateway(timeout); Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout); Await.ready(wait, timeout); Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout); // notify listeners (TMs) about the leader change cluster.notifyRetrievalListeners(1, newLeaderSessionID); Await.ready(jobRemoval, timeout); } /** * Tests that the same JobManager can be reelected as the leader. Even though, the same JM is * elected as the next leader, all currently running jobs should be canceled properly and all TMs * should disconnect from the leader and then reconnect to it. */ @Test public void testReelectionOfSameJobManager() throws Exception { UUID leaderSessionID = UUID.randomUUID(); UUID newLeaderSessionID = UUID.randomUUID(); FiniteDuration shortTimeout = new FiniteDuration(20, TimeUnit.SECONDS); cluster.grantLeadership(0, leaderSessionID); cluster.notifyRetrievalListeners(0, leaderSessionID); cluster.waitForTaskManagersToBeRegistered(); // submit blocking job cluster.submitJobDetached(job); ActorGateway jm = cluster.getLeaderGateway(timeout); Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout); Await.ready(wait, timeout); Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout); // make JM(0) again the leader --> this implies first a leadership revokal cluster.grantLeadership(0, newLeaderSessionID); Await.ready(jobRemoval, timeout); // The TMs should not be able to reconnect since they don't know the current leader // session ID try { cluster.waitForTaskManagersToBeRegistered(shortTimeout); fail("TaskManager should not be able to register at JobManager."); } catch (TimeoutException e) { // expected exception since the TMs have still the old leader session ID } // notify the TMs about the new (old) leader cluster.notifyRetrievalListeners(0, newLeaderSessionID); cluster.waitForTaskManagersToBeRegistered(); // try to resubmit now the non-blocking job, it should complete successfully Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false); cluster.submitJobAndWait(job, false, timeout); } public JobGraph createBlockingJob(int parallelism) { Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true); JobVertex sender = new JobVertex("sender"); JobVertex receiver = new JobVertex("receiver"); sender.setInvokableClass(Tasks.Sender.class); receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class); sender.setParallelism(parallelism); receiver.setParallelism(parallelism); receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE); SlotSharingGroup slotSharingGroup = new SlotSharingGroup(); sender.setSlotSharingGroup(slotSharingGroup); receiver.setSlotSharingGroup(slotSharingGroup); return new JobGraph("Blocking test job", sender, receiver); } }
@Test public void testBuildDeploymentDescriptor() { try { final JobID jobId = new JobID(); final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); final JobVertexID jid3 = new JobVertexID(); final JobVertexID jid4 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); JobVertex v3 = new JobVertex("v3", jid3); JobVertex v4 = new JobVertex("v4", jid4); v1.setParallelism(10); v2.setParallelism(10); v3.setParallelism(10); v4.setParallelism(10); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v3.setInvokableClass(BatchTask.class); v4.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL); v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4); eg.attachJobGraph(ordered); ExecutionJobVertex ejv = eg.getAllVertices().get(jid2); ExecutionVertex vertex = ejv.getTaskVertices()[3]; ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()); final Instance instance = getInstance(instanceGateway); final SimpleSlot slot = instance.allocateSimpleSlot(jobId); assertEquals(ExecutionState.CREATED, vertex.getExecutionState()); vertex.deployToSlot(slot); assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState()); TaskDeploymentDescriptor descr = instanceGateway.lastTDD; assertNotNull(descr); assertEquals(jobId, descr.getJobID()); assertEquals(jid2, descr.getVertexID()); assertEquals(3, descr.getIndexInSubtaskGroup()); assertEquals(10, descr.getNumberOfSubtasks()); assertEquals(BatchTask.class.getName(), descr.getInvokableClassName()); assertEquals("v2", descr.getTaskName()); List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions(); List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates(); assertEquals(2, producedPartitions.size()); assertEquals(1, consumedPartitions.size()); assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions()); assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions()); assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test /** * Tests that a blocking batch job fails if there are not enough resources left to schedule the * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks * swallow the fail exception when scheduling a consumer task. */ public void testNoResourceAvailableFailure() throws Exception { final JobID jobId = new JobID(); JobVertex v1 = new JobVertex("source"); JobVertex v2 = new JobVertex("sink"); int dop1 = 1; int dop2 = 1; v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput( v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "failing test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); for (int i = 0; i < dop1; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()) .getTaskVertices()[0] .getCurrentExecutionAttempt() .getAttemptId(); eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING)); eg.updateState( new TaskExecutionState( jobId, attemptID, ExecutionState.FINISHED, null, new AccumulatorSnapshot( jobId, attemptID, new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(), new HashMap<String, Accumulator<?, ?>>()))); assertEquals(JobStatus.FAILED, eg.getState()); }
private void submitJobGraphAndWait(final JobGraph jobGraph) throws JobExecutionException { flink.submitJobAndWait(jobGraph, false, TestingUtils.TESTING_DURATION()); }
@BeforeClass public static void setUp() throws Exception { flink = TestingUtils.startTestingCluster( NUMBER_OF_SLOTS_PER_TM, NUMBER_OF_TMS, TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT()); }
public NullInputSplitGateway() { super(TestingUtils.defaultExecutionContext()); }
@Test public void testSchedulingLocation() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance i1 = getRandomInstance(2); Instance i2 = getRandomInstance(2); Instance i3 = getRandomInstance(2); scheduler.newInstanceAvailable(i1); scheduler.newInstanceAvailable(i2); scheduler.newInstanceAvailable(i3); // schedule something on an arbitrary instance SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(new Instance[0])), false).get(); // figure out how we use the location hints Instance first = (Instance) s1.getOwner(); Instance second = first != i1 ? i1 : i2; Instance third = first == i3 ? i2 : i3; // something that needs to go to the first instance again SimpleSlot s2 = scheduler .allocateSlot(new ScheduledUnit(getTestVertex(s1.getTaskManagerLocation())), false) .get(); assertEquals(first, s2.getOwner()); // first or second --> second, because first is full SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, second)), false).get(); assertEquals(second, s3.getOwner()); // first or third --> third (because first is full) SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get(); SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get(); assertEquals(third, s4.getOwner()); assertEquals(third, s5.getOwner()); // first or third --> second, because all others are full SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get(); assertEquals(second, s6.getOwner()); // release something on the first and second instance s2.releaseSlot(); s6.releaseSlot(); SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get(); assertEquals(first, s7.getOwner()); assertEquals(1, scheduler.getNumberOfUnconstrainedAssignments()); assertEquals(1, scheduler.getNumberOfNonLocalizedAssignments()); assertEquals(5, scheduler.getNumberOfLocalizedAssignments()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testScheduleQueueing() { final int NUM_INSTANCES = 50; final int NUM_SLOTS_PER_INSTANCE = 3; final int NUM_TASKS_TO_SCHEDULE = 2000; try { // note: since this test asynchronously releases slots, the executor needs release workers. // doing the release call synchronous can lead to a deadlock Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); for (int i = 0; i < NUM_INSTANCES; i++) { scheduler.newInstanceAvailable( getRandomInstance((int) (Math.random() * NUM_SLOTS_PER_INSTANCE) + 1)); } assertEquals(NUM_INSTANCES, scheduler.getNumberOfAvailableInstances()); final int totalSlots = scheduler.getNumberOfAvailableSlots(); // all slots we ever got. List<Future<SimpleSlot>> allAllocatedSlots = new ArrayList<>(); // slots that need to be released final Set<SimpleSlot> toRelease = new HashSet<SimpleSlot>(); // flag to track errors in the concurrent thread final AtomicBoolean errored = new AtomicBoolean(false); // thread to asynchronously release slots Runnable disposer = new Runnable() { @Override public void run() { try { int recycled = 0; while (recycled < NUM_TASKS_TO_SCHEDULE) { synchronized (toRelease) { while (toRelease.isEmpty()) { toRelease.wait(); } Iterator<SimpleSlot> iter = toRelease.iterator(); SimpleSlot next = iter.next(); iter.remove(); next.releaseSlot(); recycled++; } } } catch (Throwable t) { errored.set(true); } } }; Thread disposeThread = new Thread(disposer); disposeThread.start(); for (int i = 0; i < NUM_TASKS_TO_SCHEDULE; i++) { Future<SimpleSlot> future = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), true); future.thenAcceptAsync( new AcceptFunction<SimpleSlot>() { @Override public void accept(SimpleSlot slot) { synchronized (toRelease) { toRelease.add(slot); toRelease.notifyAll(); } } }, TestingUtils.defaultExecutionContext()); allAllocatedSlots.add(future); } disposeThread.join(); assertFalse("The slot releasing thread caused an error.", errored.get()); List<SimpleSlot> slotsAfter = new ArrayList<SimpleSlot>(); for (Future<SimpleSlot> future : allAllocatedSlots) { slotsAfter.add(future.get()); } assertEquals( "All instances should have available slots.", NUM_INSTANCES, scheduler.getNumberOfInstancesWithAvailableSlots()); // the slots should all be different assertTrue(areAllDistinct(slotsAfter.toArray())); assertEquals( "All slots should be available.", totalSlots, scheduler.getNumberOfAvailableSlots()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testScheduleImmediately() { try { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); assertEquals(0, scheduler.getNumberOfAvailableSlots()); scheduler.newInstanceAvailable(getRandomInstance(2)); scheduler.newInstanceAvailable(getRandomInstance(1)); scheduler.newInstanceAvailable(getRandomInstance(2)); assertEquals(5, scheduler.getNumberOfAvailableSlots()); // schedule something into all slots SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); // the slots should all be different assertTrue(areAllDistinct(s1, s2, s3, s4, s5)); try { scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false); fail("Scheduler accepted scheduling request without available resource."); } catch (NoResourceAvailableException e) { // pass! } // release some slots again s3.releaseSlot(); s4.releaseSlot(); assertEquals(2, scheduler.getNumberOfAvailableSlots()); // now we can schedule some more slots SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get(); assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7)); // release all s1.releaseSlot(); s2.releaseSlot(); s5.releaseSlot(); s6.releaseSlot(); s7.releaseSlot(); assertEquals(5, scheduler.getNumberOfAvailableSlots()); // check that slots that are released twice (accidentally) do not mess things up s1.releaseSlot(); s2.releaseSlot(); s5.releaseSlot(); s6.releaseSlot(); s7.releaseSlot(); assertEquals(5, scheduler.getNumberOfAvailableSlots()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/* * Test setup: * - v1 is isolated, no slot sharing * - v2 and v3 (not connected) share slots * - v4 and v5 (connected) share slots */ @Test public void testAssignSlotSharingGroup() { try { JobVertex v1 = new JobVertex("v1"); JobVertex v2 = new JobVertex("v2"); JobVertex v3 = new JobVertex("v3"); JobVertex v4 = new JobVertex("v4"); JobVertex v5 = new JobVertex("v5"); v1.setParallelism(4); v2.setParallelism(5); v3.setParallelism(7); v4.setParallelism(1); v5.setParallelism(11); v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE); v5.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE); SlotSharingGroup jg1 = new SlotSharingGroup(); v2.setSlotSharingGroup(jg1); v3.setSlotSharingGroup(jg1); SlotSharingGroup jg2 = new SlotSharingGroup(); v4.setSlotSharingGroup(jg2); v5.setSlotSharingGroup(jg2); List<JobVertex> vertices = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5)); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), new JobID(), "test job", new Configuration(), ExecutionConfigTest.getSerializedConfig(), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.attachJobGraph(vertices); // verify that the vertices are all in the same slot sharing group SlotSharingGroup group1 = null; SlotSharingGroup group2 = null; // verify that v1 tasks have no slot sharing group assertNull(eg.getJobVertex(v1.getID()).getSlotSharingGroup()); // v2 and v3 are shared group1 = eg.getJobVertex(v2.getID()).getSlotSharingGroup(); assertNotNull(group1); assertEquals(group1, eg.getJobVertex(v3.getID()).getSlotSharingGroup()); assertEquals(2, group1.getJobVertexIds().size()); assertTrue(group1.getJobVertexIds().contains(v2.getID())); assertTrue(group1.getJobVertexIds().contains(v3.getID())); // v4 and v5 are shared group2 = eg.getJobVertex(v4.getID()).getSlotSharingGroup(); assertNotNull(group2); assertEquals(group2, eg.getJobVertex(v5.getID()).getSlotSharingGroup()); assertEquals(2, group1.getJobVertexIds().size()); assertTrue(group2.getJobVertexIds().contains(v4.getID())); assertTrue(group2.getJobVertexIds().contains(v5.getID())); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }