@Test
  public void testAddAndRemoveInstance() {
    try {
      Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());

      Instance i1 = getRandomInstance(2);
      Instance i2 = getRandomInstance(2);
      Instance i3 = getRandomInstance(2);

      assertEquals(0, scheduler.getNumberOfAvailableInstances());
      assertEquals(0, scheduler.getNumberOfAvailableSlots());
      scheduler.newInstanceAvailable(i1);
      assertEquals(1, scheduler.getNumberOfAvailableInstances());
      assertEquals(2, scheduler.getNumberOfAvailableSlots());
      scheduler.newInstanceAvailable(i2);
      assertEquals(2, scheduler.getNumberOfAvailableInstances());
      assertEquals(4, scheduler.getNumberOfAvailableSlots());
      scheduler.newInstanceAvailable(i3);
      assertEquals(3, scheduler.getNumberOfAvailableInstances());
      assertEquals(6, scheduler.getNumberOfAvailableSlots());

      // cannot add available instance again
      try {
        scheduler.newInstanceAvailable(i2);
        fail("Scheduler accepted instance twice");
      } catch (IllegalArgumentException e) {
        // bueno!
      }

      // some instances die
      assertEquals(3, scheduler.getNumberOfAvailableInstances());
      assertEquals(6, scheduler.getNumberOfAvailableSlots());
      scheduler.instanceDied(i2);
      assertEquals(2, scheduler.getNumberOfAvailableInstances());
      assertEquals(4, scheduler.getNumberOfAvailableSlots());

      // try to add a dead instance
      try {
        scheduler.newInstanceAvailable(i2);
        fail("Scheduler accepted dead instance");
      } catch (IllegalArgumentException e) {
        // stimmt

      }

      scheduler.instanceDied(i1);
      assertEquals(1, scheduler.getNumberOfAvailableInstances());
      assertEquals(2, scheduler.getNumberOfAvailableSlots());
      scheduler.instanceDied(i3);
      assertEquals(0, scheduler.getNumberOfAvailableInstances());
      assertEquals(0, scheduler.getNumberOfAvailableSlots());

      assertFalse(i1.isAlive());
      assertFalse(i2.isAlive());
      assertFalse(i3.isAlive());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
 @BeforeClass
 public static void setup() {
   system = ActorSystem.create("TestingActorSystem", TestingUtils.testConfig());
   taskManager =
       TestActorRef.create(
           system, Props.create(ExecutionGraphTestUtils.SimpleAcknowledgingTaskManager.class));
 }
  @Test
  public void testScheduleWithDyingInstances() {
    try {
      Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());

      Instance i1 = getRandomInstance(2);
      Instance i2 = getRandomInstance(2);
      Instance i3 = getRandomInstance(1);

      scheduler.newInstanceAvailable(i1);
      scheduler.newInstanceAvailable(i2);
      scheduler.newInstanceAvailable(i3);

      List<SimpleSlot> slots = new ArrayList<SimpleSlot>();
      slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
      slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
      slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
      slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
      slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());

      i2.markDead();

      for (SimpleSlot slot : slots) {
        if (slot.getOwner() == i2) {
          assertTrue(slot.isCanceled());
        } else {
          assertFalse(slot.isCanceled());
        }

        slot.releaseSlot();
      }

      assertEquals(3, scheduler.getNumberOfAvailableSlots());

      i1.markDead();
      i3.markDead();

      // cannot get another slot, since all instances are dead
      try {
        scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        fail("Scheduler served a slot from a dead instance");
      } catch (NoResourceAvailableException e) {
        // fine
      } catch (Exception e) {
        fail("Wrong exception type.");
      }

      // now the latest, the scheduler should have noticed (through the lazy mechanisms)
      // that all instances have vanished
      assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots());
      assertEquals(0, scheduler.getNumberOfAvailableSlots());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  private Map<ExecutionAttemptID, Execution> setupExecution(
      JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception {
    final JobID jobId = new JobID();

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "some job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    for (int i = 0; i < dop1 + dop2; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions();
    assertEquals(dop1 + dop2, executions.size());

    return executions;
  }
public class LeaderChangeStateCleanupTest extends TestLogger {

  private static FiniteDuration timeout = TestingUtils.TESTING_DURATION();

  private int numJMs = 2;
  private int numTMs = 2;
  private int numSlotsPerTM = 2;
  private int parallelism = numTMs * numSlotsPerTM;

  private Configuration configuration;
  private LeaderElectionRetrievalTestingCluster cluster = null;
  private JobGraph job = createBlockingJob(parallelism);

  @Before
  public void before() throws Exception {
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);

    configuration = new Configuration();

    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
    configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTM);

    cluster =
        new LeaderElectionRetrievalTestingCluster(
            configuration, true, false, StreamingMode.BATCH_ONLY);
    cluster.start(false); // TaskManagers don't have to register at the JobManager

    cluster.waitForActorsToBeAlive(); // we only wait until all actors are alive
  }

  @After
  public void after() {
    if (cluster != null) {
      cluster.stop();
    }
  }

  /**
   * Tests that a job is properly canceled in the case of a leader change. In such an event all
   * TaskManagers have to disconnect from the previous leader and connect to the newly elected
   * leader.
   */
  @Test
  public void testStateCleanupAfterNewLeaderElectionAndListenerNotification() throws Exception {
    UUID leaderSessionID1 = UUID.randomUUID();
    UUID leaderSessionID2 = UUID.randomUUID();

    // first make JM(0) the leader
    cluster.grantLeadership(0, leaderSessionID1);
    // notify all listeners
    cluster.notifyRetrievalListeners(0, leaderSessionID1);

    cluster.waitForTaskManagersToBeRegistered();

    // submit blocking job so that it is not finished when we cancel it
    cluster.submitJobDetached(job);

    ActorGateway jm = cluster.getLeaderGateway(timeout);

    Future<Object> wait =
        jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);

    Await.ready(wait, timeout);

    Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);

    // make the JM(1) the new leader
    cluster.grantLeadership(1, leaderSessionID2);
    // notify all listeners about the event
    cluster.notifyRetrievalListeners(1, leaderSessionID2);

    Await.ready(jobRemoval, timeout);

    cluster.waitForTaskManagersToBeRegistered();

    ActorGateway jm2 = cluster.getLeaderGateway(timeout);

    Future<Object> futureNumberSlots =
        jm2.ask(JobManagerMessages.getRequestTotalNumberOfSlots(), timeout);

    // check that all TMs have registered at the new leader
    int numberSlots = (Integer) Await.result(futureNumberSlots, timeout);

    assertEquals(parallelism, numberSlots);

    // try to resubmit now the non-blocking job, it should complete successfully
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
    cluster.submitJobAndWait(job, false, timeout);
  }

  /**
   * Tests that a job is properly canceled in the case of a leader change. However, this time only
   * the JMs are notified about the leader change and the TMs still believe the old leader to have
   * leadership.
   */
  @Test
  public void testStateCleanupAfterNewLeaderElection() throws Exception {
    UUID leaderSessionID = UUID.randomUUID();
    UUID newLeaderSessionID = UUID.randomUUID();

    cluster.grantLeadership(0, leaderSessionID);
    cluster.notifyRetrievalListeners(0, leaderSessionID);

    cluster.waitForTaskManagersToBeRegistered();

    // submit blocking job so that we can test job clean up
    cluster.submitJobDetached(job);

    ActorGateway jm = cluster.getLeaderGateway(timeout);

    Future<Object> wait =
        jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);

    Await.ready(wait, timeout);

    Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);

    // only notify the JMs about the new leader JM(1)
    cluster.grantLeadership(1, newLeaderSessionID);

    // job should be removed anyway
    Await.ready(jobRemoval, timeout);
  }

  /**
   * Tests that a job is properly canceled in the event of a leader change. However, this time only
   * the TMs are notified about the changing leader. This should be enough to cancel the currently
   * running job, though.
   */
  @Test
  public void testStateCleanupAfterListenerNotification() throws Exception {
    UUID leaderSessionID = UUID.randomUUID();
    UUID newLeaderSessionID = UUID.randomUUID();

    cluster.grantLeadership(0, leaderSessionID);
    cluster.notifyRetrievalListeners(0, leaderSessionID);

    cluster.waitForTaskManagersToBeRegistered();

    // submit blocking job
    cluster.submitJobDetached(job);

    ActorGateway jm = cluster.getLeaderGateway(timeout);

    Future<Object> wait =
        jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);

    Await.ready(wait, timeout);

    Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);

    // notify listeners (TMs) about the leader change
    cluster.notifyRetrievalListeners(1, newLeaderSessionID);

    Await.ready(jobRemoval, timeout);
  }

  /**
   * Tests that the same JobManager can be reelected as the leader. Even though, the same JM is
   * elected as the next leader, all currently running jobs should be canceled properly and all TMs
   * should disconnect from the leader and then reconnect to it.
   */
  @Test
  public void testReelectionOfSameJobManager() throws Exception {
    UUID leaderSessionID = UUID.randomUUID();
    UUID newLeaderSessionID = UUID.randomUUID();

    FiniteDuration shortTimeout = new FiniteDuration(20, TimeUnit.SECONDS);

    cluster.grantLeadership(0, leaderSessionID);
    cluster.notifyRetrievalListeners(0, leaderSessionID);

    cluster.waitForTaskManagersToBeRegistered();

    // submit blocking job
    cluster.submitJobDetached(job);

    ActorGateway jm = cluster.getLeaderGateway(timeout);

    Future<Object> wait =
        jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);

    Await.ready(wait, timeout);

    Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);

    // make JM(0) again the leader --> this implies first a leadership revokal
    cluster.grantLeadership(0, newLeaderSessionID);

    Await.ready(jobRemoval, timeout);

    // The TMs should not be able to reconnect since they don't know the current leader
    // session ID
    try {
      cluster.waitForTaskManagersToBeRegistered(shortTimeout);
      fail("TaskManager should not be able to register at JobManager.");
    } catch (TimeoutException e) {
      // expected exception since the TMs have still the old leader session ID
    }

    // notify the TMs about the new (old) leader
    cluster.notifyRetrievalListeners(0, newLeaderSessionID);

    cluster.waitForTaskManagersToBeRegistered();

    // try to resubmit now the non-blocking job, it should complete successfully
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
    cluster.submitJobAndWait(job, false, timeout);
  }

  public JobGraph createBlockingJob(int parallelism) {
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);

    JobVertex sender = new JobVertex("sender");
    JobVertex receiver = new JobVertex("receiver");

    sender.setInvokableClass(Tasks.Sender.class);
    receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);

    sender.setParallelism(parallelism);
    receiver.setParallelism(parallelism);

    receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE);

    SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
    sender.setSlotSharingGroup(slotSharingGroup);
    receiver.setSlotSharingGroup(slotSharingGroup);

    return new JobGraph("Blocking test job", sender, receiver);
  }
}
  @Test
  public void testBuildDeploymentDescriptor() {
    try {
      final JobID jobId = new JobID();

      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();
      final JobVertexID jid3 = new JobVertexID();
      final JobVertexID jid4 = new JobVertexID();

      JobVertex v1 = new JobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);
      JobVertex v3 = new JobVertex("v3", jid3);
      JobVertex v4 = new JobVertex("v4", jid4);

      v1.setParallelism(10);
      v2.setParallelism(10);
      v3.setParallelism(10);
      v4.setParallelism(10);

      v1.setInvokableClass(BatchTask.class);
      v2.setInvokableClass(BatchTask.class);
      v3.setInvokableClass(BatchTask.class);
      v4.setInvokableClass(BatchTask.class);

      v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL);
      v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL);
      v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL);

      ExecutionGraph eg =
          new ExecutionGraph(
              TestingUtils.defaultExecutionContext(),
              jobId,
              "some job",
              new Configuration(),
              new SerializedValue<>(new ExecutionConfig()),
              AkkaUtils.getDefaultTimeout(),
              new NoRestartStrategy());

      List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);

      eg.attachJobGraph(ordered);

      ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
      ExecutionVertex vertex = ejv.getTaskVertices()[3];

      ExecutionGraphTestUtils.SimpleActorGateway instanceGateway =
          new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext());

      final Instance instance = getInstance(instanceGateway);

      final SimpleSlot slot = instance.allocateSimpleSlot(jobId);

      assertEquals(ExecutionState.CREATED, vertex.getExecutionState());

      vertex.deployToSlot(slot);

      assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());

      TaskDeploymentDescriptor descr = instanceGateway.lastTDD;
      assertNotNull(descr);

      assertEquals(jobId, descr.getJobID());
      assertEquals(jid2, descr.getVertexID());
      assertEquals(3, descr.getIndexInSubtaskGroup());
      assertEquals(10, descr.getNumberOfSubtasks());
      assertEquals(BatchTask.class.getName(), descr.getInvokableClassName());
      assertEquals("v2", descr.getTaskName());

      List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
      List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();

      assertEquals(2, producedPartitions.size());
      assertEquals(1, consumedPartitions.size());

      assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions());
      assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions());
      assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  /**
   * Tests that a blocking batch job fails if there are not enough resources left to schedule the
   * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
   * swallow the fail exception when scheduling a consumer task.
   */
  public void testNoResourceAvailableFailure() throws Exception {
    final JobID jobId = new JobID();
    JobVertex v1 = new JobVertex("source");
    JobVertex v2 = new JobVertex("sink");

    int dop1 = 1;
    int dop2 = 1;

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    v2.connectNewDataSetAsInput(
        v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "failing test job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
    for (int i = 0; i < dop1; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    ExecutionAttemptID attemptID =
        eg.getJobVertex(v1.getID())
            .getTaskVertices()[0]
            .getCurrentExecutionAttempt()
            .getAttemptId();
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING));
    eg.updateState(
        new TaskExecutionState(
            jobId,
            attemptID,
            ExecutionState.FINISHED,
            null,
            new AccumulatorSnapshot(
                jobId,
                attemptID,
                new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(),
                new HashMap<String, Accumulator<?, ?>>())));

    assertEquals(JobStatus.FAILED, eg.getState());
  }
 private void submitJobGraphAndWait(final JobGraph jobGraph) throws JobExecutionException {
   flink.submitJobAndWait(jobGraph, false, TestingUtils.TESTING_DURATION());
 }
 @BeforeClass
 public static void setUp() throws Exception {
   flink =
       TestingUtils.startTestingCluster(
           NUMBER_OF_SLOTS_PER_TM, NUMBER_OF_TMS, TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT());
 }
 public NullInputSplitGateway() {
   super(TestingUtils.defaultExecutionContext());
 }
  @Test
  public void testSchedulingLocation() {
    try {
      Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());

      Instance i1 = getRandomInstance(2);
      Instance i2 = getRandomInstance(2);
      Instance i3 = getRandomInstance(2);

      scheduler.newInstanceAvailable(i1);
      scheduler.newInstanceAvailable(i2);
      scheduler.newInstanceAvailable(i3);

      // schedule something on an arbitrary instance
      SimpleSlot s1 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(new Instance[0])), false).get();

      // figure out how we use the location hints
      Instance first = (Instance) s1.getOwner();
      Instance second = first != i1 ? i1 : i2;
      Instance third = first == i3 ? i2 : i3;

      // something that needs to go to the first instance again
      SimpleSlot s2 =
          scheduler
              .allocateSlot(new ScheduledUnit(getTestVertex(s1.getTaskManagerLocation())), false)
              .get();
      assertEquals(first, s2.getOwner());

      // first or second --> second, because first is full
      SimpleSlot s3 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, second)), false).get();
      assertEquals(second, s3.getOwner());

      // first or third --> third (because first is full)
      SimpleSlot s4 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get();
      SimpleSlot s5 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get();
      assertEquals(third, s4.getOwner());
      assertEquals(third, s5.getOwner());

      // first or third --> second, because all others are full
      SimpleSlot s6 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get();
      assertEquals(second, s6.getOwner());

      // release something on the first and second instance
      s2.releaseSlot();
      s6.releaseSlot();

      SimpleSlot s7 =
          scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false).get();
      assertEquals(first, s7.getOwner());

      assertEquals(1, scheduler.getNumberOfUnconstrainedAssignments());
      assertEquals(1, scheduler.getNumberOfNonLocalizedAssignments());
      assertEquals(5, scheduler.getNumberOfLocalizedAssignments());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testScheduleQueueing() {
    final int NUM_INSTANCES = 50;
    final int NUM_SLOTS_PER_INSTANCE = 3;
    final int NUM_TASKS_TO_SCHEDULE = 2000;

    try {
      // note: since this test asynchronously releases slots, the executor needs release workers.
      // doing the release call synchronous can lead to a deadlock
      Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());

      for (int i = 0; i < NUM_INSTANCES; i++) {
        scheduler.newInstanceAvailable(
            getRandomInstance((int) (Math.random() * NUM_SLOTS_PER_INSTANCE) + 1));
      }

      assertEquals(NUM_INSTANCES, scheduler.getNumberOfAvailableInstances());
      final int totalSlots = scheduler.getNumberOfAvailableSlots();

      // all slots we ever got.
      List<Future<SimpleSlot>> allAllocatedSlots = new ArrayList<>();

      // slots that need to be released
      final Set<SimpleSlot> toRelease = new HashSet<SimpleSlot>();

      // flag to track errors in the concurrent thread
      final AtomicBoolean errored = new AtomicBoolean(false);

      // thread to asynchronously release slots
      Runnable disposer =
          new Runnable() {

            @Override
            public void run() {
              try {
                int recycled = 0;
                while (recycled < NUM_TASKS_TO_SCHEDULE) {
                  synchronized (toRelease) {
                    while (toRelease.isEmpty()) {
                      toRelease.wait();
                    }

                    Iterator<SimpleSlot> iter = toRelease.iterator();
                    SimpleSlot next = iter.next();
                    iter.remove();

                    next.releaseSlot();
                    recycled++;
                  }
                }
              } catch (Throwable t) {
                errored.set(true);
              }
            }
          };

      Thread disposeThread = new Thread(disposer);
      disposeThread.start();

      for (int i = 0; i < NUM_TASKS_TO_SCHEDULE; i++) {
        Future<SimpleSlot> future = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), true);
        future.thenAcceptAsync(
            new AcceptFunction<SimpleSlot>() {
              @Override
              public void accept(SimpleSlot slot) {
                synchronized (toRelease) {
                  toRelease.add(slot);
                  toRelease.notifyAll();
                }
              }
            },
            TestingUtils.defaultExecutionContext());
        allAllocatedSlots.add(future);
      }

      disposeThread.join();

      assertFalse("The slot releasing thread caused an error.", errored.get());

      List<SimpleSlot> slotsAfter = new ArrayList<SimpleSlot>();
      for (Future<SimpleSlot> future : allAllocatedSlots) {
        slotsAfter.add(future.get());
      }

      assertEquals(
          "All instances should have available slots.",
          NUM_INSTANCES,
          scheduler.getNumberOfInstancesWithAvailableSlots());

      // the slots should all be different
      assertTrue(areAllDistinct(slotsAfter.toArray()));

      assertEquals(
          "All slots should be available.", totalSlots, scheduler.getNumberOfAvailableSlots());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testScheduleImmediately() {
    try {
      Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
      assertEquals(0, scheduler.getNumberOfAvailableSlots());

      scheduler.newInstanceAvailable(getRandomInstance(2));
      scheduler.newInstanceAvailable(getRandomInstance(1));
      scheduler.newInstanceAvailable(getRandomInstance(2));
      assertEquals(5, scheduler.getNumberOfAvailableSlots());

      // schedule something into all slots
      SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
      SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
      SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
      SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
      SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();

      // the slots should all be different
      assertTrue(areAllDistinct(s1, s2, s3, s4, s5));

      try {
        scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false);
        fail("Scheduler accepted scheduling request without available resource.");
      } catch (NoResourceAvailableException e) {
        // pass!
      }

      // release some slots again
      s3.releaseSlot();
      s4.releaseSlot();
      assertEquals(2, scheduler.getNumberOfAvailableSlots());

      // now we can schedule some more slots
      SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
      SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();

      assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7));

      // release all

      s1.releaseSlot();
      s2.releaseSlot();
      s5.releaseSlot();
      s6.releaseSlot();
      s7.releaseSlot();

      assertEquals(5, scheduler.getNumberOfAvailableSlots());

      // check that slots that are released twice (accidentally) do not mess things up

      s1.releaseSlot();
      s2.releaseSlot();
      s5.releaseSlot();
      s6.releaseSlot();
      s7.releaseSlot();

      assertEquals(5, scheduler.getNumberOfAvailableSlots());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Example #14
0
  /*
   * Test setup:
   * - v1 is isolated, no slot sharing
   * - v2 and v3 (not connected) share slots
   * - v4 and v5 (connected) share slots
   */
  @Test
  public void testAssignSlotSharingGroup() {
    try {
      JobVertex v1 = new JobVertex("v1");
      JobVertex v2 = new JobVertex("v2");
      JobVertex v3 = new JobVertex("v3");
      JobVertex v4 = new JobVertex("v4");
      JobVertex v5 = new JobVertex("v5");

      v1.setParallelism(4);
      v2.setParallelism(5);
      v3.setParallelism(7);
      v4.setParallelism(1);
      v5.setParallelism(11);

      v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE);
      v5.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE);

      SlotSharingGroup jg1 = new SlotSharingGroup();
      v2.setSlotSharingGroup(jg1);
      v3.setSlotSharingGroup(jg1);

      SlotSharingGroup jg2 = new SlotSharingGroup();
      v4.setSlotSharingGroup(jg2);
      v5.setSlotSharingGroup(jg2);

      List<JobVertex> vertices = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));

      ExecutionGraph eg =
          new ExecutionGraph(
              TestingUtils.defaultExecutionContext(),
              new JobID(),
              "test job",
              new Configuration(),
              ExecutionConfigTest.getSerializedConfig(),
              AkkaUtils.getDefaultTimeout(),
              new NoRestartStrategy());
      eg.attachJobGraph(vertices);

      // verify that the vertices are all in the same slot sharing group
      SlotSharingGroup group1 = null;
      SlotSharingGroup group2 = null;

      // verify that v1 tasks have no slot sharing group
      assertNull(eg.getJobVertex(v1.getID()).getSlotSharingGroup());

      // v2 and v3 are shared
      group1 = eg.getJobVertex(v2.getID()).getSlotSharingGroup();
      assertNotNull(group1);
      assertEquals(group1, eg.getJobVertex(v3.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group1.getJobVertexIds().contains(v2.getID()));
      assertTrue(group1.getJobVertexIds().contains(v3.getID()));

      // v4 and v5 are shared
      group2 = eg.getJobVertex(v4.getID()).getSlotSharingGroup();
      assertNotNull(group2);
      assertEquals(group2, eg.getJobVertex(v5.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group2.getJobVertexIds().contains(v4.getID()));
      assertTrue(group2.getJobVertexIds().contains(v5.getID()));
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }