private static Scheduler getScheduler(int numInstances, int numSlotsPerInstance)
      throws Exception {
    Scheduler scheduler = new Scheduler();

    for (int i = 0; i < numInstances; i++) {
      byte[] ipAddress = new byte[] {10, 0, 1, (byte) (1 + i)};
      int dataPort = 12001 + i;
      String host = "host" + (i + 1);

      Instance instance = getInstance(ipAddress, dataPort, host, numSlotsPerInstance);
      scheduler.newInstanceAvailable(instance);
    }
    return scheduler;
  }
  private Map<ExecutionAttemptID, Execution> setupExecution(
      JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception {
    final JobID jobId = new JobID();

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "some job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    for (int i = 0; i < dop1 + dop2; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions();
    assertEquals(dop1 + dop2, executions.size());

    return executions;
  }
  @Test
  public void testMultipleInstancesPerHost() {

    TestLocatableInputSplit[] splits =
        new TestLocatableInputSplit[] {
          new TestLocatableInputSplit(1, "host1"),
          new TestLocatableInputSplit(2, "host1"),
          new TestLocatableInputSplit(3, "host2"),
          new TestLocatableInputSplit(4, "host2"),
          new TestLocatableInputSplit(5, "host3"),
          new TestLocatableInputSplit(6, "host3")
        };

    try {
      AbstractJobVertex vertex = new AbstractJobVertex("test vertex");
      vertex.setParallelism(6);
      vertex.setInvokableClass(DummyInvokable.class);
      vertex.setInputSplitSource(new TestInputSplitSource(splits));

      JobGraph jobGraph = new JobGraph("test job", vertex);

      ExecutionGraph eg =
          new ExecutionGraph(
              jobGraph.getJobID(), jobGraph.getName(), jobGraph.getJobConfiguration(), TIMEOUT);

      eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
      eg.setQueuedSchedulingAllowed(false);

      // create a scheduler with 6 instances where always two are on the same host
      Scheduler scheduler = new Scheduler();
      Instance i1 = getInstance(new byte[] {10, 0, 1, 1}, 12345, "host1", 1);
      Instance i2 = getInstance(new byte[] {10, 0, 1, 1}, 12346, "host1", 1);
      Instance i3 = getInstance(new byte[] {10, 0, 1, 2}, 12345, "host2", 1);
      Instance i4 = getInstance(new byte[] {10, 0, 1, 2}, 12346, "host2", 1);
      Instance i5 = getInstance(new byte[] {10, 0, 1, 3}, 12345, "host3", 1);
      Instance i6 = getInstance(new byte[] {10, 0, 1, 3}, 12346, "host4", 1);
      scheduler.newInstanceAvailable(i1);
      scheduler.newInstanceAvailable(i2);
      scheduler.newInstanceAvailable(i3);
      scheduler.newInstanceAvailable(i4);
      scheduler.newInstanceAvailable(i5);
      scheduler.newInstanceAvailable(i6);

      eg.scheduleForExecution(scheduler);

      ExecutionVertex[] tasks = eg.getVerticesTopologically().iterator().next().getTaskVertices();
      assertEquals(6, tasks.length);

      Instance taskInstance1 = tasks[0].getCurrentAssignedResource().getInstance();
      Instance taskInstance2 = tasks[1].getCurrentAssignedResource().getInstance();
      Instance taskInstance3 = tasks[2].getCurrentAssignedResource().getInstance();
      Instance taskInstance4 = tasks[3].getCurrentAssignedResource().getInstance();
      Instance taskInstance5 = tasks[4].getCurrentAssignedResource().getInstance();
      Instance taskInstance6 = tasks[5].getCurrentAssignedResource().getInstance();

      assertTrue(taskInstance1 == i1 || taskInstance1 == i2);
      assertTrue(taskInstance2 == i1 || taskInstance2 == i2);
      assertTrue(taskInstance3 == i3 || taskInstance3 == i4);
      assertTrue(taskInstance4 == i3 || taskInstance4 == i4);
      assertTrue(taskInstance5 == i5 || taskInstance5 == i6);
      assertTrue(taskInstance6 == i5 || taskInstance6 == i6);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  /**
   * Tests that a blocking batch job fails if there are not enough resources left to schedule the
   * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
   * swallow the fail exception when scheduling a consumer task.
   */
  public void testNoResourceAvailableFailure() throws Exception {
    final JobID jobId = new JobID();
    JobVertex v1 = new JobVertex("source");
    JobVertex v2 = new JobVertex("sink");

    int dop1 = 1;
    int dop2 = 1;

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    v2.connectNewDataSetAsInput(
        v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "failing test job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
    for (int i = 0; i < dop1; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    ExecutionAttemptID attemptID =
        eg.getJobVertex(v1.getID())
            .getTaskVertices()[0]
            .getCurrentExecutionAttempt()
            .getAttemptId();
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING));
    eg.updateState(
        new TaskExecutionState(
            jobId,
            attemptID,
            ExecutionState.FINISHED,
            null,
            new AccumulatorSnapshot(
                jobId,
                attemptID,
                new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(),
                new HashMap<String, Accumulator<?, ?>>())));

    assertEquals(JobStatus.FAILED, eg.getState());
  }
Ejemplo n.º 5
0
  /**
   * NOTE: This method only throws exceptions if it is in an illegal state to be scheduled, or if
   * the tasks needs to be scheduled immediately and no resource is available. If the task is
   * accepted by the schedule, any error sets the vertex state to failed and triggers the recovery
   * logic.
   *
   * @param scheduler The scheduler to use to schedule this execution attempt.
   * @param queued Flag to indicate whether the scheduler may queue this task if it cannot
   *     immediately deploy it.
   * @throws IllegalStateException Thrown, if the vertex is not in CREATED state, which is the only
   *     state that permits scheduling.
   * @throws NoResourceAvailableException Thrown is no queued scheduling is allowed and no resources
   *     are currently available.
   */
  public boolean scheduleForExecution(Scheduler scheduler, boolean queued)
      throws NoResourceAvailableException {
    if (scheduler == null) {
      throw new IllegalArgumentException("Cannot send null Scheduler when scheduling execution.");
    }

    final SlotSharingGroup sharingGroup = vertex.getJobVertex().getSlotSharingGroup();
    final CoLocationConstraint locationConstraint = vertex.getLocationConstraint();

    // sanity check
    if (locationConstraint != null && sharingGroup == null) {
      throw new RuntimeException(
          "Trying to schedule with co-location constraint but without slot sharing allowed.");
    }

    if (transitionState(CREATED, SCHEDULED)) {

      ScheduledUnit toSchedule =
          locationConstraint == null
              ? new ScheduledUnit(this, sharingGroup)
              : new ScheduledUnit(this, sharingGroup, locationConstraint);

      // IMPORTANT: To prevent leaks of cluster resources, we need to make sure that slots are
      // returned
      //     in all cases where the deployment failed. we use many try {} finally {} clauses to
      // assure that
      if (queued) {
        SlotAllocationFuture future = scheduler.scheduleQueued(toSchedule);

        future.setFutureAction(
            new SlotAllocationFutureAction() {
              @Override
              public void slotAllocated(SimpleSlot slot) {
                try {
                  deployToSlot(slot);
                } catch (Throwable t) {
                  try {
                    slot.releaseSlot();
                  } finally {
                    markFailed(t);
                  }
                }
              }
            });
      } else {
        SimpleSlot slot = scheduler.scheduleImmediately(toSchedule);
        try {
          deployToSlot(slot);
        } catch (Throwable t) {
          try {
            slot.releaseSlot();
          } finally {
            markFailed(t);
          }
        }
      }

      return true;
    } else {
      // call race, already deployed, or already done
      return false;
    }
  }