Ejemplo n.º 1
0
  public void connectToPredecessors(
      Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException {

    List<JobEdge> inputs = jobVertex.getInputs();

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          String.format(
              "Connecting ExecutionJobVertex %s (%s) to %d predecessors.",
              jobVertex.getID(), jobVertex.getName(), inputs.size()));
    }

    for (int num = 0; num < inputs.size(); num++) {
      JobEdge edge = inputs.get(num);

      if (LOG.isDebugEnabled()) {
        if (edge.getSource() == null) {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via ID %s.",
                  num, jobVertex.getID(), jobVertex.getName(), edge.getSourceId()));
        } else {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via predecessor %s (%s).",
                  num,
                  jobVertex.getID(),
                  jobVertex.getName(),
                  edge.getSource().getProducer().getID(),
                  edge.getSource().getProducer().getName()));
        }
      }

      // fetch the intermediate result via ID. if it does not exist, then it either has not been
      // created, or the order
      // in which this method is called for the job vertices is not a topological order
      IntermediateResult ires = intermediateDataSets.get(edge.getSourceId());
      if (ires == null) {
        throw new JobException(
            "Cannot connect this job graph to the previous graph. No previous intermediate result found for ID "
                + edge.getSourceId());
      }

      this.inputs.add(ires);

      int consumerIndex = ires.registerConsumer();

      for (int i = 0; i < parallelism; i++) {
        ExecutionVertex ev = taskVertices[i];
        ev.connectSource(num, ires, edge, consumerIndex);
      }
    }
  }
  @Test
  /**
   * Tests that a blocking batch job fails if there are not enough resources left to schedule the
   * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
   * swallow the fail exception when scheduling a consumer task.
   */
  public void testNoResourceAvailableFailure() throws Exception {
    final JobID jobId = new JobID();
    JobVertex v1 = new JobVertex("source");
    JobVertex v2 = new JobVertex("sink");

    int dop1 = 1;
    int dop2 = 1;

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    v2.connectNewDataSetAsInput(
        v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "failing test job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
    for (int i = 0; i < dop1; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    ExecutionAttemptID attemptID =
        eg.getJobVertex(v1.getID())
            .getTaskVertices()[0]
            .getCurrentExecutionAttempt()
            .getAttemptId();
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING));
    eg.updateState(
        new TaskExecutionState(
            jobId,
            attemptID,
            ExecutionState.FINISHED,
            null,
            new AccumulatorSnapshot(
                jobId,
                attemptID,
                new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(),
                new HashMap<String, Accumulator<?, ?>>())));

    assertEquals(JobStatus.FAILED, eg.getState());
  }
Ejemplo n.º 3
0
  public ExecutionJobVertex(
      ExecutionGraph graph,
      JobVertex jobVertex,
      int defaultParallelism,
      Time timeout,
      long createTimestamp)
      throws JobException, IOException {

    if (graph == null || jobVertex == null) {
      throw new NullPointerException();
    }

    this.graph = graph;
    this.jobVertex = jobVertex;

    int vertexParallelism = jobVertex.getParallelism();
    int numTaskVertices = vertexParallelism > 0 ? vertexParallelism : defaultParallelism;

    this.parallelism = numTaskVertices;

    int maxP = jobVertex.getMaxParallelism();

    Preconditions.checkArgument(
        maxP >= parallelism,
        "The maximum parallelism ("
            + maxP
            + ") must be greater or equal than the parallelism ("
            + parallelism
            + ").");
    this.maxParallelism = maxP;

    this.serializedTaskInformation =
        new SerializedValue<>(
            new TaskInformation(
                jobVertex.getID(),
                jobVertex.getName(),
                parallelism,
                maxParallelism,
                jobVertex.getInvokableClassName(),
                jobVertex.getConfiguration()));

    this.taskVertices = new ExecutionVertex[numTaskVertices];

    this.inputs = new ArrayList<IntermediateResult>(jobVertex.getInputs().size());

    // take the sharing group
    this.slotSharingGroup = jobVertex.getSlotSharingGroup();
    this.coLocationGroup = jobVertex.getCoLocationGroup();

    // setup the coLocation group
    if (coLocationGroup != null && slotSharingGroup == null) {
      throw new JobException("Vertex uses a co-location constraint without using slot sharing");
    }

    // create the intermediate results
    this.producedDataSets =
        new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()];

    for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
      final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i);

      this.producedDataSets[i] =
          new IntermediateResult(result.getId(), this, numTaskVertices, result.getResultType());
    }

    Configuration jobConfiguration = graph.getJobConfiguration();
    int maxPriorAttemptsHistoryLength =
        jobConfiguration != null
            ? jobConfiguration.getInteger(JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE)
            : JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE.defaultValue();

    // create all task vertices
    for (int i = 0; i < numTaskVertices; i++) {
      ExecutionVertex vertex =
          new ExecutionVertex(
              this,
              i,
              this.producedDataSets,
              timeout,
              createTimestamp,
              maxPriorAttemptsHistoryLength);

      this.taskVertices[i] = vertex;
    }

    // sanity check for the double referencing between intermediate result partitions and execution
    // vertices
    for (IntermediateResult ir : this.producedDataSets) {
      if (ir.getNumberOfAssignedPartitions() != parallelism) {
        throw new RuntimeException(
            "The intermediate result's partitions were not correctly assigned.");
      }
    }

    // set up the input splits, if the vertex has any
    try {
      @SuppressWarnings("unchecked")
      InputSplitSource<InputSplit> splitSource =
          (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();

      if (splitSource != null) {
        Thread currentThread = Thread.currentThread();
        ClassLoader oldContextClassLoader = currentThread.getContextClassLoader();
        currentThread.setContextClassLoader(graph.getUserClassLoader());
        try {
          inputSplits = splitSource.createInputSplits(numTaskVertices);

          if (inputSplits != null) {
            splitAssigner = splitSource.getInputSplitAssigner(inputSplits);
          }
        } finally {
          currentThread.setContextClassLoader(oldContextClassLoader);
        }
      } else {
        inputSplits = null;
      }
    } catch (Throwable t) {
      throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t);
    }

    finishedSubtasks = new boolean[parallelism];
  }
Ejemplo n.º 4
0
 @Override
 public JobVertexID getJobVertexId() {
   return jobVertex.getID();
 }
Ejemplo n.º 5
0
  /*
   * Test setup:
   * - v1 is isolated, no slot sharing
   * - v2 and v3 (not connected) share slots
   * - v4 and v5 (connected) share slots
   */
  @Test
  public void testAssignSlotSharingGroup() {
    try {
      JobVertex v1 = new JobVertex("v1");
      JobVertex v2 = new JobVertex("v2");
      JobVertex v3 = new JobVertex("v3");
      JobVertex v4 = new JobVertex("v4");
      JobVertex v5 = new JobVertex("v5");

      v1.setParallelism(4);
      v2.setParallelism(5);
      v3.setParallelism(7);
      v4.setParallelism(1);
      v5.setParallelism(11);

      v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE);
      v5.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE);

      SlotSharingGroup jg1 = new SlotSharingGroup();
      v2.setSlotSharingGroup(jg1);
      v3.setSlotSharingGroup(jg1);

      SlotSharingGroup jg2 = new SlotSharingGroup();
      v4.setSlotSharingGroup(jg2);
      v5.setSlotSharingGroup(jg2);

      List<JobVertex> vertices = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));

      ExecutionGraph eg =
          new ExecutionGraph(
              TestingUtils.defaultExecutionContext(),
              new JobID(),
              "test job",
              new Configuration(),
              ExecutionConfigTest.getSerializedConfig(),
              AkkaUtils.getDefaultTimeout(),
              new NoRestartStrategy());
      eg.attachJobGraph(vertices);

      // verify that the vertices are all in the same slot sharing group
      SlotSharingGroup group1 = null;
      SlotSharingGroup group2 = null;

      // verify that v1 tasks have no slot sharing group
      assertNull(eg.getJobVertex(v1.getID()).getSlotSharingGroup());

      // v2 and v3 are shared
      group1 = eg.getJobVertex(v2.getID()).getSlotSharingGroup();
      assertNotNull(group1);
      assertEquals(group1, eg.getJobVertex(v3.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group1.getJobVertexIds().contains(v2.getID()));
      assertTrue(group1.getJobVertexIds().contains(v3.getID()));

      // v4 and v5 are shared
      group2 = eg.getJobVertex(v4.getID()).getSlotSharingGroup();
      assertNotNull(group2);
      assertEquals(group2, eg.getJobVertex(v5.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group2.getJobVertexIds().contains(v4.getID()));
      assertTrue(group2.getJobVertexIds().contains(v5.getID()));
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }