Exemple #1
0
  public static Map<JobVertexID, ExecutionJobVertex> includeLegacyJobVertexIDs(
      Map<JobVertexID, ExecutionJobVertex> tasks) {

    Map<JobVertexID, ExecutionJobVertex> expanded = new HashMap<>(2 * tasks.size());
    // first include all new ids
    expanded.putAll(tasks);

    // now expand and add legacy ids
    for (ExecutionJobVertex executionJobVertex : tasks.values()) {
      if (null != executionJobVertex) {
        JobVertex jobVertex = executionJobVertex.getJobVertex();
        if (null != jobVertex) {
          List<JobVertexID> alternativeIds = jobVertex.getIdAlternatives();
          for (JobVertexID jobVertexID : alternativeIds) {
            ExecutionJobVertex old = expanded.put(jobVertexID, executionJobVertex);
            Preconditions.checkState(
                null == old || old.equals(executionJobVertex),
                "Ambiguous jobvertex id detected during expansion to legacy ids.");
          }
        }
      }
    }

    return expanded;
  }
  /**
   * Verifies a correct error message when vertices with master initialization (input formats /
   * output formats) fail.
   */
  @Test
  public void testFailureWhenInitializeOnMasterFails() {
    try {
      // create a simple job graph

      JobVertex jobVertex =
          new JobVertex("Vertex that fails in initializeOnMaster") {

            @Override
            public void initializeOnMaster(ClassLoader loader) throws Exception {
              throw new RuntimeException("test exception");
            }
          };

      jobVertex.setInvokableClass(Tasks.NoOpInvokable.class);
      JobGraph jg = new JobGraph("test job", jobVertex);

      // submit the job
      Future<Object> submitFuture =
          jmGateway.ask(
              new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout);
      try {
        Await.result(submitFuture, timeout);
      } catch (JobExecutionException e) {
        // that is what we expect
        // test that the exception nesting is not too deep
        assertTrue(e.getCause() instanceof RuntimeException);
      } catch (Exception e) {
        fail("Wrong exception type");
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  private JobGraph createTestJobGraph(
      String jobName, int senderParallelism, int receiverParallelism) {

    // The sender and receiver invokable logic ensure that each subtask gets the expected data
    final JobVertex sender = new JobVertex("Sender");
    sender.setInvokableClass(RoundRobinSubtaskIndexSender.class);
    sender
        .getConfiguration()
        .setInteger(RoundRobinSubtaskIndexSender.CONFIG_KEY, receiverParallelism);
    sender.setParallelism(senderParallelism);

    final JobVertex receiver = new JobVertex("Receiver");
    receiver.setInvokableClass(SubtaskIndexReceiver.class);
    receiver.getConfiguration().setInteger(SubtaskIndexReceiver.CONFIG_KEY, senderParallelism);
    receiver.setParallelism(receiverParallelism);

    receiver.connectNewDataSetAsInput(
        sender, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);

    final JobGraph jobGraph = new JobGraph(jobName, sender, receiver);

    // We need to allow queued scheduling, because there are not enough slots available
    // to run all tasks at once. We queue tasks and then let them finish/consume the blocking
    // result one after the other.
    jobGraph.setAllowQueuedScheduling(true);

    return jobGraph;
  }
  @Test
  public void testFailureWhenJarBlobsMissing() {
    try {
      // create a simple job graph
      JobVertex jobVertex = new JobVertex("Test Vertex");
      jobVertex.setInvokableClass(Tasks.NoOpInvokable.class);
      JobGraph jg = new JobGraph("test job", jobVertex);

      // request the blob port from the job manager
      Future<Object> future =
          jmGateway.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout);
      int blobPort = (Integer) Await.result(future, timeout);

      // upload two dummy bytes and add their keys to the job graph as dependencies
      BlobKey key1, key2;
      BlobClient bc = new BlobClient(new InetSocketAddress("localhost", blobPort));
      try {
        key1 = bc.put(new byte[10]);
        key2 = bc.put(new byte[10]);

        // delete one of the blobs to make sure that the startup failed
        bc.delete(key2);
      } finally {
        bc.close();
      }

      jg.addBlob(key1);
      jg.addBlob(key2);

      // submit the job
      Future<Object> submitFuture =
          jmGateway.ask(
              new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout);
      try {
        Await.result(submitFuture, timeout);
      } catch (JobExecutionException e) {
        // that is what we expect
        assertTrue(e.getCause() instanceof IOException);
      } catch (Exception e) {
        fail("Wrong exception type");
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  private Map<ExecutionAttemptID, Execution> setupExecution(
      JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception {
    final JobID jobId = new JobID();

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "some job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    for (int i = 0; i < dop1 + dop2; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions();
    assertEquals(dop1 + dop2, executions.size());

    return executions;
  }
Exemple #6
0
  public void connectToPredecessors(
      Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException {

    List<JobEdge> inputs = jobVertex.getInputs();

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          String.format(
              "Connecting ExecutionJobVertex %s (%s) to %d predecessors.",
              jobVertex.getID(), jobVertex.getName(), inputs.size()));
    }

    for (int num = 0; num < inputs.size(); num++) {
      JobEdge edge = inputs.get(num);

      if (LOG.isDebugEnabled()) {
        if (edge.getSource() == null) {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via ID %s.",
                  num, jobVertex.getID(), jobVertex.getName(), edge.getSourceId()));
        } else {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via predecessor %s (%s).",
                  num,
                  jobVertex.getID(),
                  jobVertex.getName(),
                  edge.getSource().getProducer().getID(),
                  edge.getSource().getProducer().getName()));
        }
      }

      // fetch the intermediate result via ID. if it does not exist, then it either has not been
      // created, or the order
      // in which this method is called for the job vertices is not a topological order
      IntermediateResult ires = intermediateDataSets.get(edge.getSourceId());
      if (ires == null) {
        throw new JobException(
            "Cannot connect this job graph to the previous graph. No previous intermediate result found for ID "
                + edge.getSourceId());
      }

      this.inputs.add(ires);

      int consumerIndex = ires.registerConsumer();

      for (int i = 0; i < parallelism; i++) {
        ExecutionVertex ev = taskVertices[i];
        ev.connectSource(num, ires, edge, consumerIndex);
      }
    }
  }
  public JobGraph createBlockingJob(int parallelism) {
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);

    JobVertex sender = new JobVertex("sender");
    JobVertex receiver = new JobVertex("receiver");

    sender.setInvokableClass(Tasks.Sender.class);
    receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);

    sender.setParallelism(parallelism);
    receiver.setParallelism(parallelism);

    receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE);

    SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
    sender.setSlotSharingGroup(slotSharingGroup);
    receiver.setSlotSharingGroup(slotSharingGroup);

    return new JobGraph("Blocking test job", sender, receiver);
  }
Exemple #8
0
  public void resetForNewExecution() {
    if (!(numSubtasksInFinalState == 0 || numSubtasksInFinalState == parallelism)) {
      throw new IllegalStateException("Cannot reset vertex that is not in final state");
    }

    synchronized (stateMonitor) {
      // check and reset the sharing groups with scheduler hints
      if (slotSharingGroup != null) {
        slotSharingGroup.clearTaskAssignment();
      }

      // reset vertices one by one. if one reset fails, the "vertices in final state"
      // fields will be consistent to handle triggered cancel calls
      for (int i = 0; i < parallelism; i++) {
        taskVertices[i].resetForNewExecution();
        if (finishedSubtasks[i]) {
          finishedSubtasks[i] = false;
          numSubtasksInFinalState--;
        }
      }

      if (numSubtasksInFinalState != 0) {
        throw new RuntimeException("Bug: resetting the execution job vertex failed.");
      }

      // set up the input splits again
      try {
        if (this.inputSplits != null) {
          // lazy assignment
          @SuppressWarnings("unchecked")
          InputSplitSource<InputSplit> splitSource =
              (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();
          this.splitAssigner = splitSource.getInputSplitAssigner(this.inputSplits);
        }
      } catch (Throwable t) {
        throw new RuntimeException(
            "Re-creating the input split assigner failed: " + t.getMessage(), t);
      }

      // Reset intermediate results
      for (IntermediateResult result : producedDataSets) {
        result.resetForNewExecution();
      }
    }
  }
  @Test
  public void testBuildDeploymentDescriptor() {
    try {
      final JobID jobId = new JobID();

      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();
      final JobVertexID jid3 = new JobVertexID();
      final JobVertexID jid4 = new JobVertexID();

      JobVertex v1 = new JobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);
      JobVertex v3 = new JobVertex("v3", jid3);
      JobVertex v4 = new JobVertex("v4", jid4);

      v1.setParallelism(10);
      v2.setParallelism(10);
      v3.setParallelism(10);
      v4.setParallelism(10);

      v1.setInvokableClass(BatchTask.class);
      v2.setInvokableClass(BatchTask.class);
      v3.setInvokableClass(BatchTask.class);
      v4.setInvokableClass(BatchTask.class);

      v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL);
      v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL);
      v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL);

      ExecutionGraph eg =
          new ExecutionGraph(
              TestingUtils.defaultExecutionContext(),
              jobId,
              "some job",
              new Configuration(),
              new SerializedValue<>(new ExecutionConfig()),
              AkkaUtils.getDefaultTimeout(),
              new NoRestartStrategy());

      List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);

      eg.attachJobGraph(ordered);

      ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
      ExecutionVertex vertex = ejv.getTaskVertices()[3];

      ExecutionGraphTestUtils.SimpleActorGateway instanceGateway =
          new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext());

      final Instance instance = getInstance(instanceGateway);

      final SimpleSlot slot = instance.allocateSimpleSlot(jobId);

      assertEquals(ExecutionState.CREATED, vertex.getExecutionState());

      vertex.deployToSlot(slot);

      assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());

      TaskDeploymentDescriptor descr = instanceGateway.lastTDD;
      assertNotNull(descr);

      assertEquals(jobId, descr.getJobID());
      assertEquals(jid2, descr.getVertexID());
      assertEquals(3, descr.getIndexInSubtaskGroup());
      assertEquals(10, descr.getNumberOfSubtasks());
      assertEquals(BatchTask.class.getName(), descr.getInvokableClassName());
      assertEquals("v2", descr.getTaskName());

      List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
      List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();

      assertEquals(2, producedPartitions.size());
      assertEquals(1, consumedPartitions.size());

      assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions());
      assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions());
      assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  /**
   * Tests that a blocking batch job fails if there are not enough resources left to schedule the
   * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
   * swallow the fail exception when scheduling a consumer task.
   */
  public void testNoResourceAvailableFailure() throws Exception {
    final JobID jobId = new JobID();
    JobVertex v1 = new JobVertex("source");
    JobVertex v2 = new JobVertex("sink");

    int dop1 = 1;
    int dop2 = 1;

    v1.setParallelism(dop1);
    v2.setParallelism(dop2);

    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);

    v2.connectNewDataSetAsInput(
        v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false);

    // execution graph that executes actions synchronously
    ExecutionGraph eg =
        new ExecutionGraph(
            TestingUtils.directExecutionContext(),
            jobId,
            "failing test job",
            new Configuration(),
            new SerializedValue<>(new ExecutionConfig()),
            AkkaUtils.getDefaultTimeout(),
            new NoRestartStrategy());

    eg.setQueuedSchedulingAllowed(false);

    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);

    Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
    for (int i = 0; i < dop1; i++) {
      scheduler.newInstanceAvailable(
          ExecutionGraphTestUtils.getInstance(
              new ExecutionGraphTestUtils.SimpleActorGateway(
                  TestingUtils.directExecutionContext())));
    }
    assertEquals(dop1, scheduler.getNumberOfAvailableSlots());

    // schedule, this triggers mock deployment
    eg.scheduleForExecution(scheduler);

    ExecutionAttemptID attemptID =
        eg.getJobVertex(v1.getID())
            .getTaskVertices()[0]
            .getCurrentExecutionAttempt()
            .getAttemptId();
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING));
    eg.updateState(
        new TaskExecutionState(
            jobId,
            attemptID,
            ExecutionState.FINISHED,
            null,
            new AccumulatorSnapshot(
                jobId,
                attemptID,
                new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(),
                new HashMap<String, Accumulator<?, ?>>())));

    assertEquals(JobStatus.FAILED, eg.getState());
  }
Exemple #11
0
 @Override
 public JobVertexID getJobVertexId() {
   return jobVertex.getID();
 }
Exemple #12
0
  public ExecutionJobVertex(
      ExecutionGraph graph,
      JobVertex jobVertex,
      int defaultParallelism,
      Time timeout,
      long createTimestamp)
      throws JobException, IOException {

    if (graph == null || jobVertex == null) {
      throw new NullPointerException();
    }

    this.graph = graph;
    this.jobVertex = jobVertex;

    int vertexParallelism = jobVertex.getParallelism();
    int numTaskVertices = vertexParallelism > 0 ? vertexParallelism : defaultParallelism;

    this.parallelism = numTaskVertices;

    int maxP = jobVertex.getMaxParallelism();

    Preconditions.checkArgument(
        maxP >= parallelism,
        "The maximum parallelism ("
            + maxP
            + ") must be greater or equal than the parallelism ("
            + parallelism
            + ").");
    this.maxParallelism = maxP;

    this.serializedTaskInformation =
        new SerializedValue<>(
            new TaskInformation(
                jobVertex.getID(),
                jobVertex.getName(),
                parallelism,
                maxParallelism,
                jobVertex.getInvokableClassName(),
                jobVertex.getConfiguration()));

    this.taskVertices = new ExecutionVertex[numTaskVertices];

    this.inputs = new ArrayList<IntermediateResult>(jobVertex.getInputs().size());

    // take the sharing group
    this.slotSharingGroup = jobVertex.getSlotSharingGroup();
    this.coLocationGroup = jobVertex.getCoLocationGroup();

    // setup the coLocation group
    if (coLocationGroup != null && slotSharingGroup == null) {
      throw new JobException("Vertex uses a co-location constraint without using slot sharing");
    }

    // create the intermediate results
    this.producedDataSets =
        new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()];

    for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
      final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i);

      this.producedDataSets[i] =
          new IntermediateResult(result.getId(), this, numTaskVertices, result.getResultType());
    }

    Configuration jobConfiguration = graph.getJobConfiguration();
    int maxPriorAttemptsHistoryLength =
        jobConfiguration != null
            ? jobConfiguration.getInteger(JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE)
            : JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE.defaultValue();

    // create all task vertices
    for (int i = 0; i < numTaskVertices; i++) {
      ExecutionVertex vertex =
          new ExecutionVertex(
              this,
              i,
              this.producedDataSets,
              timeout,
              createTimestamp,
              maxPriorAttemptsHistoryLength);

      this.taskVertices[i] = vertex;
    }

    // sanity check for the double referencing between intermediate result partitions and execution
    // vertices
    for (IntermediateResult ir : this.producedDataSets) {
      if (ir.getNumberOfAssignedPartitions() != parallelism) {
        throw new RuntimeException(
            "The intermediate result's partitions were not correctly assigned.");
      }
    }

    // set up the input splits, if the vertex has any
    try {
      @SuppressWarnings("unchecked")
      InputSplitSource<InputSplit> splitSource =
          (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();

      if (splitSource != null) {
        Thread currentThread = Thread.currentThread();
        ClassLoader oldContextClassLoader = currentThread.getContextClassLoader();
        currentThread.setContextClassLoader(graph.getUserClassLoader());
        try {
          inputSplits = splitSource.createInputSplits(numTaskVertices);

          if (inputSplits != null) {
            splitAssigner = splitSource.getInputSplitAssigner(inputSplits);
          }
        } finally {
          currentThread.setContextClassLoader(oldContextClassLoader);
        }
      } else {
        inputSplits = null;
      }
    } catch (Throwable t) {
      throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t);
    }

    finishedSubtasks = new boolean[parallelism];
  }
  public static JobGraph getJobGraph(String[] args) throws Exception {

    int parallelism = 2;
    String pageWithRankInputPath =
        ""; // "file://" + PlayConstants.PLAY_DIR + "test-inputs/danglingpagerank/pageWithRank";
    String adjacencyListInputPath = ""; // "file://" + PlayConstants.PLAY_DIR +
    //			"test-inputs/danglingpagerank/adjacencylists";
    String outputPath =
        OperatingSystem.isWindows()
            ? "file:/c:/tmp/flink/iterations"
            : "file:///tmp/flink/iterations";
    //		String confPath = PlayConstants.PLAY_DIR + "local-conf";
    int minorConsumer = 25;
    int matchMemory = 50;
    int coGroupSortMemory = 50;
    int numIterations = 25;
    long numVertices = 5;
    long numDanglingVertices = 1;

    String failingWorkers = "1";
    int failingIteration = 2;
    double messageLoss = 0.75;

    if (args.length >= 15) {
      parallelism = Integer.parseInt(args[0]);
      pageWithRankInputPath = args[1];
      adjacencyListInputPath = args[2];
      outputPath = args[3];
      //			confPath = args[4];
      minorConsumer = Integer.parseInt(args[5]);
      matchMemory = Integer.parseInt(args[6]);
      coGroupSortMemory = Integer.parseInt(args[7]);
      numIterations = Integer.parseInt(args[8]);
      numVertices = Long.parseLong(args[9]);
      numDanglingVertices = Long.parseLong(args[10]);
      failingWorkers = args[11];
      failingIteration = Integer.parseInt(args[12]);
      messageLoss = Double.parseDouble(args[13]);
    }

    int totalMemoryConsumption = 3 * minorConsumer + matchMemory + coGroupSortMemory;

    JobGraph jobGraph = new JobGraph("CompensatableDanglingPageRank");

    // --------------- the inputs ---------------------

    // page rank input
    InputFormatVertex pageWithRankInput =
        JobGraphUtils.createInput(
            new ImprovedDanglingPageRankInputFormat(),
            pageWithRankInputPath,
            "DanglingPageWithRankInput",
            jobGraph,
            parallelism);
    TaskConfig pageWithRankInputConfig = new TaskConfig(pageWithRankInput.getConfiguration());
    pageWithRankInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    pageWithRankInputConfig.setOutputComparator(fieldZeroComparator, 0);
    pageWithRankInputConfig.setOutputSerializer(recSerializer);
    pageWithRankInputConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));

    // edges as adjacency list
    InputFormatVertex adjacencyListInput =
        JobGraphUtils.createInput(
            new ImprovedAdjacencyListInputFormat(),
            adjacencyListInputPath,
            "AdjancencyListInput",
            jobGraph,
            parallelism);
    TaskConfig adjacencyListInputConfig = new TaskConfig(adjacencyListInput.getConfiguration());
    adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    adjacencyListInputConfig.setOutputSerializer(recSerializer);
    adjacencyListInputConfig.setOutputComparator(fieldZeroComparator, 0);

    // --------------- the head ---------------------
    JobVertex head =
        JobGraphUtils.createTask(
            IterationHeadPactTask.class, "IterationHead", jobGraph, parallelism);
    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);

    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(recSerializer, 0);
    headConfig.setInputComparator(fieldZeroComparator, 0);
    headConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    headConfig.setRelativeMemoryInput(0, (double) minorConsumer / totalMemoryConsumption);
    headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT);
    headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD);

    // back channel / iterations
    headConfig.setRelativeBackChannelMemory((double) minorConsumer / totalMemoryConsumption);

    // output into iteration
    headConfig.setOutputSerializer(recSerializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);

    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(recSerializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);

    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(3);
    headConfig.setNumberOfIterations(numIterations);

    // the driver
    headConfig.setDriver(CollectorMapDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    headConfig.setStubWrapper(new UserCodeClassWrapper<CompensatingMap>(CompensatingMap.class));
    headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    headConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
    headConfig.addIterationAggregator(
        CompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator());

    // --------------- the join ---------------------

    JobVertex intermediate =
        JobGraphUtils.createTask(
            IterationIntermediatePactTask.class, "IterationIntermediate", jobGraph, parallelism);
    TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);
    //		intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class);
    intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
    intermediateConfig.setRelativeMemoryDriver((double) matchMemory / totalMemoryConsumption);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.addInputToGroup(1);
    intermediateConfig.setInputSerializer(recSerializer, 0);
    intermediateConfig.setInputSerializer(recSerializer, 1);
    intermediateConfig.setDriverComparator(fieldZeroComparator, 0);
    intermediateConfig.setDriverComparator(fieldZeroComparator, 1);
    intermediateConfig.setDriverPairComparator(pairComparatorFactory);

    intermediateConfig.setOutputSerializer(recSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    intermediateConfig.setOutputComparator(fieldZeroComparator, 0);

    intermediateConfig.setStubWrapper(
        new UserCodeClassWrapper<CompensatableDotProductMatch>(CompensatableDotProductMatch.class));
    intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    intermediateConfig.setStubParameter(
        "compensation.failingIteration", String.valueOf(failingIteration));
    intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));

    // ---------------- the tail (co group) --------------------

    JobVertex tail =
        JobGraphUtils.createTask(
            IterationTailPactTask.class, "IterationTail", jobGraph, parallelism);
    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();
    // TODO we need to combine!

    // inputs and driver
    tailConfig.setDriver(CoGroupDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.addInputToGroup(1);
    tailConfig.setInputSerializer(recSerializer, 0);
    tailConfig.setInputSerializer(recSerializer, 1);
    tailConfig.setDriverComparator(fieldZeroComparator, 0);
    tailConfig.setDriverComparator(fieldZeroComparator, 1);
    tailConfig.setDriverPairComparator(pairComparatorFactory);
    tailConfig.setInputAsynchronouslyMaterialized(0, true);
    tailConfig.setRelativeInputMaterializationMemory(
        0, (double) minorConsumer / totalMemoryConsumption);
    tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT);
    tailConfig.setInputComparator(fieldZeroComparator, 1);
    tailConfig.setRelativeMemoryInput(1, (double) coGroupSortMemory / totalMemoryConsumption);
    tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT);
    tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD);

    // output
    tailConfig.setOutputSerializer(recSerializer);

    // the stub
    tailConfig.setStubWrapper(
        new UserCodeClassWrapper<CompensatableDotProductCoGroup>(
            CompensatableDotProductCoGroup.class));
    tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    tailConfig.setStubParameter(
        "pageRank.numDanglingVertices", String.valueOf(numDanglingVertices));
    tailConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));

    // --------------- the output ---------------------

    OutputFormatVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", parallelism);
    TaskConfig outputConfig = new TaskConfig(output.getConfiguration());
    outputConfig.addInputToGroup(0);
    outputConfig.setInputSerializer(recSerializer, 0);
    outputConfig.setStubWrapper(
        new UserCodeClassWrapper<PageWithRankOutFormat>(PageWithRankOutFormat.class));
    outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath);

    // --------------- the auxiliaries ---------------------

    JobVertex sync = JobGraphUtils.createSync(jobGraph, parallelism);
    TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setNumberOfIterations(numIterations);
    syncConfig.addIterationAggregator(
        CompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator());
    syncConfig.setConvergenceCriterion(
        CompensatableDotProductCoGroup.AGGREGATOR_NAME, new DiffL1NormConvergenceCriterion());
    syncConfig.setIterationId(ITERATION_ID);

    // --------------- the wiring ---------------------

    JobGraphUtils.connect(pageWithRankInput, head, DistributionPattern.ALL_TO_ALL);

    JobGraphUtils.connect(head, intermediate, DistributionPattern.POINTWISE);
    intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);

    JobGraphUtils.connect(adjacencyListInput, intermediate, DistributionPattern.ALL_TO_ALL);

    JobGraphUtils.connect(head, tail, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(intermediate, tail, DistributionPattern.ALL_TO_ALL);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, parallelism);

    JobGraphUtils.connect(head, output, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(head, sync, DistributionPattern.POINTWISE);

    SlotSharingGroup sharingGroup = new SlotSharingGroup();
    pageWithRankInput.setSlotSharingGroup(sharingGroup);
    adjacencyListInput.setSlotSharingGroup(sharingGroup);
    head.setSlotSharingGroup(sharingGroup);
    intermediate.setSlotSharingGroup(sharingGroup);
    tail.setSlotSharingGroup(sharingGroup);
    output.setSlotSharingGroup(sharingGroup);
    sync.setSlotSharingGroup(sharingGroup);

    tail.setStrictlyCoLocatedWith(head);
    intermediate.setStrictlyCoLocatedWith(head);

    return jobGraph;
  }
  /*
   * Test setup:
   * - v1 is isolated, no slot sharing
   * - v2 and v3 (not connected) share slots
   * - v4 and v5 (connected) share slots
   */
  @Test
  public void testAssignSlotSharingGroup() {
    try {
      JobVertex v1 = new JobVertex("v1");
      JobVertex v2 = new JobVertex("v2");
      JobVertex v3 = new JobVertex("v3");
      JobVertex v4 = new JobVertex("v4");
      JobVertex v5 = new JobVertex("v5");

      v1.setParallelism(4);
      v2.setParallelism(5);
      v3.setParallelism(7);
      v4.setParallelism(1);
      v5.setParallelism(11);

      v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE);
      v5.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE);

      SlotSharingGroup jg1 = new SlotSharingGroup();
      v2.setSlotSharingGroup(jg1);
      v3.setSlotSharingGroup(jg1);

      SlotSharingGroup jg2 = new SlotSharingGroup();
      v4.setSlotSharingGroup(jg2);
      v5.setSlotSharingGroup(jg2);

      List<JobVertex> vertices = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));

      ExecutionGraph eg =
          new ExecutionGraph(
              TestingUtils.defaultExecutionContext(),
              new JobID(),
              "test job",
              new Configuration(),
              ExecutionConfigTest.getSerializedConfig(),
              AkkaUtils.getDefaultTimeout(),
              new NoRestartStrategy());
      eg.attachJobGraph(vertices);

      // verify that the vertices are all in the same slot sharing group
      SlotSharingGroup group1 = null;
      SlotSharingGroup group2 = null;

      // verify that v1 tasks have no slot sharing group
      assertNull(eg.getJobVertex(v1.getID()).getSlotSharingGroup());

      // v2 and v3 are shared
      group1 = eg.getJobVertex(v2.getID()).getSlotSharingGroup();
      assertNotNull(group1);
      assertEquals(group1, eg.getJobVertex(v3.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group1.getJobVertexIds().contains(v2.getID()));
      assertTrue(group1.getJobVertexIds().contains(v3.getID()));

      // v4 and v5 are shared
      group2 = eg.getJobVertex(v4.getID()).getSlotSharingGroup();
      assertNotNull(group2);
      assertEquals(group2, eg.getJobVertex(v5.getID()).getSlotSharingGroup());

      assertEquals(2, group1.getJobVertexIds().size());
      assertTrue(group2.getJobVertexIds().contains(v4.getID()));
      assertTrue(group2.getJobVertexIds().contains(v5.getID()));
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }