private static JobInputVertex createPointsInput(
      JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    @SuppressWarnings("unchecked")
    CsvInputFormat pointsInFormat =
        new CsvInputFormat(
            '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class);
    JobInputVertex pointsInput =
        JobGraphUtils.createInput(
            pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks);
    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);

      TaskConfig chainedMapper = new TaskConfig(new Configuration());
      chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
      chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder()));
      chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD);
      chainedMapper.setOutputSerializer(serializer);

      taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points");
    }

    return pointsInput;
  }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobTaskVertex pointsInput =
        JobGraphUtils.createTask(
            RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());

      taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class));
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);
      taskConfig.setDriver(CollectorMapDriver.class);
      taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);

      taskConfig.addInputToGroup(0);
      taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE);
      taskConfig.setInputSerializer(serializer, 0);

      taskConfig.setBroadcastInputName("models", 0);
      taskConfig.addBroadcastInputToGroup(0);
      taskConfig.setBroadcastInputSerializer(serializer, 0);
    }

    return pointsInput;
  }
Пример #3
0
  @Test
  public void testFailingMapTask() {
    int keyCnt = 100;
    int valCnt = 20;

    try {
      // environment
      initEnvironment(3 * 1024 * 1024);
      addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0);
      addOutput(this.outList);

      // chained combine config
      {
        final TaskConfig combineConfig = new TaskConfig(new Configuration());

        // input
        combineConfig.addInputToGroup(0);
        combineConfig.setInputSerializer(serFact, 0);

        // output
        combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
        combineConfig.setOutputSerializer(serFact);

        // driver
        combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP);
        combineConfig.setDriverComparator(compFact, 0);
        combineConfig.setMemoryDriver(3 * 1024 * 1024);

        // udf
        combineConfig.setStubClass(MockFailingCombineStub.class);

        getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine");
      }

      // chained map+combine
      {
        final RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask =
            new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>();

        super.registerTask(testTask, MapDriver.class, MockMapStub.class);

        boolean stubFailed = false;

        try {
          testTask.invoke();
        } catch (Exception e) {
          stubFailed = true;
        }

        Assert.assertTrue("Stub exception was not forwarded.", stubFailed);
      }
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Пример #4
0
  @Test
  public void testMapTask() {
    final int keyCnt = 100;
    final int valCnt = 20;

    try {

      // environment
      initEnvironment(3 * 1024 * 1024);
      addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0);
      addOutput(this.outList);

      // chained combine config
      {
        final TaskConfig combineConfig = new TaskConfig(new Configuration());

        // input
        combineConfig.addInputToGroup(0);
        combineConfig.setInputSerializer(serFact, 0);

        // output
        combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
        combineConfig.setOutputSerializer(serFact);

        // driver
        combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP);
        combineConfig.setDriverComparator(compFact, 0);
        combineConfig.setMemoryDriver(3 * 1024 * 1024);

        // udf
        combineConfig.setStubClass(MockReduceStub.class);

        getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine");
      }

      // chained map+combine
      {
        RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask =
            new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>();
        registerTask(testTask, MapDriver.class, MockMapStub.class);

        try {
          testTask.invoke();
        } catch (Exception e) {
          e.printStackTrace();
          Assert.fail("Invoke method caused exception.");
        }
      }

      Assert.assertEquals(keyCnt, this.outList.size());
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  private static JobTaskVertex createIterationHead(
      JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobTaskVertex head =
        JobGraphUtils.createTask(
            IterationHeadPactTask.class, "Iteration Head", jobGraph, numSubTasks, numSubTasks);

    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);

    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(serializer, 0);

    // back channel / iterations
    headConfig.setBackChannelMemory(MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);

    // output into iteration. broadcasting the centers
    headConfig.setOutputSerializer(serializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST);

    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(serializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);

    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(2);

    // the driver
    headConfig.setDriver(NoOpDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.UNARY_NO_OP);

    return head;
  }
  @SuppressWarnings("unchecked")
  private static JobInputVertex createModelsInput(
      JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    CsvInputFormat modelsInFormat =
        new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class);
    JobInputVertex modelsInput =
        JobGraphUtils.createInput(
            modelsInFormat, pointsPath, "Input[Models]", jobGraph, numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST);
      taskConfig.setOutputSerializer(serializer);
    }

    return modelsInput;
  }
  private static JobTaskVertex createReducer(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeComparatorFactory<?> inputComparator,
      TypeSerializerFactory<?> outputSerializer) {
    // ---------------- the tail (co group) --------------------

    JobTaskVertex tail =
        JobGraphUtils.createTask(
            IterationTailPactTask.class,
            "Reduce / Iteration Tail",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();

    // inputs and driver
    tailConfig.setDriver(ReduceDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.setInputSerializer(inputSerializer, 0);
    tailConfig.setDriverComparator(inputComparator, 0);

    tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    tailConfig.setInputComparator(inputComparator, 0);
    tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);
    tailConfig.setFilehandlesInput(0, 128);
    tailConfig.setSpillingThresholdInput(0, 0.9f);

    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(outputSerializer);

    // the udf
    tailConfig.setStubWrapper(
        new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter()));

    return tail;
  }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeSerializerFactory<?> broadcastVarSerializer,
      TypeSerializerFactory<?> outputSerializer,
      TypeComparatorFactory<?> outputComparator) {
    JobTaskVertex mapper =
        JobGraphUtils.createTask(
            IterationIntermediatePactTask.class,
            "Map (Select nearest center)",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);

    intermediateConfig.setDriver(CollectorMapDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.setInputSerializer(inputSerializer, 0);

    intermediateConfig.setOutputSerializer(outputSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    intermediateConfig.setOutputComparator(outputComparator, 0);

    intermediateConfig.setBroadcastInputName("centers", 0);
    intermediateConfig.addBroadcastInputToGroup(0);
    intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0);

    // the udf
    intermediateConfig.setStubWrapper(
        new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter()));

    return mapper;
  }
Пример #9
0
 public void addOutput(List<PactRecord> output) {
   this.mockEnv.addOutput(output);
   TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration());
   conf.addOutputShipStrategy(ShipStrategyType.FORWARD);
   conf.setOutputSerializer(PactRecordSerializerFactory.get());
 }