Пример #1
0
  public void registerFileInputTask(
      AbstractInputTask<?> inTask,
      Class<? extends DelimitedInputFormat> stubClass,
      String inPath,
      String delimiter) {
    DelimitedInputFormat format;
    try {
      format = stubClass.newInstance();
    } catch (Throwable t) {
      throw new RuntimeException("Could not instantiate test input format.", t);
    }

    format.setFilePath(inPath);
    format.setDelimiter(delimiter);

    TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration());
    dsConfig.setStubWrapper(new UserCodeObjectWrapper<DelimitedInputFormat>(format));

    this.inputSplitProvider.addInputSplits(inPath, 5);

    inTask.setEnvironment(this.mockEnv);

    if (inTask instanceof DataSourceTask<?>) {
      ((DataSourceTask<?>) inTask).setUserCodeClassLoader(getClass().getClassLoader());
    }
    inTask.registerInputOutput();
  }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobTaskVertex pointsInput =
        JobGraphUtils.createTask(
            RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());

      taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class));
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);
      taskConfig.setDriver(CollectorMapDriver.class);
      taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);

      taskConfig.addInputToGroup(0);
      taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE);
      taskConfig.setInputSerializer(serializer, 0);

      taskConfig.setBroadcastInputName("models", 0);
      taskConfig.addBroadcastInputToGroup(0);
      taskConfig.setBroadcastInputSerializer(serializer, 0);
    }

    return pointsInput;
  }
  private static JobInputVertex createPointsInput(
      JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    @SuppressWarnings("unchecked")
    CsvInputFormat pointsInFormat =
        new CsvInputFormat(
            '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class);
    JobInputVertex pointsInput =
        JobGraphUtils.createInput(
            pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks);
    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);

      TaskConfig chainedMapper = new TaskConfig(new Configuration());
      chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
      chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder()));
      chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD);
      chainedMapper.setOutputSerializer(serializer);

      taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points");
    }

    return pointsInput;
  }
Пример #4
0
  public void registerFileOutputTask(
      AbstractOutputTask outTask, Class<? extends FileOutputFormat> stubClass, String outPath) {
    TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration());

    dsConfig.setStubWrapper(new UserCodeClassWrapper<FileOutputFormat>(stubClass));
    dsConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outPath);

    outTask.setEnvironment(this.mockEnv);

    if (outTask instanceof DataSinkTask<?>) {
      ((DataSinkTask<?>) outTask).setUserCodeClassLoader(getClass().getClassLoader());
    }

    outTask.registerInputOutput();
  }
Пример #5
0
  public void registerTask(
      AbstractTask task,
      @SuppressWarnings("rawtypes") Class<? extends PactDriver> driver,
      Class<? extends Stub> stubClass) {
    final TaskConfig config = new TaskConfig(this.mockEnv.getTaskConfiguration());
    config.setDriver(driver);
    config.setStubWrapper(new UserCodeClassWrapper<Stub>(stubClass));

    task.setEnvironment(this.mockEnv);

    if (task instanceof RegularPactTask<?, ?>) {
      ((RegularPactTask<?, ?>) task).setUserCodeClassLoader(getClass().getClassLoader());
    }

    task.registerInputOutput();
  }
  private static JobTaskVertex createReducer(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeComparatorFactory<?> inputComparator,
      TypeSerializerFactory<?> outputSerializer) {
    // ---------------- the tail (co group) --------------------

    JobTaskVertex tail =
        JobGraphUtils.createTask(
            IterationTailPactTask.class,
            "Reduce / Iteration Tail",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();

    // inputs and driver
    tailConfig.setDriver(ReduceDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.setInputSerializer(inputSerializer, 0);
    tailConfig.setDriverComparator(inputComparator, 0);

    tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    tailConfig.setInputComparator(inputComparator, 0);
    tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);
    tailConfig.setFilehandlesInput(0, 128);
    tailConfig.setSpillingThresholdInput(0, 0.9f);

    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(outputSerializer);

    // the udf
    tailConfig.setStubWrapper(
        new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter()));

    return tail;
  }
  private static JobOutputVertex createOutput(
      JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) {

    JobOutputVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(output.getConfiguration());
      taskConfig.addInputToGroup(0);
      taskConfig.setInputSerializer(serializer, 0);

      PointOutFormat outFormat = new PointOutFormat();
      outFormat.setOutputFilePath(new Path(resultPath));

      taskConfig.setStubWrapper(new UserCodeObjectWrapper<PointOutFormat>(outFormat));
    }

    return output;
  }
  private static JobOutputVertex createOutput(
      JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobOutputVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(output.getConfiguration());
      taskConfig.addInputToGroup(0);
      taskConfig.setInputSerializer(serializer, 0);

      @SuppressWarnings("unchecked")
      CsvOutputFormat outFormat =
          new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class);
      outFormat.setOutputFilePath(new Path(resultPath));

      taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat));
    }

    return output;
  }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeSerializerFactory<?> broadcastVarSerializer,
      TypeSerializerFactory<?> outputSerializer,
      TypeComparatorFactory<?> outputComparator) {
    JobTaskVertex mapper =
        JobGraphUtils.createTask(
            IterationIntermediatePactTask.class,
            "Map (Select nearest center)",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);

    intermediateConfig.setDriver(CollectorMapDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.setInputSerializer(inputSerializer, 0);

    intermediateConfig.setOutputSerializer(outputSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    intermediateConfig.setOutputComparator(outputComparator, 0);

    intermediateConfig.setBroadcastInputName("centers", 0);
    intermediateConfig.addBroadcastInputToGroup(0);
    intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0);

    // the udf
    intermediateConfig.setStubWrapper(
        new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter()));

    return mapper;
  }