private static JobOutputVertex createSync(JobGraph jobGraph, int numIterations, int dop) {
   JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, dop);
   TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
   syncConfig.setNumberOfIterations(numIterations);
   syncConfig.setIterationId(ITERATION_ID);
   return sync;
 }
Пример #2
0
  public void registerFileInputTask(
      AbstractInputTask<?> inTask,
      Class<? extends DelimitedInputFormat> stubClass,
      String inPath,
      String delimiter) {
    DelimitedInputFormat format;
    try {
      format = stubClass.newInstance();
    } catch (Throwable t) {
      throw new RuntimeException("Could not instantiate test input format.", t);
    }

    format.setFilePath(inPath);
    format.setDelimiter(delimiter);

    TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration());
    dsConfig.setStubWrapper(new UserCodeObjectWrapper<DelimitedInputFormat>(format));

    this.inputSplitProvider.addInputSplits(inPath, 5);

    inTask.setEnvironment(this.mockEnv);

    if (inTask instanceof DataSourceTask<?>) {
      ((DataSourceTask<?>) inTask).setUserCodeClassLoader(getClass().getClassLoader());
    }
    inTask.registerInputOutput();
  }
  /*
   * (non-Javadoc)
   *
   * @see eu.stratosphere.pact.runtime.task.AbstractPactTask#prepare()
   */
  @Override
  public void prepare() throws Exception {
    final TaskConfig config = this.taskContext.getTaskConfig();

    // set up memory and I/O parameters
    final long availableMemory = config.getMemorySize();

    // test minimum memory requirements
    LocalStrategy ls = config.getLocalStrategy();

    long strategyMinMem = 0;

    switch (ls) {
      case COMBININGSORT:
        strategyMinMem = MIN_REQUIRED_MEMORY;
        break;
    }

    if (availableMemory < strategyMinMem) {
      throw new RuntimeException(
          "The Combine task was initialized with too little memory for local strategy "
              + config.getLocalStrategy()
              + " : "
              + availableMemory
              + " bytes."
              + "Required is at least "
              + strategyMinMem
              + " bytes.");
    }

    // obtain the TaskManager's MemoryManager
    final MemoryManager memoryManager = this.taskContext.getMemoryManager();

    final MutableObjectIterator<T> in = this.taskContext.getInput(0);
    this.serializer = this.taskContext.getInputSerializer(0);
    this.comparator = this.taskContext.getInputComparator(0);

    switch (ls) {
        // local strategy is COMBININGSORT
        // The Input is combined using a sort-merge strategy. Before spilling on disk, the data
        // volume is reduced using
        // the combine() method of the ReduceStub.
        // An iterator on the sorted, grouped, and combined pairs is created and returned
      case COMBININGSORT:
        input =
            new AsynchronousPartialSorter<T>(
                memoryManager,
                in,
                this.taskContext.getOwningNepheleTask(),
                this.serializer,
                this.comparator.duplicate(),
                availableMemory);
        break;
        // obtain and return a grouped iterator from the combining sort-merger
      default:
        throw new RuntimeException("Invalid local strategy provided for CombineTask.");
    }
  }
  private static JobInputVertex createPointsInput(
      JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    @SuppressWarnings("unchecked")
    CsvInputFormat pointsInFormat =
        new CsvInputFormat(
            '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class);
    JobInputVertex pointsInput =
        JobGraphUtils.createInput(
            pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks);
    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);

      TaskConfig chainedMapper = new TaskConfig(new Configuration());
      chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
      chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder()));
      chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD);
      chainedMapper.setOutputSerializer(serializer);

      taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points");
    }

    return pointsInput;
  }
Пример #5
0
  @Test
  public void testFailingMapTask() {
    int keyCnt = 100;
    int valCnt = 20;

    try {
      // environment
      initEnvironment(3 * 1024 * 1024);
      addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0);
      addOutput(this.outList);

      // chained combine config
      {
        final TaskConfig combineConfig = new TaskConfig(new Configuration());

        // input
        combineConfig.addInputToGroup(0);
        combineConfig.setInputSerializer(serFact, 0);

        // output
        combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
        combineConfig.setOutputSerializer(serFact);

        // driver
        combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP);
        combineConfig.setDriverComparator(compFact, 0);
        combineConfig.setMemoryDriver(3 * 1024 * 1024);

        // udf
        combineConfig.setStubClass(MockFailingCombineStub.class);

        getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine");
      }

      // chained map+combine
      {
        final RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask =
            new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>();

        super.registerTask(testTask, MapDriver.class, MockMapStub.class);

        boolean stubFailed = false;

        try {
          testTask.invoke();
        } catch (Exception e) {
          stubFailed = true;
        }

        Assert.assertTrue("Stub exception was not forwarded.", stubFailed);
      }
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Пример #6
0
  @Test
  public void testMapTask() {
    final int keyCnt = 100;
    final int valCnt = 20;

    try {

      // environment
      initEnvironment(3 * 1024 * 1024);
      addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0);
      addOutput(this.outList);

      // chained combine config
      {
        final TaskConfig combineConfig = new TaskConfig(new Configuration());

        // input
        combineConfig.addInputToGroup(0);
        combineConfig.setInputSerializer(serFact, 0);

        // output
        combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
        combineConfig.setOutputSerializer(serFact);

        // driver
        combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP);
        combineConfig.setDriverComparator(compFact, 0);
        combineConfig.setMemoryDriver(3 * 1024 * 1024);

        // udf
        combineConfig.setStubClass(MockReduceStub.class);

        getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine");
      }

      // chained map+combine
      {
        RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask =
            new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>();
        registerTask(testTask, MapDriver.class, MockMapStub.class);

        try {
          testTask.invoke();
        } catch (Exception e) {
          e.printStackTrace();
          Assert.fail("Invoke method caused exception.");
        }
      }

      Assert.assertEquals(keyCnt, this.outList.size());
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Пример #7
0
  public void registerFileOutputTask(
      AbstractOutputTask outTask, Class<? extends FileOutputFormat> stubClass, String outPath) {
    TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration());

    dsConfig.setStubWrapper(new UserCodeClassWrapper<FileOutputFormat>(stubClass));
    dsConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outPath);

    outTask.setEnvironment(this.mockEnv);

    if (outTask instanceof DataSinkTask<?>) {
      ((DataSinkTask<?>) outTask).setUserCodeClassLoader(getClass().getClassLoader());
    }

    outTask.registerInputOutput();
  }
Пример #8
0
  public void registerTask(
      AbstractTask task,
      @SuppressWarnings("rawtypes") Class<? extends PactDriver> driver,
      Class<? extends Stub> stubClass) {
    final TaskConfig config = new TaskConfig(this.mockEnv.getTaskConfiguration());
    config.setDriver(driver);
    config.setStubWrapper(new UserCodeClassWrapper<Stub>(stubClass));

    task.setEnvironment(this.mockEnv);

    if (task instanceof RegularPactTask<?, ?>) {
      ((RegularPactTask<?, ?>) task).setUserCodeClassLoader(getClass().getClassLoader());
    }

    task.registerInputOutput();
  }
  @SuppressWarnings("unchecked")
  private static JobInputVertex createModelsInput(
      JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    CsvInputFormat modelsInFormat =
        new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class);
    JobInputVertex modelsInput =
        JobGraphUtils.createInput(
            modelsInFormat, pointsPath, "Input[Models]", jobGraph, numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST);
      taskConfig.setOutputSerializer(serializer);
    }

    return modelsInput;
  }
  private static JobOutputVertex createOutput(
      JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) {

    JobOutputVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(output.getConfiguration());
      taskConfig.addInputToGroup(0);
      taskConfig.setInputSerializer(serializer, 0);

      PointOutFormat outFormat = new PointOutFormat();
      outFormat.setOutputFilePath(new Path(resultPath));

      taskConfig.setStubWrapper(new UserCodeObjectWrapper<PointOutFormat>(outFormat));
    }

    return output;
  }
  private static JobOutputVertex createOutput(
      JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobOutputVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(output.getConfiguration());
      taskConfig.addInputToGroup(0);
      taskConfig.setInputSerializer(serializer, 0);

      @SuppressWarnings("unchecked")
      CsvOutputFormat outFormat =
          new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class);
      outFormat.setOutputFilePath(new Path(resultPath));

      taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat));
    }

    return output;
  }
Пример #12
0
 public void addOutput(List<PactRecord> output) {
   this.mockEnv.addOutput(output);
   TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration());
   conf.addOutputShipStrategy(ShipStrategyType.FORWARD);
   conf.setOutputSerializer(PactRecordSerializerFactory.get());
 }
Пример #13
0
 public void addInput(MutableObjectIterator<PactRecord> input, int groupId) {
   this.mockEnv.addInput(input);
   TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration());
   conf.addInputToGroup(groupId);
   conf.setInputSerializer(PactRecordSerializerFactory.get(), groupId);
 }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobTaskVertex pointsInput =
        JobGraphUtils.createTask(
            RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());

      taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class));
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);
      taskConfig.setDriver(CollectorMapDriver.class);
      taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);

      taskConfig.addInputToGroup(0);
      taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE);
      taskConfig.setInputSerializer(serializer, 0);

      taskConfig.setBroadcastInputName("models", 0);
      taskConfig.addBroadcastInputToGroup(0);
      taskConfig.setBroadcastInputSerializer(serializer, 0);
    }

    return pointsInput;
  }
  private static JobTaskVertex createIterationHead(
      JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) {
    JobTaskVertex head =
        JobGraphUtils.createTask(
            IterationHeadPactTask.class, "Iteration Head", jobGraph, numSubTasks, numSubTasks);

    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);

    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(serializer, 0);

    // back channel / iterations
    headConfig.setBackChannelMemory(MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);

    // output into iteration. broadcasting the centers
    headConfig.setOutputSerializer(serializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST);

    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(serializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);

    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(2);

    // the driver
    headConfig.setDriver(NoOpDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.UNARY_NO_OP);

    return head;
  }
  private static JobTaskVertex createMapper(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeSerializerFactory<?> broadcastVarSerializer,
      TypeSerializerFactory<?> outputSerializer,
      TypeComparatorFactory<?> outputComparator) {
    JobTaskVertex mapper =
        JobGraphUtils.createTask(
            IterationIntermediatePactTask.class,
            "Map (Select nearest center)",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);

    intermediateConfig.setDriver(CollectorMapDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.setInputSerializer(inputSerializer, 0);

    intermediateConfig.setOutputSerializer(outputSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    intermediateConfig.setOutputComparator(outputComparator, 0);

    intermediateConfig.setBroadcastInputName("centers", 0);
    intermediateConfig.addBroadcastInputToGroup(0);
    intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0);

    // the udf
    intermediateConfig.setStubWrapper(
        new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter()));

    return mapper;
  }
  private static JobTaskVertex createReducer(
      JobGraph jobGraph,
      int numSubTasks,
      TypeSerializerFactory<?> inputSerializer,
      TypeComparatorFactory<?> inputComparator,
      TypeSerializerFactory<?> outputSerializer) {
    // ---------------- the tail (co group) --------------------

    JobTaskVertex tail =
        JobGraphUtils.createTask(
            IterationTailPactTask.class,
            "Reduce / Iteration Tail",
            jobGraph,
            numSubTasks,
            numSubTasks);

    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();

    // inputs and driver
    tailConfig.setDriver(ReduceDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.setInputSerializer(inputSerializer, 0);
    tailConfig.setDriverComparator(inputComparator, 0);

    tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    tailConfig.setInputComparator(inputComparator, 0);
    tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);
    tailConfig.setFilehandlesInput(0, 128);
    tailConfig.setSpillingThresholdInput(0, 0.9f);

    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(outputSerializer);

    // the udf
    tailConfig.setStubWrapper(
        new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter()));

    return tail;
  }