@Test
  public void testSort() throws Exception {
    final int NUM_RECORDS = 559273;
    final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE;
    final List<MemorySegment> memory =
        this.memoryManager.allocatePages(new DummyInvokable(), numSegments);

    FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
    RandomIntPairGenerator generator = new RandomIntPairGenerator(SEED);

    // write the records
    IntPair record = new IntPair();
    int num = -1;
    do {
      generator.next(record);
      num++;
    } while (sorter.write(record) && num < NUM_RECORDS);

    QuickSort qs = new QuickSort();
    qs.sort(sorter);

    MutableObjectIterator<IntPair> iter = sorter.getIterator();
    IntPair readTarget = new IntPair();

    int current = 0;
    int last = 0;

    iter.next(readTarget);
    // readTarget.getFieldInto(0, last);
    last = readTarget.getKey();

    while ((readTarget = iter.next(readTarget)) != null) {
      current = readTarget.getKey();

      final int cmp = last - current;
      if (cmp > 0) {
        Assert.fail("Next key is not larger or equal to previous key.");
      }

      int tmp = current;
      current = last;
      last = tmp;
    }

    // release the memory occupied by the buffers
    this.memoryManager.release(sorter.dispose());
  }
Ejemplo n.º 2
0
    @Override
    public void run() {
      final MutableObjectIterator<T> input = this.input;
      final TypeSerializer<T> serializer = this.serializer;
      final SpillingBuffer buffer = this.buffer;

      try {
        T record = serializer.createInstance();

        while (this.running && ((record = input.next(record)) != null)) {
          serializer.serialize(record, buffer);
        }

        TempBarrier.this.writingDone();
      } catch (Throwable t) {
        TempBarrier.this.setException(t);
      }
    }
  @Test
  public void testWriteAndIterator() throws Exception {
    final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE;
    final List<MemorySegment> memory =
        this.memoryManager.allocatePages(new DummyInvokable(), numSegments);

    FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
    RandomIntPairGenerator generator = new RandomIntPairGenerator(SEED);

    // write the records
    IntPair record = new IntPair();
    int num = -1;
    do {
      generator.next(record);
      num++;
    } while (sorter.write(record));

    // re-read the records
    generator.reset();

    MutableObjectIterator<IntPair> iter = sorter.getIterator();
    IntPair readTarget = new IntPair();
    int count = 0;

    while ((readTarget = iter.next(readTarget)) != null) {
      count++;

      generator.next(record);

      int rk = readTarget.getKey();
      int gk = record.getKey();

      int rv = readTarget.getValue();
      int gv = record.getValue();

      Assert.assertEquals("The re-read key is wrong", gk, rk);
      Assert.assertEquals("The re-read value is wrong", gv, rv);
    }

    Assert.assertEquals("Incorrect number of records", num, count);

    // release the memory occupied by the buffers
    this.memoryManager.release(sorter.dispose());
  }
Ejemplo n.º 4
0
  @Override
  public void run() throws Exception {
    // cache references on the stack
    final MutableObjectIterator<T> input = this.taskContext.getInput(0);
    final Collector<T> output = this.taskContext.getOutputCollector();

    if (objectReuseEnabled) {
      T record = this.taskContext.<T>getInputSerializer(0).getSerializer().createInstance();

      while (this.running && ((record = input.next(record)) != null)) {
        output.collect(record);
      }
    } else {
      T record;
      TypeSerializer<T> serializer = this.taskContext.<T>getInputSerializer(0).getSerializer();
      while (this.running && ((record = input.next(serializer.createInstance())) != null)) {
        output.collect(record);
      }
    }
  }
    @Override
    public T next() throws IOException {
      Tuple value = tupleInput.next(this.value);
      if (value != null) {
        this.value = value;
        long pointer = value.<Long>getField(pointerPos);

        recordsInputs.seek(pointer);
        return serializer.deserialize(recordsInputs);
      } else {
        return null;
      }
    }
Ejemplo n.º 6
0
  @Override
  public void run() throws Exception {
    if (LOG.isDebugEnabled()) {
      LOG.debug(
          this.taskContext.formatLogString("AllReduce preprocessing done. Running Reducer code."));
    }

    final GenericReduce<T> stub = this.taskContext.getStub();
    final MutableObjectIterator<T> input = this.input;
    final TypeSerializer<T> serializer = this.serializer;

    T val1 = serializer.createInstance();

    if ((val1 = input.next(val1)) == null) {
      return;
    }

    T val2;
    while (running && (val2 = input.next(serializer.createInstance())) != null) {
      val1 = stub.reduce(val1, val2);
    }

    this.taskContext.getOutputCollector().collect(val1);
  }
Ejemplo n.º 7
0
    public static void prepareInputFile(
        MutableObjectIterator<Record> inIt, String inputFilePath, boolean insertInvalidData)
        throws IOException {
      FileWriter fw = new FileWriter(inputFilePath);
      BufferedWriter bw = new BufferedWriter(fw);

      if (insertInvalidData) {
        bw.write("####_I_AM_INVALID_########\n");
      }

      Record rec = new Record();
      while ((rec = inIt.next(rec)) != null) {
        IntValue key = rec.getField(0, IntValue.class);
        IntValue value = rec.getField(1, IntValue.class);

        bw.write(key.getValue() + "_" + value.getValue() + "\n");
      }
      if (insertInvalidData) {
        bw.write("####_I_AM_INVALID_########\n");
      }

      bw.flush();
      bw.close();
    }
  @Test
  public void testSpillingSortWithIntermediateMergeIntPair() {
    try {
      // amount of pairs
      final int PAIRS = 50000000;

      // comparator
      final RandomIntPairGenerator generator = new RandomIntPairGenerator(12345678, PAIRS);

      final TypeSerializerFactory<IntPair> serializerFactory =
          new IntPairSerializer.IntPairSerializerFactory();
      final TypeComparator<IntPair> comparator = new IntPairComparator();

      // merge iterator
      LOG.debug("Initializing sortmerger...");

      Sorter<IntPair> merger =
          new UnilateralSortMerger<IntPair>(
              this.memoryManager,
              this.ioManager,
              generator,
              this.parentTask,
              serializerFactory,
              comparator,
              (double) 64 / 78,
              4,
              0.7f);

      // emit data
      LOG.debug("Emitting data...");

      // check order
      MutableObjectIterator<IntPair> iterator = merger.getIterator();

      LOG.debug("Checking results...");
      int pairsRead = 1;
      int nextStep = PAIRS / 20;

      IntPair rec1 = new IntPair();
      IntPair rec2 = new IntPair();

      Assert.assertTrue((rec1 = iterator.next(rec1)) != null);

      while ((rec2 = iterator.next(rec2)) != null) {
        final int k1 = rec1.getKey();
        final int k2 = rec2.getKey();
        pairsRead++;

        Assert.assertTrue(k1 - k2 <= 0);

        IntPair tmp = rec1;
        rec1 = rec2;
        rec2 = tmp;

        // log
        if (pairsRead == nextStep) {
          nextStep += PAIRS / 20;
        }
      }
      Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
      merger.close();
      testSuccess = true;
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSpillingSortWithIntermediateMerge() {
    try {
      // amount of pairs
      final int PAIRS = 10000000;

      // comparator
      final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

      final TestData.Generator generator =
          new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
      final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, PAIRS);

      // merge iterator
      LOG.debug("Initializing sortmerger...");

      Sorter<Record> merger =
          new UnilateralSortMerger<Record>(
              this.memoryManager,
              this.ioManager,
              source,
              this.parentTask,
              this.pactRecordSerializer,
              this.pactRecordComparator,
              (double) 64 / 78,
              16,
              0.7f);

      // emit data
      LOG.debug("Emitting data...");

      // check order
      MutableObjectIterator<Record> iterator = merger.getIterator();

      LOG.debug("Checking results...");
      int pairsRead = 1;
      int nextStep = PAIRS / 20;

      Record rec1 = new Record();
      Record rec2 = new Record();

      Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
      while ((rec2 = iterator.next(rec2)) != null) {
        final Key k1 = rec1.getField(0, TestData.Key.class);
        final Key k2 = rec2.getField(0, TestData.Key.class);
        pairsRead++;

        Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

        Record tmp = rec1;
        rec1 = rec2;
        k1.setKey(k2.getKey());
        rec2 = tmp;

        // log
        if (pairsRead == nextStep) {
          nextStep += PAIRS / 20;
        }
      }
      Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
      merger.close();
      testSuccess = true;
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSpillingSort() {
    try {
      // comparator
      final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

      final TestData.Generator generator =
          new TestData.Generator(
              SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.CONSTANT, VAL);
      final MutableObjectIterator<Record> source =
          new TestData.GeneratorIterator(generator, NUM_PAIRS);

      // merge iterator
      LOG.debug("Initializing sortmerger...");

      Sorter<Record> merger =
          new UnilateralSortMerger<Record>(
              this.memoryManager,
              this.ioManager,
              source,
              this.parentTask,
              this.pactRecordSerializer,
              this.pactRecordComparator,
              (double) 16 / 78,
              64,
              0.7f);

      // emit data
      LOG.debug("Reading and sorting data...");

      // check order
      MutableObjectIterator<Record> iterator = merger.getIterator();

      LOG.debug("Checking results...");
      int pairsEmitted = 1;

      Record rec1 = new Record();
      Record rec2 = new Record();

      Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
      while ((rec2 = iterator.next(rec2)) != null) {
        final Key k1 = rec1.getField(0, TestData.Key.class);
        final Key k2 = rec2.getField(0, TestData.Key.class);
        pairsEmitted++;

        Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

        Record tmp = rec1;
        rec1 = rec2;
        k1.setKey(k2.getKey());

        rec2 = tmp;
      }
      Assert.assertTrue(NUM_PAIRS == pairsEmitted);

      merger.close();
      testSuccess = true;
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Ejemplo n.º 11
0
  @Override
  public void invoke() throws Exception {
    if (LOG.isDebugEnabled()) {
      LOG.debug(getLogString("Starting data sink operator"));
    }

    ExecutionConfig executionConfig;
    try {
      ExecutionConfig c =
          (ExecutionConfig)
              InstantiationUtil.readObjectFromConfig(
                  getJobConfiguration(), ExecutionConfig.CONFIG_KEY, getUserCodeClassLoader());
      if (c != null) {
        executionConfig = c;
      } else {
        LOG.warn("The execution config returned by the configuration was null");
        executionConfig = new ExecutionConfig();
      }
    } catch (IOException e) {
      throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e);
    } catch (ClassNotFoundException e) {
      throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e);
    }
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();

    try {

      // initialize local strategies
      MutableObjectIterator<IT> input1;
      switch (this.config.getInputLocalStrategy(0)) {
        case NONE:
          // nothing to do
          localStrategy = null;
          input1 = reader;
          break;
        case SORT:
          // initialize sort local strategy
          try {
            // get type comparator
            TypeComparatorFactory<IT> compFact =
                this.config.getInputComparator(0, getUserCodeClassLoader());
            if (compFact == null) {
              throw new Exception("Missing comparator factory for local strategy on input " + 0);
            }

            // initialize sorter
            UnilateralSortMerger<IT> sorter =
                new UnilateralSortMerger<IT>(
                    getEnvironment().getMemoryManager(),
                    getEnvironment().getIOManager(),
                    this.reader,
                    this,
                    this.inputTypeSerializerFactory,
                    compFact.createComparator(),
                    this.config.getRelativeMemoryInput(0),
                    this.config.getFilehandlesInput(0),
                    this.config.getSpillingThresholdInput(0));

            this.localStrategy = sorter;
            input1 = sorter.getIterator();
          } catch (Exception e) {
            throw new RuntimeException(
                "Initializing the input processing failed"
                    + (e.getMessage() == null ? "." : ": " + e.getMessage()),
                e);
          }
          break;
        default:
          throw new RuntimeException("Invalid local strategy for DataSinkTask");
      }

      // read the reader and write it to the output

      final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer();
      final MutableObjectIterator<IT> input = input1;
      final OutputFormat<IT> format = this.format;

      // check if task has been canceled
      if (this.taskCanceled) {
        return;
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Starting to produce output"));
      }

      // open
      format.open(
          this.getEnvironment().getIndexInSubtaskGroup(),
          this.getEnvironment().getNumberOfSubtasks());

      if (objectReuseEnabled) {
        IT record = serializer.createInstance();

        // work!
        while (!this.taskCanceled && ((record = input.next(record)) != null)) {
          format.writeRecord(record);
        }
      } else {
        IT record;

        // work!
        while (!this.taskCanceled && ((record = input.next()) != null)) {
          format.writeRecord(record);
        }
      }

      // close. We close here such that a regular close throwing an exception marks a task as
      // failed.
      if (!this.taskCanceled) {
        this.format.close();
        this.format = null;
      }
    } catch (Exception ex) {

      // make a best effort to clean up
      try {
        if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
          cleanupCalled = true;
          ((CleanupWhenUnsuccessful) format).tryCleanupOnError();
        }
      } catch (Throwable t) {
        LOG.error("Cleanup on error failed.", t);
      }

      ex = ExceptionInChainedStubException.exceptionUnwrap(ex);

      if (ex instanceof CancelTaskException) {
        // forward canceling exception
        throw ex;
      }
      // drop, if the task was canceled
      else if (!this.taskCanceled) {
        if (LOG.isErrorEnabled()) {
          LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
        }
        throw ex;
      }
    } finally {
      if (this.format != null) {
        // close format, if it has not been closed, yet.
        // This should only be the case if we had a previous error, or were canceled.
        try {
          this.format.close();
        } catch (Throwable t) {
          if (LOG.isWarnEnabled()) {
            LOG.warn(getLogString("Error closing the output format"), t);
          }
        }
      }
      // close local strategy if necessary
      if (localStrategy != null) {
        try {
          this.localStrategy.close();
        } catch (Throwable t) {
          LOG.error("Error closing local strategy", t);
        }
      }

      RegularPactTask.clearReaders(new MutableReader[] {inputReader});
    }

    if (!this.taskCanceled) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Finished data sink operator"));
      }
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Data sink operator cancelled"));
      }
    }
  }