@Test public void testSort() throws Exception { final int NUM_RECORDS = 559273; final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE; final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), numSegments); FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory); RandomIntPairGenerator generator = new RandomIntPairGenerator(SEED); // write the records IntPair record = new IntPair(); int num = -1; do { generator.next(record); num++; } while (sorter.write(record) && num < NUM_RECORDS); QuickSort qs = new QuickSort(); qs.sort(sorter); MutableObjectIterator<IntPair> iter = sorter.getIterator(); IntPair readTarget = new IntPair(); int current = 0; int last = 0; iter.next(readTarget); // readTarget.getFieldInto(0, last); last = readTarget.getKey(); while ((readTarget = iter.next(readTarget)) != null) { current = readTarget.getKey(); final int cmp = last - current; if (cmp > 0) { Assert.fail("Next key is not larger or equal to previous key."); } int tmp = current; current = last; last = tmp; } // release the memory occupied by the buffers this.memoryManager.release(sorter.dispose()); }
@Override public void run() { final MutableObjectIterator<T> input = this.input; final TypeSerializer<T> serializer = this.serializer; final SpillingBuffer buffer = this.buffer; try { T record = serializer.createInstance(); while (this.running && ((record = input.next(record)) != null)) { serializer.serialize(record, buffer); } TempBarrier.this.writingDone(); } catch (Throwable t) { TempBarrier.this.setException(t); } }
@Test public void testWriteAndIterator() throws Exception { final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE; final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), numSegments); FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory); RandomIntPairGenerator generator = new RandomIntPairGenerator(SEED); // write the records IntPair record = new IntPair(); int num = -1; do { generator.next(record); num++; } while (sorter.write(record)); // re-read the records generator.reset(); MutableObjectIterator<IntPair> iter = sorter.getIterator(); IntPair readTarget = new IntPair(); int count = 0; while ((readTarget = iter.next(readTarget)) != null) { count++; generator.next(record); int rk = readTarget.getKey(); int gk = record.getKey(); int rv = readTarget.getValue(); int gv = record.getValue(); Assert.assertEquals("The re-read key is wrong", gk, rk); Assert.assertEquals("The re-read value is wrong", gv, rv); } Assert.assertEquals("Incorrect number of records", num, count); // release the memory occupied by the buffers this.memoryManager.release(sorter.dispose()); }
@Override public void run() throws Exception { // cache references on the stack final MutableObjectIterator<T> input = this.taskContext.getInput(0); final Collector<T> output = this.taskContext.getOutputCollector(); if (objectReuseEnabled) { T record = this.taskContext.<T>getInputSerializer(0).getSerializer().createInstance(); while (this.running && ((record = input.next(record)) != null)) { output.collect(record); } } else { T record; TypeSerializer<T> serializer = this.taskContext.<T>getInputSerializer(0).getSerializer(); while (this.running && ((record = input.next(serializer.createInstance())) != null)) { output.collect(record); } } }
@Override public T next() throws IOException { Tuple value = tupleInput.next(this.value); if (value != null) { this.value = value; long pointer = value.<Long>getField(pointerPos); recordsInputs.seek(pointer); return serializer.deserialize(recordsInputs); } else { return null; } }
@Override public void run() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug( this.taskContext.formatLogString("AllReduce preprocessing done. Running Reducer code.")); } final GenericReduce<T> stub = this.taskContext.getStub(); final MutableObjectIterator<T> input = this.input; final TypeSerializer<T> serializer = this.serializer; T val1 = serializer.createInstance(); if ((val1 = input.next(val1)) == null) { return; } T val2; while (running && (val2 = input.next(serializer.createInstance())) != null) { val1 = stub.reduce(val1, val2); } this.taskContext.getOutputCollector().collect(val1); }
public static void prepareInputFile( MutableObjectIterator<Record> inIt, String inputFilePath, boolean insertInvalidData) throws IOException { FileWriter fw = new FileWriter(inputFilePath); BufferedWriter bw = new BufferedWriter(fw); if (insertInvalidData) { bw.write("####_I_AM_INVALID_########\n"); } Record rec = new Record(); while ((rec = inIt.next(rec)) != null) { IntValue key = rec.getField(0, IntValue.class); IntValue value = rec.getField(1, IntValue.class); bw.write(key.getValue() + "_" + value.getValue() + "\n"); } if (insertInvalidData) { bw.write("####_I_AM_INVALID_########\n"); } bw.flush(); bw.close(); }
@Test public void testSpillingSortWithIntermediateMergeIntPair() { try { // amount of pairs final int PAIRS = 50000000; // comparator final RandomIntPairGenerator generator = new RandomIntPairGenerator(12345678, PAIRS); final TypeSerializerFactory<IntPair> serializerFactory = new IntPairSerializer.IntPairSerializerFactory(); final TypeComparator<IntPair> comparator = new IntPairComparator(); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<IntPair> merger = new UnilateralSortMerger<IntPair>( this.memoryManager, this.ioManager, generator, this.parentTask, serializerFactory, comparator, (double) 64 / 78, 4, 0.7f); // emit data LOG.debug("Emitting data..."); // check order MutableObjectIterator<IntPair> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsRead = 1; int nextStep = PAIRS / 20; IntPair rec1 = new IntPair(); IntPair rec2 = new IntPair(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { final int k1 = rec1.getKey(); final int k2 = rec2.getKey(); pairsRead++; Assert.assertTrue(k1 - k2 <= 0); IntPair tmp = rec1; rec1 = rec2; rec2 = tmp; // log if (pairsRead == nextStep) { nextStep += PAIRS / 20; } } Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testSpillingSortWithIntermediateMerge() { try { // amount of pairs final int PAIRS = 10000000; // comparator final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator(); final TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH); final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, PAIRS); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<Record> merger = new UnilateralSortMerger<Record>( this.memoryManager, this.ioManager, source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, (double) 64 / 78, 16, 0.7f); // emit data LOG.debug("Emitting data..."); // check order MutableObjectIterator<Record> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsRead = 1; int nextStep = PAIRS / 20; Record rec1 = new Record(); Record rec2 = new Record(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { final Key k1 = rec1.getField(0, TestData.Key.class); final Key k2 = rec2.getField(0, TestData.Key.class); pairsRead++; Assert.assertTrue(keyComparator.compare(k1, k2) <= 0); Record tmp = rec1; rec1 = rec2; k1.setKey(k2.getKey()); rec2 = tmp; // log if (pairsRead == nextStep) { nextStep += PAIRS / 20; } } Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testSpillingSort() { try { // comparator final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator(); final TestData.Generator generator = new TestData.Generator( SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.CONSTANT, VAL); final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, NUM_PAIRS); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<Record> merger = new UnilateralSortMerger<Record>( this.memoryManager, this.ioManager, source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, (double) 16 / 78, 64, 0.7f); // emit data LOG.debug("Reading and sorting data..."); // check order MutableObjectIterator<Record> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsEmitted = 1; Record rec1 = new Record(); Record rec2 = new Record(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { final Key k1 = rec1.getField(0, TestData.Key.class); final Key k2 = rec2.getField(0, TestData.Key.class); pairsEmitted++; Assert.assertTrue(keyComparator.compare(k1, k2) <= 0); Record tmp = rec1; rec1 = rec2; k1.setKey(k2.getKey()); rec2 = tmp; } Assert.assertTrue(NUM_PAIRS == pairsEmitted); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Override public void invoke() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Starting data sink operator")); } ExecutionConfig executionConfig; try { ExecutionConfig c = (ExecutionConfig) InstantiationUtil.readObjectFromConfig( getJobConfiguration(), ExecutionConfig.CONFIG_KEY, getUserCodeClassLoader()); if (c != null) { executionConfig = c; } else { LOG.warn("The execution config returned by the configuration was null"); executionConfig = new ExecutionConfig(); } } catch (IOException e) { throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e); } catch (ClassNotFoundException e) { throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e); } boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled(); try { // initialize local strategies MutableObjectIterator<IT> input1; switch (this.config.getInputLocalStrategy(0)) { case NONE: // nothing to do localStrategy = null; input1 = reader; break; case SORT: // initialize sort local strategy try { // get type comparator TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, getUserCodeClassLoader()); if (compFact == null) { throw new Exception("Missing comparator factory for local strategy on input " + 0); } // initialize sorter UnilateralSortMerger<IT> sorter = new UnilateralSortMerger<IT>( getEnvironment().getMemoryManager(), getEnvironment().getIOManager(), this.reader, this, this.inputTypeSerializerFactory, compFact.createComparator(), this.config.getRelativeMemoryInput(0), this.config.getFilehandlesInput(0), this.config.getSpillingThresholdInput(0)); this.localStrategy = sorter; input1 = sorter.getIterator(); } catch (Exception e) { throw new RuntimeException( "Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e); } break; default: throw new RuntimeException("Invalid local strategy for DataSinkTask"); } // read the reader and write it to the output final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer(); final MutableObjectIterator<IT> input = input1; final OutputFormat<IT> format = this.format; // check if task has been canceled if (this.taskCanceled) { return; } if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Starting to produce output")); } // open format.open( this.getEnvironment().getIndexInSubtaskGroup(), this.getEnvironment().getNumberOfSubtasks()); if (objectReuseEnabled) { IT record = serializer.createInstance(); // work! while (!this.taskCanceled && ((record = input.next(record)) != null)) { format.writeRecord(record); } } else { IT record; // work! while (!this.taskCanceled && ((record = input.next()) != null)) { format.writeRecord(record); } } // close. We close here such that a regular close throwing an exception marks a task as // failed. if (!this.taskCanceled) { this.format.close(); this.format = null; } } catch (Exception ex) { // make a best effort to clean up try { if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) { cleanupCalled = true; ((CleanupWhenUnsuccessful) format).tryCleanupOnError(); } } catch (Throwable t) { LOG.error("Cleanup on error failed.", t); } ex = ExceptionInChainedStubException.exceptionUnwrap(ex); if (ex instanceof CancelTaskException) { // forward canceling exception throw ex; } // drop, if the task was canceled else if (!this.taskCanceled) { if (LOG.isErrorEnabled()) { LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex); } throw ex; } } finally { if (this.format != null) { // close format, if it has not been closed, yet. // This should only be the case if we had a previous error, or were canceled. try { this.format.close(); } catch (Throwable t) { if (LOG.isWarnEnabled()) { LOG.warn(getLogString("Error closing the output format"), t); } } } // close local strategy if necessary if (localStrategy != null) { try { this.localStrategy.close(); } catch (Throwable t) { LOG.error("Error closing local strategy", t); } } RegularPactTask.clearReaders(new MutableReader[] {inputReader}); } if (!this.taskCanceled) { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Finished data sink operator")); } } else { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Data sink operator cancelled")); } } }