private VectorContainer getBatch() throws IOException { assert fs != null; assert path != null; if (inputStream == null) { inputStream = fs.open(path); } VectorAccessibleSerializable vas = new VectorAccessibleSerializable(allocator); Stopwatch watch = Stopwatch.createStarted(); vas.readFromStream(inputStream); VectorContainer c = vas.get(); if (schema != null) { c = SchemaUtil.coerceContainer(c, schema, context); } // logger.debug("Took {} us to read {} records", watch.elapsed(TimeUnit.MICROSECONDS), // c.getRecordCount()); spilledBatches--; currentContainer.zeroVectors(); Iterator<VectorWrapper<?>> wrapperIterator = c.iterator(); for (VectorWrapper w : currentContainer) { TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector()); pair.transfer(); } currentContainer.setRecordCount(c.getRecordCount()); c.zeroVectors(); return c; }
public void addBatch(VectorContainer newContainer) throws IOException { assert fs != null; assert path != null; if (outputStream == null) { outputStream = fs.create(path); } int recordCount = newContainer.getRecordCount(); WritableBatch batch = WritableBatch.getBatchNoHVWrap(recordCount, newContainer, false); VectorAccessibleSerializable outputBatch = new VectorAccessibleSerializable(batch, allocator); Stopwatch watch = Stopwatch.createStarted(); outputBatch.writeToStream(outputStream); newContainer.zeroVectors(); logger.debug( "Took {} us to spill {} records", watch.elapsed(TimeUnit.MICROSECONDS), recordCount); spilledBatches++; }