public BufferedTupleWriter( TupleReader reader, int bufferSize, VirtualFile outputFolder, CompressionType[] compressionTypes, String[] fileNameSeeds, boolean calculateChecksum, BatchCallback callback) throws IOException { this.columnTypes = reader.getColumnTypes(); this.compressionTypes = compressionTypes; this.columnCount = columnTypes.length; this.buffer = new TupleBuffer(columnTypes, bufferSize); this.reader = reader; this.callback = callback; this.columnWriters = new ColumnFileWriterBundle[columnCount]; // check parameters if (columnTypes.length != compressionTypes.length) { throw new IllegalArgumentException("length of columnTypes/compressionTypes doesn't match"); } if (columnTypes.length != fileNameSeeds.length) { throw new IllegalArgumentException("length of columnTypes/fileNameSeeds doesn't match"); } if (reader.getColumnCount() != columnCount) { throw new IllegalArgumentException( "column count doesn't match. writer=" + columnCount + ", reader=" + reader.getColumnCount()); } for (int i = 0; i < columnCount; ++i) { if (reader.getColumnType(i) != columnTypes[i]) { throw new IllegalArgumentException( "column type[" + i + "] doesn't match. writer=" + columnTypes[i] + ", reader=" + reader.getColumnType(i)); } // although column types must match between reader/writer, compression types are often // different. } for (int i = 0; i < columnCount; ++i) { columnWriters[i] = new ColumnFileWriterBundle( outputFolder, fileNameSeeds[i], columnTypes[i], compressionTypes[i], calculateChecksum); } }
@SuppressWarnings({"unchecked", "rawtypes"}) @Override public int appendAllTuples() throws IOException { LOG.info("receiving/writing all tuples..."); while (true) { buffer.resetCount(); int read = reader.nextBatch(buffer); if (read < 0) { break; } if (LOG.isDebugEnabled()) { LOG.debug("read " + read + " tuples."); } for (int i = 0; i < columnCount; ++i) { Object columnData = buffer.getColumnBuffer(i); ((TypedWriter) columnWriters[i].getDataWriter()).writeValues(columnData, 0, read); } if (LOG.isDebugEnabled()) { LOG.debug("wrote " + read + " tuples to " + columnCount + " column files."); } tuplesWritten += read; if (callback != null) { boolean continued = callback.onBatchWritten(tuplesWritten); if (!continued) { LOG.warn("callback function requested to terminate. exitting.."); break; } } } LOG.info("done."); return tuplesWritten; }