@Test public void testBufferSizeFor1000Col() throws IOException { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector( Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int bufferSize = 128 * 1024; String columns = getRandomColumnNames(1000); // just for testing. manually write the column names conf.set(IOConstants.COLUMNS, columns); Writer writer = OrcFile.createWriter( testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(bufferSize)); final int newBufferSize; if (writer instanceof WriterImpl) { WriterImpl orcWriter = (WriterImpl) writer; newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize); assertEquals(bufferSize, newBufferSize); } }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter( Path path, Options options) throws IOException { final Path filename = AcidUtils.createFilename(path, options); final OrcFile.WriterOptions opts = OrcFile.writerOptions(options.getConfiguration()); if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE) .blockPadding(false) .compress(CompressionKind.NONE) .rowIndexStride(0); } final OrcRecordUpdater.KeyIndexBuilder watcher = new OrcRecordUpdater.KeyIndexBuilder(); opts.inspector(options.getInspector()).callback(watcher); final Writer writer = OrcFile.createWriter(filename, opts); return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() { @Override public void write(Writable w) throws IOException { OrcStruct orc = (OrcStruct) w; watcher.addKey( ((IntWritable) orc.getFieldValue(OrcRecordUpdater.OPERATION)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).get(), ((IntWritable) orc.getFieldValue(OrcRecordUpdater.BUCKET)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ROW_ID)).get()); writer.addRow(w); } @Override public void close(boolean abort) throws IOException { writer.close(); } }; }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
@Override public void close(boolean b) throws IOException { // if we haven't written any rows, we need to create a file with a // generic schema. if (writer == null) { // a row with no columns ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector( new ArrayList<String>(), new ArrayList<ObjectInspector>()); options.inspector(inspector); writer = OrcFile.createWriter(path, options); } writer.close(); }
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); } this.fs = fs; try { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); } catch (IOException ioe) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + ioe); } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()) { flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; } OrcFile.WriterOptions writerOptions = null; if (options instanceof OrcOptions) { writerOptions = ((OrcOptions) options).getOrcOptions(); } if (writerOptions == null) { writerOptions = OrcFile.writerOptions(options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); } writerOptions.inspector(createEventSchema(options.getInspector())); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction); item.setFieldValue(BUCKET, bucket); item.setFieldValue(ROW_ID, rowId); }
@Test public void testBufferSizeFor1Col() throws IOException { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector( Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int bufferSize = 128 * 1024; Writer writer = OrcFile.createWriter( testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(bufferSize)); final int newBufferSize; if (writer instanceof WriterImpl) { WriterImpl orcWriter = (WriterImpl) writer; newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize); assertEquals(bufferSize, newBufferSize); } }