@Test public void testBufferSizeFor1000Col() throws IOException { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector( Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int bufferSize = 128 * 1024; String columns = getRandomColumnNames(1000); // just for testing. manually write the column names conf.set(IOConstants.COLUMNS, columns); Writer writer = OrcFile.createWriter( testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(bufferSize)); final int newBufferSize; if (writer instanceof WriterImpl) { WriterImpl orcWriter = (WriterImpl) writer; newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize); assertEquals(bufferSize, newBufferSize); } }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter( Path path, Options options) throws IOException { final Path filename = AcidUtils.createFilename(path, options); final OrcFile.WriterOptions opts = OrcFile.writerOptions(options.getConfiguration()); if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE) .blockPadding(false) .compress(CompressionKind.NONE) .rowIndexStride(0); } final OrcRecordUpdater.KeyIndexBuilder watcher = new OrcRecordUpdater.KeyIndexBuilder(); opts.inspector(options.getInspector()).callback(watcher); final Writer writer = OrcFile.createWriter(filename, opts); return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() { @Override public void write(Writable w) throws IOException { OrcStruct orc = (OrcStruct) w; watcher.addKey( ((IntWritable) orc.getFieldValue(OrcRecordUpdater.OPERATION)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).get(), ((IntWritable) orc.getFieldValue(OrcRecordUpdater.BUCKET)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ROW_ID)).get()); writer.addRow(w); } @Override public void close(boolean abort) throws IOException { writer.close(); } }; }
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); } this.fs = fs; try { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); } catch (IOException ioe) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + ioe); } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()) { flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; } OrcFile.WriterOptions writerOptions = null; if (options instanceof OrcOptions) { writerOptions = ((OrcOptions) options).getOrcOptions(); } if (writerOptions == null) { writerOptions = OrcFile.writerOptions(options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); } writerOptions.inspector(createEventSchema(options.getInspector())); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction); item.setFieldValue(BUCKET, bucket); item.setFieldValue(ROW_ID, rowId); }
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter( JobConf conf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable reporter) throws IOException { OrcFile.WriterOptions options = OrcFile.writerOptions(conf); if (tableProperties.containsKey(OrcFile.STRIPE_SIZE)) { options.stripeSize(Long.parseLong(tableProperties.getProperty(OrcFile.STRIPE_SIZE))); } if (tableProperties.containsKey(OrcFile.COMPRESSION)) { options.compress(CompressionKind.valueOf(tableProperties.getProperty(OrcFile.COMPRESSION))); } if (tableProperties.containsKey(OrcFile.COMPRESSION_BLOCK_SIZE)) { options.bufferSize( Integer.parseInt(tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE))); } if (tableProperties.containsKey(OrcFile.ROW_INDEX_STRIDE)) { options.rowIndexStride( Integer.parseInt(tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE))); } if (tableProperties.containsKey(OrcFile.ENABLE_INDEXES)) { if ("false".equals(tableProperties.getProperty(OrcFile.ENABLE_INDEXES))) { options.rowIndexStride(0); } } if (tableProperties.containsKey(OrcFile.BLOCK_PADDING)) { options.blockPadding( Boolean.parseBoolean(tableProperties.getProperty(OrcFile.BLOCK_PADDING))); } return new OrcRecordWriter(path, options, conf); }
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) { OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf); if (props != null) { final String columnNameProperty = props.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) { List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } TypeDescription schema = TypeDescription.createStruct(); for (int i = 0; i < columnNames.size(); ++i) { schema.addField(columnNames.get(i), convertTypeInfo(columnTypes.get(i))); } if (LOG.isDebugEnabled()) { LOG.debug("ORC schema = " + schema); } result.setSchema(schema); } } return result; }
@Test public void testBufferSizeFor1Col() throws IOException { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector( Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int bufferSize = 128 * 1024; Writer writer = OrcFile.createWriter( testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(bufferSize)); final int newBufferSize; if (writer instanceof WriterImpl) { WriterImpl orcWriter = (WriterImpl) writer; newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize); assertEquals(bufferSize, newBufferSize); } }
@Override public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter( FileSystem fileSystem, JobConf conf, String name, Progressable reporter) throws IOException { return new OrcRecordWriter( new Path(getWorkOutputPath(conf), name), OrcFile.writerOptions(conf), conf); }