@Override public void map( WritableComparable key, CompactorInputSplit split, OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector, Reporter reporter) throws IOException { // This will only get called once, since CompactRecordReader only returns one record, // the input split. // Based on the split we're passed we go instantiate the real reader and then iterate on it // until it finishes. @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class AcidInputFormat<WritableComparable, V> aif = instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME)); ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY)); boolean isMajor = jobConf.getBoolean(IS_MAJOR, false); AcidInputFormat.RawReader<V> reader = aif.getRawReader( jobConf, isMajor, split.getBucket(), txnList, split.getBaseDir(), split.getDeltaDirs()); RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); while (reader.next(identifier, value)) { if (isMajor && reader.isDelete(value)) continue; writer.write(value); reporter.progress(); } }
@Override public void write(Writable w) throws IOException { // Get input data byte[] input; int inputLength; if (w instanceof Text) { input = ((Text) w).getBytes(); inputLength = ((Text) w).getLength(); } else { assert (w instanceof BytesWritable); input = ((BytesWritable) w).get(); inputLength = ((BytesWritable) w).getSize(); } // Add signature byte[] wrapped = new byte[signature.length + inputLength]; for (int i = 0; i < signature.length; i++) { wrapped[i] = signature[i]; } for (int i = 0; i < inputLength; i++) { wrapped[i + signature.length] = input[i]; } // Encode byte[] output = base64.encode(wrapped); bytesWritable.set(output, 0, output.length); writer.write(bytesWritable); }
@Override public void close() throws IOException { if (writer != null) { writer.close(false); } }
@Override public void close(boolean abort) throws IOException { writer.close(abort); }
public static FileSplit createTestFile( String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception { // filter out partition keys, which are not written to the file testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey))); JobConf jobConf = new JobConf(); ReaderWriterProfiler.setProfilerOptions(jobConf); Properties tableProperties = new Properties(); tableProperties.setProperty( "columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty( "columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); serDe.initialize(new Configuration(), tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(new Configuration()).getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); jobConf.set("parquet.compression", compressionCodec); jobConf.set("parquet.enable.dictionary", "true"); } RecordWriter recordWriter = outputFormat.getHiveRecordWriter( jobConf, new Path(filePath), Text.class, compressionCodec != null, tableProperties, new Progressable() { @Override public void progress() {} }); try { serDe.initialize(new Configuration(), tableProperties); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector))); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < numRows; rowNumber++) { for (int i = 0; i < testColumns.size(); i++) { Object writeValue = testColumns.get(i).getWriteValue(); if (writeValue instanceof Slice) { writeValue = ((Slice) writeValue).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), writeValue); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(new Configuration()).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); }