Ejemplo n.º 1
0
    @Override
    public void map(
        WritableComparable key,
        CompactorInputSplit split,
        OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector,
        Reporter reporter)
        throws IOException {
      // This will only get called once, since CompactRecordReader only returns one record,
      // the input split.
      // Based on the split we're passed we go instantiate the real reader and then iterate on it
      // until it finishes.
      @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
      AcidInputFormat<WritableComparable, V> aif =
          instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME));
      ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY));

      boolean isMajor = jobConf.getBoolean(IS_MAJOR, false);
      AcidInputFormat.RawReader<V> reader =
          aif.getRawReader(
              jobConf,
              isMajor,
              split.getBucket(),
              txnList,
              split.getBaseDir(),
              split.getDeltaDirs());
      RecordIdentifier identifier = reader.createKey();
      V value = reader.createValue();
      getWriter(reporter, reader.getObjectInspector(), split.getBucket());
      while (reader.next(identifier, value)) {
        if (isMajor && reader.isDelete(value)) continue;
        writer.write(value);
        reporter.progress();
      }
    }
    @Override
    public void write(Writable w) throws IOException {

      // Get input data
      byte[] input;
      int inputLength;
      if (w instanceof Text) {
        input = ((Text) w).getBytes();
        inputLength = ((Text) w).getLength();
      } else {
        assert (w instanceof BytesWritable);
        input = ((BytesWritable) w).get();
        inputLength = ((BytesWritable) w).getSize();
      }

      // Add signature
      byte[] wrapped = new byte[signature.length + inputLength];
      for (int i = 0; i < signature.length; i++) {
        wrapped[i] = signature[i];
      }
      for (int i = 0; i < inputLength; i++) {
        wrapped[i + signature.length] = input[i];
      }

      // Encode
      byte[] output = base64.encode(wrapped);
      bytesWritable.set(output, 0, output.length);

      writer.write(bytesWritable);
    }
Ejemplo n.º 3
0
 @Override
 public void close() throws IOException {
   if (writer != null) {
     writer.close(false);
   }
 }
 @Override
 public void close(boolean abort) throws IOException {
   writer.close(abort);
 }
  public static FileSplit createTestFile(
      String filePath,
      HiveOutputFormat<?, ?> outputFormat,
      @SuppressWarnings("deprecation") SerDe serDe,
      String compressionCodec,
      List<TestColumn> testColumns,
      int numRows)
      throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));

    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
        "columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty(
        "columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(new Configuration(), tableProperties);

    if (compressionCodec != null) {
      CompressionCodec codec =
          new CompressionCodecFactory(new Configuration()).getCodecByName(compressionCodec);
      jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
      jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
      jobConf.set("parquet.compression", compressionCodec);
      jobConf.set("parquet.enable.dictionary", "true");
    }

    RecordWriter recordWriter =
        outputFormat.getHiveRecordWriter(
            jobConf,
            new Path(filePath),
            Text.class,
            compressionCodec != null,
            tableProperties,
            new Progressable() {
              @Override
              public void progress() {}
            });

    try {
      serDe.initialize(new Configuration(), tableProperties);

      SettableStructObjectInspector objectInspector =
          getStandardStructObjectInspector(
              ImmutableList.copyOf(transform(testColumns, TestColumn::getName)),
              ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));

      Object row = objectInspector.create();

      List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

      for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
        for (int i = 0; i < testColumns.size(); i++) {
          Object writeValue = testColumns.get(i).getWriteValue();
          if (writeValue instanceof Slice) {
            writeValue = ((Slice) writeValue).getBytes();
          }
          objectInspector.setStructFieldData(row, fields.get(i), writeValue);
        }

        Writable record = serDe.serialize(row, objectInspector);
        recordWriter.write(record);
      }
    } finally {
      recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(new Configuration()).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
  }