public WriteAvroToByteArray(
        CommandBuilder builder,
        Config config,
        Command parent,
        Command child,
        MorphlineContext context) {
      super(builder, config, parent, child, context);
      this.format =
          new Validator<Format>()
              .validateEnum(
                  config,
                  getConfigs().getString(config, "format", Format.container.toString()),
                  Format.class);

      String codec = getConfigs().getString(config, "codec", null);
      if (codec == null) {
        this.codecFactory = null;
      } else {
        this.codecFactory = CodecFactory.fromString(codec);
      }

      Config metadataConfig = getConfigs().getConfig(config, "metadata", ConfigFactory.empty());
      for (Map.Entry<String, Object> entry : new Configs().getEntrySet(metadataConfig)) {
        this.metadata.put(entry.getKey(), entry.getValue().toString());
      }

      validateArguments();
    }
Пример #2
0
 @Test
 public void testHadoopCodecFactoryGZip() {
   CodecFactory hadoopSnappyCodec =
       HadoopCodecFactory.fromHadoopString("org.apache.hadoop.io.compress.GZipCodec");
   CodecFactory avroSnappyCodec = CodecFactory.fromString("deflate");
   assertTrue(hadoopSnappyCodec.getClass().equals(avroSnappyCodec.getClass()));
 }
Пример #3
0
  /**
   * Generates an input Avro file containing the given records in the temporary directory and
   * returns the full path of the file.
   */
  private <T> String generateTestFile(
      String filename,
      List<T> elems,
      SyncBehavior syncBehavior,
      int syncInterval,
      AvroCoder<T> coder,
      String codec)
      throws IOException {
    Random random = new Random(0);
    File tmpFile = tmpFolder.newFile(filename);
    String path = tmpFile.toString();

    FileOutputStream os = new FileOutputStream(tmpFile);
    DatumWriter<T> datumWriter = coder.createDatumWriter();
    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
      writer.setCodec(CodecFactory.fromString(codec));
      writer.create(coder.getSchema(), os);

      int recordIndex = 0;
      int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;

      for (T elem : elems) {
        writer.append(elem);
        recordIndex++;

        switch (syncBehavior) {
          case SYNC_REGULAR:
            if (recordIndex == syncInterval) {
              recordIndex = 0;
              writer.sync();
            }
            break;
          case SYNC_RANDOM:
            if (recordIndex == syncIndex) {
              recordIndex = 0;
              writer.sync();
              syncIndex = random.nextInt(syncInterval);
            }
            break;
          case SYNC_DEFAULT:
          default:
        }
      }
    }
    return path;
  }