private byte[] createAvroData(String name, int age, List<String> emails) throws IOException {
    String AVRO_SCHEMA =
        "{\n"
            + "\"type\": \"record\",\n"
            + "\"name\": \"Employee\",\n"
            + "\"fields\": [\n"
            + " {\"name\": \"name\", \"type\": \"string\"},\n"
            + " {\"name\": \"age\", \"type\": \"int\"},\n"
            + " {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n"
            + " {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n"
            + "]}";
    Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericRecord e1 = new GenericData.Record(schema);
    e1.put("name", name);
    e1.put("age", age);
    e1.put("emails", emails);
    e1.put("boss", null);

    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    dataFileWriter.append(e1);
    dataFileWriter.close();
    return out.toByteArray();
  }
 private void writeContainer(Record src, OutputStream dst) {
   DataFileWriter dataFileWriter = null;
   try {
     try {
       Schema schema = null;
       for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) {
         Preconditions.checkNotNull(attachment);
         GenericContainer datum = (GenericContainer) attachment;
         schema = getSchema(datum, schema);
         assert schema != null;
         if (dataFileWriter == null) { // init
           GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
           dataFileWriter = new DataFileWriter(datumWriter);
           if (codecFactory != null) {
             dataFileWriter.setCodec(codecFactory);
           }
           for (Map.Entry<String, String> entry : metadata.entrySet()) {
             dataFileWriter.setMeta(entry.getKey(), entry.getValue());
           }
           dataFileWriter.create(schema, dst);
         }
         dataFileWriter.append(datum);
       }
       if (dataFileWriter != null) {
         dataFileWriter.flush();
       }
     } catch (IOException e) {
       throw new MorphlineRuntimeException(e);
     }
   } finally {
     Closeables.closeQuietly(dataFileWriter);
   }
 }
  @Test
  public void test() throws IOException {
    File file = new File("target/AvroDocument.avro");

    Schema schema = AvroDocument._SCHEMA;

    {
      System.out.println("Writing to: " + file.getAbsolutePath());
      DatumWriter<Object> datumWriter = new SpecificDatumWriter(AvroDocument.class);
      FileOutputStream outputStream = new FileOutputStream(file);
      DataFileWriter<Object> dfw = new DataFileWriter<Object>(schema, outputStream, datumWriter);

      AvroDocument d = createTestDocument();
      dfw.append(d);
      dfw.flush();
      dfw.close();
    }

    {
      System.out.println("Reading from: " + file.getAbsolutePath());
      DatumReader<Object> datumReader = new SpecificDatumReader(AvroDocument.class);
      SeekableInput seekableInput = new SeekableFileInput(file);
      DataFileReader<Object> dfr = new DataFileReader<Object>(seekableInput, datumReader);
      AvroDocument d = new AvroDocument();
      dfr.next(d);
      AvroDocumentReader.dumpAvroDocument(d, System.out);
    }
  }
Пример #4
0
 private static <T> File createFile(File file, Schema schema, T... records) throws IOException {
   DatumWriter<T> datumWriter = new GenericDatumWriter<T>(schema);
   DataFileWriter<T> fileWriter = new DataFileWriter<T>(datumWriter);
   fileWriter.create(schema, file);
   for (T record : records) {
     fileWriter.append(record);
   }
   fileWriter.close();
   return file;
 }
Пример #5
0
  public static void main(String[] args) throws IOException {
    User user1 = new User();
    user1.setName("Alyssa");
    user1.setFavoriteNumber(256);
    // Leave favorite color null

    // Alternate constructor
    User user2 = new User("Ben", 7, "red");

    // Construct via builder
    User user3 =
        User.newBuilder()
            .setName("Charlie")
            .setFavoriteColor("blue")
            .setFavoriteNumber(null)
            .build();

    // Serialize user1 and user2 to disk
    File file = new File("users.avro");
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), file);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.append(user3);
    dataFileWriter.close();

    // Deserialize Users from disk
    DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
    DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
    try {
      User user = null;
      while (dataFileReader.hasNext()) {
        // Reuse user object by passing it to next(). This saves us from
        // allocating and garbage collecting many objects for files with
        // many items.
        user = dataFileReader.next(user);
        System.out.println(user);
      }
    } finally {
      dataFileReader.close();
    }
  }
  /** Writes an avro file of generic records with a 'key', 'blah', and 'value' field. */
  private Path writeGenericRecordAvroFile() throws IOException {
    // Open a writer.
    final File file = new File(getLocalTempDir(), "generic-kv.avro");
    final Schema writerSchema = Schema.createRecord("record", null, null, false);
    writerSchema.setFields(
        Lists.newArrayList(
            new Schema.Field("key", Schema.create(Schema.Type.INT), null, null),
            new Schema.Field("blah", Schema.create(Schema.Type.STRING), null, null),
            new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));

    final DataFileWriter<GenericRecord> fileWriter =
        new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(writerSchema))
            .create(writerSchema, file);
    try {
      // Write a record.
      GenericData.Record record = new GenericData.Record(writerSchema);
      record.put("key", 1);
      record.put("blah", "blah");
      record.put("value", "one");
      fileWriter.append(record);

      // Write another record.
      record = new GenericData.Record(writerSchema);
      record.put("key", 2);
      record.put("blah", "blah");
      record.put("value", "two");
      fileWriter.append(record);

      // Write a duplicate record with the same key field value.
      record = new GenericData.Record(writerSchema);
      record.put("key", 2);
      record.put("blah", "blah");
      record.put("value", "deux");
      fileWriter.append(record);

      // Close it and return the path.
    } finally {
      fileWriter.close();
    }
    return new Path(file.getPath());
  }
 public void execute(TridentTuple tuple, TridentCollector collector) {
   GenericRecord docEntry = new GenericData.Record(schema);
   docEntry.put("docid", tuple.getStringByField("documentId"));
   docEntry.put("time", Time.currentTimeMillis());
   docEntry.put("line", tuple.getStringByField("document"));
   try {
     dataFileWriter.append(docEntry);
     dataFileWriter.flush();
   } catch (IOException e) {
     LOG.error("Error writing to document record: " + e);
     throw new RuntimeException(e);
   }
 }
Пример #8
0
  @Override
  public void execute(Tuple inputTuple) {
    /* Processing tuples of the shape
    (DATASOURCE_ID, TIMESTAMP_FIELD, CONTENT_FIELD) */

    // get datasource
    String datasource = inputTuple.getStringByField(RestIngestionSpout.DATASOURCE_ID);
    // compute month
    long timestamp = inputTuple.getLongByField(RestIngestionSpout.TIMESTAMP_FIELD);
    // this computation is completely stateless
    String month = timestampToMonth(timestamp);

    // now get the DataFileWriter
    DataFileWriter<GenericRecord> writer = null;
    try {
      writer = this.writersCache.get(DatasourceMonth.create(datasource, month));
    } catch (ExecutionException ee) {
      LOGGER.error(
          "Error getting DataFileWriter for tuple for datasource "
              + datasource
              + " and timestamp "
              + timestamp
              + " : "
              + ee.getMessage());
      this.collector.fail(inputTuple);
      return;
    }

    // create and write a new record
    GenericRecord newDataRecord = new GenericData.Record(AVRO_SCHEMA);
    newDataRecord.put(AVRO_TIMESTAMP_FIELD, new Long(timestamp));
    newDataRecord.put(
        AVRO_CONTENT_FIELD, inputTuple.getStringByField(RestIngestionSpout.CONTENT_FIELD));
    try {
      writer.append(newDataRecord);
    } catch (IOException ioe) {
      LOGGER.error(
          "Error writing Avro record for datasource "
              + datasource
              + " and timestamp "
              + timestamp
              + " : "
              + ioe.getMessage());
      this.collector.fail(inputTuple);
      return;
    }

    // ACK processing for this tupe as ok
    this.collector.ack(inputTuple);
  }
  private void populateGenericFile(List<GenericRecord> genericRecords) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter =
        new GenericDatumWriter<GenericRecord>(schema);

    DataFileWriter<GenericRecord> dataFileWriter =
        new DataFileWriter<GenericRecord>(genericDatumWriter);
    dataFileWriter.create(schema, outputStream);

    for (GenericRecord record : genericRecords) {
      dataFileWriter.append(record);
    }

    dataFileWriter.close();
    outputStream.close();
  }
Пример #10
0
  /**
   * Generates an input Avro file containing the given records in the temporary directory and
   * returns the full path of the file.
   */
  private <T> String generateTestFile(
      String filename,
      List<T> elems,
      SyncBehavior syncBehavior,
      int syncInterval,
      AvroCoder<T> coder,
      String codec)
      throws IOException {
    Random random = new Random(0);
    File tmpFile = tmpFolder.newFile(filename);
    String path = tmpFile.toString();

    FileOutputStream os = new FileOutputStream(tmpFile);
    DatumWriter<T> datumWriter = coder.createDatumWriter();
    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
      writer.setCodec(CodecFactory.fromString(codec));
      writer.create(coder.getSchema(), os);

      int recordIndex = 0;
      int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;

      for (T elem : elems) {
        writer.append(elem);
        recordIndex++;

        switch (syncBehavior) {
          case SYNC_REGULAR:
            if (recordIndex == syncInterval) {
              recordIndex = 0;
              writer.sync();
            }
            break;
          case SYNC_RANDOM:
            if (recordIndex == syncIndex) {
              recordIndex = 0;
              writer.sync();
              syncIndex = random.nextInt(syncInterval);
            }
            break;
          case SYNC_DEFAULT:
          default:
        }
      }
    }
    return path;
  }
  public void testWrite() throws IOException {

    URL url = this.getClass().getClassLoader().getResource("input/Company.avsc");
    assertNotNull(url);
    Schema schema = new Schema.Parser().parse(new File(url.getFile()));
    assertNotNull(schema);

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    // Another way of loading a file
    File file = new File("src/test/resources/input/companies.avro");
    DataFileReader<GenericRecord> dataFileReader =
        new DataFileReader<GenericRecord>(file, datumReader);

    File fileOut = new File("target/companies2.avro");
    Schema schemaOut =
        new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc"));
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

    GenericRecord company = null;
    int count = 0;
    while (dataFileReader.hasNext()) {
      company = dataFileReader.next(company);
      if (company.get("name").toString().equals("aol")) {
        dataFileWriter.create(schemaOut, fileOut);

        GenericRecord recordOut = new GenericData.Record(schemaOut);
        recordOut.put("id", company.get("id"));
        recordOut.put("name", company.get("name"));
        assertTrue(recordOut.getSchema().getField("address") != null);
        assertTrue(recordOut.getSchema().getField("employeeCount") == null);

        // address is of complex type
        GenericRecord address =
            new GenericData.Record((GenericData.Record) company.get("address"), true);
        recordOut.put("address", address);

        dataFileWriter.append(recordOut);

        count++;
      }
    }
    assertTrue(count > 0);

    dataFileWriter.close();
  }
  public static void main(String[] args) throws IOException {
    DatumWriter<ArchivePlace> datumWriter =
        new SpecificDatumWriter<ArchivePlace>(ArchivePlace.class);
    DataFileWriter<ArchivePlace> falloutDatafileWriter =
        new DataFileWriter<ArchivePlace>(datumWriter);
    FileOutputStream falloutOutputStream =
        new FileOutputStream("src/test/resources/archive-places/input.avro", true);

    falloutDatafileWriter.create(ArchivePlace.SCHEMA$, falloutOutputStream);

    List<ArchivePlace> places =
        SerializationUtil.loadFromJsons(
            ArchivePlace.SCHEMA$, "src/test/resources/archive-places/input.json");
    for (ArchivePlace place : places) {
      falloutDatafileWriter.append(place);
      falloutDatafileWriter.flush();
    }
    falloutDatafileWriter.close();
    falloutOutputStream.close();
  }
Пример #13
0
 private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize)
     throws IOException, InterruptedException {
   SeekableInput input = new FsInput(fileStatus.getPath(), hadoopConf);
   DatumReader<GenericRecord> reader = new GenericDatumReader<>();
   FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
   List<Map.Entry> batch = new ArrayList<>();
   int count = 0;
   while (fileReader.hasNext() && batch.size() < batchSize) {
     GenericRecord datum = fileReader.next();
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     DataFileWriter<GenericRecord> dataFileWriter =
         new DataFileWriter<GenericRecord>(
             new GenericDatumWriter<GenericRecord>(datum.getSchema()));
     dataFileWriter.create(datum.getSchema(), out);
     dataFileWriter.append(datum);
     dataFileWriter.close();
     out.close();
     batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + count, out.toByteArray()));
     count++;
   }
   return batch;
 }
Пример #14
0
  public void serializeSpecific() throws IOException {
    // Create a datum to serialize.
    MyPair datum = new MyPair();
    datum.left = new Utf8("dog");
    datum.right = new Utf8("cat");
    File tmpFile = File.createTempFile("myPairAvroExample", ".avro");
    // Serialize it.
    DataFileWriter<MyPair> writer =
        new DataFileWriter<MyPair>(new SpecificDatumWriter<MyPair>(MyPair.class));
    writer.create(MyPair.SCHEMA$, tmpFile);
    writer.append(datum);
    writer.close();

    System.out.println("Serialization: " + tmpFile);

    // Deserialize it.
    FileReader<MyPair> reader =
        DataFileReader.openReader(tmpFile, new SpecificDatumReader<MyPair>(MyPair.class));
    while (reader.hasNext()) {
      MyPair result = reader.next();
      System.out.printf("Left: %s, Right: %s\n", result.left, result.right);
    }
    reader.close();
  }
Пример #15
0
 @Override
 public void writeRecord(E record) throws IOException {
   dataFileWriter.append(record);
 }
Пример #16
0
 @Override
 public void write(T value) throws Exception {
   dataFileWriter.append(value);
 }