@Test
  public void test() throws IOException {
    File file = new File("target/AvroDocument.avro");

    Schema schema = AvroDocument._SCHEMA;

    {
      System.out.println("Writing to: " + file.getAbsolutePath());
      DatumWriter<Object> datumWriter = new SpecificDatumWriter(AvroDocument.class);
      FileOutputStream outputStream = new FileOutputStream(file);
      DataFileWriter<Object> dfw = new DataFileWriter<Object>(schema, outputStream, datumWriter);

      AvroDocument d = createTestDocument();
      dfw.append(d);
      dfw.flush();
      dfw.close();
    }

    {
      System.out.println("Reading from: " + file.getAbsolutePath());
      DatumReader<Object> datumReader = new SpecificDatumReader(AvroDocument.class);
      SeekableInput seekableInput = new SeekableFileInput(file);
      DataFileReader<Object> dfr = new DataFileReader<Object>(seekableInput, datumReader);
      AvroDocument d = new AvroDocument();
      dfr.next(d);
      AvroDocumentReader.dumpAvroDocument(d, System.out);
    }
  }
  private byte[] createAvroData(String name, int age, List<String> emails) throws IOException {
    String AVRO_SCHEMA =
        "{\n"
            + "\"type\": \"record\",\n"
            + "\"name\": \"Employee\",\n"
            + "\"fields\": [\n"
            + " {\"name\": \"name\", \"type\": \"string\"},\n"
            + " {\"name\": \"age\", \"type\": \"int\"},\n"
            + " {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n"
            + " {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n"
            + "]}";
    Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericRecord e1 = new GenericData.Record(schema);
    e1.put("name", name);
    e1.put("age", age);
    e1.put("emails", emails);
    e1.put("boss", null);

    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    dataFileWriter.append(e1);
    dataFileWriter.close();
    return out.toByteArray();
  }
 @Override
 public void cleanup() {
   try {
     dataFileWriter.close();
   } catch (IOException e) {
     LOG.error("Error Closing file: " + e);
   }
 }
Exemple #4
0
 private static <T> File createFile(File file, Schema schema, T... records) throws IOException {
   DatumWriter<T> datumWriter = new GenericDatumWriter<T>(schema);
   DataFileWriter<T> fileWriter = new DataFileWriter<T>(datumWriter);
   fileWriter.create(schema, file);
   for (T record : records) {
     fileWriter.append(record);
   }
   fileWriter.close();
   return file;
 }
    /**
     * Assure that currentWriter is populated and refers to the correct data file. This may
     * roll-over the existing data file. Also assures that writing one more span will not violate
     * limits on Span storage.
     *
     * @throws IOException
     */
    private void assureCurrentWriter() throws IOException {
      boolean createNewFile = false;

      // Will we overshoot policy?
      while (this.spansSoFar >= maxSpans) {
        File oldest = null;
        // If spansSoFar is positive, there must be at least one file
        synchronized (this.files) {
          oldest = this.files.remove(this.files.firstKey());
        }
        this.spansSoFar -= spansPerFile.get(oldest);
        spansPerFile.remove(oldest);
        oldest.delete();
      }
      if (files.size() == 0) {
        // In corner case we have removed the current file,
        // if that happened we need to clear current variables.
        currentTimestamp = (long) 0;
        currentWriter = null;
      }
      long rightNow = System.currentTimeMillis() / 1000L;

      // What file should we be in
      long cutOff = floorSecond(rightNow);

      if (currentWriter == null) {
        createNewFile = true;
      }
      // Test for roll-over.
      else if (cutOff >= (currentTimestamp + secondsPerFile)) {
        currentWriter.close();
        createNewFile = true;
      }

      if (createNewFile) {
        File newFile =
            new File(
                traceFileDir + "/" + Thread.currentThread().getId() + "_" + cutOff + FILE_SUFFIX);
        synchronized (this.files) {
          this.files.put(cutOff, newFile);
        }
        this.spansPerFile.put(newFile, (long) 0);
        this.currentWriter = new DataFileWriter<Span>(SPAN_WRITER);
        this.currentWriter.setCodec(CodecFactory.deflateCodec(compressionLevel));
        this.currentWriter.create(Span.SCHEMA$, newFile);
        this.currentTimestamp = cutOff;
      }
    }
  private void populateGenericFile(List<GenericRecord> genericRecords) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter =
        new GenericDatumWriter<GenericRecord>(schema);

    DataFileWriter<GenericRecord> dataFileWriter =
        new DataFileWriter<GenericRecord>(genericDatumWriter);
    dataFileWriter.create(schema, outputStream);

    for (GenericRecord record : genericRecords) {
      dataFileWriter.append(record);
    }

    dataFileWriter.close();
    outputStream.close();
  }
  public void testWrite() throws IOException {

    URL url = this.getClass().getClassLoader().getResource("input/Company.avsc");
    assertNotNull(url);
    Schema schema = new Schema.Parser().parse(new File(url.getFile()));
    assertNotNull(schema);

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    // Another way of loading a file
    File file = new File("src/test/resources/input/companies.avro");
    DataFileReader<GenericRecord> dataFileReader =
        new DataFileReader<GenericRecord>(file, datumReader);

    File fileOut = new File("target/companies2.avro");
    Schema schemaOut =
        new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc"));
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

    GenericRecord company = null;
    int count = 0;
    while (dataFileReader.hasNext()) {
      company = dataFileReader.next(company);
      if (company.get("name").toString().equals("aol")) {
        dataFileWriter.create(schemaOut, fileOut);

        GenericRecord recordOut = new GenericData.Record(schemaOut);
        recordOut.put("id", company.get("id"));
        recordOut.put("name", company.get("name"));
        assertTrue(recordOut.getSchema().getField("address") != null);
        assertTrue(recordOut.getSchema().getField("employeeCount") == null);

        // address is of complex type
        GenericRecord address =
            new GenericData.Record((GenericData.Record) company.get("address"), true);
        recordOut.put("address", address);

        dataFileWriter.append(recordOut);

        count++;
      }
    }
    assertTrue(count > 0);

    dataFileWriter.close();
  }
  public static void main(String[] args) throws IOException {
    User user1 = new User();
    user1.setName("Alyssa");
    user1.setFavoriteNumber(256);
    // Leave favorite color null

    // Alternate constructor
    User user2 = new User("Ben", 7, "red");

    // Construct via builder
    User user3 =
        User.newBuilder()
            .setName("Charlie")
            .setFavoriteColor("blue")
            .setFavoriteNumber(null)
            .build();

    // Serialize user1 and user2 to disk
    File file = new File("users.avro");
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), file);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.append(user3);
    dataFileWriter.close();

    // Deserialize Users from disk
    DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
    DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
    try {
      User user = null;
      while (dataFileReader.hasNext()) {
        // Reuse user object by passing it to next(). This saves us from
        // allocating and garbage collecting many objects for files with
        // many items.
        user = dataFileReader.next(user);
        System.out.println(user);
      }
    } finally {
      dataFileReader.close();
    }
  }
  public static void main(String[] args) throws IOException {
    DatumWriter<ArchivePlace> datumWriter =
        new SpecificDatumWriter<ArchivePlace>(ArchivePlace.class);
    DataFileWriter<ArchivePlace> falloutDatafileWriter =
        new DataFileWriter<ArchivePlace>(datumWriter);
    FileOutputStream falloutOutputStream =
        new FileOutputStream("src/test/resources/archive-places/input.avro", true);

    falloutDatafileWriter.create(ArchivePlace.SCHEMA$, falloutOutputStream);

    List<ArchivePlace> places =
        SerializationUtil.loadFromJsons(
            ArchivePlace.SCHEMA$, "src/test/resources/archive-places/input.json");
    for (ArchivePlace place : places) {
      falloutDatafileWriter.append(place);
      falloutDatafileWriter.flush();
    }
    falloutDatafileWriter.close();
    falloutOutputStream.close();
  }
  /** Writes an avro file of generic records with a 'key', 'blah', and 'value' field. */
  private Path writeGenericRecordAvroFile() throws IOException {
    // Open a writer.
    final File file = new File(getLocalTempDir(), "generic-kv.avro");
    final Schema writerSchema = Schema.createRecord("record", null, null, false);
    writerSchema.setFields(
        Lists.newArrayList(
            new Schema.Field("key", Schema.create(Schema.Type.INT), null, null),
            new Schema.Field("blah", Schema.create(Schema.Type.STRING), null, null),
            new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));

    final DataFileWriter<GenericRecord> fileWriter =
        new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(writerSchema))
            .create(writerSchema, file);
    try {
      // Write a record.
      GenericData.Record record = new GenericData.Record(writerSchema);
      record.put("key", 1);
      record.put("blah", "blah");
      record.put("value", "one");
      fileWriter.append(record);

      // Write another record.
      record = new GenericData.Record(writerSchema);
      record.put("key", 2);
      record.put("blah", "blah");
      record.put("value", "two");
      fileWriter.append(record);

      // Write a duplicate record with the same key field value.
      record = new GenericData.Record(writerSchema);
      record.put("key", 2);
      record.put("blah", "blah");
      record.put("value", "deux");
      fileWriter.append(record);

      // Close it and return the path.
    } finally {
      fileWriter.close();
    }
    return new Path(file.getPath());
  }
 private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize)
     throws IOException, InterruptedException {
   SeekableInput input = new FsInput(fileStatus.getPath(), hadoopConf);
   DatumReader<GenericRecord> reader = new GenericDatumReader<>();
   FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
   List<Map.Entry> batch = new ArrayList<>();
   int count = 0;
   while (fileReader.hasNext() && batch.size() < batchSize) {
     GenericRecord datum = fileReader.next();
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     DataFileWriter<GenericRecord> dataFileWriter =
         new DataFileWriter<GenericRecord>(
             new GenericDatumWriter<GenericRecord>(datum.getSchema()));
     dataFileWriter.create(datum.getSchema(), out);
     dataFileWriter.append(datum);
     dataFileWriter.close();
     out.close();
     batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + count, out.toByteArray()));
     count++;
   }
   return batch;
 }
Exemple #12
0
  public void serializeSpecific() throws IOException {
    // Create a datum to serialize.
    MyPair datum = new MyPair();
    datum.left = new Utf8("dog");
    datum.right = new Utf8("cat");
    File tmpFile = File.createTempFile("myPairAvroExample", ".avro");
    // Serialize it.
    DataFileWriter<MyPair> writer =
        new DataFileWriter<MyPair>(new SpecificDatumWriter<MyPair>(MyPair.class));
    writer.create(MyPair.SCHEMA$, tmpFile);
    writer.append(datum);
    writer.close();

    System.out.println("Serialization: " + tmpFile);

    // Deserialize it.
    FileReader<MyPair> reader =
        DataFileReader.openReader(tmpFile, new SpecificDatumReader<MyPair>(MyPair.class));
    while (reader.hasNext()) {
      MyPair result = reader.next();
      System.out.printf("Left: %s, Right: %s\n", result.left, result.right);
    }
    reader.close();
  }
 @Override
 public void close() throws IOException {
   dataFileWriter.flush();
   dataFileWriter.close();
   super.close();
 }