@Test public void test() throws IOException { File file = new File("target/AvroDocument.avro"); Schema schema = AvroDocument._SCHEMA; { System.out.println("Writing to: " + file.getAbsolutePath()); DatumWriter<Object> datumWriter = new SpecificDatumWriter(AvroDocument.class); FileOutputStream outputStream = new FileOutputStream(file); DataFileWriter<Object> dfw = new DataFileWriter<Object>(schema, outputStream, datumWriter); AvroDocument d = createTestDocument(); dfw.append(d); dfw.flush(); dfw.close(); } { System.out.println("Reading from: " + file.getAbsolutePath()); DatumReader<Object> datumReader = new SpecificDatumReader(AvroDocument.class); SeekableInput seekableInput = new SeekableFileInput(file); DataFileReader<Object> dfr = new DataFileReader<Object>(seekableInput, datumReader); AvroDocument d = new AvroDocument(); dfr.next(d); AvroDocumentReader.dumpAvroDocument(d, System.out); } }
private byte[] createAvroData(String name, int age, List<String> emails) throws IOException { String AVRO_SCHEMA = "{\n" + "\"type\": \"record\",\n" + "\"name\": \"Employee\",\n" + "\"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"age\", \"type\": \"int\"},\n" + " {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + " {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n" + "]}"; Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); ByteArrayOutputStream out = new ByteArrayOutputStream(); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", name); e1.put("age", age); e1.put("emails", emails); e1.put("boss", null); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, out); dataFileWriter.append(e1); dataFileWriter.close(); return out.toByteArray(); }
@Override public void cleanup() { try { dataFileWriter.close(); } catch (IOException e) { LOG.error("Error Closing file: " + e); } }
private static <T> File createFile(File file, Schema schema, T... records) throws IOException { DatumWriter<T> datumWriter = new GenericDatumWriter<T>(schema); DataFileWriter<T> fileWriter = new DataFileWriter<T>(datumWriter); fileWriter.create(schema, file); for (T record : records) { fileWriter.append(record); } fileWriter.close(); return file; }
/** * Assure that currentWriter is populated and refers to the correct data file. This may * roll-over the existing data file. Also assures that writing one more span will not violate * limits on Span storage. * * @throws IOException */ private void assureCurrentWriter() throws IOException { boolean createNewFile = false; // Will we overshoot policy? while (this.spansSoFar >= maxSpans) { File oldest = null; // If spansSoFar is positive, there must be at least one file synchronized (this.files) { oldest = this.files.remove(this.files.firstKey()); } this.spansSoFar -= spansPerFile.get(oldest); spansPerFile.remove(oldest); oldest.delete(); } if (files.size() == 0) { // In corner case we have removed the current file, // if that happened we need to clear current variables. currentTimestamp = (long) 0; currentWriter = null; } long rightNow = System.currentTimeMillis() / 1000L; // What file should we be in long cutOff = floorSecond(rightNow); if (currentWriter == null) { createNewFile = true; } // Test for roll-over. else if (cutOff >= (currentTimestamp + secondsPerFile)) { currentWriter.close(); createNewFile = true; } if (createNewFile) { File newFile = new File( traceFileDir + "/" + Thread.currentThread().getId() + "_" + cutOff + FILE_SUFFIX); synchronized (this.files) { this.files.put(cutOff, newFile); } this.spansPerFile.put(newFile, (long) 0); this.currentWriter = new DataFileWriter<Span>(SPAN_WRITER); this.currentWriter.setCodec(CodecFactory.deflateCodec(compressionLevel)); this.currentWriter.create(Span.SCHEMA$, newFile); this.currentTimestamp = cutOff; } }
private void populateGenericFile(List<GenericRecord> genericRecords) throws IOException { FileOutputStream outputStream = new FileOutputStream(this.avroFile); GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter); dataFileWriter.create(schema, outputStream); for (GenericRecord record : genericRecords) { dataFileWriter.append(record); } dataFileWriter.close(); outputStream.close(); }
public void testWrite() throws IOException { URL url = this.getClass().getClassLoader().getResource("input/Company.avsc"); assertNotNull(url); Schema schema = new Schema.Parser().parse(new File(url.getFile())); assertNotNull(schema); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); // Another way of loading a file File file = new File("src/test/resources/input/companies.avro"); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); File fileOut = new File("target/companies2.avro"); Schema schemaOut = new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc")); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); GenericRecord company = null; int count = 0; while (dataFileReader.hasNext()) { company = dataFileReader.next(company); if (company.get("name").toString().equals("aol")) { dataFileWriter.create(schemaOut, fileOut); GenericRecord recordOut = new GenericData.Record(schemaOut); recordOut.put("id", company.get("id")); recordOut.put("name", company.get("name")); assertTrue(recordOut.getSchema().getField("address") != null); assertTrue(recordOut.getSchema().getField("employeeCount") == null); // address is of complex type GenericRecord address = new GenericData.Record((GenericData.Record) company.get("address"), true); recordOut.put("address", address); dataFileWriter.append(recordOut); count++; } } assertTrue(count > 0); dataFileWriter.close(); }
public static void main(String[] args) throws IOException { User user1 = new User(); user1.setName("Alyssa"); user1.setFavoriteNumber(256); // Leave favorite color null // Alternate constructor User user2 = new User("Ben", 7, "red"); // Construct via builder User user3 = User.newBuilder() .setName("Charlie") .setFavoriteColor("blue") .setFavoriteNumber(null) .build(); // Serialize user1 and user2 to disk File file = new File("users.avro"); DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class); DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter); dataFileWriter.create(user1.getSchema(), file); dataFileWriter.append(user1); dataFileWriter.append(user2); dataFileWriter.append(user3); dataFileWriter.close(); // Deserialize Users from disk DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class); DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader); try { User user = null; while (dataFileReader.hasNext()) { // Reuse user object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. user = dataFileReader.next(user); System.out.println(user); } } finally { dataFileReader.close(); } }
public static void main(String[] args) throws IOException { DatumWriter<ArchivePlace> datumWriter = new SpecificDatumWriter<ArchivePlace>(ArchivePlace.class); DataFileWriter<ArchivePlace> falloutDatafileWriter = new DataFileWriter<ArchivePlace>(datumWriter); FileOutputStream falloutOutputStream = new FileOutputStream("src/test/resources/archive-places/input.avro", true); falloutDatafileWriter.create(ArchivePlace.SCHEMA$, falloutOutputStream); List<ArchivePlace> places = SerializationUtil.loadFromJsons( ArchivePlace.SCHEMA$, "src/test/resources/archive-places/input.json"); for (ArchivePlace place : places) { falloutDatafileWriter.append(place); falloutDatafileWriter.flush(); } falloutDatafileWriter.close(); falloutOutputStream.close(); }
/** Writes an avro file of generic records with a 'key', 'blah', and 'value' field. */ private Path writeGenericRecordAvroFile() throws IOException { // Open a writer. final File file = new File(getLocalTempDir(), "generic-kv.avro"); final Schema writerSchema = Schema.createRecord("record", null, null, false); writerSchema.setFields( Lists.newArrayList( new Schema.Field("key", Schema.create(Schema.Type.INT), null, null), new Schema.Field("blah", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null))); final DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(writerSchema)) .create(writerSchema, file); try { // Write a record. GenericData.Record record = new GenericData.Record(writerSchema); record.put("key", 1); record.put("blah", "blah"); record.put("value", "one"); fileWriter.append(record); // Write another record. record = new GenericData.Record(writerSchema); record.put("key", 2); record.put("blah", "blah"); record.put("value", "two"); fileWriter.append(record); // Write a duplicate record with the same key field value. record = new GenericData.Record(writerSchema); record.put("key", 2); record.put("blah", "blah"); record.put("value", "deux"); fileWriter.append(record); // Close it and return the path. } finally { fileWriter.close(); } return new Path(file.getPath()); }
private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize) throws IOException, InterruptedException { SeekableInput input = new FsInput(fileStatus.getPath(), hadoopConf); DatumReader<GenericRecord> reader = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); List<Map.Entry> batch = new ArrayList<>(); int count = 0; while (fileReader.hasNext() && batch.size() < batchSize) { GenericRecord datum = fileReader.next(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(datum.getSchema())); dataFileWriter.create(datum.getSchema(), out); dataFileWriter.append(datum); dataFileWriter.close(); out.close(); batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + count, out.toByteArray())); count++; } return batch; }
public void serializeSpecific() throws IOException { // Create a datum to serialize. MyPair datum = new MyPair(); datum.left = new Utf8("dog"); datum.right = new Utf8("cat"); File tmpFile = File.createTempFile("myPairAvroExample", ".avro"); // Serialize it. DataFileWriter<MyPair> writer = new DataFileWriter<MyPair>(new SpecificDatumWriter<MyPair>(MyPair.class)); writer.create(MyPair.SCHEMA$, tmpFile); writer.append(datum); writer.close(); System.out.println("Serialization: " + tmpFile); // Deserialize it. FileReader<MyPair> reader = DataFileReader.openReader(tmpFile, new SpecificDatumReader<MyPair>(MyPair.class)); while (reader.hasNext()) { MyPair result = reader.next(); System.out.printf("Left: %s, Right: %s\n", result.left, result.right); } reader.close(); }
@Override public void close() throws IOException { dataFileWriter.flush(); dataFileWriter.close(); super.close(); }