@Test public void testCreationWithSchema() throws Exception { List<Bird> expected = createRandomRecords(100); String filename = generateTestFile( "tmp.avro", expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC); // Create a source with a schema object Schema schema = ReflectData.get().getSchema(Bird.class); AvroSource<GenericRecord> source = AvroSource.from(filename).withSchema(schema); List<GenericRecord> records = SourceTestUtils.readFromSource(source, null); assertEqualsWithGeneric(expected, records); // Create a source with a JSON schema String schemaString = ReflectData.get().getSchema(Bird.class).toString(); source = AvroSource.from(filename).withSchema(schemaString); records = SourceTestUtils.readFromSource(source, null); assertEqualsWithGeneric(expected, records); // Create a source with no schema source = AvroSource.from(filename); records = SourceTestUtils.readFromSource(source, null); assertEqualsWithGeneric(expected, records); }
@Override public void open(int taskNumber, int numTasks) throws IOException { super.open(taskNumber, numTasks); DatumWriter<E> datumWriter; Schema schema; if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) { datumWriter = new SpecificDatumWriter<E>(avroValueType); try { schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema(); } catch (InstantiationException e) { throw new RuntimeException(e.getMessage()); } catch (IllegalAccessException e) { throw new RuntimeException(e.getMessage()); } } else { datumWriter = new ReflectDatumWriter<E>(avroValueType); schema = ReflectData.get().getSchema(avroValueType); } dataFileWriter = new DataFileWriter<E>(datumWriter); if (userDefinedSchema == null) { dataFileWriter.create(schema, stream); } else { dataFileWriter.create(userDefinedSchema, stream); } }
public int run(String[] args) throws Exception { Path inputPath = new Path("weblog_entries.txt"); Path outputPath = new Path("output"); Schema schema = ReflectData.get().getSchema(WeblogRecord.class); Configuration conf = getConf(); Job weblogJob = Job.getInstance(conf); weblogJob.setJobName("Avro Writer"); weblogJob.setJarByClass(getClass()); weblogJob.setNumReduceTasks(0); weblogJob.setMapperClass(WeblogMapper_Ex_5.class); weblogJob.setMapOutputKeyClass(AvroWrapper.class); weblogJob.setMapOutputValueClass(NullWritable.class); weblogJob.setInputFormatClass(TextInputFormat.class); AvroJob.setOutputKeySchema(weblogJob, schema); FileInputFormat.setInputPaths(weblogJob, inputPath); FileOutputFormat.setOutputPath(weblogJob, outputPath); if (weblogJob.waitForCompletion(true)) { return 0; } return 1; }
private Schema namespacelessSchemaFor(Class<?> type) { return schemaCache.computeIfAbsent( type, clazz -> { Schema schema = ReflectData.get().getSchema(clazz); // kind of a hack to set an empty namespace :) return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), "")); }); }
@Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // Set the input schema right before the retrieving the record reader Schema schema = ReflectData.get().getSchema(WikiPage.class); AvroJob.setInputSchema(job, schema); return super.getRecordReader(split, job, reporter); }
@Test public void testAvroNativeJson() throws IOException { AvroNativeFileOutputFormat format = new AvroNativeFileOutputFormat(); ByteArrayOutputStream sos = new ByteArrayOutputStream(); format.format(sos, e); format.close(); byte[] bytes = sos.toByteArray(); ReflectData reflectData = ReflectData.get(); Schema schema = reflectData.getSchema(EventImpl.class); ReflectDatumReader<EventImpl> dr = new ReflectDatumReader<EventImpl>(schema); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); DataFileStream<EventImpl> dec = new DataFileStream<EventImpl>(bais, dr); Event er = dec.next(); assertEquals(e.getHost(), er.getHost()); assertEquals(e.getNanos(), er.getNanos()); assertEquals(e.getPriority(), er.getPriority()); assertTrue(Arrays.equals(e.getBody(), er.getBody())); }
/** * Returns a new {@link PTransform} that's like this one but that writes to Avro file(s) * containing records whose type is the specified Avro-generated class. * * <p>Does not modify this object. * * @param <X> the type of the elements of the input PCollection */ public <X> Bound<X> withSchema(Class<X> type) { return new Bound<>( name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, ReflectData.get().getSchema(type), validate); }
/** * Returns a new {@link PTransform} that's like this one but that reads Avro file(s) * containing records whose type is the specified Avro-generated class. * * <p>Does not modify this object. * * @param <X> the type of the decoded elements and the elements of the resulting PCollection */ public <X> Bound<X> withSchema(Class<X> type) { return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type), validate); }
public class AvroMR extends Configured implements Tool { public static final Schema PAIR_SCHEMA = Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)); public static final Schema OUTPUT_SCHEMA = ReflectData.get().getSchema(UFORecord.class); @Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; } public static class AvroRecordMapper extends AvroMapper<GenericRecord, Pair<Utf8, Long>> { @Override public void map(GenericRecord in, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { Pair<Utf8, Long> p = new Pair<Utf8, Long>(PAIR_SCHEMA); Utf8 shape = (Utf8) in.get("shape"); if (shape != null) { p.set(shape, 1L); collector.collect(p); } } } public static class AvroRecordReducer extends AvroReducer<Utf8, Long, GenericRecord> { public void reduce( Utf8 key, Iterable<Long> values, AvroCollector<GenericRecord> collector, Reporter reporter) throws IOException { long sum = 0; for (Long val : values) { sum += val; } GenericRecord value = new GenericData.Record(OUTPUT_SCHEMA); value.put("shape", key); value.put("count", sum); collector.collect(value); } } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new AvroMR(), args); System.exit(res); } }
public ReflectResponder(Protocol protocol, Object impl) { super(protocol, impl, ReflectData.get()); }
public ReflectResponder(Class iface, Object impl) { super(ReflectData.get().getProtocol(iface), impl, ReflectData.get()); }