Exemplo n.º 1
0
  @Test
  public void testCreationWithSchema() throws Exception {
    List<Bird> expected = createRandomRecords(100);
    String filename =
        generateTestFile(
            "tmp.avro",
            expected,
            SyncBehavior.SYNC_DEFAULT,
            0,
            AvroCoder.of(Bird.class),
            DataFileConstants.NULL_CODEC);

    // Create a source with a schema object
    Schema schema = ReflectData.get().getSchema(Bird.class);
    AvroSource<GenericRecord> source = AvroSource.from(filename).withSchema(schema);
    List<GenericRecord> records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);

    // Create a source with a JSON schema
    String schemaString = ReflectData.get().getSchema(Bird.class).toString();
    source = AvroSource.from(filename).withSchema(schemaString);
    records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);

    // Create a source with no schema
    source = AvroSource.from(filename);
    records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);
  }
Exemplo n.º 2
0
  @Override
  public void open(int taskNumber, int numTasks) throws IOException {
    super.open(taskNumber, numTasks);

    DatumWriter<E> datumWriter;
    Schema schema;
    if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
      datumWriter = new SpecificDatumWriter<E>(avroValueType);
      try {
        schema =
            ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
      } catch (InstantiationException e) {
        throw new RuntimeException(e.getMessage());
      } catch (IllegalAccessException e) {
        throw new RuntimeException(e.getMessage());
      }
    } else {
      datumWriter = new ReflectDatumWriter<E>(avroValueType);
      schema = ReflectData.get().getSchema(avroValueType);
    }
    dataFileWriter = new DataFileWriter<E>(datumWriter);
    if (userDefinedSchema == null) {
      dataFileWriter.create(schema, stream);
    } else {
      dataFileWriter.create(userDefinedSchema, stream);
    }
  }
Exemplo n.º 3
0
  public int run(String[] args) throws Exception {
    Path inputPath = new Path("weblog_entries.txt");
    Path outputPath = new Path("output");
    Schema schema = ReflectData.get().getSchema(WeblogRecord.class);
    Configuration conf = getConf();

    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Avro Writer");
    weblogJob.setJarByClass(getClass());

    weblogJob.setNumReduceTasks(0);
    weblogJob.setMapperClass(WeblogMapper_Ex_5.class);
    weblogJob.setMapOutputKeyClass(AvroWrapper.class);
    weblogJob.setMapOutputValueClass(NullWritable.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);

    AvroJob.setOutputKeySchema(weblogJob, schema);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
      return 0;
    }
    return 1;
  }
Exemplo n.º 4
0
 private Schema namespacelessSchemaFor(Class<?> type) {
   return schemaCache.computeIfAbsent(
       type,
       clazz -> {
         Schema schema = ReflectData.get().getSchema(clazz);
         // kind of a hack to set an empty namespace :)
         return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), ""));
       });
 }
Exemplo n.º 5
0
  @Override
  public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter)
      throws IOException {
    // Set the input schema right before the retrieving the record reader
    Schema schema = ReflectData.get().getSchema(WikiPage.class);
    AvroJob.setInputSchema(job, schema);

    return super.getRecordReader(split, job, reporter);
  }
Exemplo n.º 6
0
  @Test
  public void testAvroNativeJson() throws IOException {
    AvroNativeFileOutputFormat format = new AvroNativeFileOutputFormat();
    ByteArrayOutputStream sos = new ByteArrayOutputStream();
    format.format(sos, e);
    format.close();
    byte[] bytes = sos.toByteArray();

    ReflectData reflectData = ReflectData.get();
    Schema schema = reflectData.getSchema(EventImpl.class);
    ReflectDatumReader<EventImpl> dr = new ReflectDatumReader<EventImpl>(schema);
    ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
    DataFileStream<EventImpl> dec = new DataFileStream<EventImpl>(bais, dr);

    Event er = dec.next();
    assertEquals(e.getHost(), er.getHost());
    assertEquals(e.getNanos(), er.getNanos());
    assertEquals(e.getPriority(), er.getPriority());
    assertTrue(Arrays.equals(e.getBody(), er.getBody()));
  }
Exemplo n.º 7
0
 /**
  * Returns a new {@link PTransform} that's like this one but that writes to Avro file(s)
  * containing records whose type is the specified Avro-generated class.
  *
  * <p>Does not modify this object.
  *
  * @param <X> the type of the elements of the input PCollection
  */
 public <X> Bound<X> withSchema(Class<X> type) {
   return new Bound<>(
       name,
       filenamePrefix,
       filenameSuffix,
       numShards,
       shardTemplate,
       type,
       ReflectData.get().getSchema(type),
       validate);
 }
Exemplo n.º 8
0
 /**
  * Returns a new {@link PTransform} that's like this one but that reads Avro file(s)
  * containing records whose type is the specified Avro-generated class.
  *
  * <p>Does not modify this object.
  *
  * @param <X> the type of the decoded elements and the elements of the resulting PCollection
  */
 public <X> Bound<X> withSchema(Class<X> type) {
   return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type), validate);
 }
public class AvroMR extends Configured implements Tool {
  public static final Schema PAIR_SCHEMA =
      Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG));
  public static final Schema OUTPUT_SCHEMA = ReflectData.get().getSchema(UFORecord.class);

  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }

  public static class AvroRecordMapper extends AvroMapper<GenericRecord, Pair<Utf8, Long>> {
    @Override
    public void map(GenericRecord in, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter)
        throws IOException {
      Pair<Utf8, Long> p = new Pair<Utf8, Long>(PAIR_SCHEMA);
      Utf8 shape = (Utf8) in.get("shape");
      if (shape != null) {
        p.set(shape, 1L);
        collector.collect(p);
      }
    }
  }

  public static class AvroRecordReducer extends AvroReducer<Utf8, Long, GenericRecord> {
    public void reduce(
        Utf8 key, Iterable<Long> values, AvroCollector<GenericRecord> collector, Reporter reporter)
        throws IOException {
      long sum = 0;
      for (Long val : values) {
        sum += val;
      }

      GenericRecord value = new GenericData.Record(OUTPUT_SCHEMA);
      value.put("shape", key);
      value.put("count", sum);

      collector.collect(value);
    }
  }

  public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new Configuration(), new AvroMR(), args);
    System.exit(res);
  }
}
Exemplo n.º 10
0
 public ReflectResponder(Protocol protocol, Object impl) {
   super(protocol, impl, ReflectData.get());
 }
Exemplo n.º 11
0
 public ReflectResponder(Class iface, Object impl) {
   super(ReflectData.get().getProtocol(iface), impl, ReflectData.get());
 }