/** Tests the case where we try to convert a string to a long incorrectly. */
  @Test(
      expected = org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException.class)
  public void testIllegalConversion() throws Exception {
    // We will convert s1 from string to long (or leave it null), ignore s2,
    // convert l1 from long to string, and leave l2 the same.
    Schema input =
        SchemaBuilder.record("Input")
            .namespace("com.cloudera.edh")
            .fields()
            .nullableString("s1", "")
            .requiredString("s2")
            .optionalLong("l1")
            .requiredLong("l2")
            .endRecord();
    Schema output =
        SchemaBuilder.record("Output")
            .namespace("com.cloudera.edh")
            .fields()
            .optionalLong("s1")
            .optionalString("l1")
            .requiredLong("l2")
            .endRecord();

    AvroRecordConverter converter = new AvroRecordConverter(input, output, EMPTY_MAPPING);

    Record inputRecord = new Record(input);
    inputRecord.put("s1", "blah");
    inputRecord.put("s2", "blah");
    inputRecord.put("l1", null);
    inputRecord.put("l2", 5L);
    converter.convert(inputRecord);
  }
  @Test
  public void testUseReaderSchema() throws IOException {

    // Create a schema with only a username, so we can test reading it
    // with an enhanced record structure.
    Schema oldRecordSchema =
        SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord")
            .fields()
            .requiredString("username")
            .endRecord();

    // create the dataset
    Dataset<Record> in =
        repo.create("ns", "in", new DatasetDescriptor.Builder().schema(oldRecordSchema).build());
    Dataset<Record> out =
        repo.create("ns", "out", new DatasetDescriptor.Builder().schema(oldRecordSchema).build());
    Record oldUser = new Record(oldRecordSchema);
    oldUser.put("username", "user");

    DatasetWriter<Record> writer = in.newWriter();

    try {

      writer.write(oldUser);

    } finally {
      writer.close();
    }

    Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);

    // read data from updated dataset that has the new schema.
    // At this point, User class has the old schema
    PCollection<NewUserRecord> data =
        pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class));

    PCollection<NewUserRecord> processed =
        data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class));

    pipeline.write(processed, CrunchDatasets.asTarget(out));

    DatasetReader reader = out.newReader();

    Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded());

    try {

      // there should be one record that is equal to our old user generic record.
      Assert.assertEquals(oldUser, reader.next());
      Assert.assertFalse(reader.hasNext());

    } finally {
      reader.close();
    }
  }
Beispiel #3
0
  @Test
  public void testDeepCopyGeneric() {
    Record record = new Record(Person.SCHEMA$);
    record.put("name", "John Doe");
    record.put("age", 42);
    record.put("siblingnames", Lists.newArrayList());

    Record deepCopyRecord =
        new AvroDeepCopier.AvroGenericDeepCopier(Person.SCHEMA$).deepCopy(record);

    assertEquals(record, deepCopyRecord);
    assertNotSame(record, deepCopyRecord);
  }
  /** Tests the case where we don't use a mapping file and just map records by name. */
  @Test
  public void testDefaultConversion() throws Exception {
    // We will convert s1 from string to long (or leave it null), ignore s2,
    // convert s3 to from string to double, convert l1 from long to string,
    // and leave l2 the same.
    Schema input =
        SchemaBuilder.record("Input")
            .namespace("com.cloudera.edh")
            .fields()
            .nullableString("s1", "")
            .requiredString("s2")
            .requiredString("s3")
            .optionalLong("l1")
            .requiredLong("l2")
            .endRecord();
    Schema output =
        SchemaBuilder.record("Output")
            .namespace("com.cloudera.edh")
            .fields()
            .optionalLong("s1")
            .optionalString("l1")
            .requiredLong("l2")
            .requiredDouble("s3")
            .endRecord();

    AvroRecordConverter converter =
        new AvroRecordConverter(input, output, EMPTY_MAPPING, LocaleUtils.toLocale("en_US"));

    Record inputRecord = new Record(input);
    inputRecord.put("s1", null);
    inputRecord.put("s2", "blah");
    inputRecord.put("s3", "5.5");
    inputRecord.put("l1", null);
    inputRecord.put("l2", 5L);
    Record outputRecord = converter.convert(inputRecord);
    assertNull(outputRecord.get("s1"));
    assertNull(outputRecord.get("l1"));
    assertEquals(5L, outputRecord.get("l2"));
    assertEquals(5.5, outputRecord.get("s3"));

    inputRecord.put("s1", "500");
    inputRecord.put("s2", "blah");
    inputRecord.put("s3", "5.5e-5");
    inputRecord.put("l1", 100L);
    inputRecord.put("l2", 2L);
    outputRecord = converter.convert(inputRecord);
    assertEquals(500L, outputRecord.get("s1"));
    assertEquals("100", outputRecord.get("l1"));
    assertEquals(2L, outputRecord.get("l2"));
    assertEquals(5.5e-5, outputRecord.get("s3"));
  }
  /** Tests the case where we want to default map one field and explicitly map another. */
  @Test
  public void testExplicitMapping() throws Exception {
    // We will convert s1 from string to long (or leave it null), ignore s2,
    // convert l1 from long to string, and leave l2 the same.
    Schema input = NESTED_RECORD_SCHEMA;
    Schema parent = NESTED_PARENT_SCHEMA;
    Schema output = UNNESTED_OUTPUT_SCHEMA;
    Map<String, String> mapping = ImmutableMap.of("parent.id", "parentId");

    AvroRecordConverter converter = new AvroRecordConverter(input, output, mapping);

    Record inputRecord = new Record(input);
    inputRecord.put("l1", 5L);
    inputRecord.put("s1", "1000");
    Record parentRecord = new Record(parent);
    parentRecord.put("id", 200L);
    parentRecord.put("name", "parent");
    inputRecord.put("parent", parentRecord);
    Record outputRecord = converter.convert(inputRecord);
    assertEquals(5L, outputRecord.get("l1"));
    assertEquals(1000L, outputRecord.get("s1"));
    assertEquals(200L, outputRecord.get("parentId"));
  }