/** Tests the case where we try to convert a string to a long incorrectly. */ @Test( expected = org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException.class) public void testIllegalConversion() throws Exception { // We will convert s1 from string to long (or leave it null), ignore s2, // convert l1 from long to string, and leave l2 the same. Schema input = SchemaBuilder.record("Input") .namespace("com.cloudera.edh") .fields() .nullableString("s1", "") .requiredString("s2") .optionalLong("l1") .requiredLong("l2") .endRecord(); Schema output = SchemaBuilder.record("Output") .namespace("com.cloudera.edh") .fields() .optionalLong("s1") .optionalString("l1") .requiredLong("l2") .endRecord(); AvroRecordConverter converter = new AvroRecordConverter(input, output, EMPTY_MAPPING); Record inputRecord = new Record(input); inputRecord.put("s1", "blah"); inputRecord.put("s2", "blah"); inputRecord.put("l1", null); inputRecord.put("l2", 5L); converter.convert(inputRecord); }
@Test public void testUseReaderSchema() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder().schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder().schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }
@Test public void testDeepCopyGeneric() { Record record = new Record(Person.SCHEMA$); record.put("name", "John Doe"); record.put("age", 42); record.put("siblingnames", Lists.newArrayList()); Record deepCopyRecord = new AvroDeepCopier.AvroGenericDeepCopier(Person.SCHEMA$).deepCopy(record); assertEquals(record, deepCopyRecord); assertNotSame(record, deepCopyRecord); }
/** Tests the case where we don't use a mapping file and just map records by name. */ @Test public void testDefaultConversion() throws Exception { // We will convert s1 from string to long (or leave it null), ignore s2, // convert s3 to from string to double, convert l1 from long to string, // and leave l2 the same. Schema input = SchemaBuilder.record("Input") .namespace("com.cloudera.edh") .fields() .nullableString("s1", "") .requiredString("s2") .requiredString("s3") .optionalLong("l1") .requiredLong("l2") .endRecord(); Schema output = SchemaBuilder.record("Output") .namespace("com.cloudera.edh") .fields() .optionalLong("s1") .optionalString("l1") .requiredLong("l2") .requiredDouble("s3") .endRecord(); AvroRecordConverter converter = new AvroRecordConverter(input, output, EMPTY_MAPPING, LocaleUtils.toLocale("en_US")); Record inputRecord = new Record(input); inputRecord.put("s1", null); inputRecord.put("s2", "blah"); inputRecord.put("s3", "5.5"); inputRecord.put("l1", null); inputRecord.put("l2", 5L); Record outputRecord = converter.convert(inputRecord); assertNull(outputRecord.get("s1")); assertNull(outputRecord.get("l1")); assertEquals(5L, outputRecord.get("l2")); assertEquals(5.5, outputRecord.get("s3")); inputRecord.put("s1", "500"); inputRecord.put("s2", "blah"); inputRecord.put("s3", "5.5e-5"); inputRecord.put("l1", 100L); inputRecord.put("l2", 2L); outputRecord = converter.convert(inputRecord); assertEquals(500L, outputRecord.get("s1")); assertEquals("100", outputRecord.get("l1")); assertEquals(2L, outputRecord.get("l2")); assertEquals(5.5e-5, outputRecord.get("s3")); }
/** Tests the case where we want to default map one field and explicitly map another. */ @Test public void testExplicitMapping() throws Exception { // We will convert s1 from string to long (or leave it null), ignore s2, // convert l1 from long to string, and leave l2 the same. Schema input = NESTED_RECORD_SCHEMA; Schema parent = NESTED_PARENT_SCHEMA; Schema output = UNNESTED_OUTPUT_SCHEMA; Map<String, String> mapping = ImmutableMap.of("parent.id", "parentId"); AvroRecordConverter converter = new AvroRecordConverter(input, output, mapping); Record inputRecord = new Record(input); inputRecord.put("l1", 5L); inputRecord.put("s1", "1000"); Record parentRecord = new Record(parent); parentRecord.put("id", 200L); parentRecord.put("name", "parent"); inputRecord.put("parent", parentRecord); Record outputRecord = converter.convert(inputRecord); assertEquals(5L, outputRecord.get("l1")); assertEquals(1000L, outputRecord.get("s1")); assertEquals(200L, outputRecord.get("parentId")); }