@Test public void test_getHiveTypeFromAvroType_complex() throws Exception { // Expected ORC types String[] expectedTypes = { "INT", "MAP<STRING, DOUBLE>", "STRING", "UNIONTYPE<BIGINT, FLOAT>", "ARRAY<INT>" }; Schema testSchema = buildComplexAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema())); } assertEquals( "STRUCT<myInt:INT, myMap:MAP<STRING, DOUBLE>, myEnum:STRING, myLongOrFloat:UNIONTYPE<BIGINT, FLOAT>, myIntList:ARRAY<INT>>", NiFiOrcUtils.getHiveTypeFromAvroType(testSchema)); }
@Test public void test_generateHiveDDL_primitive() throws Exception { Schema avroSchema = buildPrimitiveAvroSchema(); String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable"); assertEquals( "CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", ddl); }
@Test public void test_getOrcField_enum() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a"); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema()); assertEquals(TypeInfoCreator.createString(), orcType); }
@Test public void test_getOrcField_array() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("array").type().array().items().longType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema()); assertEquals(TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()), orcType); }
@Test public void test_generateHiveDDL_complex() throws Exception { Schema avroSchema = buildComplexAvroSchema(); String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable"); assertEquals( "CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", ddl); }
@Test public void test_getOrcField_map() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("map").type().map().values().doubleType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema()); assertEquals( TypeInfoFactory.getMapTypeInfo( TypeInfoCreator.createString(), TypeInfoCreator.createDouble()), orcType); }
@Test public void test_getOrcField_union() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema()); assertEquals( TypeInfoFactory.getUnionTypeInfo( Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())), orcType); }
@Test public void test_getHiveTypeFromAvroType_primitive() throws Exception { // Expected ORC types String[] expectedTypes = { "INT", "BIGINT", "BOOLEAN", "FLOAT", "DOUBLE", "BINARY", "STRING", }; Schema testSchema = buildPrimitiveAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema())); } }
@Test public void test_getOrcField_record() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("int").type().intType().noDefault(); builder.name("long").type().longType().longDefault(1L); builder.name("array").type().array().items().stringType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema); assertEquals( TypeInfoFactory.getStructTypeInfo( Arrays.asList("int", "long", "array"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))), orcType); }
@Test public void test_getOrcField_primitive() throws Exception { // Expected ORC types TypeInfo[] expectedTypes = { TypeInfoFactory.getPrimitiveTypeInfo("int"), TypeInfoFactory.getPrimitiveTypeInfo("bigint"), TypeInfoFactory.getPrimitiveTypeInfo("boolean"), TypeInfoFactory.getPrimitiveTypeInfo("float"), TypeInfoFactory.getPrimitiveTypeInfo("double"), TypeInfoFactory.getPrimitiveTypeInfo("binary"), TypeInfoFactory.getPrimitiveTypeInfo("string") }; // Build a fake Avro record with all types Schema testSchema = buildPrimitiveAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema())); } }
@Test public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() throws Exception { // Expected ORC types TypeInfo[] expectedTypes = { TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString(), }; Schema testSchema = buildPrimitiveAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals( expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType())); } }
@Test public void test_getWritable() throws Exception { assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[] {1, 2, 3}) instanceof List); assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List); Map<String, Float> map = new HashMap<>(); map.put("Hello", 1.0f); map.put("World", 2.0f); Object writable = NiFiOrcUtils.convertToORCObject( TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map); assertTrue(writable instanceof MapWritable); MapWritable mapWritable = (MapWritable) writable; mapWritable.forEach( (key, value) -> { assertTrue(key instanceof Text); assertTrue(value instanceof FloatWritable); }); }
@Test(expected = IllegalArgumentException.class) public void test_getPrimitiveOrcTypeFromPrimitiveAvroType_badType() throws Exception { Schema.Type nonPrimitiveType = Schema.Type.ARRAY; NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(nonPrimitiveType); }