@Test public void test_getOrcField_complex_array() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("array").type().array().items().map().values().floatType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema()); assertEquals( TypeInfoFactory.getListTypeInfo( TypeInfoFactory.getMapTypeInfo( TypeInfoCreator.createString(), TypeInfoCreator.createFloat())), orcType); }
@Test public void testDecimalPlusDecimalSameParams() throws HiveException { GenericUDFOPPlus udf = new GenericUDFOPPlus(); ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(5, 2)), PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(5, 2)) }; PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs); Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(6, 2), oi.getTypeInfo()); }
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); row = new ArrayList<Object>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { row.add(null); } // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } }
@Test public void test_getOrcField_record() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("int").type().intType().noDefault(); builder.name("long").type().longType().longDefault(1L); builder.name("array").type().array().items().stringType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema); assertEquals( TypeInfoFactory.getStructTypeInfo( Arrays.asList("int", "long", "array"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))), orcType); }
private ArrayWritableObjectInspector getObjectInspector( final String columnNames, final String columnTypes) { List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes); List<String> columnNameList = createHiveColumnsFrom(columnNames); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList); return new ArrayWritableObjectInspector(rowTypeInfo); }
@Test public void testLongPlusDecimal() throws HiveException { GenericUDFOPPlus udf = new GenericUDFOPPlus(); // Long LongWritable left = new LongWritable(104); HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97")); ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableLongObjectInspector, PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(9, 4)) }; DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right), }; PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs); Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(24, 4), oi.getTypeInfo()); HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args); Assert.assertEquals(HiveDecimal.create("338.97"), res.getHiveDecimal()); }
public static TypeInfo buildPrimitiveOrcSchema() { return TypeInfoFactory.getStructTypeInfo( Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString())); }
@Test public void test_getOrcField_union() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema()); assertEquals( TypeInfoFactory.getUnionTypeInfo( Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())), orcType); }
@Test public void testDecimalPlusDecimal() throws HiveException { GenericUDFOPPlus udf = new GenericUDFOPPlus(); // Decimal HiveDecimalWritable left = new HiveDecimalWritable(HiveDecimal.create("14.5")); HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97")); ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(3, 1)), PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(5, 2)) }; DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right), }; PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs); Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(6, 2), oi.getTypeInfo()); HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args); Assert.assertEquals(HiveDecimal.create("249.47"), res.getHiveDecimal()); }
/** * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will * use them to look into/create JSON data. * * @param conf Hadoop configuration object * @param tbl Table Properties * @throws SerDeException */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LOG.debug("Initializing SerDe"); // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty); // all table column names if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } // all column types if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); stats = new SerDeStats(); // Create row related objects rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); // build options options = new JsonStructOIOptions(getMappings(tbl)); rowObjectInspector = (StructObjectInspector) JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options); // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } // other configuration ignoreMalformedJson = Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false")); }
@Test public void testMapColumnPairs() throws TooManyAccumuloColumnsException { ColumnMapper columnMapper = new ColumnMapper( ":rowID,cf:*", conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), Arrays.asList("row", "col"), Arrays.<TypeInfo>asList( TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo( TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo))); Set<Pair<Text, Text>> pairs = inputformat.getPairCollection(columnMapper.getColumnMappings()); Assert.assertEquals(1, pairs.size()); Pair<Text, Text> cfCq = pairs.iterator().next(); Assert.assertEquals("cf", cfCq.getFirst().toString()); Assert.assertNull(cfCq.getSecond()); }
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException( "ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
@Test public void testDoulePlusDecimal() throws HiveException { GenericUDFOPPlus udf = new GenericUDFOPPlus(); // Double DoubleWritable left = new DoubleWritable(74.52); HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97")); ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(5, 2)) }; DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right), }; PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs); Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo()); DoubleWritable res = (DoubleWritable) udf.evaluate(args); Assert.assertEquals(new Double(309.49), new Double(res.get())); }
@Test public void test_getOrcField_primitive() throws Exception { // Expected ORC types TypeInfo[] expectedTypes = { TypeInfoFactory.getPrimitiveTypeInfo("int"), TypeInfoFactory.getPrimitiveTypeInfo("bigint"), TypeInfoFactory.getPrimitiveTypeInfo("boolean"), TypeInfoFactory.getPrimitiveTypeInfo("float"), TypeInfoFactory.getPrimitiveTypeInfo("double"), TypeInfoFactory.getPrimitiveTypeInfo("binary"), TypeInfoFactory.getPrimitiveTypeInfo("string") }; // Build a fake Avro record with all types Schema testSchema = buildPrimitiveAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema())); } }
@Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); result = new HiveDecimalWritable(HiveDecimal.ZERO); inputOI = (PrimitiveObjectInspector) parameters[0]; // The output precision is 10 greater than the input which should cover at least // 10b rows. The scale is the same as the input. DecimalTypeInfo outputTypeInfo = null; if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { int precision = Math.min(HiveDecimal.MAX_PRECISION, inputOI.precision() + 10); outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputOI.scale()); } else { outputTypeInfo = (DecimalTypeInfo) inputOI.getTypeInfo(); } ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(outputTypeInfo); outputOI = (PrimitiveObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(oi, ObjectInspectorCopyOption.JAVA); return oi; }
static TypeInfo createDouble() { return TypeInfoFactory.getPrimitiveTypeInfo("double"); }
static { cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME), LAZY_BOOLEAN_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME), LAZY_BYTE_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), LAZY_SHORT_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME), LAZY_INT_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME), LAZY_FLOAT_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME), LAZY_DOUBLE_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME), LAZY_LONG_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME), LAZY_VOID_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME), LAZY_DATE_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME), LAZY_TIMESTAMP_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME), LAZY_INTERVAL_YEAR_MONTH_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME), LAZY_INTERVAL_DAY_TIME_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put( TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME), LAZY_BINARY_OBJECT_INSPECTOR); }
static TypeInfo createInt() { return TypeInfoFactory.getPrimitiveTypeInfo("int"); }
static TypeInfo createLong() { return TypeInfoFactory.getPrimitiveTypeInfo("bigint"); }
static TypeInfo createBoolean() { return TypeInfoFactory.getPrimitiveTypeInfo("boolean"); }
static TypeInfo createFloat() { return TypeInfoFactory.getPrimitiveTypeInfo("float"); }
static TypeInfo createString() { return TypeInfoFactory.getPrimitiveTypeInfo("string"); }
static TypeInfo createBinary() { return TypeInfoFactory.getPrimitiveTypeInfo("binary"); }
private MapOpCtx initObjectInspector( Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception { PartitionDesc pd = opCtx.partDesc; TableDesc td = pd.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties()); Map<String, String> partSpec = pd.getPartSpec(); opCtx.tableName = String.valueOf(overlayedProps.getProperty("name")); opCtx.partName = String.valueOf(partSpec); opCtx.deserializer = pd.getDeserializer(hconf); StructObjectInspector partRawRowObjectInspector; if (Utilities.isInputFileFormatSelfDescribing(pd)) { partRawRowObjectInspector = tableRowOI; } else { partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector(); } opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI); // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); if (partKeys.length > partKeyTypes.length) { throw new HiveException( "Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length); } List<String> partNames = new ArrayList<String>(partKeys.length); Object[] partValues = new Object[partKeys.length]; List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length); for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); // Partitions do not exist for this table if (partSpec == null) { // for partitionless table, initialize partValue to null partValues[i] = null; } else { partValues[i] = ObjectInspectorConverters.getConverter( PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) .convert(partSpec.get(key)); } partObjectInspectors.add(oi); } opCtx.rowWithPart = new Object[] {null, partValues}; opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors); } // The op may not be a TableScan for mapjoins // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key; // In that case, it will be a Select, but the rowOI need not be amended if (opCtx.op instanceof TableScanOperator) { TableScanOperator tsOp = (TableScanOperator) opCtx.op; TableScanDesc tsDesc = tsOp.getConf(); if (tsDesc != null && tsDesc.hasVirtualCols()) { opCtx.vcs = tsDesc.getVirtualCols(); opCtx.vcValues = new Object[opCtx.vcs.size()]; opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs); if (opCtx.isPartitioned()) { opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3); } else { opCtx.rowWithPartAndVC = new Object[2]; } } } if (!opCtx.hasVC() && !opCtx.isPartitioned()) { opCtx.rowObjectInspector = tableRowOI; return opCtx; } List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>(); inspectors.add(tableRowOI); if (opCtx.isPartitioned()) { inspectors.add(opCtx.partObjectInspector); } if (opCtx.hasVC()) { inspectors.add(opCtx.vcsObjectInspector); } opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors); return opCtx; }