@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); row = new ArrayList<Object>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { row.add(null); } // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } }
private ArrayWritableObjectInspector getObjectInspector( final String columnNames, final String columnTypes) { List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes); List<String> columnNameList = createHiveColumnsFrom(columnNames); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList); return new ArrayWritableObjectInspector(rowTypeInfo); }
public static TypeInfo buildPrimitiveOrcSchema() { return TypeInfoFactory.getStructTypeInfo( Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString())); }
/** * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will * use them to look into/create JSON data. * * @param conf Hadoop configuration object * @param tbl Table Properties * @throws SerDeException */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LOG.debug("Initializing SerDe"); // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty); // all table column names if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } // all column types if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); stats = new SerDeStats(); // Create row related objects rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); // build options options = new JsonStructOIOptions(getMappings(tbl)); rowObjectInspector = (StructObjectInspector) JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options); // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } // other configuration ignoreMalformedJson = Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false")); }
@Test public void test_getOrcField_record() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("int").type().intType().noDefault(); builder.name("long").type().longType().longDefault(1L); builder.name("array").type().array().items().stringType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema); assertEquals( TypeInfoFactory.getStructTypeInfo( Arrays.asList("int", "long", "array"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))), orcType); }
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException( "ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }