@Override public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { List<String> columnNames = Arrays.asList(tbl.getProperty(Constants.LIST_COLUMNS).split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(Constants.LIST_COLUMN_TYPES)); numCols = columnNames.size(); List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols); for (int i = 0; i < numCols; i++) { columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); } this.inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); this.outputFields = new String[numCols]; row = new ArrayList<String>(numCols); for (int i = 0; i < numCols; i++) { row.add(null); } separatorChar = getProperty(tbl, "separatorChar", CSVWriter.DEFAULT_SEPARATOR); quoteChar = getProperty(tbl, "quoteChar", CSVWriter.DEFAULT_QUOTE_CHARACTER); escapeChar = getProperty(tbl, "escapeChar", CSVWriter.DEFAULT_ESCAPE_CHARACTER); }
// Initialize this SerDe with the system properties and table properties @Override public void initialize(Configuration sysProps, Properties tblProps) throws SerDeException { LOG.debug("Initializing QueryStringSerDe"); // Get the names of the columns for the table this SerDe is being used // with String columnNameProperty = tblProps.getProperty(serdeConstants.LIST_COLUMNS); columnNames = Arrays.asList(columnNameProperty.split(",")); // Convert column types from text to TypeInfo objects String columnTypeProperty = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES); columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); assert columnNames.size() == columnTypes.size(); numColumns = columnNames.size(); // Create ObjectInspectors from the type information for each column List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); ObjectInspector oi; for (int c = 0; c < numColumns; c++) { oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c)); columnOIs.add(oi); } rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); // Create an empty row object to be reused during deserialization row = new ArrayList<Object>(numColumns); for (int c = 0; c < numColumns; c++) { row.add(null); } LOG.debug("QueryStringSerDe initialization complete"); }
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); row = new ArrayList<Object>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { row.add(null); } // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } }
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) { List<TypeInfo> columnTypes; if (columnsTypeStr.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); } return columnTypes; }
/** * Reads the following SERDEPROPERTIES * * <p> * * <ul> * <li>{@code voltdb.servers} (required) comma separated list of VoltDB servers that comprise a * VoltDB cluster * <li>{@code voltdb.table} (required) destination VoltDB table * <li>{@code voltdb.user} (optional) VoltDB user name * <li>{@code voltdb.password} (optional) VoltDB user password * </ul> * * <p>and makes sure that the Hive table column types match the destination VoltDB column types */ @Override public void initialize(Configuration conf, Properties props) throws SerDeException { String columnNamesPropVal = props.getProperty(serdeConstants.LIST_COLUMNS, ""); String columnTypesPropVal = props.getProperty(serdeConstants.LIST_COLUMN_TYPES, ""); String serversPropVal = props.getProperty(SERVERS_PROP, ""); String table = props.getProperty(TABLE_PROP, ""); String user = props.getProperty(USER_PROP); String password = props.getProperty(PASSWORD_PROP); if (serversPropVal.trim().isEmpty() || table.trim().isEmpty()) { throw new VoltSerdeException( "properties \"" + SERVERS_PROP + "\", and \"" + TABLE_PROP + "\" must be minimally defined"); } List<String> columnNames = m_splitter.splitToList(columnNamesPropVal); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesPropVal); String[] servers = m_splitter.splitToList(serversPropVal).toArray(new String[0]); if (servers.length == 0) { throw new VoltSerdeException( "properties \"" + SERVERS_PROP + "\", and \"" + TABLE_PROP + "\" must be minimally defined"); } if (conf != null) { VoltConfiguration.configureVoltDB(conf, servers, user, password, table); } VoltType[] voltTypes = null; m_voltConf = new VoltConfiguration(table, servers, user, password); try { m_voltConf.isMinimallyConfigured(); voltTypes = m_voltConf.getTableColumnTypes(); } catch (IOException e) { throw new VoltSerdeException("uanble to setup a VoltDB context", e); } m_oig = new VoltObjectInspectorGenerator(columnNames, columnTypes, voltTypes); }
/** * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will * use them to look into/create JSON data. * * @param conf Hadoop configuration object * @param tbl Table Properties * @throws SerDeException */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LOG.debug("Initializing SerDe"); // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty); // all table column names if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } // all column types if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); stats = new SerDeStats(); // Create row related objects rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); // build options options = new JsonStructOIOptions(getMappings(tbl)); rowObjectInspector = (StructObjectInspector) JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options); // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } // other configuration ignoreMalformedJson = Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false")); }
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException( "ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
/* * (non-Javadoc) * * @see org.apache.hadoop.hive.serde2.AbstractSerDe#initialize(org.apache.hadoop.conf.Configuration, * java.util.Properties) */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { String columnNameProperty = tbl.getProperty(LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(LIST_COLUMN_TYPES); List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); List<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(columnNames.size()); ObjectInspector colObjectInspector; for (int col = 0; col < columnNames.size(); col++) { colObjectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(col)); columnObjectInspectors.add(colObjectInspector); } cachedObjectInspector = ObjectInspectorFactory.getColumnarStructObjectInspector( columnNames, columnObjectInspectors); }
@Override public ObjectInspector getObjectInspector() { // Read the configuration parameters String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); // NOTE: if "columns.types" is missing, all columns will be of String type String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); // Parse the configuration parameters ArrayList<String> columnNames = new ArrayList<String>(); Deque<Integer> virtualColumns = new ArrayDeque<Integer>(); if (columnNameProperty != null && columnNameProperty.length() > 0) { String[] colNames = columnNameProperty.split(","); for (int i = 0; i < colNames.length; i++) { if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) { virtualColumns.addLast(i); } else { columnNames.add(colNames[i]); } } } if (columnTypeProperty == null) { // Default type: all string StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnNames.size(); i++) { if (i > 0) { sb.append(":"); } sb.append("string"); } columnTypeProperty = sb.toString(); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); while (virtualColumns.size() > 0) { fieldTypes.remove(virtualColumns.removeLast()); } StructTypeInfo rowType = new StructTypeInfo(); rowType.setAllStructFieldNames(columnNames); rowType.setAllStructFieldTypeInfos(fieldTypes); return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType)); }
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking // compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe // because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString( tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
@Override public void initialize(Configuration cfg, Properties props) throws SerDeException { String columnNameProperty = props.getProperty(serdeConstants.LIST_COLUMNS); columnNames = Arrays.asList(columnNameProperty.split(",")); numColumns = columnNames.size(); String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); // Ensure we have the same number of column names and types assert numColumns == columnTypes.size(); List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns); row = new ArrayList<Object>(numColumns); for (int c = 0; c < numColumns; c++) { ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c)); inspectors.add(oi); row.add(null); } inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) { OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf); if (props != null) { final String columnNameProperty = props.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) { List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } TypeDescription schema = TypeDescription.createStruct(); for (int i = 0; i < columnNames.size(); ++i) { schema.addField(columnNames.get(i), convertTypeInfo(columnTypes.get(i))); } if (LOG.isDebugEnabled()) { LOG.debug("ORC schema = " + schema); } result.setSchema(schema); } } return result; }
@Override public RecordWriter getHiveRecordWriter( JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tbl, Progressable progress) throws IOException { boolean usenewformat = jc.getBoolean("fdf.newformat", false); IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); IFieldMap map = new IFieldMap(); ArrayList<TypeInfo> types; if (columnTypeProperty == null) { types = new ArrayList<TypeInfo>(); map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0)); } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); String compress = tbl.getProperty(ConstVar.Compress); if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1); int i = 0; for (TypeInfo type : types) { byte fdftype = 0; String name = type.getTypeName(); if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte; else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short; else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int; else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long; else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float; else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double; else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String; map.addFieldType(new IRecord.IFType(fdftype, i++)); } head.setFieldMap(map); ArrayList<ArrayList<Integer>> columnprojects = null; String projectionString = jc.get(ConstVar.Projection); if (projectionString != null) { columnprojects = new ArrayList<ArrayList<Integer>>(); String[] projectionList = projectionString.split(ConstVar.RecordSplit); for (String str : projectionList) { ArrayList<Integer> cp = new ArrayList<Integer>(); String[] item = str.split(ConstVar.FieldSplit); for (String s : item) { cp.add(Integer.valueOf(s)); } columnprojects.add(cp); } } if (!jc.getBoolean(ConstVar.NeedPostfix, true)) { final Configuration conf = new Configuration(jc); final IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.create(finalOutPath.toString(), head); return new RecordWriter() { @Override public void write(Writable w) throws IOException {} @Override public void close(boolean abort) throws IOException { ifdf.close(); } }; } final IColumnDataFile icdf = new IColumnDataFile(jc); icdf.create(finalOutPath.toString(), head, columnprojects); LOG.info(finalOutPath.toString()); LOG.info("output file compress?\t" + compress); LOG.info("head:\t" + head.toStr()); return new RecordWriter() { @Override public void write(Writable w) throws IOException { icdf.addRecord((IRecord) w); } @Override public void close(boolean abort) throws IOException { icdf.close(); } }; }