@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); row = new ArrayList<Object>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { row.add(null); } // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } }
// Initialize this SerDe with the system properties and table properties @Override public void initialize(Configuration sysProps, Properties tblProps) throws SerDeException { LOG.debug("Initializing QueryStringSerDe"); // Get the names of the columns for the table this SerDe is being used // with String columnNameProperty = tblProps.getProperty(serdeConstants.LIST_COLUMNS); columnNames = Arrays.asList(columnNameProperty.split(",")); // Convert column types from text to TypeInfo objects String columnTypeProperty = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES); columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); assert columnNames.size() == columnTypes.size(); numColumns = columnNames.size(); // Create ObjectInspectors from the type information for each column List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); ObjectInspector oi; for (int c = 0; c < numColumns; c++) { oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c)); columnOIs.add(oi); } rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); // Create an empty row object to be reused during deserialization row = new ArrayList<Object>(numColumns); for (int c = 0; c < numColumns; c++) { row.add(null); } LOG.debug("QueryStringSerDe initialization complete"); }
/** * Converts the skewedValue available as a string in the metadata to the appropriate object by * using the type of the column from the join key. * * @param skewedValue * @param keyCol * @return an expression node descriptor of the appropriate constant */ private ExprNodeConstantDesc createConstDesc(String skewedValue, ExprNodeColumnDesc keyCol) { ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(TypeInfoFactory.stringTypeInfo); ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(keyCol.getTypeInfo()); Converter converter = ObjectInspectorConverters.getConverter(inputOI, outputOI); Object skewedValueObject = converter.convert(skewedValue); return new ExprNodeConstantDesc(keyCol.getTypeInfo(), skewedValueObject); }
@Override public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { List<String> columnNames = Arrays.asList(tbl.getProperty(Constants.LIST_COLUMNS).split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(Constants.LIST_COLUMN_TYPES)); numCols = columnNames.size(); List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols); for (int i = 0; i < numCols; i++) { columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); } this.inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); this.outputFields = new String[numCols]; row = new ArrayList<String>(numCols); for (int i = 0; i < numCols; i++) { row.add(null); } separatorChar = getProperty(tbl, "separatorChar", CSVWriter.DEFAULT_SEPARATOR); quoteChar = getProperty(tbl, "quoteChar", CSVWriter.DEFAULT_QUOTE_CHARACTER); escapeChar = getProperty(tbl, "escapeChar", CSVWriter.DEFAULT_ESCAPE_CHARACTER); }
public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI) throws UDFArgumentException { final String typeName = numberOI.getTypeName(); if (DOUBLE_TYPE_NAME.equals(typeName)) { DoubleWritable v = getConstValue(numberOI); return v.get(); } else if (FLOAT_TYPE_NAME.equals(typeName)) { FloatWritable v = getConstValue(numberOI); return v.get(); } else if (INT_TYPE_NAME.equals(typeName)) { IntWritable v = getConstValue(numberOI); return v.get(); } else if (BIGINT_TYPE_NAME.equals(typeName)) { LongWritable v = getConstValue(numberOI); return v.get(); } else if (SMALLINT_TYPE_NAME.equals(typeName)) { ShortWritable v = getConstValue(numberOI); return v.get(); } else if (TINYINT_TYPE_NAME.equals(typeName)) { ByteWritable v = getConstValue(numberOI); return v.get(); } throw new UDFArgumentException( "Unexpected argument type to cast as double: " + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI)); }
public static PrimitiveObjectInspector asPrimitiveObjectInspector( @Nonnull final ObjectInspector oi) throws UDFArgumentException { if (oi.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentException( "Is not PrimitiveObjectInspector: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } return (PrimitiveObjectInspector) oi; }
public static long getConstLong(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!isBigIntOI(oi)) { throw new UDFArgumentException( "argument must be a BigInt value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } LongWritable v = getConstValue(oi); return v.get(); }
public static boolean getConstBoolean(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!isBooleanOI(oi)) { throw new UDFArgumentException( "argument must be a Boolean value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } BooleanWritable v = getConstValue(oi); return v.get(); }
public static String getConstString(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!isStringOI(oi)) { throw new UDFArgumentException( "argument must be a Text value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } Text v = getConstValue(oi); return v == null ? null : v.toString(); }
@Nonnull public static ConstantObjectInspector asConstantObjectInspector(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) { throw new UDFArgumentException( "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } return (ConstantObjectInspector) oi; }
@Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { // Verify that the first parameter supports comparisons. ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); if (!ObjectInspectorUtils.compareSupported(oi)) { throw new UDFArgumentTypeException( 0, "Cannot support comparison of map<> type or complex type containing map<>."); } return new GenericUDAFFirstRowEvaluator(); }
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) { List<TypeInfo> columnTypes; if (columnsTypeStr.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); } return columnTypes; }
/* * (non-Javadoc) * * @see org.apache.hadoop.hive.serde2.AbstractSerDe#initialize(org.apache.hadoop.conf.Configuration, * java.util.Properties) */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { String columnNameProperty = tbl.getProperty(LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(LIST_COLUMN_TYPES); List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); List<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(columnNames.size()); ObjectInspector colObjectInspector; for (int col = 0; col < columnNames.size(); col++) { colObjectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(col)); columnObjectInspectors.add(colObjectInspector); } cachedObjectInspector = ObjectInspectorFactory.getColumnarStructObjectInspector( columnNames, columnObjectInspectors); }
@SuppressWarnings("unchecked") @Nullable public static <T extends Writable> T getConstValue(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) { throw new UDFArgumentException( "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } ConstantObjectInspector constOI = (ConstantObjectInspector) oi; Object v = constOI.getWritableConstantValue(); return (T) v; }
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking // compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe // because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString( tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
/** Get the list of field type as csv from a StructObjectInspector. */ public static String getFieldTypes(StructObjectInspector soi) { List<? extends StructField> fields = soi.getAllStructFieldRefs(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < fields.size(); i++) { if (i > 0) { sb.append(":"); } sb.append( TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()) .getTypeName()); } return sb.toString(); }
@Override public void initialize(Configuration cfg, Properties props) throws SerDeException { String columnNameProperty = props.getProperty(serdeConstants.LIST_COLUMNS); columnNames = Arrays.asList(columnNameProperty.split(",")); numColumns = columnNames.size(); String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); // Ensure we have the same number of column names and types assert numColumns == columnTypes.size(); List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns); row = new ArrayList<Object>(numColumns); for (int c = 0; c < numColumns; c++) { ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c)); inspectors.add(oi); row.add(null); } inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
@Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException( parameters.length - 1, "Exactly one argument is expected."); } ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); if (!ObjectInspectorUtils.compareSupported(oi)) { throw new UDFArgumentTypeException( parameters.length - 1, "Cannot support comparison of map<> type or complex type containing map<>."); } return new GenericUDAFMaxEvaluator(); }
/* * add array<struct> to the list of columns */ protected static RowResolver createSelectListRR(MatchPath evaluator, PTFInputDef inpDef) throws SemanticException { RowResolver rr = new RowResolver(); RowResolver inputRR = inpDef.getOutputShape().getRr(); evaluator.inputColumnNamesMap = new HashMap<String, String>(); ArrayList<String> inputColumnNames = new ArrayList<String>(); ArrayList<ObjectInspector> inpColOIs = new ArrayList<ObjectInspector>(); for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { ColumnInfo cInfo = new ColumnInfo(inpCInfo); String colAlias = cInfo.getAlias(); String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); if (tabColAlias != null) { colAlias = tabColAlias[1]; } ASTNode inExpr = null; inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); if (inExpr != null) { rr.putExpression(inExpr, cInfo); colAlias = inExpr.toStringTree().toLowerCase(); } else { colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; rr.put(cInfo.getTabAlias(), colAlias, cInfo); } evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); inputColumnNames.add(colAlias); inpColOIs.add(cInfo.getObjectInspector()); } StandardListObjectInspector pathAttrOI = ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, inpColOIs)); ColumnInfo pathColumn = new ColumnInfo( PATHATTR_NAME, TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), null, false, false); rr.put(null, PATHATTR_NAME, pathColumn); return rr; }
/** * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will * use them to look into/create JSON data. * * @param conf Hadoop configuration object * @param tbl Table Properties * @throws SerDeException */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LOG.debug("Initializing SerDe"); // Get column names and sort order String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty); // all table column names if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } // all column types if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert (columnNames.size() == columnTypes.size()); stats = new SerDeStats(); // Create row related objects rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); // build options options = new JsonStructOIOptions(getMappings(tbl)); rowObjectInspector = (StructObjectInspector) JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options); // Get the sort order String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER); columnSortOrderIsDesc = new boolean[columnNames.size()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-'); } // other configuration ignoreMalformedJson = Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false")); }
/** * Reads the following SERDEPROPERTIES * * <p> * * <ul> * <li>{@code voltdb.servers} (required) comma separated list of VoltDB servers that comprise a * VoltDB cluster * <li>{@code voltdb.table} (required) destination VoltDB table * <li>{@code voltdb.user} (optional) VoltDB user name * <li>{@code voltdb.password} (optional) VoltDB user password * </ul> * * <p>and makes sure that the Hive table column types match the destination VoltDB column types */ @Override public void initialize(Configuration conf, Properties props) throws SerDeException { String columnNamesPropVal = props.getProperty(serdeConstants.LIST_COLUMNS, ""); String columnTypesPropVal = props.getProperty(serdeConstants.LIST_COLUMN_TYPES, ""); String serversPropVal = props.getProperty(SERVERS_PROP, ""); String table = props.getProperty(TABLE_PROP, ""); String user = props.getProperty(USER_PROP); String password = props.getProperty(PASSWORD_PROP); if (serversPropVal.trim().isEmpty() || table.trim().isEmpty()) { throw new VoltSerdeException( "properties \"" + SERVERS_PROP + "\", and \"" + TABLE_PROP + "\" must be minimally defined"); } List<String> columnNames = m_splitter.splitToList(columnNamesPropVal); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesPropVal); String[] servers = m_splitter.splitToList(serversPropVal).toArray(new String[0]); if (servers.length == 0) { throw new VoltSerdeException( "properties \"" + SERVERS_PROP + "\", and \"" + TABLE_PROP + "\" must be minimally defined"); } if (conf != null) { VoltConfiguration.configureVoltDB(conf, servers, user, password, table); } VoltType[] voltTypes = null; m_voltConf = new VoltConfiguration(table, servers, user, password); try { m_voltConf.isMinimallyConfigured(); voltTypes = m_voltConf.getTableColumnTypes(); } catch (IOException e) { throw new VoltSerdeException("uanble to setup a VoltDB context", e); } m_oig = new VoltObjectInspectorGenerator(columnNames, columnTypes, voltTypes); }
private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx) throws SerDeException { // We are going to use LBSerDe to serialize values; create OI for retrieval. List<? extends StructField> fields = ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs(); List<String> colNames = new ArrayList<String>(fields.size()); List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size()); for (int i = 0; i < fields.size(); ++i) { StructField field = fields.get(i); colNames.add(field.getFieldName()); // It would be nice if OI could return typeInfo... TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()); colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo)); } return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis); }
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException( "ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
@Nonnull public static String[] getConstStringArray(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) { throw new UDFArgumentException( "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } ConstantObjectInspector constOI = (ConstantObjectInspector) oi; final List<?> lst = (List<?>) constOI.getWritableConstantValue(); final int size = lst.size(); final String[] ary = new String[size]; for (int i = 0; i < size; i++) { Object o = lst.get(i); if (o != null) { ary[i] = o.toString(); } } return ary; }
@Override public ObjectInspector getObjectInspector() { // Read the configuration parameters String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); // NOTE: if "columns.types" is missing, all columns will be of String type String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); // Parse the configuration parameters ArrayList<String> columnNames = new ArrayList<String>(); Deque<Integer> virtualColumns = new ArrayDeque<Integer>(); if (columnNameProperty != null && columnNameProperty.length() > 0) { String[] colNames = columnNameProperty.split(","); for (int i = 0; i < colNames.length; i++) { if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) { virtualColumns.addLast(i); } else { columnNames.add(colNames[i]); } } } if (columnTypeProperty == null) { // Default type: all string StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnNames.size(); i++) { if (i > 0) { sb.append(":"); } sb.append("string"); } columnTypeProperty = sb.toString(); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); while (virtualColumns.size() > 0) { fieldTypes.remove(virtualColumns.removeLast()); } StructTypeInfo rowType = new StructTypeInfo(); rowType.setAllStructFieldNames(columnNames); rowType.setAllStructFieldTypeInfos(fieldTypes); return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType)); }
static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { if (outputSchema == null) { throw new IOException("Invalid output schema specified"); } List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(); List<String> fieldNames = new ArrayList<String>(); for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); fieldNames.add(hcatFieldSchema.getName()); fieldInspectors.add(getObjectInspector(type)); } StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldInspectors); return structInspector; }
@Test public void test_getWritable() throws Exception { assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[] {1, 2, 3}) instanceof List); assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List); Map<String, Float> map = new HashMap<>(); map.put("Hello", 1.0f); map.put("World", 2.0f); Object writable = NiFiOrcUtils.convertToORCObject( TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map); assertTrue(writable instanceof MapWritable); MapWritable mapWritable = (MapWritable) writable; mapWritable.forEach( (key, value) -> { assertTrue(key instanceof Text); assertTrue(value instanceof FloatWritable); }); }
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) { OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf); if (props != null) { final String columnNameProperty = props.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) { List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } TypeDescription schema = TypeDescription.createStruct(); for (int i = 0; i < columnNames.size(); ++i) { schema.addField(columnNames.get(i), convertTypeInfo(columnTypes.get(i))); } if (LOG.isDebugEnabled()) { LOG.debug("ORC schema = " + schema); } result.setSchema(schema); } } return result; }
@Override public RecordWriter getHiveRecordWriter( JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tbl, Progressable progress) throws IOException { boolean usenewformat = jc.getBoolean("fdf.newformat", false); IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); IFieldMap map = new IFieldMap(); ArrayList<TypeInfo> types; if (columnTypeProperty == null) { types = new ArrayList<TypeInfo>(); map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0)); } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); String compress = tbl.getProperty(ConstVar.Compress); if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1); int i = 0; for (TypeInfo type : types) { byte fdftype = 0; String name = type.getTypeName(); if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte; else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short; else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int; else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long; else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float; else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double; else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String; map.addFieldType(new IRecord.IFType(fdftype, i++)); } head.setFieldMap(map); ArrayList<ArrayList<Integer>> columnprojects = null; String projectionString = jc.get(ConstVar.Projection); if (projectionString != null) { columnprojects = new ArrayList<ArrayList<Integer>>(); String[] projectionList = projectionString.split(ConstVar.RecordSplit); for (String str : projectionList) { ArrayList<Integer> cp = new ArrayList<Integer>(); String[] item = str.split(ConstVar.FieldSplit); for (String s : item) { cp.add(Integer.valueOf(s)); } columnprojects.add(cp); } } if (!jc.getBoolean(ConstVar.NeedPostfix, true)) { final Configuration conf = new Configuration(jc); final IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.create(finalOutPath.toString(), head); return new RecordWriter() { @Override public void write(Writable w) throws IOException {} @Override public void close(boolean abort) throws IOException { ifdf.close(); } }; } final IColumnDataFile icdf = new IColumnDataFile(jc); icdf.create(finalOutPath.toString(), head, columnprojects); LOG.info(finalOutPath.toString()); LOG.info("output file compress?\t" + compress); LOG.info("head:\t" + head.toStr()); return new RecordWriter() { @Override public void write(Writable w) throws IOException { icdf.addRecord((IRecord) w); } @Override public void close(boolean abort) throws IOException { icdf.close(); } }; }
private static void splitJoinCondition( List<RelDataTypeField> sysFieldList, List<RelNode> inputs, RexNode condition, List<List<RexNode>> joinKeys, List<Integer> filterNulls, List<SqlOperator> rangeOp, List<RexNode> nonEquiList) throws CalciteSemanticException { final int sysFieldCount = sysFieldList.size(); final RelOptCluster cluster = inputs.get(0).getCluster(); final RexBuilder rexBuilder = cluster.getRexBuilder(); if (condition instanceof RexCall) { RexCall call = (RexCall) condition; if (call.getOperator() == SqlStdOperatorTable.AND) { for (RexNode operand : call.getOperands()) { splitJoinCondition( sysFieldList, inputs, operand, joinKeys, filterNulls, rangeOp, nonEquiList); } return; } RexNode leftKey = null; RexNode rightKey = null; int leftInput = 0; int rightInput = 0; List<RelDataTypeField> leftFields = null; List<RelDataTypeField> rightFields = null; boolean reverse = false; SqlKind kind = call.getKind(); // Only consider range operators if we haven't already seen one if ((kind == SqlKind.EQUALS) || (filterNulls != null && kind == SqlKind.IS_NOT_DISTINCT_FROM) || (rangeOp != null && rangeOp.isEmpty() && (kind == SqlKind.GREATER_THAN || kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL))) { final List<RexNode> operands = call.getOperands(); RexNode op0 = operands.get(0); RexNode op1 = operands.get(1); final ImmutableBitSet projRefs0 = InputFinder.bits(op0); final ImmutableBitSet projRefs1 = InputFinder.bits(op1); final ImmutableBitSet[] inputsRange = new ImmutableBitSet[inputs.size()]; int totalFieldCount = 0; for (int i = 0; i < inputs.size(); i++) { final int firstField = totalFieldCount + sysFieldCount; totalFieldCount = firstField + inputs.get(i).getRowType().getFieldCount(); inputsRange[i] = ImmutableBitSet.range(firstField, totalFieldCount); } boolean foundBothInputs = false; for (int i = 0; i < inputs.size() && !foundBothInputs; i++) { if (projRefs0.intersects(inputsRange[i]) && projRefs0.union(inputsRange[i]).equals(inputsRange[i])) { if (leftKey == null) { leftKey = op0; leftInput = i; leftFields = inputs.get(leftInput).getRowType().getFieldList(); } else { rightKey = op0; rightInput = i; rightFields = inputs.get(rightInput).getRowType().getFieldList(); reverse = true; foundBothInputs = true; } } else if (projRefs1.intersects(inputsRange[i]) && projRefs1.union(inputsRange[i]).equals(inputsRange[i])) { if (leftKey == null) { leftKey = op1; leftInput = i; leftFields = inputs.get(leftInput).getRowType().getFieldList(); } else { rightKey = op1; rightInput = i; rightFields = inputs.get(rightInput).getRowType().getFieldList(); foundBothInputs = true; } } } if ((leftKey != null) && (rightKey != null)) { // adjustment array int[] adjustments = new int[totalFieldCount]; for (int i = 0; i < inputs.size(); i++) { final int adjustment = inputsRange[i].nextSetBit(0); for (int j = adjustment; j < inputsRange[i].length(); j++) { adjustments[j] = -adjustment; } } // replace right Key input ref rightKey = rightKey.accept( new RelOptUtil.RexInputConverter( rexBuilder, rightFields, rightFields, adjustments)); // left key only needs to be adjusted if there are system // fields, but do it for uniformity leftKey = leftKey.accept( new RelOptUtil.RexInputConverter( rexBuilder, leftFields, leftFields, adjustments)); RelDataType leftKeyType = leftKey.getType(); RelDataType rightKeyType = rightKey.getType(); if (leftKeyType != rightKeyType) { // perform casting using Hive rules TypeInfo rType = TypeConverter.convert(rightKeyType); TypeInfo lType = TypeConverter.convert(leftKeyType); TypeInfo tgtType = FunctionRegistry.getCommonClassForComparison(lType, rType); if (tgtType == null) { throw new CalciteSemanticException( "Cannot find common type for join keys " + leftKey + " (type " + leftKeyType + ") and " + rightKey + " (type " + rightKeyType + ")"); } RelDataType targetKeyType = TypeConverter.convert(tgtType, rexBuilder.getTypeFactory()); if (leftKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, lType)) { leftKey = rexBuilder.makeCast(targetKeyType, leftKey); } if (rightKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, rType)) { rightKey = rexBuilder.makeCast(targetKeyType, rightKey); } } } } if ((leftKey != null) && (rightKey != null)) { // found suitable join keys // add them to key list, ensuring that if there is a // non-equi join predicate, it appears at the end of the // key list; also mark the null filtering property addJoinKey(joinKeys.get(leftInput), leftKey, (rangeOp != null) && !rangeOp.isEmpty()); addJoinKey(joinKeys.get(rightInput), rightKey, (rangeOp != null) && !rangeOp.isEmpty()); if (filterNulls != null && kind == SqlKind.EQUALS) { // nulls are considered not matching for equality comparison // add the position of the most recently inserted key filterNulls.add(joinKeys.get(leftInput).size() - 1); } if (rangeOp != null && kind != SqlKind.EQUALS && kind != SqlKind.IS_DISTINCT_FROM) { if (reverse) { kind = reverse(kind); } rangeOp.add(op(kind, call.getOperator())); } return; } // else fall through and add this condition as nonEqui condition } // The operator is not of RexCall type // So we fail. Fall through. // Add this condition to the list of non-equi-join conditions. nonEquiList.add(condition); }