private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx) throws SerDeException { // We are going to use LBSerDe to serialize values; create OI for retrieval. List<? extends StructField> fields = ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs(); List<String> colNames = new ArrayList<String>(fields.size()); List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size()); for (int i = 0; i < fields.size(); ++i) { StructField field = fields.get(i); colNames.add(field.getFieldName()); // It would be nice if OI could return typeInfo... TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()); colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo)); } return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis); }
static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { if (outputSchema == null) { throw new IOException("Invalid output schema specified"); } List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(); List<String> fieldNames = new ArrayList<String>(); for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); fieldNames.add(hcatFieldSchema.getName()); fieldInspectors.add(getObjectInspector(type)); } StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldInspectors); return structInspector; }
@Test public void test_getWritable() throws Exception { assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[] {1, 2, 3}) instanceof List); assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List); Map<String, Float> map = new HashMap<>(); map.put("Hello", 1.0f); map.put("World", 2.0f); Object writable = NiFiOrcUtils.convertToORCObject( TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map); assertTrue(writable instanceof MapWritable); MapWritable mapWritable = (MapWritable) writable; mapWritable.forEach( (key, value) -> { assertTrue(key instanceof Text); assertTrue(value instanceof FloatWritable); }); }
public static TypeInfo buildComplexOrcSchema() { return TypeInfoUtils.getTypeInfoFromTypeString( "struct<myInt:int,myMap:map<string,double>,myEnum:string,myLongOrFloat:uniontype<int>,myIntList:array<int>>"); }
/** * Validate partition schema, checks if the column types match between the partition and the * existing table schema. Returns the list of columns present in the partition but not in the * table. * * @param table the table * @param partitionSchema the partition schema * @return the list of newly added fields * @throws IOException Signals that an I/O exception has occurred. */ public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema) throws IOException { Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>(); for (FieldSchema field : table.getPartitionKeys()) { partitionKeyMap.put(field.getName().toLowerCase(), field); } List<FieldSchema> tableCols = table.getCols(); List<FieldSchema> newFields = new ArrayList<FieldSchema>(); for (int i = 0; i < partitionSchema.getFields().size(); i++) { FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i)); FieldSchema tableField; if (i < tableCols.size()) { tableField = tableCols.get(i); if (!tableField.getName().equalsIgnoreCase(field.getName())) { throw new HCatException( ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, "Expected column <" + tableField.getName() + "> at position " + (i + 1) + ", found column <" + field.getName() + ">"); } } else { tableField = partitionKeyMap.get(field.getName().toLowerCase()); if (tableField != null) { throw new HCatException( ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">"); } } if (tableField == null) { // field present in partition but not in table newFields.add(field); } else { // field present in both. validate type has not changed TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType()); TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType()); if (!partitionType.equals(tableType)) { throw new HCatException( ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + field.getName() + ">, expected <" + tableType.getTypeName() + ">, got <" + partitionType.getTypeName() + ">"); } } } return newFields; }
@Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { super.evaluateChildren(batch); } if (!integerPrimitiveCategoryKnown) { String typeName = getOutputType().toLowerCase(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); integerPrimitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); integerPrimitiveCategoryKnown = true; } DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; if (n == 0) { // Nothing to do return; } if (inV.noNulls) { outV.noNulls = true; if (inV.isRepeating) { outV.isRepeating = true; func(outV, inV, 0); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; func(outV, inV, i); } outV.isRepeating = false; } else { for (int i = 0; i != n; i++) { func(outV, inV, i); } outV.isRepeating = false; } } else { // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; if (!inV.isNull[0]) { func(outV, inV, 0); } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { func(outV, inV, i); } } outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for (int i = 0; i != n; i++) { if (!inV.isNull[i]) { func(outV, inV, i); } } outV.isRepeating = false; } } }