@Override public Writable serialize(final Object obj, final ObjectInspector objInspector) throws SerDeException { if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException( "Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } final ArrayWritable serializeData = createStruct(obj, (StructObjectInspector) objInspector); serializedSize = serializeData.get().length; status = LAST_OPERATION.SERIALIZE; return serializeData; }
public static StructObjectInspector getTableObjectInspector( @SuppressWarnings("deprecation") Deserializer deserializer) { try { ObjectInspector inspector = deserializer.getObjectInspector(); checkArgument( inspector.getCategory() == Category.STRUCT, "expected STRUCT: %s", inspector.getCategory()); return (StructObjectInspector) inspector; } catch (SerDeException e) { throw Throwables.propagate(e); } }
public static Block serializeObject( Type type, BlockBuilder builder, Object object, ObjectInspector inspector) { switch (inspector.getCategory()) { case PRIMITIVE: serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector); return null; case LIST: return serializeList(type, builder, object, (ListObjectInspector) inspector); case MAP: return serializeMap(type, builder, object, (MapObjectInspector) inspector); case STRUCT: return serializeStruct(type, builder, object, (StructObjectInspector) inspector); } throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory()); }
public void printFileInfo() throws Exception { System.out.println("Reader: " + m_reader); System.out.println("# Rows: " + m_reader.getNumberOfRows()); System.out.println("# Types in the file: " + m_types.size()); for (int i = 0; i < m_types.size(); i++) { System.out.println("Type " + i + ": " + m_types.get(i).getKind()); } System.out.println("Compression: " + m_reader.getCompression()); if (m_reader.getCompression() != CompressionKind.NONE) { System.out.println("Compression size: " + m_reader.getCompressionSize()); } m_oi = (StructObjectInspector) m_reader.getObjectInspector(); System.out.println("object inspector type category: " + m_oi.getCategory()); System.out.println("object inspector type name : " + m_oi.getTypeName()); System.out.println("Number of columns in the table: " + m_fields.size()); // Print the type info: for (int i = 0; i < m_fields.size(); i++) { System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName()); ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector(); System.out.println("Column " + i + " type category: " + lv_foi.getCategory()); System.out.println("Column " + i + " type name: " + lv_foi.getTypeName()); } }
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException( getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < fields.size(); i++) { if (i > 0) sb.append(separator); Object column = soi.getStructFieldData(obj, fields.get(i)); if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) { // For primitive object, serialize to plain string sb.append(column == null ? nullString : column.toString()); } else { // For complex object, serialize to JSON format sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector())); } } serializeCache.set(sb.toString()); return serializeCache; }
private ObjectInspector solveOi(ObjectInspector arg) { switch (arg.getCategory()) { case PRIMITIVE: // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL, // UNKNOWN PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arg; return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( poi.getPrimitiveCategory()); case LIST: return ObjectInspectorFactory.getStandardListObjectInspector( solveOi(((ListObjectInspector) arg).getListElementObjectInspector())); case MAP: return ObjectInspectorFactory.getStandardMapObjectInspector( solveOi(((MapObjectInspector) arg).getMapKeyObjectInspector()), solveOi(((MapObjectInspector) arg).getMapValueObjectInspector())); case STRUCT: StructObjectInspector soi = (StructObjectInspector) arg; int size = soi.getAllStructFieldRefs().size(); ArrayList<String> fnl = new ArrayList<String>(size); ArrayList<ObjectInspector> foil = new ArrayList<ObjectInspector>(size); for (StructField sf : ((StructObjectInspector) arg).getAllStructFieldRefs()) { fnl.add(sf.getFieldName()); foil.add(solveOi(sf.getFieldObjectInspector())); } return JsonStructObjectInspector.getJsonStructObjectInspector(fnl, foil); default: return arg; } }
@Override protected String extractField(Object target) { if (target instanceof HiveType) { HiveType type = (HiveType) target; ObjectInspector inspector = type.getObjectInspector(); if (inspector instanceof StructObjectInspector) { StructObjectInspector soi = (StructObjectInspector) inspector; StructField field = soi.getStructFieldRef(fieldName); ObjectInspector foi = field.getFieldObjectInspector(); Assert.isTrue( foi.getCategory() == ObjectInspector.Category.PRIMITIVE, String.format( "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName())); // expecting a writeable - simply do a toString Object data = soi.getStructFieldData(type.getObject(), field); if (data == null || data instanceof NullWritable) { return StringUtils.EMPTY; } return data.toString(); } } return null; }
public static PrimitiveObjectInspector asDoubleCompatibleOI(@Nonnull final ObjectInspector argOI) throws UDFArgumentTypeException { if (argOI.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentTypeException( 0, "Only primitive type arguments are accepted but " + argOI.getTypeName() + " is passed."); } final PrimitiveObjectInspector oi = (PrimitiveObjectInspector) argOI; switch (oi.getPrimitiveCategory()) { case BYTE: case SHORT: case INT: case LONG: case FLOAT: case DOUBLE: case STRING: case TIMESTAMP: break; default: throw new UDFArgumentTypeException( 0, "Only numeric or string type arguments are accepted but " + argOI.getTypeName() + " is passed."); } return oi; }
@Override public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException { if (oi.getCategory() != Category.STRUCT) { throw new VoltSerdeException( getClass().toString() + " can only serialize struct types, but we got: " + oi.getTypeName()); } VoltRecord vr = new VoltRecord(m_voltConf.getTableName()); StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); List<Object> fieldValues = soi.getStructFieldsDataAsList(obj); final int size = m_oig.getColumnTypes().size(); for (int i = 0; i < size; ++i) { ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector(); PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI; Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i)); if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) { fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null; } vr.add(fieldValue); } return vr; }
public static PrimitiveObjectInspector asPrimitiveObjectInspector( @Nonnull final ObjectInspector oi) throws UDFArgumentException { if (oi.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentException( "Is not PrimitiveObjectInspector: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } return (PrimitiveObjectInspector) oi; }
@Nonnull public static ListObjectInspector asListOI(@Nonnull final ObjectInspector oi) throws UDFArgumentException { Category category = oi.getCategory(); if (category != Category.LIST) { throw new UDFArgumentException("Expected List OI but was: " + oi); } return (ListObjectInspector) oi; }
private Writable createObject(final Object obj, final ObjectInspector inspector) throws SerDeException { if (obj == null) { return null; } switch (inspector.getCategory()) { case STRUCT: return createStruct(obj, (StructObjectInspector) inspector); case LIST: return createArray(obj, (ListObjectInspector) inspector); case MAP: return createMap(obj, (MapObjectInspector) inspector); case PRIMITIVE: return createPrimitive(obj, (PrimitiveObjectInspector) inspector); default: throw new SerDeException("Unknown data type" + inspector.getCategory()); } }
/** * Serializes a field. Since we have nested structures, it may be called recursively for instance * when defining a list<struct<>> * * @param obj Object holding the fields' content * @param oi The field's objec inspector * @return the serialized object */ Object serializeField(Object obj, ObjectInspector oi) { if (obj == null) { return null; } Object result = null; switch (oi.getCategory()) { case PRIMITIVE: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (poi.getPrimitiveCategory()) { case VOID: result = null; break; case BOOLEAN: result = (((BooleanObjectInspector) poi).get(obj) ? Boolean.TRUE : Boolean.FALSE); break; case BYTE: result = (((ShortObjectInspector) poi).get(obj)); break; case DOUBLE: result = (((DoubleObjectInspector) poi).get(obj)); break; case FLOAT: result = (((FloatObjectInspector) poi).get(obj)); break; case INT: result = (((IntObjectInspector) poi).get(obj)); break; case LONG: result = (((LongObjectInspector) poi).get(obj)); break; case SHORT: result = (((ShortObjectInspector) poi).get(obj)); break; case STRING: result = (((StringObjectInspector) poi).getPrimitiveJavaObject(obj)); break; case UNKNOWN: throw new RuntimeException("Unknown primitive"); } break; case MAP: result = serializeMap(obj, (MapObjectInspector) oi); break; case LIST: result = serializeList(obj, (ListObjectInspector) oi); break; case STRUCT: result = serializeStruct(obj, (StructObjectInspector) oi, null); break; } return result; }
/* * validate and setup SymbolInfo */ private void validateAndSetupSymbolInfo( MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException { int symbolArgsSz = argsNum - 2; if (symbolArgsSz % 2 != 0) { throwErrorWithSignature( "Symbol Name, Expression need to be specified in pairs: " + "there are odd number of symbol args"); } evaluator.symInfo = new SymbolsInfo(symbolArgsSz / 2); for (int i = 1; i <= symbolArgsSz; i += 2) { PTFExpressionDef symbolNameArg = args.get(i); ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); if (!ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector) symbolNameArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throwErrorWithSignature( String.format( "Currently a Symbol Name(%s) must be a Constant String", symbolNameArg.getExpressionTreeString())); } String symbolName = ((ConstantObjectInspector) symbolNameArgOI).getWritableConstantValue().toString(); PTFExpressionDef symolExprArg = args.get(i + 1); ObjectInspector symolExprArgOI = symolExprArg.getOI(); if ((symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector) symolExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) { throwErrorWithSignature( String.format( "Currently a Symbol Expression(%s) " + "must be a boolean expression", symolExprArg.getExpressionTreeString())); } evaluator.symInfo.add(symbolName, symolExprArg); } }
private ObjectInspectorConverters.Converter getConverter(ObjectInspector arg) { switch (arg.getCategory()) { case PRIMITIVE: return ObjectInspectorConverters.getConverter(arg, arg); case LIST: case MAP: case STRUCT: return ObjectInspectorConverters.getConverter(arg, solveOi(arg)); default: return null; } }
/** Create a hierarchical LazyBinaryObject based on the given typeInfo. */ public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) { ObjectInspector.Category c = oi.getCategory(); switch (c) { case PRIMITIVE: return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector) oi); case MAP: return new LazyBinaryMap((LazyBinaryMapObjectInspector) oi); case LIST: return new LazyBinaryArray((LazyBinaryListObjectInspector) oi); case STRUCT: return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi); } throw new RuntimeException("Hive LazyBinarySerDe Internal error."); }
/* * validate and setup patternStr */ private void validateAndSetupPatternStr(MatchPath evaluator, List<PTFExpressionDef> args) throws SemanticException { PTFExpressionDef symboPatternArg = args.get(0); ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); if (!ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector) symbolPatternArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); } evaluator.patternStr = ((ConstantObjectInspector) symbolPatternArgOI).getWritableConstantValue().toString(); }
/* * validate and setup resultExprStr */ private void validateAndSetupResultExprStr( MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException { PTFExpressionDef resultExprArg = args.get(argsNum - 1); ObjectInspector resultExprArgOI = resultExprArg.getOI(); if (!ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector) resultExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); } evaluator.resultExprStr = ((ConstantObjectInspector) resultExprArgOI).getWritableConstantValue().toString(); }
/** * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0 * Specification, Table B-3: Mapping from JDBC Types to Java Object Types). * * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}. */ private static Object convertLazyToJava(Object o, ObjectInspector oi) { Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA); if (obj == null) { return null; } if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) { return new String((byte[]) obj); } // for now, expose non-primitive as a string // TODO: expose non-primitive as a structured object while maintaining JDBC compliance if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) { return SerDeUtils.getJSONString(o, oi); } return obj; }
/** * Hive will call this to serialize an object. Returns a writable object of the same class * returned by <a href="#getSerializedClass">getSerializedClass</a> * * @param obj The object to serialize * @param objInspector The ObjectInspector that knows about the object's structure * @return a serialized object in form of a Writable. Must be the same type returned by <a * href="#getSerializedClass">getSerializedClass</a> * @throws SerDeException */ @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { // make sure it is a struct record if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException( getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames); Text t = new Text(serializer.toString()); serializedDataSize = t.getBytes().length; return t; }
/** * Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0 * Specification, Table B-3: Mapping from JDBC Types to Java Object Types). * * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}. */ public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) { if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) { if (val == null) { return null; } Object obj = ObjectInspectorUtils.copyToStandardObject( val, valOI, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA); // uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6 if (version < 5 && ((PrimitiveObjectInspector) valOI).getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.BINARY) { // todo HIVE-5269 return new String((byte[]) obj); } return obj; } // for now, expose non-primitive as a string // TODO: expose non-primitive as a structured object while maintaining JDBC compliance return SerDeUtils.getJSONString(val, valOI); }
/* * For primitive types, use LazyBinary's object. * For complex types, make a standard (Java) object from LazyBinary's object. */ public static List<Object> getComplexFieldsAsList( LazyBinaryStruct lazyBinaryStruct, ArrayList<Object> objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) { List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { StructField field = fields.get(i); ObjectInspector objectInspector = field.getFieldObjectInspector(); Category category = objectInspector.getCategory(); Object object = lazyBinaryStruct.getField(i); if (category == Category.PRIMITIVE) { objectArrayBuffer.set(i, object); } else { objectArrayBuffer.set( i, ObjectInspectorUtils.copyToStandardObject( object, objectInspector, ObjectInspectorCopyOption.WRITABLE)); } } return objectArrayBuffer; }
/** * Check a particular field and set its size and offset in bytes based on the field type and the * bytes arrays. * * <p>For void, boolean, byte, short, int, long, float and double, there is no offset and the size * is fixed. For string, map, list, struct, the first four bytes are used to store the size. So * the offset is 4 and the size is computed by concating the first four bytes together. The first * four bytes are defined with respect to the offset in the bytes arrays. For timestamp, if the * first bit is 0, the record length is 4, otherwise a VInt begins at the 5th byte and its length * is added to 4. * * @param objectInspector object inspector of the field * @param bytes bytes arrays store the table row * @param offset offset of this field * @param recordInfo modify this byteinfo object and return it */ public static void checkObjectByteInfo( ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) { Category category = objectInspector.getCategory(); switch (category) { case PRIMITIVE: PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory(); switch (primitiveCategory) { case VOID: recordInfo.elementOffset = 0; recordInfo.elementSize = 0; break; case BOOLEAN: case BYTE: recordInfo.elementOffset = 0; recordInfo.elementSize = 1; break; case SHORT: recordInfo.elementOffset = 0; recordInfo.elementSize = 2; break; case FLOAT: recordInfo.elementOffset = 0; recordInfo.elementSize = 4; break; case DOUBLE: recordInfo.elementOffset = 0; recordInfo.elementSize = 8; break; case INT: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case LONG: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case STRING: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case CHAR: case VARCHAR: LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case BINARY: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; case DATE: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case TIMESTAMP: recordInfo.elementOffset = 0; recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset); break; case DECIMAL: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = 0; recordInfo.elementSize = vInt.length; LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt); recordInfo.elementSize += vInt.length + vInt.value; break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); } } break; case LIST: case MAP: case STRUCT: case UNION: recordInfo.elementOffset = 4; recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); break; default: { throw new RuntimeException("Unrecognized non-primitive type: " + category); } } }
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) { switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { switch (poi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) poi).get(o); sb.append(b ? "true" : "false"); break; } case BYTE: { sb.append(((ByteObjectInspector) poi).get(o)); break; } case SHORT: { sb.append(((ShortObjectInspector) poi).get(o)); break; } case INT: { sb.append(((IntObjectInspector) poi).get(o)); break; } case LONG: { sb.append(((LongObjectInspector) poi).get(o)); break; } case FLOAT: { sb.append(((FloatObjectInspector) poi).get(o)); break; } case DOUBLE: { sb.append(((DoubleObjectInspector) poi).get(o)); break; } case STRING: { sb.append('"'); sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o))); sb.append('"'); break; } case CHAR: { sb.append('"'); sb.append( escapeString( ((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case VARCHAR: { sb.append('"'); sb.append( escapeString( ((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case TIMESTAMP: { sb.append('"'); sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); Text txt = new Text(); txt.set(bw.getBytes(), 0, bw.getLength()); sb.append(txt.toString()); break; } case DECIMAL: { sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); break; } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); } } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector(); List<?> olist = loi.getList(o); if (olist == null) { sb.append(nullStr); } else { sb.append(LBRACKET); for (int i = 0; i < olist.size(); i++) { if (i > 0) { sb.append(COMMA); } buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL); } sb.append(RBRACKET); } break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector(); Map<?, ?> omap = moi.getMap(o); if (omap == null) { sb.append(nullStr); } else { sb.append(LBRACE); boolean first = true; for (Object entry : omap.entrySet()) { if (first) { first = false; } else { sb.append(COMMA); } Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry; buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL); sb.append(COLON); buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL); } sb.append(RBRACE); } break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); for (int i = 0; i < structFields.size(); i++) { if (i > 0) { sb.append(COMMA); } sb.append(QUOTE); sb.append(structFields.get(i).getFieldName()); sb.append(QUOTE); sb.append(COLON); buildJSONString( sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector(), JSON_NULL); } sb.append(RBRACE); } break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); sb.append(uoi.getTag(o)); sb.append(COLON); buildJSONString( sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL); sb.append(RBRACE); } break; } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } }
// This method is just for experimentation. public void testRead() throws Exception { m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf)); System.out.println("Reader: " + m_reader); System.out.println("# Rows: " + m_reader.getNumberOfRows()); m_types = m_reader.getTypes(); System.out.println("# Types in the file: " + m_types.size()); for (int i = 0; i < m_types.size(); i++) { System.out.println("Type " + i + ": " + m_types.get(i).getKind()); } System.out.println("Compression: " + m_reader.getCompression()); if (m_reader.getCompression() != CompressionKind.NONE) { System.out.println("Compression size: " + m_reader.getCompressionSize()); } StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector(); System.out.println("object inspector type category: " + m_oi.getCategory()); System.out.println("object inspector type name : " + m_oi.getTypeName()); m_fields = m_oi.getAllStructFieldRefs(); System.out.println("Number of columns in the table: " + m_fields.size()); RecordReader m_rr = m_reader.rows(); // Print the type info: for (int i = 0; i < m_fields.size(); i++) { System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName()); ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector(); System.out.println("Column " + i + " type category: " + lv_foi.getCategory()); System.out.println("Column " + i + " type name: " + lv_foi.getTypeName()); // Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i)); // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i)); } OrcStruct lv_row = null; Object lv_field_val = null; StringBuilder lv_row_string = new StringBuilder(1024); while (m_rr.hasNext()) { lv_row = (OrcStruct) m_rr.next(lv_row); lv_row_string.setLength(0); for (int i = 0; i < m_fields.size(); i++) { lv_field_val = lv_row.getFieldValue(i); if (lv_field_val != null) { lv_row_string.append(lv_field_val); } lv_row_string.append('|'); } System.out.println(lv_row_string); } /** * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT == * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0); * System.out.println("Column 1 value: " + lvf_1_val); } */ }
/** * True if Object passed is representing null object. * * @param o The object * @param oi The ObjectInspector * @return true if the object passed is representing NULL object false otherwise */ public static boolean hasAnyNullObject(Object o, ObjectInspector oi) { switch (oi.getCategory()) { case PRIMITIVE: { if (o == null) { return true; } return false; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector(); List<?> olist = loi.getList(o); if (olist == null) { return true; } else { // there are no elements in the list if (olist.size() == 0) { return false; } // if all the elements are representing null, then return true for (int i = 0; i < olist.size(); i++) { if (hasAnyNullObject(olist.get(i), listElementObjectInspector)) { return true; } } return false; } } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector(); Map<?, ?> omap = moi.getMap(o); if (omap == null) { return true; } else { // there are no elements in the map if (omap.entrySet().size() == 0) { return false; } // if all the entries of map are representing null, then return true for (Map.Entry<?, ?> entry : omap.entrySet()) { if (hasAnyNullObject(entry.getKey(), mapKeyObjectInspector) || hasAnyNullObject(entry.getValue(), mapValueObjectInspector)) { return true; } } return false; } } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); if (o == null) { return true; } else { // there are no fields in the struct if (structFields.size() == 0) { return false; } // if any the fields of struct are representing null, then return true for (int i = 0; i < structFields.size(); i++) { if (hasAnyNullObject( soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector())) { return true; } } return false; } } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; if (o == null) { return true; } else { // there are no elements in the union if (uoi.getObjectInspectors().size() == 0) { return false; } return hasAnyNullObject(uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o))); } } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } }
/** * The initialize method is called only once during the lifetime of the UDF. * * <p>Method checks for the validity (number, type, etc) of the arguments being passed to the UDF. * It also sets the return type of the result of the UDF, in this case the ObjectInspector * equivalent of Map<String,Object> * * @param arguments * @return ObjectInspector Map<String,Object> * @throws UDFArgumentException */ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentLengthException( "The HostNormalizerUDF takes an array with only 1 element as argument"); } // we are expecting the parameter to be of String type. ObjectInspector arg = arguments[0]; int argIndex = 0; if (arg.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Now that we have made sure that the argument is of primitive type, we can get the primitive // category PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arg).getPrimitiveCategory(); if (primitiveCategory != PrimitiveCategory.STRING) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Instantiate the Webrequest webrequest = Webrequest.getInstance(); argumentOI = (StringObjectInspector) arg; List<String> fieldNames = new LinkedList<>(); List<ObjectInspector> fieldOIs = new LinkedList<>(); int idx = 0; fieldNames.add("project_class"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT_CLASS = idx++; fieldNames.add("project"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT = idx++; fieldNames.add("qualifiers"); fieldOIs.add( ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector)); IDX_QUALIFIERS = idx++; fieldNames.add("tld"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_TLD = idx++; result = new Object[idx]; return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); }
static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); return; } // This field is not a null. buffer.write((byte) 1, invert); switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BOOLEAN: { boolean v = ((BooleanObjectInspector) poi).get(o); buffer.write((byte) (v ? 2 : 1), invert); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; byte v = boi.get(o); buffer.write((byte) (v ^ 0x80), invert); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; short v = spoi.get(o); buffer.write((byte) ((v >> 8) ^ 0x80), invert); buffer.write((byte) v, invert); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; int v = ioi.get(o); buffer.write((byte) ((v >> 24) ^ 0x80), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; long v = loi.get(o); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; int v = Float.floatToIntBits(foi.get(o)); if ((v & (1 << 31)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case DOUBLE: { DoubleObjectInspector doi = (DoubleObjectInspector) poi; long v = Double.doubleToLongBits(doi.get(o)); if ((v & (1L << 63)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } buffer.write((byte) (v >> 56), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); byte[] toSer = new byte[ba.getLength()]; System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength()); serializeBytes(buffer, toSer, ba.getLength(), invert); return; } case DATE: { DateObjectInspector doi = (DateObjectInspector) poi; long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds(); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case TIMESTAMP: { TimestampObjectInspector toi = (TimestampObjectInspector) poi; TimestampWritable t = toi.getPrimitiveWritableObject(o); byte[] data = t.getBinarySortable(); for (int i = 0; i < data.length; i++) { buffer.write(data[i], invert); } return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector eoi = loi.getListElementObjectInspector(); // \1 followed by each element int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { buffer.write((byte) 1, invert); serialize(buffer, loi.getListElement(o, eid), eoi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); // \1 followed by each key and then each value Map<?, ?> map = moi.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { buffer.write((byte) 1, invert); serialize(buffer, entry.getKey(), koi, invert); serialize(buffer, entry.getValue(), voi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { serialize( buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert); } return; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; byte tag = uoi.getTag(o); buffer.write(tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }