public static PrimitiveObjectInspector asDoubleCompatibleOI(@Nonnull final ObjectInspector argOI) throws UDFArgumentTypeException { if (argOI.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentTypeException( 0, "Only primitive type arguments are accepted but " + argOI.getTypeName() + " is passed."); } final PrimitiveObjectInspector oi = (PrimitiveObjectInspector) argOI; switch (oi.getPrimitiveCategory()) { case BYTE: case SHORT: case INT: case LONG: case FLOAT: case DOUBLE: case STRING: case TIMESTAMP: break; default: throw new UDFArgumentTypeException( 0, "Only numeric or string type arguments are accepted but " + argOI.getTypeName() + " is passed."); } return oi; }
public static IntObjectInspector asIntOI(@Nonnull final ObjectInspector argOI) throws UDFArgumentException { if (!INT_TYPE_NAME.equals(argOI.getTypeName())) { throw new UDFArgumentException("Argument type must be INT: " + argOI.getTypeName()); } return (IntObjectInspector) argOI; }
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException( getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < fields.size(); i++) { if (i > 0) sb.append(separator); Object column = soi.getStructFieldData(obj, fields.get(i)); if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) { // For primitive object, serialize to plain string sb.append(column == null ? nullString : column.toString()); } else { // For complex object, serialize to JSON format sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector())); } } serializeCache.set(sb.toString()); return serializeCache; }
public void printFileInfo() throws Exception { System.out.println("Reader: " + m_reader); System.out.println("# Rows: " + m_reader.getNumberOfRows()); System.out.println("# Types in the file: " + m_types.size()); for (int i = 0; i < m_types.size(); i++) { System.out.println("Type " + i + ": " + m_types.get(i).getKind()); } System.out.println("Compression: " + m_reader.getCompression()); if (m_reader.getCompression() != CompressionKind.NONE) { System.out.println("Compression size: " + m_reader.getCompressionSize()); } m_oi = (StructObjectInspector) m_reader.getObjectInspector(); System.out.println("object inspector type category: " + m_oi.getCategory()); System.out.println("object inspector type name : " + m_oi.getTypeName()); System.out.println("Number of columns in the table: " + m_fields.size()); // Print the type info: for (int i = 0; i < m_fields.size(); i++) { System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName()); ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector(); System.out.println("Column " + i + " type category: " + lv_foi.getCategory()); System.out.println("Column " + i + " type name: " + lv_foi.getTypeName()); } }
public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI) throws UDFArgumentException { final String typeName = numberOI.getTypeName(); if (DOUBLE_TYPE_NAME.equals(typeName)) { DoubleWritable v = getConstValue(numberOI); return v.get(); } else if (FLOAT_TYPE_NAME.equals(typeName)) { FloatWritable v = getConstValue(numberOI); return v.get(); } else if (INT_TYPE_NAME.equals(typeName)) { IntWritable v = getConstValue(numberOI); return v.get(); } else if (BIGINT_TYPE_NAME.equals(typeName)) { LongWritable v = getConstValue(numberOI); return v.get(); } else if (SMALLINT_TYPE_NAME.equals(typeName)) { ShortWritable v = getConstValue(numberOI); return v.get(); } else if (TINYINT_TYPE_NAME.equals(typeName)) { ByteWritable v = getConstValue(numberOI); return v.get(); } throw new UDFArgumentException( "Unexpected argument type to cast as double: " + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI)); }
@Override public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException { if (oi.getCategory() != Category.STRUCT) { throw new VoltSerdeException( getClass().toString() + " can only serialize struct types, but we got: " + oi.getTypeName()); } VoltRecord vr = new VoltRecord(m_voltConf.getTableName()); StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); List<Object> fieldValues = soi.getStructFieldsDataAsList(obj); final int size = m_oig.getColumnTypes().size(); for (int i = 0; i < size; ++i) { ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector(); PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI; Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i)); if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) { fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null; } vr.add(fieldValue); } return vr; }
@Override protected String extractField(Object target) { if (target instanceof HiveType) { HiveType type = (HiveType) target; ObjectInspector inspector = type.getObjectInspector(); if (inspector instanceof StructObjectInspector) { StructObjectInspector soi = (StructObjectInspector) inspector; StructField field = soi.getStructFieldRef(fieldName); ObjectInspector foi = field.getFieldObjectInspector(); Assert.isTrue( foi.getCategory() == ObjectInspector.Category.PRIMITIVE, String.format( "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName())); // expecting a writeable - simply do a toString Object data = soi.getStructFieldData(type.getObject(), field); if (data == null || data instanceof NullWritable) { return StringUtils.EMPTY; } return data.toString(); } } return null; }
// array<类型对象> @Override public String getTypeName() { return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "<" + listElementObjectInspector.getTypeName() + ">"; }
/** * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0 * Specification, Table B-3: Mapping from JDBC Types to Java Object Types). * * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}. */ private static Object convertLazyToJava(Object o, ObjectInspector oi) { Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA); if (obj == null) { return null; } if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) { return new String((byte[]) obj); } // for now, expose non-primitive as a string // TODO: expose non-primitive as a structured object while maintaining JDBC compliance if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) { return SerDeUtils.getJSONString(o, oi); } return obj; }
/** * Hive will call this to serialize an object. Returns a writable object of the same class * returned by <a href="#getSerializedClass">getSerializedClass</a> * * @param obj The object to serialize * @param objInspector The ObjectInspector that knows about the object's structure * @return a serialized object in form of a Writable. Must be the same type returned by <a * href="#getSerializedClass">getSerializedClass</a> * @throws SerDeException */ @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { // make sure it is a struct record if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException( getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames); Text t = new Text(serializer.toString()); serializedDataSize = t.getBytes().length; return t; }
// This method is just for experimentation. public void testRead() throws Exception { m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf)); System.out.println("Reader: " + m_reader); System.out.println("# Rows: " + m_reader.getNumberOfRows()); m_types = m_reader.getTypes(); System.out.println("# Types in the file: " + m_types.size()); for (int i = 0; i < m_types.size(); i++) { System.out.println("Type " + i + ": " + m_types.get(i).getKind()); } System.out.println("Compression: " + m_reader.getCompression()); if (m_reader.getCompression() != CompressionKind.NONE) { System.out.println("Compression size: " + m_reader.getCompressionSize()); } StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector(); System.out.println("object inspector type category: " + m_oi.getCategory()); System.out.println("object inspector type name : " + m_oi.getTypeName()); m_fields = m_oi.getAllStructFieldRefs(); System.out.println("Number of columns in the table: " + m_fields.size()); RecordReader m_rr = m_reader.rows(); // Print the type info: for (int i = 0; i < m_fields.size(); i++) { System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName()); ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector(); System.out.println("Column " + i + " type category: " + lv_foi.getCategory()); System.out.println("Column " + i + " type name: " + lv_foi.getTypeName()); // Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i)); // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i)); } OrcStruct lv_row = null; Object lv_field_val = null; StringBuilder lv_row_string = new StringBuilder(1024); while (m_rr.hasNext()) { lv_row = (OrcStruct) m_rr.next(lv_row); lv_row_string.setLength(0); for (int i = 0; i < m_fields.size(); i++) { lv_field_val = lv_row.getFieldValue(i); if (lv_field_val != null) { lv_row_string.append(lv_field_val); } lv_row_string.append('|'); } System.out.println(lv_row_string); } /** * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT == * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0); * System.out.println("Column 1 value: " + lvf_1_val); } */ }
public static boolean isBooleanOI(@Nonnull final ObjectInspector oi) { String typeName = oi.getTypeName(); return BOOLEAN_TYPE_NAME.equals(typeName); }
public String getType() { return objectInspector.getTypeName(); }
/** * The initialize method is called only once during the lifetime of the UDF. * * <p>Method checks for the validity (number, type, etc) of the arguments being passed to the UDF. * It also sets the return type of the result of the UDF, in this case the ObjectInspector * equivalent of Map<String,Object> * * @param arguments * @return ObjectInspector Map<String,Object> * @throws UDFArgumentException */ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentLengthException( "The HostNormalizerUDF takes an array with only 1 element as argument"); } // we are expecting the parameter to be of String type. ObjectInspector arg = arguments[0]; int argIndex = 0; if (arg.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Now that we have made sure that the argument is of primitive type, we can get the primitive // category PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arg).getPrimitiveCategory(); if (primitiveCategory != PrimitiveCategory.STRING) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Instantiate the Webrequest webrequest = Webrequest.getInstance(); argumentOI = (StringObjectInspector) arg; List<String> fieldNames = new LinkedList<>(); List<ObjectInspector> fieldOIs = new LinkedList<>(); int idx = 0; fieldNames.add("project_class"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT_CLASS = idx++; fieldNames.add("project"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT = idx++; fieldNames.add("qualifiers"); fieldOIs.add( ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector)); IDX_QUALIFIERS = idx++; fieldNames.add("tld"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_TLD = idx++; result = new Object[idx]; return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); }