private ObjectInspector solveOi(ObjectInspector arg) { switch (arg.getCategory()) { case PRIMITIVE: // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL, // UNKNOWN PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arg; return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( poi.getPrimitiveCategory()); case LIST: return ObjectInspectorFactory.getStandardListObjectInspector( solveOi(((ListObjectInspector) arg).getListElementObjectInspector())); case MAP: return ObjectInspectorFactory.getStandardMapObjectInspector( solveOi(((MapObjectInspector) arg).getMapKeyObjectInspector()), solveOi(((MapObjectInspector) arg).getMapValueObjectInspector())); case STRUCT: StructObjectInspector soi = (StructObjectInspector) arg; int size = soi.getAllStructFieldRefs().size(); ArrayList<String> fnl = new ArrayList<String>(size); ArrayList<ObjectInspector> foil = new ArrayList<ObjectInspector>(size); for (StructField sf : ((StructObjectInspector) arg).getAllStructFieldRefs()) { fnl.add(sf.getFieldName()); foil.add(solveOi(sf.getFieldObjectInspector())); } return JsonStructObjectInspector.getJsonStructObjectInspector(fnl, foil); default: return arg; } }
/** * @param oi - Input object inspector * @param oiSettableProperties - Lookup map to cache the result.(If no caching, pass null) * @return - true if : (1) oi is an instance of settable<DataType>OI. (2) All the embedded object * inspectors are instances of settable<DataType>OI. If (1) or (2) is false, return false. */ public static boolean hasAllFieldsSettable( ObjectInspector oi, Map<ObjectInspector, Boolean> oiSettableProperties) { // If the result is already present in the cache, return it. if (!(oiSettableProperties == null) && oiSettableProperties.containsKey(oi)) { return oiSettableProperties.get(oi).booleanValue(); } // If the top-level object inspector is non-settable return false if (!(isInstanceOfSettableOI(oi))) { return setOISettablePropertiesMap(oi, oiSettableProperties, false); } Boolean returnValue = true; switch (oi.getCategory()) { case PRIMITIVE: break; case STRUCT: StructObjectInspector structOutputOI = (StructObjectInspector) oi; List<? extends StructField> listFields = structOutputOI.getAllStructFieldRefs(); for (StructField listField : listFields) { if (!hasAllFieldsSettable(listField.getFieldObjectInspector(), oiSettableProperties)) { returnValue = false; break; } } break; case LIST: ListObjectInspector listOutputOI = (ListObjectInspector) oi; returnValue = hasAllFieldsSettable( listOutputOI.getListElementObjectInspector(), oiSettableProperties); break; case MAP: MapObjectInspector mapOutputOI = (MapObjectInspector) oi; returnValue = hasAllFieldsSettable(mapOutputOI.getMapKeyObjectInspector(), oiSettableProperties) && hasAllFieldsSettable( mapOutputOI.getMapValueObjectInspector(), oiSettableProperties); break; case UNION: UnionObjectInspector unionOutputOI = (UnionObjectInspector) oi; List<ObjectInspector> unionListFields = unionOutputOI.getObjectInspectors(); for (ObjectInspector listField : unionListFields) { if (!hasAllFieldsSettable(listField, oiSettableProperties)) { returnValue = false; break; } } break; default: throw new RuntimeException( "Hive internal error inside hasAllFieldsSettable : " + oi.getTypeName() + " not supported yet."); } return setOISettablePropertiesMap(oi, oiSettableProperties, returnValue); }
/** * Copy fields in the input row to the output array of standard objects. * * @param result output list of standard objects. * @param row input row. * @param soi Object inspector for the to-be-copied columns. * @param objectInspectorOption */ public static void copyToStandardObject( List<Object> result, Object row, StructObjectInspector soi, ObjectInspectorCopyOption objectInspectorOption) { List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (StructField f : fields) { result.add( copyToStandardObject( soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption)); } }
/** * Copy specified fields in the input row to the output array of standard objects. * * @param result output list of standard objects. * @param row input row. * @param startCol starting column number from the input row. * @param numCols number of columns to copy. * @param soi Object inspector for the to-be-copied columns. */ public static void partialCopyToStandardObject( List<Object> result, Object row, int startCol, int numCols, StructObjectInspector soi, ObjectInspectorCopyOption objectInspectorOption) { List<? extends StructField> fields = soi.getAllStructFieldRefs(); int i = 0, j = 0; for (StructField f : fields) { if (i++ >= startCol) { result.add( copyToStandardObject( soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption)); if (++j == numCols) { break; } } } }
public static void copyStructToArray( Object o, ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption, Object[] dest, int offset) throws SerDeException { if (o == null) { return; } if (oi.getCategory() != Category.STRUCT) { throw new SerDeException("Unexpected category " + oi.getCategory()); } StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); ++i) { StructField f = fields.get(i); dest[offset + i] = copyToStandardObject( soi.getStructFieldData(o, f), f.getFieldObjectInspector(), objectInspectorOption); } }
/** * Compares two types identified by the given object inspectors. This method compares the types as * follows: * * <ol> * <li>If the given inspectors do not belong to same category, the result is negative. * <li>If the given inspectors are for <code>PRIMITIVE</code> type, the result is the comparison * of their type names. * <li>If the given inspectors are for <code>LIST</code> type, then the result is recursive call * to compare the type of list elements. * <li>If the given inspectors are <code>MAP</code> type, then the result is a recursive call to * compare the map key and value types. * <li>If the given inspectors are <code>STRUCT</code> type, then the result is negative if they * do not have the same number of fields. If they do have the same number of fields, the * result is a recursive call to compare each of the field types. * <li>If none of the above, the result is negative. * </ol> * * @param o1 * @param o2 * @return true if the given object inspectors represent the same types. */ public static boolean compareTypes(ObjectInspector o1, ObjectInspector o2) { Category c1 = o1.getCategory(); Category c2 = o2.getCategory(); // Return false if categories are not equal if (!c1.equals(c2)) { return false; } // If both categories are primitive return the comparison of type names. if (c1.equals(Category.PRIMITIVE)) { return o1.getTypeName().equals(o2.getTypeName()); } // If lists, recursively compare the list element types if (c1.equals(Category.LIST)) { ObjectInspector child1 = ((ListObjectInspector) o1).getListElementObjectInspector(); ObjectInspector child2 = ((ListObjectInspector) o2).getListElementObjectInspector(); return compareTypes(child1, child2); } // If maps, recursively compare the key and value types if (c1.equals(Category.MAP)) { MapObjectInspector mapOI1 = (MapObjectInspector) o1; MapObjectInspector mapOI2 = (MapObjectInspector) o2; ObjectInspector childKey1 = mapOI1.getMapKeyObjectInspector(); ObjectInspector childKey2 = mapOI2.getMapKeyObjectInspector(); if (compareTypes(childKey1, childKey2)) { ObjectInspector childVal1 = mapOI1.getMapValueObjectInspector(); ObjectInspector childVal2 = mapOI2.getMapValueObjectInspector(); if (compareTypes(childVal1, childVal2)) { return true; } } return false; } // If structs, recursively compare the fields if (c1.equals(Category.STRUCT)) { StructObjectInspector structOI1 = (StructObjectInspector) o1; StructObjectInspector structOI2 = (StructObjectInspector) o2; List<? extends StructField> childFieldsList1 = structOI1.getAllStructFieldRefs(); List<? extends StructField> childFieldsList2 = structOI2.getAllStructFieldRefs(); if (childFieldsList1 == null && childFieldsList2 == null) { return true; } else if (childFieldsList1 == null || childFieldsList2 == null) { return false; } else if (childFieldsList1.size() != childFieldsList2.size()) { return false; } Iterator<? extends StructField> it1 = childFieldsList1.iterator(); Iterator<? extends StructField> it2 = childFieldsList2.iterator(); while (it1.hasNext()) { StructField field1 = it1.next(); StructField field2 = it2.next(); if (!compareTypes(field1.getFieldObjectInspector(), field2.getFieldObjectInspector())) { return false; } } return true; } if (c1.equals(Category.UNION)) { UnionObjectInspector uoi1 = (UnionObjectInspector) o1; UnionObjectInspector uoi2 = (UnionObjectInspector) o2; List<ObjectInspector> ois1 = uoi1.getObjectInspectors(); List<ObjectInspector> ois2 = uoi2.getObjectInspectors(); if (ois1 == null && ois2 == null) { return true; } else if (ois1 == null || ois2 == null) { return false; } else if (ois1.size() != ois2.size()) { return false; } Iterator<? extends ObjectInspector> it1 = ois1.iterator(); Iterator<? extends ObjectInspector> it2 = ois2.iterator(); while (it1.hasNext()) { if (!compareTypes(it1.next(), it2.next())) { return false; } } return true; } // Unknown category throw new RuntimeException("Unknown category encountered: " + c1); }
public static int hashCode(Object o, ObjectInspector objIns) { if (o == null) { return 0; } switch (objIns.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns); switch (poi.getPrimitiveCategory()) { case VOID: return 0; case BOOLEAN: return ((BooleanObjectInspector) poi).get(o) ? 1 : 0; case BYTE: return ((ByteObjectInspector) poi).get(o); case SHORT: return ((ShortObjectInspector) poi).get(o); case INT: return ((IntObjectInspector) poi).get(o); case LONG: { long a = ((LongObjectInspector) poi).get(o); return (int) ((a >>> 32) ^ a); } case FLOAT: return Float.floatToIntBits(((FloatObjectInspector) poi).get(o)); case DOUBLE: { // This hash function returns the same result as Double.hashCode() // while DoubleWritable.hashCode returns a different result. long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o)); return (int) ((a >>> 32) ^ a); } case STRING: { // This hash function returns the same result as String.hashCode() when // all characters are ASCII, while Text.hashCode() always returns a // different result. Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o); int r = 0; for (int i = 0; i < t.getLength(); i++) { r = r * 31 + t.getBytes()[i]; } return r; } case CHAR: return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case VARCHAR: return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case BINARY: return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case DATE: return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case TIMESTAMP: TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o); return t.hashCode(); case INTERVAL_YEAR_MONTH: HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o); return intervalYearMonth.hashCode(); case INTERVAL_DAY_TIME: HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o); return intervalDayTime.hashCode(); case DECIMAL: return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); default: { throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory()); } } } case LIST: { int r = 0; ListObjectInspector listOI = (ListObjectInspector) objIns; ObjectInspector elemOI = listOI.getListElementObjectInspector(); for (int ii = 0; ii < listOI.getListLength(o); ++ii) { r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI); } return r; } case MAP: { int r = 0; MapObjectInspector mapOI = (MapObjectInspector) objIns; ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); ObjectInspector valueOI = mapOI.getMapValueObjectInspector(); Map<?, ?> map = mapOI.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI); } return r; } case STRUCT: int r = 0; StructObjectInspector structOI = (StructObjectInspector) objIns; List<? extends StructField> fields = structOI.getAllStructFieldRefs(); for (StructField field : fields) { r = 31 * r + hashCode( structOI.getStructFieldData(o, field), field.getFieldObjectInspector()); } return r; case UNION: UnionObjectInspector uOI = (UnionObjectInspector) objIns; byte tag = uOI.getTag(o); return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag)); default: throw new RuntimeException("Unknown type: " + objIns.getTypeName()); } }
public static Object copyToStandardObject( Object o, ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption) { if (o == null) { return null; } Object result = null; switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector loi = (PrimitiveObjectInspector) oi; if (objectInspectorOption == ObjectInspectorCopyOption.DEFAULT) { objectInspectorOption = loi.preferWritable() ? ObjectInspectorCopyOption.WRITABLE : ObjectInspectorCopyOption.JAVA; } switch (objectInspectorOption) { case JAVA: result = loi.getPrimitiveJavaObject(o); if (loi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) { result = PrimitiveObjectInspectorFactory.javaTimestampObjectInspector.copyObject(result); } break; case WRITABLE: result = loi.getPrimitiveWritableObject(loi.copyObject(o)); break; } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; int length = loi.getListLength(o); ArrayList<Object> list = new ArrayList<Object>(length); for (int i = 0; i < length; i++) { list.add( copyToStandardObject( loi.getListElement(o, i), loi.getListElementObjectInspector(), objectInspectorOption)); } result = list; break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; HashMap<Object, Object> map = new HashMap<Object, Object>(); Map<? extends Object, ? extends Object> omap = moi.getMap(o); for (Map.Entry<? extends Object, ? extends Object> entry : omap.entrySet()) { map.put( copyToStandardObject( entry.getKey(), moi.getMapKeyObjectInspector(), objectInspectorOption), copyToStandardObject( entry.getValue(), moi.getMapValueObjectInspector(), objectInspectorOption)); } result = map; break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); ArrayList<Object> struct = new ArrayList<Object>(fields.size()); for (StructField f : fields) { struct.add( copyToStandardObject( soi.getStructFieldData(o, f), f.getFieldObjectInspector(), objectInspectorOption)); } result = struct; break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; List<ObjectInspector> objectInspectors = uoi.getObjectInspectors(); Object object = copyToStandardObject( uoi.getField(o), objectInspectors.get(uoi.getTag(o)), objectInspectorOption); result = object; break; } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } } return result; }
public static ObjectInspector getStandardObjectInspector( ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption) { ObjectInspector result = null; switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (objectInspectorOption) { case DEFAULT: { if (poi.preferWritable()) { result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( poi.getTypeInfo()); } else { result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( poi.getTypeInfo()); } break; } case JAVA: { result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( poi.getTypeInfo()); break; } case WRITABLE: result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( poi.getTypeInfo()); break; } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; result = ObjectInspectorFactory.getStandardListObjectInspector( getStandardObjectInspector( loi.getListElementObjectInspector(), objectInspectorOption)); break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; result = ObjectInspectorFactory.getStandardMapObjectInspector( getStandardObjectInspector(moi.getMapKeyObjectInspector(), objectInspectorOption), getStandardObjectInspector( moi.getMapValueObjectInspector(), objectInspectorOption)); break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.size()); for (StructField f : fields) { fieldNames.add(f.getFieldName()); fieldObjectInspectors.add( getStandardObjectInspector(f.getFieldObjectInspector(), objectInspectorOption)); } result = ObjectInspectorFactory.getStandardStructObjectInspector( fieldNames, fieldObjectInspectors); break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; List<ObjectInspector> ois = new ArrayList<ObjectInspector>(); for (ObjectInspector eoi : uoi.getObjectInspectors()) { ois.add(getStandardObjectInspector(eoi, objectInspectorOption)); } result = ObjectInspectorFactory.getStandardUnionObjectInspector(ois); break; } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } } return result; }