@Override public boolean equals(Object other) { if (this == other) { return true; } if (!(other instanceof UnionTypeInfo)) { return false; } UnionTypeInfo o = (UnionTypeInfo) other; // Compare the field types return o.getAllUnionObjectTypeInfos().equals(getAllUnionObjectTypeInfos()); }
static TypeDescription convertTypeInfo(TypeInfo info) { switch (info.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info; switch (pinfo.getPrimitiveCategory()) { case BOOLEAN: return TypeDescription.createBoolean(); case BYTE: return TypeDescription.createByte(); case SHORT: return TypeDescription.createShort(); case INT: return TypeDescription.createInt(); case LONG: return TypeDescription.createLong(); case FLOAT: return TypeDescription.createFloat(); case DOUBLE: return TypeDescription.createDouble(); case STRING: return TypeDescription.createString(); case DATE: return TypeDescription.createDate(); case TIMESTAMP: return TypeDescription.createTimestamp(); case BINARY: return TypeDescription.createBinary(); case DECIMAL: { DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo; return TypeDescription.createDecimal() .withScale(dinfo.getScale()) .withPrecision(dinfo.getPrecision()); } case VARCHAR: { BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; return TypeDescription.createVarchar().withMaxLength(cinfo.getLength()); } case CHAR: { BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; return TypeDescription.createChar().withMaxLength(cinfo.getLength()); } default: throw new IllegalArgumentException( "ORC doesn't handle primitive" + " category " + pinfo.getPrimitiveCategory()); } } case LIST: { ListTypeInfo linfo = (ListTypeInfo) info; return TypeDescription.createList(convertTypeInfo(linfo.getListElementTypeInfo())); } case MAP: { MapTypeInfo minfo = (MapTypeInfo) info; return TypeDescription.createMap( convertTypeInfo(minfo.getMapKeyTypeInfo()), convertTypeInfo(minfo.getMapValueTypeInfo())); } case UNION: { UnionTypeInfo minfo = (UnionTypeInfo) info; TypeDescription result = TypeDescription.createUnion(); for (TypeInfo child : minfo.getAllUnionObjectTypeInfos()) { result.addUnionChild(convertTypeInfo(child)); } return result; } case STRUCT: { StructTypeInfo sinfo = (StructTypeInfo) info; TypeDescription result = TypeDescription.createStruct(); for (String fieldName : sinfo.getAllStructFieldNames()) { result.addField(fieldName, convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName))); } return result; } default: throw new IllegalArgumentException("ORC doesn't handle " + info.getCategory()); } }
static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse) throws IOException { // Is this field a null? byte isNull = buffer.read(invert); if (isNull == 0) { return null; } assert (isNull == 1); switch (type.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; switch (ptype.getPrimitiveCategory()) { case VOID: { return null; } case BOOLEAN: { BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; byte b = buffer.read(invert); assert (b == 1 || b == 2); r.set(b == 2); return r; } case BYTE: { ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r.set((byte) (buffer.read(invert) ^ 0x80)); return r; } case SHORT: { ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; int v = buffer.read(invert) ^ 0x80; v = (v << 8) + (buffer.read(invert) & 0xff); r.set((short) v); return r; } case INT: { IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; int v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 3; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } r.set(v); return r; } case LONG: { LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; long v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 7; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } r.set(v); return r; } case FLOAT: { FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; int v = 0; for (int i = 0; i < 4; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1 << 31)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } r.set(Float.intBitsToFloat(v)); return r; } case DOUBLE: { DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; long v = 0; for (int i = 0; i < 8; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1L << 63)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } r.set(Double.longBitsToDouble(v)); return r; } case STRING: { Text r = reuse == null ? new Text() : (Text) reuse; // Get the actual length first int start = buffer.tell(); int length = 0; do { byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. r.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. r.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = r.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return r; } case BINARY: { BytesWritable bw = new BytesWritable(); // Get the actual length first int start = buffer.tell(); int length = 0; do { byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. bw.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. bw.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = bw.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return bw; } case DATE: { DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse; long v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 7; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } d.set(DateWritable.timeToDate(v)); return d; } case TIMESTAMP: TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse); byte[] bytes = new byte[8]; for (int i = 0; i < bytes.length; i++) { bytes[i] = buffer.read(invert); } t.setBinarySortable(bytes, 0); return t; default: { throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory()); } } } case LIST: { ListTypeInfo ltype = (ListTypeInfo) type; TypeInfo etype = ltype.getListElementTypeInfo(); // Create the list if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse; // Read the list int size = 0; while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each element assert (more == 1); if (size == r.size()) { r.add(null); } r.set(size, deserialize(buffer, etype, invert, r.get(size))); size++; } // Remove additional elements if the list is reused while (r.size() > size) { r.remove(r.size() - 1); } return r; } case MAP: { MapTypeInfo mtype = (MapTypeInfo) type; TypeInfo ktype = mtype.getMapKeyTypeInfo(); TypeInfo vtype = mtype.getMapValueTypeInfo(); // Create the map if needed Map<Object, Object> r; if (reuse == null) { r = new HashMap<Object, Object>(); } else { r = (HashMap<Object, Object>) reuse; r.clear(); } while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each key and then each value assert (more == 1); Object k = deserialize(buffer, ktype, invert, null); Object v = deserialize(buffer, vtype, invert, null); r.put(k, v); } return r; } case STRUCT: { StructTypeInfo stype = (StructTypeInfo) type; List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos(); int size = fieldTypes.size(); // Create the struct if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse; assert (r.size() <= size); // Set the size of the struct while (r.size() < size) { r.add(null); } // Read one field by one field for (int eid = 0; eid < size; eid++) { r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid))); } return r; } case UNION: { UnionTypeInfo utype = (UnionTypeInfo) type; StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse; // Read the tag byte tag = buffer.read(invert); r.setTag(tag); r.setObject( deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null)); return r; } default: { throw new RuntimeException("Unrecognized type: " + type.getCategory()); } } }
/** * Returns the lazy binary object inspector that can be used to inspect an lazy binary object of * that typeInfo * * <p>For primitive types, we use the standard writable object inspector. */ public static ObjectInspector getLazyBinaryObjectInspectorFromTypeInfo(TypeInfo typeInfo) { ObjectInspector result = cachedLazyBinaryObjectInspector.get(typeInfo); if (result == null) { switch (typeInfo.getCategory()) { case PRIMITIVE: { result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( ((PrimitiveTypeInfo) typeInfo)); break; } case LIST: { ObjectInspector elementObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo( ((ListTypeInfo) typeInfo).getListElementTypeInfo()); result = LazyBinaryObjectInspectorFactory.getLazyBinaryListObjectInspector( elementObjectInspector); break; } case MAP: { MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; ObjectInspector keyObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo()); ObjectInspector valueObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo()); result = LazyBinaryObjectInspectorFactory.getLazyBinaryMapObjectInspector( keyObjectInspector, valueObjectInspector); break; } case STRUCT: { StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List<String> fieldNames = structTypeInfo.getAllStructFieldNames(); List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size()); for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add( getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } result = LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector( fieldNames, fieldObjectInspectors); break; } case UNION: { UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; final List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size()); for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add( getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } result = LazyBinaryObjectInspectorFactory.getLazyBinaryUnionObjectInspector( fieldObjectInspectors); break; } default: { result = null; } } ObjectInspector prev = cachedLazyBinaryObjectInspector.putIfAbsent(typeInfo, result); if (prev != null) { result = prev; } } return result; }