static TypeDescription convertTypeInfo(TypeInfo info) { switch (info.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info; switch (pinfo.getPrimitiveCategory()) { case BOOLEAN: return TypeDescription.createBoolean(); case BYTE: return TypeDescription.createByte(); case SHORT: return TypeDescription.createShort(); case INT: return TypeDescription.createInt(); case LONG: return TypeDescription.createLong(); case FLOAT: return TypeDescription.createFloat(); case DOUBLE: return TypeDescription.createDouble(); case STRING: return TypeDescription.createString(); case DATE: return TypeDescription.createDate(); case TIMESTAMP: return TypeDescription.createTimestamp(); case BINARY: return TypeDescription.createBinary(); case DECIMAL: { DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo; return TypeDescription.createDecimal() .withScale(dinfo.getScale()) .withPrecision(dinfo.getPrecision()); } case VARCHAR: { BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; return TypeDescription.createVarchar().withMaxLength(cinfo.getLength()); } case CHAR: { BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; return TypeDescription.createChar().withMaxLength(cinfo.getLength()); } default: throw new IllegalArgumentException( "ORC doesn't handle primitive" + " category " + pinfo.getPrimitiveCategory()); } } case LIST: { ListTypeInfo linfo = (ListTypeInfo) info; return TypeDescription.createList(convertTypeInfo(linfo.getListElementTypeInfo())); } case MAP: { MapTypeInfo minfo = (MapTypeInfo) info; return TypeDescription.createMap( convertTypeInfo(minfo.getMapKeyTypeInfo()), convertTypeInfo(minfo.getMapValueTypeInfo())); } case UNION: { UnionTypeInfo minfo = (UnionTypeInfo) info; TypeDescription result = TypeDescription.createUnion(); for (TypeInfo child : minfo.getAllUnionObjectTypeInfos()) { result.addUnionChild(convertTypeInfo(child)); } return result; } case STRUCT: { StructTypeInfo sinfo = (StructTypeInfo) info; TypeDescription result = TypeDescription.createStruct(); for (String fieldName : sinfo.getAllStructFieldNames()) { result.addField(fieldName, convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName))); } return result; } default: throw new IllegalArgumentException("ORC doesn't handle " + info.getCategory()); } }
private Writable createPrimitive(final Object obj, final PrimitiveObjectInspector inspector) throws SerDeException { if (obj == null) { return null; } switch (inspector.getPrimitiveCategory()) { case VOID: return null; case BOOLEAN: return new BooleanWritable( ((BooleanObjectInspector) inspector).get(obj) ? Boolean.TRUE : Boolean.FALSE); case BYTE: return new ByteWritable(((ByteObjectInspector) inspector).get(obj)); case DOUBLE: return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj)); case FLOAT: return new FloatWritable(((FloatObjectInspector) inspector).get(obj)); case INT: return new IntWritable(((IntObjectInspector) inspector).get(obj)); case LONG: return new LongWritable(((LongObjectInspector) inspector).get(obj)); case SHORT: return new ShortWritable(((ShortObjectInspector) inspector).get(obj)); case STRING: String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj); try { return new BytesWritable(v.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new SerDeException("Failed to encode string in UTF-8", e); } case DECIMAL: HiveDecimal hd = (HiveDecimal) inspector.getPrimitiveJavaObject(obj); DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo(); int prec = decTypeInfo.precision(); int scale = decTypeInfo.scale(); byte[] src = hd.setScale(scale).unscaledValue().toByteArray(); // Estimated number of bytes needed. int bytes = PRECISION_TO_BYTE_COUNT[prec - 1]; if (bytes == src.length) { // No padding needed. return new BytesWritable(src); } byte[] tgt = new byte[bytes]; if (hd.signum() == -1) { // For negative number, initializing bits to 1 for (int i = 0; i < bytes; i++) { tgt[i] |= 0xFF; } } System.arraycopy( src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones. return new BytesWritable(tgt); case TIMESTAMP: return new TimestampWritable( ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj)); case CHAR: String strippedValue = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(obj).getStrippedValue(); return new BytesWritable(Binary.fromString(strippedValue).getBytes()); case VARCHAR: String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue(); return new BytesWritable(Binary.fromString(value).getBytes()); case BINARY: return new BytesWritable(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(obj)); default: throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory()); } }