private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException {

    switch (type.getCategory()) {
      case PRIMITIVE:
        PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
            primitiveType.getPrimitiveCategory());

      case MAP:
        MapTypeInfo mapType = (MapTypeInfo) type;
        MapObjectInspector mapInspector =
            ObjectInspectorFactory.getStandardMapObjectInspector(
                getObjectInspector(mapType.getMapKeyTypeInfo()),
                getObjectInspector(mapType.getMapValueTypeInfo()));
        return mapInspector;

      case LIST:
        ListTypeInfo listType = (ListTypeInfo) type;
        ListObjectInspector listInspector =
            ObjectInspectorFactory.getStandardListObjectInspector(
                getObjectInspector(listType.getListElementTypeInfo()));
        return listInspector;

      case STRUCT:
        StructTypeInfo structType = (StructTypeInfo) type;
        List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

        List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
        for (TypeInfo fieldType : fieldTypes) {
          fieldInspectors.add(getObjectInspector(fieldType));
        }

        StructObjectInspector structInspector =
            ObjectInspectorFactory.getStandardStructObjectInspector(
                structType.getAllStructFieldNames(), fieldInspectors);
        return structInspector;

      default:
        throw new IOException("Unknown field schema type");
    }
  }
Example #2
0
 static TypeDescription convertTypeInfo(TypeInfo info) {
   switch (info.getCategory()) {
     case PRIMITIVE:
       {
         PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info;
         switch (pinfo.getPrimitiveCategory()) {
           case BOOLEAN:
             return TypeDescription.createBoolean();
           case BYTE:
             return TypeDescription.createByte();
           case SHORT:
             return TypeDescription.createShort();
           case INT:
             return TypeDescription.createInt();
           case LONG:
             return TypeDescription.createLong();
           case FLOAT:
             return TypeDescription.createFloat();
           case DOUBLE:
             return TypeDescription.createDouble();
           case STRING:
             return TypeDescription.createString();
           case DATE:
             return TypeDescription.createDate();
           case TIMESTAMP:
             return TypeDescription.createTimestamp();
           case BINARY:
             return TypeDescription.createBinary();
           case DECIMAL:
             {
               DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
               return TypeDescription.createDecimal()
                   .withScale(dinfo.getScale())
                   .withPrecision(dinfo.getPrecision());
             }
           case VARCHAR:
             {
               BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
               return TypeDescription.createVarchar().withMaxLength(cinfo.getLength());
             }
           case CHAR:
             {
               BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
               return TypeDescription.createChar().withMaxLength(cinfo.getLength());
             }
           default:
             throw new IllegalArgumentException(
                 "ORC doesn't handle primitive" + " category " + pinfo.getPrimitiveCategory());
         }
       }
     case LIST:
       {
         ListTypeInfo linfo = (ListTypeInfo) info;
         return TypeDescription.createList(convertTypeInfo(linfo.getListElementTypeInfo()));
       }
     case MAP:
       {
         MapTypeInfo minfo = (MapTypeInfo) info;
         return TypeDescription.createMap(
             convertTypeInfo(minfo.getMapKeyTypeInfo()),
             convertTypeInfo(minfo.getMapValueTypeInfo()));
       }
     case UNION:
       {
         UnionTypeInfo minfo = (UnionTypeInfo) info;
         TypeDescription result = TypeDescription.createUnion();
         for (TypeInfo child : minfo.getAllUnionObjectTypeInfos()) {
           result.addUnionChild(convertTypeInfo(child));
         }
         return result;
       }
     case STRUCT:
       {
         StructTypeInfo sinfo = (StructTypeInfo) info;
         TypeDescription result = TypeDescription.createStruct();
         for (String fieldName : sinfo.getAllStructFieldNames()) {
           result.addField(fieldName, convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName)));
         }
         return result;
       }
     default:
       throw new IllegalArgumentException("ORC doesn't handle " + info.getCategory());
   }
 }
  static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
      throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
      return null;
    }
    assert (isNull == 1);

    switch (type.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
          switch (ptype.getPrimitiveCategory()) {
            case VOID:
              {
                return null;
              }
            case BOOLEAN:
              {
                BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                byte b = buffer.read(invert);
                assert (b == 1 || b == 2);
                r.set(b == 2);
                return r;
              }
            case BYTE:
              {
                ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                r.set((byte) (buffer.read(invert) ^ 0x80));
                return r;
              }
            case SHORT:
              {
                ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                int v = buffer.read(invert) ^ 0x80;
                v = (v << 8) + (buffer.read(invert) & 0xff);
                r.set((short) v);
                return r;
              }
            case INT:
              {
                IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                int v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                r.set(v);
                return r;
              }
            case LONG:
              {
                LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                long v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                r.set(v);
                return r;
              }
            case FLOAT:
              {
                FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                int v = 0;
                for (int i = 0; i < 4; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                if ((v & (1 << 31)) == 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1 << 31);
                }
                r.set(Float.intBitsToFloat(v));
                return r;
              }
            case DOUBLE:
              {
                DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                long v = 0;
                for (int i = 0; i < 8; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                if ((v & (1L << 63)) == 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1L << 63);
                }
                r.set(Double.longBitsToDouble(v));
                return r;
              }
            case STRING:
              {
                Text r = reuse == null ? new Text() : (Text) reuse;
                // Get the actual length first
                int start = buffer.tell();
                int length = 0;
                do {
                  byte b = buffer.read(invert);
                  if (b == 0) {
                    // end of string
                    break;
                  }
                  if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                  }
                  length++;
                } while (true);

                if (length == buffer.tell() - start) {
                  // No escaping happened, so we are already done.
                  r.set(buffer.getData(), start, length);
                } else {
                  // Escaping happened, we need to copy byte-by-byte.
                  // 1. Set the length first.
                  r.set(buffer.getData(), start, length);
                  // 2. Reset the pointer.
                  buffer.seek(start);
                  // 3. Copy the data.
                  byte[] rdata = r.getBytes();
                  for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                      // The last char is an escape char, read the actual char.
                      // The serialization format escape \0 to \1, and \1 to \2,
                      // to make sure the string is null-terminated.
                      b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                  }
                  // 4. Read the null terminator.
                  byte b = buffer.read(invert);
                  assert (b == 0);
                }
                return r;
              }

            case BINARY:
              {
                BytesWritable bw = new BytesWritable();
                // Get the actual length first
                int start = buffer.tell();
                int length = 0;
                do {
                  byte b = buffer.read(invert);
                  if (b == 0) {
                    // end of string
                    break;
                  }
                  if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                  }
                  length++;
                } while (true);

                if (length == buffer.tell() - start) {
                  // No escaping happened, so we are already done.
                  bw.set(buffer.getData(), start, length);
                } else {
                  // Escaping happened, we need to copy byte-by-byte.
                  // 1. Set the length first.
                  bw.set(buffer.getData(), start, length);
                  // 2. Reset the pointer.
                  buffer.seek(start);
                  // 3. Copy the data.
                  byte[] rdata = bw.getBytes();
                  for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                      // The last char is an escape char, read the actual char.
                      // The serialization format escape \0 to \1, and \1 to \2,
                      // to make sure the string is null-terminated.
                      b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                  }
                  // 4. Read the null terminator.
                  byte b = buffer.read(invert);
                  assert (b == 0);
                }
                return bw;
              }

            case DATE:
              {
                DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
                long v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                  v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                d.set(DateWritable.timeToDate(v));
                return d;
              }

            case TIMESTAMP:
              TimestampWritable t =
                  (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
              byte[] bytes = new byte[8];

              for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
              }
              t.setBinarySortable(bytes, 0);
              return t;

            default:
              {
                throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          ListTypeInfo ltype = (ListTypeInfo) type;
          TypeInfo etype = ltype.getListElementTypeInfo();

          // Create the list if needed
          ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

          // Read the list
          int size = 0;
          while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
              // \0 to terminate
              break;
            }
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
              r.add(null);
            }
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
            size++;
          }
          // Remove additional elements if the list is reused
          while (r.size() > size) {
            r.remove(r.size() - 1);
          }
          return r;
        }
      case MAP:
        {
          MapTypeInfo mtype = (MapTypeInfo) type;
          TypeInfo ktype = mtype.getMapKeyTypeInfo();
          TypeInfo vtype = mtype.getMapValueTypeInfo();

          // Create the map if needed
          Map<Object, Object> r;
          if (reuse == null) {
            r = new HashMap<Object, Object>();
          } else {
            r = (HashMap<Object, Object>) reuse;
            r.clear();
          }

          while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
              // \0 to terminate
              break;
            }
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
          }
          return r;
        }
      case STRUCT:
        {
          StructTypeInfo stype = (StructTypeInfo) type;
          List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
          int size = fieldTypes.size();
          // Create the struct if needed
          ArrayList<Object> r =
              reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
          assert (r.size() <= size);
          // Set the size of the struct
          while (r.size() < size) {
            r.add(null);
          }
          // Read one field by one field
          for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
          }
          return r;
        }
      case UNION:
        {
          UnionTypeInfo utype = (UnionTypeInfo) type;
          StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
          // Read the tag
          byte tag = buffer.read(invert);
          r.setTag(tag);
          r.setObject(
              deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
          return r;
        }
      default:
        {
          throw new RuntimeException("Unrecognized type: " + type.getCategory());
        }
    }
  }