@Override
 public Writable serialize(final Object obj, final ObjectInspector objInspector)
     throws SerDeException {
   if (!objInspector.getCategory().equals(Category.STRUCT)) {
     throw new SerDeException(
         "Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct");
   }
   final ArrayWritable serializeData = createStruct(obj, (StructObjectInspector) objInspector);
   serializedSize = serializeData.get().length;
   status = LAST_OPERATION.SERIALIZE;
   return serializeData;
 }
Esempio n. 2
0
 public static StructObjectInspector getTableObjectInspector(
     @SuppressWarnings("deprecation") Deserializer deserializer) {
   try {
     ObjectInspector inspector = deserializer.getObjectInspector();
     checkArgument(
         inspector.getCategory() == Category.STRUCT,
         "expected STRUCT: %s",
         inspector.getCategory());
     return (StructObjectInspector) inspector;
   } catch (SerDeException e) {
     throw Throwables.propagate(e);
   }
 }
Esempio n. 3
0
 public static Block serializeObject(
     Type type, BlockBuilder builder, Object object, ObjectInspector inspector) {
   switch (inspector.getCategory()) {
     case PRIMITIVE:
       serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector);
       return null;
     case LIST:
       return serializeList(type, builder, object, (ListObjectInspector) inspector);
     case MAP:
       return serializeMap(type, builder, object, (MapObjectInspector) inspector);
     case STRUCT:
       return serializeStruct(type, builder, object, (StructObjectInspector) inspector);
   }
   throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory());
 }
  public void printFileInfo() throws Exception {

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    System.out.println("# Types in the file: " + m_types.size());
    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    System.out.println("Number of columns in the table: " + m_fields.size());

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
    }
  }
Esempio n. 5
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
  private ObjectInspector solveOi(ObjectInspector arg) {

    switch (arg.getCategory()) {
      case PRIMITIVE:

        // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL,
        // UNKNOWN
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arg;
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
            poi.getPrimitiveCategory());
      case LIST:
        return ObjectInspectorFactory.getStandardListObjectInspector(
            solveOi(((ListObjectInspector) arg).getListElementObjectInspector()));
      case MAP:
        return ObjectInspectorFactory.getStandardMapObjectInspector(
            solveOi(((MapObjectInspector) arg).getMapKeyObjectInspector()),
            solveOi(((MapObjectInspector) arg).getMapValueObjectInspector()));
      case STRUCT:
        StructObjectInspector soi = (StructObjectInspector) arg;
        int size = soi.getAllStructFieldRefs().size();
        ArrayList<String> fnl = new ArrayList<String>(size);
        ArrayList<ObjectInspector> foil = new ArrayList<ObjectInspector>(size);

        for (StructField sf : ((StructObjectInspector) arg).getAllStructFieldRefs()) {
          fnl.add(sf.getFieldName());
          foil.add(solveOi(sf.getFieldObjectInspector()));
        }

        return JsonStructObjectInspector.getJsonStructObjectInspector(fnl, foil);
      default:
        return arg;
    }
  }
  @Override
  protected String extractField(Object target) {
    if (target instanceof HiveType) {
      HiveType type = (HiveType) target;
      ObjectInspector inspector = type.getObjectInspector();
      if (inspector instanceof StructObjectInspector) {
        StructObjectInspector soi = (StructObjectInspector) inspector;
        StructField field = soi.getStructFieldRef(fieldName);
        ObjectInspector foi = field.getFieldObjectInspector();
        Assert.isTrue(
            foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
            String.format(
                "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName()));

        // expecting a writeable - simply do a toString
        Object data = soi.getStructFieldData(type.getObject(), field);
        if (data == null || data instanceof NullWritable) {
          return StringUtils.EMPTY;
        }
        return data.toString();
      }
    }

    return null;
  }
Esempio n. 8
0
 public static PrimitiveObjectInspector asDoubleCompatibleOI(@Nonnull final ObjectInspector argOI)
     throws UDFArgumentTypeException {
   if (argOI.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentTypeException(
         0,
         "Only primitive type arguments are accepted but " + argOI.getTypeName() + " is passed.");
   }
   final PrimitiveObjectInspector oi = (PrimitiveObjectInspector) argOI;
   switch (oi.getPrimitiveCategory()) {
     case BYTE:
     case SHORT:
     case INT:
     case LONG:
     case FLOAT:
     case DOUBLE:
     case STRING:
     case TIMESTAMP:
       break;
     default:
       throw new UDFArgumentTypeException(
           0,
           "Only numeric or string type arguments are accepted but "
               + argOI.getTypeName()
               + " is passed.");
   }
   return oi;
 }
  @Override
  public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException {
    if (oi.getCategory() != Category.STRUCT) {
      throw new VoltSerdeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + oi.getTypeName());
    }
    VoltRecord vr = new VoltRecord(m_voltConf.getTableName());
    StructObjectInspector soi = (StructObjectInspector) oi;
    List<? extends StructField> structFields = soi.getAllStructFieldRefs();
    List<Object> fieldValues = soi.getStructFieldsDataAsList(obj);

    final int size = m_oig.getColumnTypes().size();

    for (int i = 0; i < size; ++i) {
      ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector();
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI;

      Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i));
      if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) {
        fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null;
      }
      vr.add(fieldValue);
    }

    return vr;
  }
Esempio n. 10
0
 public static PrimitiveObjectInspector asPrimitiveObjectInspector(
     @Nonnull final ObjectInspector oi) throws UDFArgumentException {
   if (oi.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentException(
         "Is not PrimitiveObjectInspector: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   return (PrimitiveObjectInspector) oi;
 }
Esempio n. 11
0
 @Nonnull
 public static ListObjectInspector asListOI(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   Category category = oi.getCategory();
   if (category != Category.LIST) {
     throw new UDFArgumentException("Expected List OI but was: " + oi);
   }
   return (ListObjectInspector) oi;
 }
  private Writable createObject(final Object obj, final ObjectInspector inspector)
      throws SerDeException {
    if (obj == null) {
      return null;
    }

    switch (inspector.getCategory()) {
      case STRUCT:
        return createStruct(obj, (StructObjectInspector) inspector);
      case LIST:
        return createArray(obj, (ListObjectInspector) inspector);
      case MAP:
        return createMap(obj, (MapObjectInspector) inspector);
      case PRIMITIVE:
        return createPrimitive(obj, (PrimitiveObjectInspector) inspector);
      default:
        throw new SerDeException("Unknown data type" + inspector.getCategory());
    }
  }
Esempio n. 13
0
  /**
   * Serializes a field. Since we have nested structures, it may be called recursively for instance
   * when defining a list<struct<>>
   *
   * @param obj Object holding the fields' content
   * @param oi The field's objec inspector
   * @return the serialized object
   */
  Object serializeField(Object obj, ObjectInspector oi) {
    if (obj == null) {
      return null;
    }

    Object result = null;
    switch (oi.getCategory()) {
      case PRIMITIVE:
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        switch (poi.getPrimitiveCategory()) {
          case VOID:
            result = null;
            break;
          case BOOLEAN:
            result = (((BooleanObjectInspector) poi).get(obj) ? Boolean.TRUE : Boolean.FALSE);
            break;
          case BYTE:
            result = (((ShortObjectInspector) poi).get(obj));
            break;
          case DOUBLE:
            result = (((DoubleObjectInspector) poi).get(obj));
            break;
          case FLOAT:
            result = (((FloatObjectInspector) poi).get(obj));
            break;
          case INT:
            result = (((IntObjectInspector) poi).get(obj));
            break;
          case LONG:
            result = (((LongObjectInspector) poi).get(obj));
            break;
          case SHORT:
            result = (((ShortObjectInspector) poi).get(obj));
            break;
          case STRING:
            result = (((StringObjectInspector) poi).getPrimitiveJavaObject(obj));
            break;
          case UNKNOWN:
            throw new RuntimeException("Unknown primitive");
        }
        break;
      case MAP:
        result = serializeMap(obj, (MapObjectInspector) oi);
        break;
      case LIST:
        result = serializeList(obj, (ListObjectInspector) oi);
        break;
      case STRUCT:
        result = serializeStruct(obj, (StructObjectInspector) oi, null);
        break;
    }
    return result;
  }
Esempio n. 14
0
    /*
     * validate and setup SymbolInfo
     */
    private void validateAndSetupSymbolInfo(
        MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException {
      int symbolArgsSz = argsNum - 2;
      if (symbolArgsSz % 2 != 0) {
        throwErrorWithSignature(
            "Symbol Name, Expression need to be specified in pairs: "
                + "there are odd number of symbol args");
      }

      evaluator.symInfo = new SymbolsInfo(symbolArgsSz / 2);
      for (int i = 1; i <= symbolArgsSz; i += 2) {
        PTFExpressionDef symbolNameArg = args.get(i);
        ObjectInspector symbolNameArgOI = symbolNameArg.getOI();

        if (!ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI)
            || (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
            || ((PrimitiveObjectInspector) symbolNameArgOI).getPrimitiveCategory()
                != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
          throwErrorWithSignature(
              String.format(
                  "Currently a Symbol Name(%s) must be a Constant String",
                  symbolNameArg.getExpressionTreeString()));
        }
        String symbolName =
            ((ConstantObjectInspector) symbolNameArgOI).getWritableConstantValue().toString();

        PTFExpressionDef symolExprArg = args.get(i + 1);
        ObjectInspector symolExprArgOI = symolExprArg.getOI();
        if ((symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
            || ((PrimitiveObjectInspector) symolExprArgOI).getPrimitiveCategory()
                != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
          throwErrorWithSignature(
              String.format(
                  "Currently a Symbol Expression(%s) " + "must be a boolean expression",
                  symolExprArg.getExpressionTreeString()));
        }
        evaluator.symInfo.add(symbolName, symolExprArg);
      }
    }
  private ObjectInspectorConverters.Converter getConverter(ObjectInspector arg) {

    switch (arg.getCategory()) {
      case PRIMITIVE:
        return ObjectInspectorConverters.getConverter(arg, arg);
      case LIST:
      case MAP:
      case STRUCT:
        return ObjectInspectorConverters.getConverter(arg, solveOi(arg));
      default:
        return null;
    }
  }
Esempio n. 16
0
  /** Create a hierarchical LazyBinaryObject based on the given typeInfo. */
  public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) {
    ObjectInspector.Category c = oi.getCategory();
    switch (c) {
      case PRIMITIVE:
        return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector) oi);
      case MAP:
        return new LazyBinaryMap((LazyBinaryMapObjectInspector) oi);
      case LIST:
        return new LazyBinaryArray((LazyBinaryListObjectInspector) oi);
      case STRUCT:
        return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi);
    }

    throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
  }
Esempio n. 17
0
    /*
     * validate and setup patternStr
     */
    private void validateAndSetupPatternStr(MatchPath evaluator, List<PTFExpressionDef> args)
        throws SemanticException {
      PTFExpressionDef symboPatternArg = args.get(0);
      ObjectInspector symbolPatternArgOI = symboPatternArg.getOI();

      if (!ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI)
          || (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
          || ((PrimitiveObjectInspector) symbolPatternArgOI).getPrimitiveCategory()
              != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throwErrorWithSignature("Currently the symbol Pattern must be a Constant String.");
      }

      evaluator.patternStr =
          ((ConstantObjectInspector) symbolPatternArgOI).getWritableConstantValue().toString();
    }
Esempio n. 18
0
    /*
     * validate and setup resultExprStr
     */
    private void validateAndSetupResultExprStr(
        MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException {
      PTFExpressionDef resultExprArg = args.get(argsNum - 1);
      ObjectInspector resultExprArgOI = resultExprArg.getOI();

      if (!ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI)
          || (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
          || ((PrimitiveObjectInspector) resultExprArgOI).getPrimitiveCategory()
              != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throwErrorWithSignature("Currently the result Expr parameter must be a Constant String.");
      }

      evaluator.resultExprStr =
          ((ConstantObjectInspector) resultExprArgOI).getWritableConstantValue().toString();
    }
Esempio n. 19
0
  /**
   * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
   * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
   *
   * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
   */
  private static Object convertLazyToJava(Object o, ObjectInspector oi) {
    Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA);

    if (obj == null) {
      return null;
    }
    if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
      return new String((byte[]) obj);
    }
    // for now, expose non-primitive as a string
    // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
    if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
      return SerDeUtils.getJSONString(o, oi);
    }
    return obj;
  }
Esempio n. 20
0
  /**
   * Hive will call this to serialize an object. Returns a writable object of the same class
   * returned by <a href="#getSerializedClass">getSerializedClass</a>
   *
   * @param obj The object to serialize
   * @param objInspector The ObjectInspector that knows about the object's structure
   * @return a serialized object in form of a Writable. Must be the same type returned by <a
   *     href="#getSerializedClass">getSerializedClass</a>
   * @throws SerDeException
   */
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    // make sure it is a struct record
    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }

    JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames);

    Text t = new Text(serializer.toString());

    serializedDataSize = t.getBytes().length;
    return t;
  }
Esempio n. 21
0
 /**
  * Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
  * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
  *
  * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
  */
 public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) {
   if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
     if (val == null) {
       return null;
     }
     Object obj =
         ObjectInspectorUtils.copyToStandardObject(
             val, valOI, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
     // uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6
     if (version < 5
         && ((PrimitiveObjectInspector) valOI).getPrimitiveCategory()
             == PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
       // todo HIVE-5269
       return new String((byte[]) obj);
     }
     return obj;
   }
   // for now, expose non-primitive as a string
   // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
   return SerDeUtils.getJSONString(val, valOI);
 }
Esempio n. 22
0
  /*
   * For primitive types, use LazyBinary's object.
   * For complex types, make a standard (Java) object from LazyBinary's object.
   */
  public static List<Object> getComplexFieldsAsList(
      LazyBinaryStruct lazyBinaryStruct,
      ArrayList<Object> objectArrayBuffer,
      LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {

    List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
    for (int i = 0; i < fields.size(); i++) {
      StructField field = fields.get(i);
      ObjectInspector objectInspector = field.getFieldObjectInspector();
      Category category = objectInspector.getCategory();
      Object object = lazyBinaryStruct.getField(i);
      if (category == Category.PRIMITIVE) {
        objectArrayBuffer.set(i, object);
      } else {
        objectArrayBuffer.set(
            i,
            ObjectInspectorUtils.copyToStandardObject(
                object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
      }
    }
    return objectArrayBuffer;
  }
 /**
  * Check a particular field and set its size and offset in bytes based on the field type and the
  * bytes arrays.
  *
  * <p>For void, boolean, byte, short, int, long, float and double, there is no offset and the size
  * is fixed. For string, map, list, struct, the first four bytes are used to store the size. So
  * the offset is 4 and the size is computed by concating the first four bytes together. The first
  * four bytes are defined with respect to the offset in the bytes arrays. For timestamp, if the
  * first bit is 0, the record length is 4, otherwise a VInt begins at the 5th byte and its length
  * is added to 4.
  *
  * @param objectInspector object inspector of the field
  * @param bytes bytes arrays store the table row
  * @param offset offset of this field
  * @param recordInfo modify this byteinfo object and return it
  */
 public static void checkObjectByteInfo(
     ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) {
   Category category = objectInspector.getCategory();
   switch (category) {
     case PRIMITIVE:
       PrimitiveCategory primitiveCategory =
           ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory();
       switch (primitiveCategory) {
         case VOID:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 0;
           break;
         case BOOLEAN:
         case BYTE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 1;
           break;
         case SHORT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 2;
           break;
         case FLOAT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 4;
           break;
         case DOUBLE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = 8;
           break;
         case INT:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case LONG:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case STRING:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case CHAR:
         case VARCHAR:
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case BINARY:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = vInt.length;
           recordInfo.elementSize = vInt.value;
           break;
         case DATE:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
           break;
         case TIMESTAMP:
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset);
           break;
         case DECIMAL:
           // using vint instead of 4 bytes
           LazyBinaryUtils.readVInt(bytes, offset, vInt);
           recordInfo.elementOffset = 0;
           recordInfo.elementSize = vInt.length;
           LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt);
           recordInfo.elementSize += vInt.length + vInt.value;
           break;
         default:
           {
             throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
           }
       }
       break;
     case LIST:
     case MAP:
     case STRUCT:
     case UNION:
       recordInfo.elementOffset = 4;
       recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
       break;
     default:
       {
         throw new RuntimeException("Unrecognized non-primitive type: " + category);
       }
   }
 }
Esempio n. 24
0
  static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {

    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
          if (o == null) {
            sb.append(nullStr);
          } else {
            switch (poi.getPrimitiveCategory()) {
              case BOOLEAN:
                {
                  boolean b = ((BooleanObjectInspector) poi).get(o);
                  sb.append(b ? "true" : "false");
                  break;
                }
              case BYTE:
                {
                  sb.append(((ByteObjectInspector) poi).get(o));
                  break;
                }
              case SHORT:
                {
                  sb.append(((ShortObjectInspector) poi).get(o));
                  break;
                }
              case INT:
                {
                  sb.append(((IntObjectInspector) poi).get(o));
                  break;
                }
              case LONG:
                {
                  sb.append(((LongObjectInspector) poi).get(o));
                  break;
                }
              case FLOAT:
                {
                  sb.append(((FloatObjectInspector) poi).get(o));
                  break;
                }
              case DOUBLE:
                {
                  sb.append(((DoubleObjectInspector) poi).get(o));
                  break;
                }
              case STRING:
                {
                  sb.append('"');
                  sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o)));
                  sb.append('"');
                  break;
                }
              case CHAR:
                {
                  sb.append('"');
                  sb.append(
                      escapeString(
                          ((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                  sb.append('"');
                  break;
                }
              case VARCHAR:
                {
                  sb.append('"');
                  sb.append(
                      escapeString(
                          ((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                  sb.append('"');
                  break;
                }
              case DATE:
                {
                  sb.append('"');
                  sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o));
                  sb.append('"');
                  break;
                }
              case TIMESTAMP:
                {
                  sb.append('"');
                  sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o));
                  sb.append('"');
                  break;
                }
              case BINARY:
                {
                  BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
                  Text txt = new Text();
                  txt.set(bw.getBytes(), 0, bw.getLength());
                  sb.append(txt.toString());
                  break;
                }
              case DECIMAL:
                {
                  sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
                  break;
                }
              default:
                throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
            }
          }
          break;
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
          List<?> olist = loi.getList(o);
          if (olist == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACKET);
            for (int i = 0; i < olist.size(); i++) {
              if (i > 0) {
                sb.append(COMMA);
              }
              buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
            }
            sb.append(RBRACKET);
          }
          break;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
          ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
          Map<?, ?> omap = moi.getMap(o);
          if (omap == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            boolean first = true;
            for (Object entry : omap.entrySet()) {
              if (first) {
                first = false;
              } else {
                sb.append(COMMA);
              }
              Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
              buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
              sb.append(COLON);
              buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
            }
            sb.append(RBRACE);
          }
          break;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> structFields = soi.getAllStructFieldRefs();
          if (o == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            for (int i = 0; i < structFields.size(); i++) {
              if (i > 0) {
                sb.append(COMMA);
              }
              sb.append(QUOTE);
              sb.append(structFields.get(i).getFieldName());
              sb.append(QUOTE);
              sb.append(COLON);
              buildJSONString(
                  sb,
                  soi.getStructFieldData(o, structFields.get(i)),
                  structFields.get(i).getFieldObjectInspector(),
                  JSON_NULL);
            }
            sb.append(RBRACE);
          }
          break;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          if (o == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            sb.append(uoi.getTag(o));
            sb.append(COLON);
            buildJSONString(
                sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
            sb.append(RBRACE);
          }
          break;
        }
      default:
        throw new RuntimeException("Unknown type in ObjectInspector!");
    }
  }
Esempio n. 25
0
  // This method is just for experimentation.
  public void testRead() throws Exception {

    m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf));

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    m_types = m_reader.getTypes();
    System.out.println("# Types in the file: " + m_types.size());

    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    m_fields = m_oi.getAllStructFieldRefs();
    System.out.println("Number of columns in the table: " + m_fields.size());

    RecordReader m_rr = m_reader.rows();

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
      //		Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i));
      // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i));
    }

    OrcStruct lv_row = null;
    Object lv_field_val = null;
    StringBuilder lv_row_string = new StringBuilder(1024);
    while (m_rr.hasNext()) {
      lv_row = (OrcStruct) m_rr.next(lv_row);
      lv_row_string.setLength(0);
      for (int i = 0; i < m_fields.size(); i++) {
        lv_field_val = lv_row.getFieldValue(i);
        if (lv_field_val != null) {
          lv_row_string.append(lv_field_val);
        }
        lv_row_string.append('|');
      }
      System.out.println(lv_row_string);
    }

    /**
     * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT ==
     * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0);
     * System.out.println("Column 1 value: " + lvf_1_val); }
     */
  }
Esempio n. 26
0
 /**
  * True if Object passed is representing null object.
  *
  * @param o The object
  * @param oi The ObjectInspector
  * @return true if the object passed is representing NULL object false otherwise
  */
 public static boolean hasAnyNullObject(Object o, ObjectInspector oi) {
   switch (oi.getCategory()) {
     case PRIMITIVE:
       {
         if (o == null) {
           return true;
         }
         return false;
       }
     case LIST:
       {
         ListObjectInspector loi = (ListObjectInspector) oi;
         ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
         List<?> olist = loi.getList(o);
         if (olist == null) {
           return true;
         } else {
           // there are no elements in the list
           if (olist.size() == 0) {
             return false;
           }
           // if all the elements are representing null, then return true
           for (int i = 0; i < olist.size(); i++) {
             if (hasAnyNullObject(olist.get(i), listElementObjectInspector)) {
               return true;
             }
           }
           return false;
         }
       }
     case MAP:
       {
         MapObjectInspector moi = (MapObjectInspector) oi;
         ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
         ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
         Map<?, ?> omap = moi.getMap(o);
         if (omap == null) {
           return true;
         } else {
           // there are no elements in the map
           if (omap.entrySet().size() == 0) {
             return false;
           }
           // if all the entries of map are representing null, then return true
           for (Map.Entry<?, ?> entry : omap.entrySet()) {
             if (hasAnyNullObject(entry.getKey(), mapKeyObjectInspector)
                 || hasAnyNullObject(entry.getValue(), mapValueObjectInspector)) {
               return true;
             }
           }
           return false;
         }
       }
     case STRUCT:
       {
         StructObjectInspector soi = (StructObjectInspector) oi;
         List<? extends StructField> structFields = soi.getAllStructFieldRefs();
         if (o == null) {
           return true;
         } else {
           // there are no fields in the struct
           if (structFields.size() == 0) {
             return false;
           }
           // if any the fields of struct are representing null, then return true
           for (int i = 0; i < structFields.size(); i++) {
             if (hasAnyNullObject(
                 soi.getStructFieldData(o, structFields.get(i)),
                 structFields.get(i).getFieldObjectInspector())) {
               return true;
             }
           }
           return false;
         }
       }
     case UNION:
       {
         UnionObjectInspector uoi = (UnionObjectInspector) oi;
         if (o == null) {
           return true;
         } else {
           // there are no elements in the union
           if (uoi.getObjectInspectors().size() == 0) {
             return false;
           }
           return hasAnyNullObject(uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)));
         }
       }
     default:
       throw new RuntimeException("Unknown type in ObjectInspector!");
   }
 }
  /**
   * The initialize method is called only once during the lifetime of the UDF.
   *
   * <p>Method checks for the validity (number, type, etc) of the arguments being passed to the UDF.
   * It also sets the return type of the result of the UDF, in this case the ObjectInspector
   * equivalent of Map<String,Object>
   *
   * @param arguments
   * @return ObjectInspector Map<String,Object>
   * @throws UDFArgumentException
   */
  @Override
  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {

    if (arguments.length != 1) {
      throw new UDFArgumentLengthException(
          "The HostNormalizerUDF takes an array with only 1 element as argument");
    }

    // we are expecting the parameter to be of String type.
    ObjectInspector arg = arguments[0];
    int argIndex = 0;

    if (arg.getCategory() != Category.PRIMITIVE) {
      throw new UDFArgumentTypeException(
          argIndex,
          "A string argument was expected but an argument of type "
              + arg.getTypeName()
              + " was given.");
    }

    // Now that we have made sure that the argument is of primitive type, we can get the primitive
    // category
    PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arg).getPrimitiveCategory();

    if (primitiveCategory != PrimitiveCategory.STRING) {
      throw new UDFArgumentTypeException(
          argIndex,
          "A string argument was expected but an argument of type "
              + arg.getTypeName()
              + " was given.");
    }

    // Instantiate the Webrequest
    webrequest = Webrequest.getInstance();

    argumentOI = (StringObjectInspector) arg;
    List<String> fieldNames = new LinkedList<>();
    List<ObjectInspector> fieldOIs = new LinkedList<>();
    int idx = 0;

    fieldNames.add("project_class");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_PROJECT_CLASS = idx++;

    fieldNames.add("project");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_PROJECT = idx++;

    fieldNames.add("qualifiers");
    fieldOIs.add(
        ObjectInspectorFactory.getStandardListObjectInspector(
            PrimitiveObjectInspectorFactory.javaStringObjectInspector));
    IDX_QUALIFIERS = idx++;

    fieldNames.add("tld");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_TLD = idx++;

    result = new Object[idx];

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
  }
  static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) {
    // Is this field a null?
    if (o == null) {
      buffer.write((byte) 0, invert);
      return;
    }
    // This field is not a null.
    buffer.write((byte) 1, invert);

    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
          switch (poi.getPrimitiveCategory()) {
            case VOID:
              {
                return;
              }
            case BOOLEAN:
              {
                boolean v = ((BooleanObjectInspector) poi).get(o);
                buffer.write((byte) (v ? 2 : 1), invert);
                return;
              }
            case BYTE:
              {
                ByteObjectInspector boi = (ByteObjectInspector) poi;
                byte v = boi.get(o);
                buffer.write((byte) (v ^ 0x80), invert);
                return;
              }
            case SHORT:
              {
                ShortObjectInspector spoi = (ShortObjectInspector) poi;
                short v = spoi.get(o);
                buffer.write((byte) ((v >> 8) ^ 0x80), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case INT:
              {
                IntObjectInspector ioi = (IntObjectInspector) poi;
                int v = ioi.get(o);
                buffer.write((byte) ((v >> 24) ^ 0x80), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case LONG:
              {
                LongObjectInspector loi = (LongObjectInspector) poi;
                long v = loi.get(o);
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case FLOAT:
              {
                FloatObjectInspector foi = (FloatObjectInspector) poi;
                int v = Float.floatToIntBits(foi.get(o));
                if ((v & (1 << 31)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1 << 31);
                }
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case DOUBLE:
              {
                DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                long v = Double.doubleToLongBits(doi.get(o));
                if ((v & (1L << 63)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1L << 63);
                }
                buffer.write((byte) (v >> 56), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case STRING:
              {
                StringObjectInspector soi = (StringObjectInspector) poi;
                Text t = soi.getPrimitiveWritableObject(o);
                serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                return;
              }

            case BINARY:
              {
                BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                byte[] toSer = new byte[ba.getLength()];
                System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                serializeBytes(buffer, toSer, ba.getLength(), invert);
                return;
              }
            case DATE:
              {
                DateObjectInspector doi = (DateObjectInspector) poi;
                long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds();
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case TIMESTAMP:
              {
                TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                TimestampWritable t = toi.getPrimitiveWritableObject(o);
                byte[] data = t.getBinarySortable();
                for (int i = 0; i < data.length; i++) {
                  buffer.write(data[i], invert);
                }
                return;
              }
            default:
              {
                throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          ObjectInspector eoi = loi.getListElementObjectInspector();

          // \1 followed by each element
          int size = loi.getListLength(o);
          for (int eid = 0; eid < size; eid++) {
            buffer.write((byte) 1, invert);
            serialize(buffer, loi.getListElement(o, eid), eoi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          ObjectInspector koi = moi.getMapKeyObjectInspector();
          ObjectInspector voi = moi.getMapValueObjectInspector();

          // \1 followed by each key and then each value
          Map<?, ?> map = moi.getMap(o);
          for (Map.Entry<?, ?> entry : map.entrySet()) {
            buffer.write((byte) 1, invert);
            serialize(buffer, entry.getKey(), koi, invert);
            serialize(buffer, entry.getValue(), voi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> fields = soi.getAllStructFieldRefs();

          for (int i = 0; i < fields.size(); i++) {
            serialize(
                buffer,
                soi.getStructFieldData(o, fields.get(i)),
                fields.get(i).getFieldObjectInspector(),
                invert);
          }
          return;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          byte tag = uoi.getTag(o);
          buffer.write(tag, invert);
          serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert);
          return;
        }
      default:
        {
          throw new RuntimeException("Unrecognized type: " + oi.getCategory());
        }
    }
  }