Esempio n. 1
0
 public static PrimitiveObjectInspector asDoubleCompatibleOI(@Nonnull final ObjectInspector argOI)
     throws UDFArgumentTypeException {
   if (argOI.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentTypeException(
         0,
         "Only primitive type arguments are accepted but " + argOI.getTypeName() + " is passed.");
   }
   final PrimitiveObjectInspector oi = (PrimitiveObjectInspector) argOI;
   switch (oi.getPrimitiveCategory()) {
     case BYTE:
     case SHORT:
     case INT:
     case LONG:
     case FLOAT:
     case DOUBLE:
     case STRING:
     case TIMESTAMP:
       break;
     default:
       throw new UDFArgumentTypeException(
           0,
           "Only numeric or string type arguments are accepted but "
               + argOI.getTypeName()
               + " is passed.");
   }
   return oi;
 }
Esempio n. 2
0
 public static IntObjectInspector asIntOI(@Nonnull final ObjectInspector argOI)
     throws UDFArgumentException {
   if (!INT_TYPE_NAME.equals(argOI.getTypeName())) {
     throw new UDFArgumentException("Argument type must be INT: " + argOI.getTypeName());
   }
   return (IntObjectInspector) argOI;
 }
Esempio n. 3
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
  public void printFileInfo() throws Exception {

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    System.out.println("# Types in the file: " + m_types.size());
    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    System.out.println("Number of columns in the table: " + m_fields.size());

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
    }
  }
Esempio n. 5
0
 public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI)
     throws UDFArgumentException {
   final String typeName = numberOI.getTypeName();
   if (DOUBLE_TYPE_NAME.equals(typeName)) {
     DoubleWritable v = getConstValue(numberOI);
     return v.get();
   } else if (FLOAT_TYPE_NAME.equals(typeName)) {
     FloatWritable v = getConstValue(numberOI);
     return v.get();
   } else if (INT_TYPE_NAME.equals(typeName)) {
     IntWritable v = getConstValue(numberOI);
     return v.get();
   } else if (BIGINT_TYPE_NAME.equals(typeName)) {
     LongWritable v = getConstValue(numberOI);
     return v.get();
   } else if (SMALLINT_TYPE_NAME.equals(typeName)) {
     ShortWritable v = getConstValue(numberOI);
     return v.get();
   } else if (TINYINT_TYPE_NAME.equals(typeName)) {
     ByteWritable v = getConstValue(numberOI);
     return v.get();
   }
   throw new UDFArgumentException(
       "Unexpected argument type to cast as double: "
           + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI));
 }
  @Override
  public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException {
    if (oi.getCategory() != Category.STRUCT) {
      throw new VoltSerdeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + oi.getTypeName());
    }
    VoltRecord vr = new VoltRecord(m_voltConf.getTableName());
    StructObjectInspector soi = (StructObjectInspector) oi;
    List<? extends StructField> structFields = soi.getAllStructFieldRefs();
    List<Object> fieldValues = soi.getStructFieldsDataAsList(obj);

    final int size = m_oig.getColumnTypes().size();

    for (int i = 0; i < size; ++i) {
      ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector();
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI;

      Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i));
      if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) {
        fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null;
      }
      vr.add(fieldValue);
    }

    return vr;
  }
  @Override
  protected String extractField(Object target) {
    if (target instanceof HiveType) {
      HiveType type = (HiveType) target;
      ObjectInspector inspector = type.getObjectInspector();
      if (inspector instanceof StructObjectInspector) {
        StructObjectInspector soi = (StructObjectInspector) inspector;
        StructField field = soi.getStructFieldRef(fieldName);
        ObjectInspector foi = field.getFieldObjectInspector();
        Assert.isTrue(
            foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
            String.format(
                "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName()));

        // expecting a writeable - simply do a toString
        Object data = soi.getStructFieldData(type.getObject(), field);
        if (data == null || data instanceof NullWritable) {
          return StringUtils.EMPTY;
        }
        return data.toString();
      }
    }

    return null;
  }
 // array<类型对象>
 @Override
 public String getTypeName() {
   return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME
       + "<"
       + listElementObjectInspector.getTypeName()
       + ">";
 }
Esempio n. 9
0
  /**
   * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
   * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
   *
   * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
   */
  private static Object convertLazyToJava(Object o, ObjectInspector oi) {
    Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA);

    if (obj == null) {
      return null;
    }
    if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
      return new String((byte[]) obj);
    }
    // for now, expose non-primitive as a string
    // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
    if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
      return SerDeUtils.getJSONString(o, oi);
    }
    return obj;
  }
Esempio n. 10
0
  /**
   * Hive will call this to serialize an object. Returns a writable object of the same class
   * returned by <a href="#getSerializedClass">getSerializedClass</a>
   *
   * @param obj The object to serialize
   * @param objInspector The ObjectInspector that knows about the object's structure
   * @return a serialized object in form of a Writable. Must be the same type returned by <a
   *     href="#getSerializedClass">getSerializedClass</a>
   * @throws SerDeException
   */
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    // make sure it is a struct record
    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }

    JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames);

    Text t = new Text(serializer.toString());

    serializedDataSize = t.getBytes().length;
    return t;
  }
Esempio n. 11
0
  // This method is just for experimentation.
  public void testRead() throws Exception {

    m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf));

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    m_types = m_reader.getTypes();
    System.out.println("# Types in the file: " + m_types.size());

    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    m_fields = m_oi.getAllStructFieldRefs();
    System.out.println("Number of columns in the table: " + m_fields.size());

    RecordReader m_rr = m_reader.rows();

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
      //		Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i));
      // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i));
    }

    OrcStruct lv_row = null;
    Object lv_field_val = null;
    StringBuilder lv_row_string = new StringBuilder(1024);
    while (m_rr.hasNext()) {
      lv_row = (OrcStruct) m_rr.next(lv_row);
      lv_row_string.setLength(0);
      for (int i = 0; i < m_fields.size(); i++) {
        lv_field_val = lv_row.getFieldValue(i);
        if (lv_field_val != null) {
          lv_row_string.append(lv_field_val);
        }
        lv_row_string.append('|');
      }
      System.out.println(lv_row_string);
    }

    /**
     * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT ==
     * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0);
     * System.out.println("Column 1 value: " + lvf_1_val); }
     */
  }
Esempio n. 12
0
 public static boolean isBooleanOI(@Nonnull final ObjectInspector oi) {
   String typeName = oi.getTypeName();
   return BOOLEAN_TYPE_NAME.equals(typeName);
 }
 public String getType() {
   return objectInspector.getTypeName();
 }
  /**
   * The initialize method is called only once during the lifetime of the UDF.
   *
   * <p>Method checks for the validity (number, type, etc) of the arguments being passed to the UDF.
   * It also sets the return type of the result of the UDF, in this case the ObjectInspector
   * equivalent of Map<String,Object>
   *
   * @param arguments
   * @return ObjectInspector Map<String,Object>
   * @throws UDFArgumentException
   */
  @Override
  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {

    if (arguments.length != 1) {
      throw new UDFArgumentLengthException(
          "The HostNormalizerUDF takes an array with only 1 element as argument");
    }

    // we are expecting the parameter to be of String type.
    ObjectInspector arg = arguments[0];
    int argIndex = 0;

    if (arg.getCategory() != Category.PRIMITIVE) {
      throw new UDFArgumentTypeException(
          argIndex,
          "A string argument was expected but an argument of type "
              + arg.getTypeName()
              + " was given.");
    }

    // Now that we have made sure that the argument is of primitive type, we can get the primitive
    // category
    PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arg).getPrimitiveCategory();

    if (primitiveCategory != PrimitiveCategory.STRING) {
      throw new UDFArgumentTypeException(
          argIndex,
          "A string argument was expected but an argument of type "
              + arg.getTypeName()
              + " was given.");
    }

    // Instantiate the Webrequest
    webrequest = Webrequest.getInstance();

    argumentOI = (StringObjectInspector) arg;
    List<String> fieldNames = new LinkedList<>();
    List<ObjectInspector> fieldOIs = new LinkedList<>();
    int idx = 0;

    fieldNames.add("project_class");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_PROJECT_CLASS = idx++;

    fieldNames.add("project");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_PROJECT = idx++;

    fieldNames.add("qualifiers");
    fieldOIs.add(
        ObjectInspectorFactory.getStandardListObjectInspector(
            PrimitiveObjectInspectorFactory.javaStringObjectInspector));
    IDX_QUALIFIERS = idx++;

    fieldNames.add("tld");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    IDX_TLD = idx++;

    result = new Object[idx];

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
  }