コード例 #1
0
  @Override
  public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException {
    if (oi.getCategory() != Category.STRUCT) {
      throw new VoltSerdeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + oi.getTypeName());
    }
    VoltRecord vr = new VoltRecord(m_voltConf.getTableName());
    StructObjectInspector soi = (StructObjectInspector) oi;
    List<? extends StructField> structFields = soi.getAllStructFieldRefs();
    List<Object> fieldValues = soi.getStructFieldsDataAsList(obj);

    final int size = m_oig.getColumnTypes().size();

    for (int i = 0; i < size; ++i) {
      ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector();
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI;

      Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i));
      if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) {
        fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null;
      }
      vr.add(fieldValue);
    }

    return vr;
  }
コード例 #2
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 public static PrimitiveObjectInspector asDoubleCompatibleOI(@Nonnull final ObjectInspector argOI)
     throws UDFArgumentTypeException {
   if (argOI.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentTypeException(
         0,
         "Only primitive type arguments are accepted but " + argOI.getTypeName() + " is passed.");
   }
   final PrimitiveObjectInspector oi = (PrimitiveObjectInspector) argOI;
   switch (oi.getPrimitiveCategory()) {
     case BYTE:
     case SHORT:
     case INT:
     case LONG:
     case FLOAT:
     case DOUBLE:
     case STRING:
     case TIMESTAMP:
       break;
     default:
       throw new UDFArgumentTypeException(
           0,
           "Only numeric or string type arguments are accepted but "
               + argOI.getTypeName()
               + " is passed.");
   }
   return oi;
 }
コード例 #3
0
  public void printFileInfo() throws Exception {

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    System.out.println("# Types in the file: " + m_types.size());
    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    System.out.println("Number of columns in the table: " + m_fields.size());

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
    }
  }
コード例 #4
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
コード例 #5
0
  @Override
  protected String extractField(Object target) {
    if (target instanceof HiveType) {
      HiveType type = (HiveType) target;
      ObjectInspector inspector = type.getObjectInspector();
      if (inspector instanceof StructObjectInspector) {
        StructObjectInspector soi = (StructObjectInspector) inspector;
        StructField field = soi.getStructFieldRef(fieldName);
        ObjectInspector foi = field.getFieldObjectInspector();
        Assert.isTrue(
            foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
            String.format(
                "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName()));

        // expecting a writeable - simply do a toString
        Object data = soi.getStructFieldData(type.getObject(), field);
        if (data == null || data instanceof NullWritable) {
          return StringUtils.EMPTY;
        }
        return data.toString();
      }
    }

    return null;
  }
コード例 #6
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 public static IntObjectInspector asIntOI(@Nonnull final ObjectInspector argOI)
     throws UDFArgumentException {
   if (!INT_TYPE_NAME.equals(argOI.getTypeName())) {
     throw new UDFArgumentException("Argument type must be INT: " + argOI.getTypeName());
   }
   return (IntObjectInspector) argOI;
 }
コード例 #7
0
 @Override
 public Writable serialize(final Object obj, final ObjectInspector objInspector)
     throws SerDeException {
   if (!objInspector.getCategory().equals(Category.STRUCT)) {
     throw new SerDeException(
         "Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct");
   }
   final ArrayWritable serializeData = createStruct(obj, (StructObjectInspector) objInspector);
   serializedSize = serializeData.get().length;
   status = LAST_OPERATION.SERIALIZE;
   return serializeData;
 }
コード例 #8
0
ファイル: HiveUtil.java プロジェクト: albertocsm/presto
 public static StructObjectInspector getTableObjectInspector(
     @SuppressWarnings("deprecation") Deserializer deserializer) {
   try {
     ObjectInspector inspector = deserializer.getObjectInspector();
     checkArgument(
         inspector.getCategory() == Category.STRUCT,
         "expected STRUCT: %s",
         inspector.getCategory());
     return (StructObjectInspector) inspector;
   } catch (SerDeException e) {
     throw Throwables.propagate(e);
   }
 }
コード例 #9
0
ファイル: MatchPath.java プロジェクト: joellove/hive-udf
    /*
     * validate and setup resultExprStr
     */
    private void validateAndSetupResultExprStr(
        MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException {
      PTFExpressionDef resultExprArg = args.get(argsNum - 1);
      ObjectInspector resultExprArgOI = resultExprArg.getOI();

      if (!ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI)
          || (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
          || ((PrimitiveObjectInspector) resultExprArgOI).getPrimitiveCategory()
              != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throwErrorWithSignature("Currently the result Expr parameter must be a Constant String.");
      }

      evaluator.resultExprStr =
          ((ConstantObjectInspector) resultExprArgOI).getWritableConstantValue().toString();
    }
コード例 #10
0
ファイル: SerDeUtils.java プロジェクト: nileema/presto
 public static Block serializeObject(
     Type type, BlockBuilder builder, Object object, ObjectInspector inspector) {
   switch (inspector.getCategory()) {
     case PRIMITIVE:
       serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector);
       return null;
     case LIST:
       return serializeList(type, builder, object, (ListObjectInspector) inspector);
     case MAP:
       return serializeMap(type, builder, object, (MapObjectInspector) inspector);
     case STRUCT:
       return serializeStruct(type, builder, object, (StructObjectInspector) inspector);
   }
   throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory());
 }
コード例 #11
0
ファイル: MatchPath.java プロジェクト: joellove/hive-udf
    /*
     * validate and setup patternStr
     */
    private void validateAndSetupPatternStr(MatchPath evaluator, List<PTFExpressionDef> args)
        throws SemanticException {
      PTFExpressionDef symboPatternArg = args.get(0);
      ObjectInspector symbolPatternArgOI = symboPatternArg.getOI();

      if (!ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI)
          || (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
          || ((PrimitiveObjectInspector) symbolPatternArgOI).getPrimitiveCategory()
              != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throwErrorWithSignature("Currently the symbol Pattern must be a Constant String.");
      }

      evaluator.patternStr =
          ((ConstantObjectInspector) symbolPatternArgOI).getWritableConstantValue().toString();
    }
コード例 #12
0
  private ObjectInspector solveOi(ObjectInspector arg) {

    switch (arg.getCategory()) {
      case PRIMITIVE:

        // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL,
        // UNKNOWN
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arg;
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
            poi.getPrimitiveCategory());
      case LIST:
        return ObjectInspectorFactory.getStandardListObjectInspector(
            solveOi(((ListObjectInspector) arg).getListElementObjectInspector()));
      case MAP:
        return ObjectInspectorFactory.getStandardMapObjectInspector(
            solveOi(((MapObjectInspector) arg).getMapKeyObjectInspector()),
            solveOi(((MapObjectInspector) arg).getMapValueObjectInspector()));
      case STRUCT:
        StructObjectInspector soi = (StructObjectInspector) arg;
        int size = soi.getAllStructFieldRefs().size();
        ArrayList<String> fnl = new ArrayList<String>(size);
        ArrayList<ObjectInspector> foil = new ArrayList<ObjectInspector>(size);

        for (StructField sf : ((StructObjectInspector) arg).getAllStructFieldRefs()) {
          fnl.add(sf.getFieldName());
          foil.add(solveOi(sf.getFieldObjectInspector()));
        }

        return JsonStructObjectInspector.getJsonStructObjectInspector(fnl, foil);
      default:
        return arg;
    }
  }
コード例 #13
0
 // array<类型对象>
 @Override
 public String getTypeName() {
   return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME
       + "<"
       + listElementObjectInspector.getTypeName()
       + ">";
 }
コード例 #14
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI)
     throws UDFArgumentException {
   final String typeName = numberOI.getTypeName();
   if (DOUBLE_TYPE_NAME.equals(typeName)) {
     DoubleWritable v = getConstValue(numberOI);
     return v.get();
   } else if (FLOAT_TYPE_NAME.equals(typeName)) {
     FloatWritable v = getConstValue(numberOI);
     return v.get();
   } else if (INT_TYPE_NAME.equals(typeName)) {
     IntWritable v = getConstValue(numberOI);
     return v.get();
   } else if (BIGINT_TYPE_NAME.equals(typeName)) {
     LongWritable v = getConstValue(numberOI);
     return v.get();
   } else if (SMALLINT_TYPE_NAME.equals(typeName)) {
     ShortWritable v = getConstValue(numberOI);
     return v.get();
   } else if (TINYINT_TYPE_NAME.equals(typeName)) {
     ByteWritable v = getConstValue(numberOI);
     return v.get();
   }
   throw new UDFArgumentException(
       "Unexpected argument type to cast as double: "
           + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI));
 }
コード例 #15
0
ファイル: SQLOperation.java プロジェクト: uclaabs/absHive
  /**
   * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
   * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
   *
   * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
   */
  private static Object convertLazyToJava(Object o, ObjectInspector oi) {
    Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA);

    if (obj == null) {
      return null;
    }
    if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
      return new String((byte[]) obj);
    }
    // for now, expose non-primitive as a string
    // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
    if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
      return SerDeUtils.getJSONString(o, oi);
    }
    return obj;
  }
コード例 #16
0
  // Serializes a row of data into a query string
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    LOG.info(obj.toString());
    LOG.info(objInspector.toString());

    return null;
  }
コード例 #17
0
  /**
   * Hive will call this to serialize an object. Returns a writable object of the same class
   * returned by <a href="#getSerializedClass">getSerializedClass</a>
   *
   * @param obj The object to serialize
   * @param objInspector The ObjectInspector that knows about the object's structure
   * @return a serialized object in form of a Writable. Must be the same type returned by <a
   *     href="#getSerializedClass">getSerializedClass</a>
   * @throws SerDeException
   */
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    // make sure it is a struct record
    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }

    JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames);

    Text t = new Text(serializer.toString());

    serializedDataSize = t.getBytes().length;
    return t;
  }
コード例 #18
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 public static PrimitiveObjectInspector asPrimitiveObjectInspector(
     @Nonnull final ObjectInspector oi) throws UDFArgumentException {
   if (oi.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentException(
         "Is not PrimitiveObjectInspector: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   return (PrimitiveObjectInspector) oi;
 }
コード例 #19
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 @Nonnull
 public static ListObjectInspector asListOI(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   Category category = oi.getCategory();
   if (category != Category.LIST) {
     throw new UDFArgumentException("Expected List OI but was: " + oi);
   }
   return (ListObjectInspector) oi;
 }
コード例 #20
0
  private Writable createObject(final Object obj, final ObjectInspector inspector)
      throws SerDeException {
    if (obj == null) {
      return null;
    }

    switch (inspector.getCategory()) {
      case STRUCT:
        return createStruct(obj, (StructObjectInspector) inspector);
      case LIST:
        return createArray(obj, (ListObjectInspector) inspector);
      case MAP:
        return createMap(obj, (MapObjectInspector) inspector);
      case PRIMITIVE:
        return createPrimitive(obj, (PrimitiveObjectInspector) inspector);
      default:
        throw new SerDeException("Unknown data type" + inspector.getCategory());
    }
  }
コード例 #21
0
  /**
   * Serializes a field. Since we have nested structures, it may be called recursively for instance
   * when defining a list<struct<>>
   *
   * @param obj Object holding the fields' content
   * @param oi The field's objec inspector
   * @return the serialized object
   */
  Object serializeField(Object obj, ObjectInspector oi) {
    if (obj == null) {
      return null;
    }

    Object result = null;
    switch (oi.getCategory()) {
      case PRIMITIVE:
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        switch (poi.getPrimitiveCategory()) {
          case VOID:
            result = null;
            break;
          case BOOLEAN:
            result = (((BooleanObjectInspector) poi).get(obj) ? Boolean.TRUE : Boolean.FALSE);
            break;
          case BYTE:
            result = (((ShortObjectInspector) poi).get(obj));
            break;
          case DOUBLE:
            result = (((DoubleObjectInspector) poi).get(obj));
            break;
          case FLOAT:
            result = (((FloatObjectInspector) poi).get(obj));
            break;
          case INT:
            result = (((IntObjectInspector) poi).get(obj));
            break;
          case LONG:
            result = (((LongObjectInspector) poi).get(obj));
            break;
          case SHORT:
            result = (((ShortObjectInspector) poi).get(obj));
            break;
          case STRING:
            result = (((StringObjectInspector) poi).getPrimitiveJavaObject(obj));
            break;
          case UNKNOWN:
            throw new RuntimeException("Unknown primitive");
        }
        break;
      case MAP:
        result = serializeMap(obj, (MapObjectInspector) oi);
        break;
      case LIST:
        result = serializeList(obj, (ListObjectInspector) oi);
        break;
      case STRUCT:
        result = serializeStruct(obj, (StructObjectInspector) oi, null);
        break;
    }
    return result;
  }
コード例 #22
0
  private ObjectInspectorConverters.Converter getConverter(ObjectInspector arg) {

    switch (arg.getCategory()) {
      case PRIMITIVE:
        return ObjectInspectorConverters.getConverter(arg, arg);
      case LIST:
      case MAP:
      case STRUCT:
        return ObjectInspectorConverters.getConverter(arg, solveOi(arg));
      default:
        return null;
    }
  }
コード例 #23
0
ファイル: MatchPath.java プロジェクト: joellove/hive-udf
    /*
     * validate and setup SymbolInfo
     */
    private void validateAndSetupSymbolInfo(
        MatchPath evaluator, List<PTFExpressionDef> args, int argsNum) throws SemanticException {
      int symbolArgsSz = argsNum - 2;
      if (symbolArgsSz % 2 != 0) {
        throwErrorWithSignature(
            "Symbol Name, Expression need to be specified in pairs: "
                + "there are odd number of symbol args");
      }

      evaluator.symInfo = new SymbolsInfo(symbolArgsSz / 2);
      for (int i = 1; i <= symbolArgsSz; i += 2) {
        PTFExpressionDef symbolNameArg = args.get(i);
        ObjectInspector symbolNameArgOI = symbolNameArg.getOI();

        if (!ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI)
            || (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
            || ((PrimitiveObjectInspector) symbolNameArgOI).getPrimitiveCategory()
                != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
          throwErrorWithSignature(
              String.format(
                  "Currently a Symbol Name(%s) must be a Constant String",
                  symbolNameArg.getExpressionTreeString()));
        }
        String symbolName =
            ((ConstantObjectInspector) symbolNameArgOI).getWritableConstantValue().toString();

        PTFExpressionDef symolExprArg = args.get(i + 1);
        ObjectInspector symolExprArgOI = symolExprArg.getOI();
        if ((symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE)
            || ((PrimitiveObjectInspector) symolExprArgOI).getPrimitiveCategory()
                != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
          throwErrorWithSignature(
              String.format(
                  "Currently a Symbol Expression(%s) " + "must be a boolean expression",
                  symolExprArg.getExpressionTreeString()));
        }
        evaluator.symInfo.add(symbolName, symolExprArg);
      }
    }
コード例 #24
0
  /*
   * For primitive types, use LazyBinary's object.
   * For complex types, make a standard (Java) object from LazyBinary's object.
   */
  public static List<Object> getComplexFieldsAsList(
      LazyBinaryStruct lazyBinaryStruct,
      ArrayList<Object> objectArrayBuffer,
      LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {

    List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
    for (int i = 0; i < fields.size(); i++) {
      StructField field = fields.get(i);
      ObjectInspector objectInspector = field.getFieldObjectInspector();
      Category category = objectInspector.getCategory();
      Object object = lazyBinaryStruct.getField(i);
      if (category == Category.PRIMITIVE) {
        objectArrayBuffer.set(i, object);
      } else {
        objectArrayBuffer.set(
            i,
            ObjectInspectorUtils.copyToStandardObject(
                object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
      }
    }
    return objectArrayBuffer;
  }
コード例 #25
0
ファイル: LazyBinaryFactory.java プロジェクト: rohegde/hive
  /** Create a hierarchical LazyBinaryObject based on the given typeInfo. */
  public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) {
    ObjectInspector.Category c = oi.getCategory();
    switch (c) {
      case PRIMITIVE:
        return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector) oi);
      case MAP:
        return new LazyBinaryMap((LazyBinaryMapObjectInspector) oi);
      case LIST:
        return new LazyBinaryArray((LazyBinaryListObjectInspector) oi);
      case STRUCT:
        return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi);
    }

    throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
  }
コード例 #26
0
ファイル: SerDeUtils.java プロジェクト: jnthm/hive
 /**
  * Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
  * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
  *
  * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
  */
 public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) {
   if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
     if (val == null) {
       return null;
     }
     Object obj =
         ObjectInspectorUtils.copyToStandardObject(
             val, valOI, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
     // uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6
     if (version < 5
         && ((PrimitiveObjectInspector) valOI).getPrimitiveCategory()
             == PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
       // todo HIVE-5269
       return new String((byte[]) obj);
     }
     return obj;
   }
   // for now, expose non-primitive as a string
   // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
   return SerDeUtils.getJSONString(val, valOI);
 }
コード例 #27
0
ファイル: ReduceSinkOperator.java プロジェクト: hugh-han/hive
  @Override
  @SuppressWarnings("unchecked")
  public void process(Object row, int tag) throws HiveException {
    try {
      ObjectInspector rowInspector = inputObjInspectors[tag];
      if (firstRow) {
        firstRow = false;
        // TODO: this is fishy - we init object inspectors based on first tag. We
        //       should either init for each tag, or if rowInspector doesn't really
        //       matter, then we can create this in ctor and get rid of firstRow.
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE
            || conf.getWriteType() == AcidUtils.Operation.DELETE) {
          assert rowInspector instanceof StructObjectInspector
              : "Exptected rowInspector to be instance of StructObjectInspector but it is a "
                  + rowInspector.getClass().getName();
          acidRowInspector = (StructObjectInspector) rowInspector;
          // The record identifier is always in the first column
          recIdField = acidRowInspector.getAllStructFieldRefs().get(0);
          recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
          // The bucket field is in the second position
          bucketField = recIdInspector.getAllStructFieldRefs().get(1);
          bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }

        if (isLogInfoEnabled) {
          LOG.info(
              "keys are "
                  + conf.getOutputKeyColumnNames()
                  + " num distributions: "
                  + conf.getNumDistributionKeys());
        }
        keyObjectInspector =
            initEvaluatorsAndReturnStruct(
                keyEval,
                distinctColIndices,
                conf.getOutputKeyColumnNames(),
                numDistributionKeys,
                rowInspector);
        valueObjectInspector =
            initEvaluatorsAndReturnStruct(
                valueEval, conf.getOutputValueColumnNames(), rowInspector);
        partitionObjectInspectors = initEvaluators(partitionEval, rowInspector);
        if (bucketEval != null) {
          bucketObjectInspectors = initEvaluators(bucketEval, rowInspector);
        }
        int numKeys = numDistinctExprs > 0 ? numDistinctExprs : 1;
        int keyLen = numDistinctExprs > 0 ? numDistributionKeys + 1 : numDistributionKeys;
        cachedKeys = new Object[numKeys][keyLen];
        cachedValues = new Object[valueEval.length];
      }

      // Determine distKeyLength (w/o distincts), and then add the first if present.
      populateCachedDistributionKeys(row, 0);

      // replace bucketing columns with hashcode % numBuckets
      int bucketNumber = -1;
      if (bucketEval != null) {
        bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
        cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber));
      } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE
          || conf.getWriteType() == AcidUtils.Operation.DELETE) {
        // In the non-partitioned case we still want to compute the bucket number for updates and
        // deletes.
        bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
      }

      HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
      int distKeyLength = firstKey.getDistKeyLength();
      if (numDistinctExprs > 0) {
        populateCachedDistinctKeys(row, 0);
        firstKey = toHiveKey(cachedKeys[0], tag, distKeyLength);
      }

      final int hashCode;

      // distKeyLength doesn't include tag, but includes buckNum in cachedKeys[0]
      if (useUniformHash && partitionEval.length > 0) {
        hashCode = computeMurmurHash(firstKey);
      } else {
        hashCode = computeHashCode(row, bucketNumber);
      }

      firstKey.setHashCode(hashCode);

      /*
       * in case of TopN for windowing, we need to distinguish between rows with
       * null partition keys and rows with value 0 for partition keys.
       */
      boolean partKeyNull = conf.isPTFReduceSink() && partitionKeysAreNull(row);

      // Try to store the first key. If it's not excluded, we will proceed.
      int firstIndex = reducerHash.tryStoreKey(firstKey, partKeyNull);
      if (firstIndex == TopNHash.EXCLUDE) return; // Nothing to do.
      // Compute value and hashcode - we'd either store or forward them.
      BytesWritable value = makeValueWritable(row);

      if (firstIndex == TopNHash.FORWARD) {
        collect(firstKey, value);
      } else {
        assert firstIndex >= 0;
        reducerHash.storeValue(firstIndex, firstKey.hashCode(), value, false);
      }

      // All other distinct keys will just be forwarded. This could be optimized...
      for (int i = 1; i < numDistinctExprs; i++) {
        System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
        populateCachedDistinctKeys(row, i);
        HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength);
        hiveKey.setHashCode(hashCode);
        collect(hiveKey, value);
      }
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
      throw new HiveException(e);
    }
  }
コード例 #28
0
 public String getType() {
   return objectInspector.getTypeName();
 }
コード例 #29
0
  // This method is just for experimentation.
  public void testRead() throws Exception {

    m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf));

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    m_types = m_reader.getTypes();
    System.out.println("# Types in the file: " + m_types.size());

    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    m_fields = m_oi.getAllStructFieldRefs();
    System.out.println("Number of columns in the table: " + m_fields.size());

    RecordReader m_rr = m_reader.rows();

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
      //		Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i));
      // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i));
    }

    OrcStruct lv_row = null;
    Object lv_field_val = null;
    StringBuilder lv_row_string = new StringBuilder(1024);
    while (m_rr.hasNext()) {
      lv_row = (OrcStruct) m_rr.next(lv_row);
      lv_row_string.setLength(0);
      for (int i = 0; i < m_fields.size(); i++) {
        lv_field_val = lv_row.getFieldValue(i);
        if (lv_field_val != null) {
          lv_row_string.append(lv_field_val);
        }
        lv_row_string.append('|');
      }
      System.out.println(lv_row_string);
    }

    /**
     * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT ==
     * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0);
     * System.out.println("Column 1 value: " + lvf_1_val); }
     */
  }
コード例 #30
0
ファイル: HiveUtils.java プロジェクト: naritta/hivemall
 public static boolean isBooleanOI(@Nonnull final ObjectInspector oi) {
   String typeName = oi.getTypeName();
   return BOOLEAN_TYPE_NAME.equals(typeName);
 }