@Override
 public void setKeyValue(Writable key, Writable val) throws SerDeException {
   Object keyObj = keySerDe.deserialize(key), valObj = valSerDe.deserialize(val);
   List<? extends StructField> keyFields = keySoi.getAllStructFieldRefs(),
       valFields = valSoi.getAllStructFieldRefs();
   for (int i = 0; i < keyFields.size(); ++i) {
     keyObjs[i] = keySoi.getStructFieldData(keyObj, keyFields.get(i));
   }
   for (int i = 0; i < valFields.size(); ++i) {
     valObjs[i] = valSoi.getStructFieldData(valObj, valFields.get(i));
   }
 }
  /**
   * Serializing means getting every field, and setting the appropriate JSONObject field. Actual
   * serialization is done at the end when the whole JSON object is built
   *
   * @param serializer
   * @param obj
   * @param structObjectInspector
   */
  private JSONObject serializeStruct(
      Object obj, StructObjectInspector soi, List<String> columnNames) {
    // do nothing for null struct
    if (null == obj) {
      return null;
    }

    JSONObject result = new JSONObject();

    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    for (int i = 0; i < fields.size(); i++) {
      StructField sf = fields.get(i);
      Object data = soi.getStructFieldData(obj, sf);

      if (null != data) {
        try {
          // we want to serialize columns with their proper HIVE name,
          // not the _col2 kind of name usually generated upstream
          result.put(
              getSerializedFieldName(columnNames, i, sf),
              serializeField(data, sf.getFieldObjectInspector()));

        } catch (JSONException ex) {
          LOG.warn("Problem serializing", ex);
          throw new RuntimeException(ex);
        }
      }
    }
    return result;
  }
Esempio n. 3
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
Esempio n. 4
0
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      Slice value;
      if (fieldValue instanceof String) {
        value = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        value = Slices.wrappedBuffer((byte[]) fieldValue);
      } else if (fieldValue instanceof HiveVarchar) {
        value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      Type type = types[column];
      if (isVarcharType(type)) {
        value = truncateToLength(value, type);
      }
      slices[column] = value;
      nulls[column] = false;
    }
  }
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else if (hiveTypes[column] == HiveType.MAP
        || hiveTypes[column] == HiveType.LIST
        || hiveTypes[column] == HiveType.STRUCT) {
      // temporarily special case MAP, LIST, and STRUCT types as strings
      slices[column] =
          Slices.wrappedBuffer(
              SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column]));
      nulls[column] = false;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      if (fieldValue instanceof String) {
        slices[column] = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        slices[column] = Slices.wrappedBuffer((byte[]) fieldValue);
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      nulls[column] = false;
    }
  }
Esempio n. 6
0
 private void stringifyObject(StringBuilder buffer, Object obj, ObjectInspector inspector)
     throws IOException {
   if (inspector instanceof StructObjectInspector) {
     buffer.append("{ ");
     StructObjectInspector soi = (StructObjectInspector) inspector;
     boolean isFirst = true;
     for (StructField field : soi.getAllStructFieldRefs()) {
       if (isFirst) {
         isFirst = false;
       } else {
         buffer.append(", ");
       }
       buffer.append(field.getFieldName());
       buffer.append(": ");
       stringifyObject(
           buffer, soi.getStructFieldData(obj, field), field.getFieldObjectInspector());
     }
     buffer.append(" }");
   } else if (inspector instanceof PrimitiveObjectInspector) {
     PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
     buffer.append(poi.getPrimitiveJavaObject(obj).toString());
   } else {
     buffer.append("*unknown*");
   }
 }
Esempio n. 7
0
  private void parseDecimalColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");

      HiveDecimal decimal = (HiveDecimal) fieldValue;
      DecimalType columnType = (DecimalType) types[column];
      BigInteger unscaledDecimal =
          rescale(decimal.unscaledValue(), decimal.scale(), columnType.getScale());

      if (columnType.isShort()) {
        longs[column] = unscaledDecimal.longValue();
      } else {
        slices[column] = Decimals.encodeUnscaledValue(unscaledDecimal);
      }
      nulls[column] = false;
    }
  }
  @Override
  protected String extractField(Object target) {
    if (target instanceof HiveType) {
      HiveType type = (HiveType) target;
      ObjectInspector inspector = type.getObjectInspector();
      if (inspector instanceof StructObjectInspector) {
        StructObjectInspector soi = (StructObjectInspector) inspector;
        StructField field = soi.getStructFieldRef(fieldName);
        ObjectInspector foi = field.getFieldObjectInspector();
        Assert.isTrue(
            foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
            String.format(
                "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName()));

        // expecting a writeable - simply do a toString
        Object data = soi.getStructFieldData(type.getObject(), field);
        if (data == null || data instanceof NullWritable) {
          return StringUtils.EMPTY;
        }
        return data.toString();
      }
    }

    return null;
  }
Esempio n. 9
0
  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
      throws SQLException, SerDeException {
    getSerDe();
    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object[] deserializedFields = new Object[fieldRefs.size()];
    Object rowObj;
    ObjectInspector fieldOI;

    int protocol = getProtocolVersion().getValue();
    for (Object rowString : rows) {
      try {
        rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8")));
      } catch (UnsupportedEncodingException e) {
        throw new SerDeException(e);
      }
      for (int i = 0; i < fieldRefs.size(); i++) {
        StructField fieldRef = fieldRefs.get(i);
        fieldOI = fieldRef.getFieldObjectInspector();
        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
      }
      rowSet.addRow(deserializedFields);
    }
    return rowSet;
  }
Esempio n. 10
0
  @Override
  public Writable serialize(final Object obj, final ObjectInspector inspector)
      throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != columnNames.size()) {
      throw new SerDeException(
          String.format("Required %d columns, received %d.", columnNames.size(), fields.size()));
    }

    cachedWritable.clear();
    for (int c = 0; c < fieldCount; c++) {
      StructField structField = fields.get(c);

      LOG.debug("fieldId=" + c + ",structField=" + structField.toString());

      if (structField != null) {
        final Object field = structInspector.getStructFieldData(obj, fields.get(c));

        final AbstractPrimitiveObjectInspector fieldOI =
            (AbstractPrimitiveObjectInspector) fields.get(c).getFieldObjectInspector();

        Writable value = (Writable) fieldOI.getPrimitiveWritableObject(field);

        if (value == null) {
          continue;
        }

        LOG.debug("fieldCount=" + fieldCount + ",value=" + value.toString());
        if (value instanceof IntWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), value);
        } else if (value instanceof Text) {
          cachedWritable.put(new Text(columnNames.get(c)), ((Text) value));
        } else if (value instanceof LongWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((LongWritable) value));
        } else if (value instanceof DoubleWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((DoubleWritable) value));
        } else if (value instanceof FloatWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((FloatWritable) value));
        } else if (value instanceof BooleanWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BooleanWritable) value));
        } else if (value instanceof ByteWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((ByteWritable) value));
        } else if (value instanceof BytesWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BytesWritable) value));
        } else {
          LOG.warn("fieldCount=" + fieldCount + ",type=" + value.getClass().getName());
        }
      }
    }

    return cachedWritable;
  }
 private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector)
     throws SerDeException {
   final List<? extends StructField> fields = inspector.getAllStructFieldRefs();
   final Writable[] arr = new Writable[fields.size()];
   for (int i = 0; i < fields.size(); i++) {
     final StructField field = fields.get(i);
     final Object subObj = inspector.getStructFieldData(obj, field);
     final ObjectInspector subInspector = field.getFieldObjectInspector();
     arr[i] = createObject(subObj, subInspector);
   }
   return new ArrayWritable(Writable.class, arr);
 }
 /**
  * Copy fields in the input row to the output array of standard objects.
  *
  * @param result output list of standard objects.
  * @param row input row.
  * @param soi Object inspector for the to-be-copied columns.
  * @param objectInspectorOption
  */
 public static void copyToStandardObject(
     List<Object> result,
     Object row,
     StructObjectInspector soi,
     ObjectInspectorCopyOption objectInspectorOption) {
   List<? extends StructField> fields = soi.getAllStructFieldRefs();
   for (StructField f : fields) {
     result.add(
         copyToStandardObject(
             soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption));
   }
 }
Esempio n. 13
0
 private int computeBucketNumber(Object row, int numBuckets) throws HiveException {
   if (conf.getWriteType() == AcidUtils.Operation.UPDATE
       || conf.getWriteType() == AcidUtils.Operation.DELETE) {
     // We don't need to evaluate the hash code.  Instead read the bucket number directly from
     // the row.  I don't need to evaluate any expressions as I know I am reading the ROW__ID
     // column directly.
     Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField);
     int buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField));
     if (isLogTraceEnabled) {
       LOG.trace("Acid choosing bucket number " + buckNum);
     }
     return buckNum;
   } else {
     Object[] bucketFieldValues = new Object[bucketEval.length];
     for (int i = 0; i < bucketEval.length; i++) {
       bucketFieldValues[i] = bucketEval[i].evaluate(row);
     }
     return ObjectInspectorUtils.getBucketNumber(
         bucketFieldValues, bucketObjectInspectors, numBuckets);
   }
 }
Esempio n. 14
0
  private void parseObjectColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column]);
      nulls[column] = false;
    }
  }
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    outputByteBuffer.reset();
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    for (int i = 0; i < columnNames.size(); i++) {
      serialize(
          outputByteBuffer,
          soi.getStructFieldData(obj, fields.get(i)),
          fields.get(i).getFieldObjectInspector(),
          columnSortOrderIsDesc[i]);
    }

    serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength());
    return serializeBytesWritable;
  }
Esempio n. 16
0
  private void parseDoubleColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      doubles[column] = ((Number) fieldValue).doubleValue();
      nulls[column] = false;
    }
  }
Esempio n. 17
0
  private void partialReadTest(FileSystem fs, int count, Path file)
      throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(2));
    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();

    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());

      for (int i : readCols) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData =
            ObjectInspectorUtils.copyToStandardObject(
                fieldData,
                fieldRefs.get(i).getFieldObjectInspector(),
                ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
      }

      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class,
          serDe.getSerializedClass());
      BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
      assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
  }
Esempio n. 18
0
  @Override
  public synchronized void process(Object row, int tag) throws HiveException {

    StructObjectInspector soi = parentObjInspectors[tag];
    List<? extends StructField> fields = parentFields[tag];

    if (needsTransform[tag]) {
      for (int c = 0; c < fields.size(); c++) {
        outputRow.set(
            c,
            columnTypeResolvers[c].convertIfNecessary(
                soi.getStructFieldData(row, fields.get(c)),
                fields.get(c).getFieldObjectInspector()));
      }
      forward(outputRow, outputObjInspector);
    } else {
      forward(row, inputObjInspectors[tag]);
    }
  }
Esempio n. 19
0
  public void fullyReadTest(FileSystem fs, int count, Path file)
      throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    int actualRead = 0;
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData =
            ObjectInspectorUtils.copyToStandardObject(
                fieldData,
                fieldRefs.get(i).getFieldObjectInspector(),
                ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]);
      }
      // Serialize
      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class,
          serDe.getSerializedClass());
      BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe.serialize(row, oi);
      assertEquals("Serialized data", s, serializedText);
      actualRead++;
    }
    reader.close();
    assertEquals("Expect " + count + " rows, actual read " + actualRead, actualRead, count);
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
  }
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
    final List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();

    if (outputFieldRefs.size() != numCols) {
      throw new SerDeException(
          "Cannot serialize the object because there are "
              + outputFieldRefs.size()
              + " fields but the table has "
              + numCols
              + " columns.");
    }

    // Get all data out.
    for (int c = 0; c < numCols; c++) {
      final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
      final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();

      // The data must be of type String
      final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;

      // Convert the field to Java class String, because objects of String
      // type
      // can be stored in String, Text, or some other classes.
      outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
    }

    final StringWriter writer = new StringWriter();
    final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar);

    try {
      csv.writeNext(outputFields);
      csv.close();

      return new Text(writer.toString());
    } catch (final IOException ioe) {
      throw new SerDeException(ioe);
    }
  }
  /**
   * Copy specified fields in the input row to the output array of standard objects.
   *
   * @param result output list of standard objects.
   * @param row input row.
   * @param startCol starting column number from the input row.
   * @param numCols number of columns to copy.
   * @param soi Object inspector for the to-be-copied columns.
   */
  public static void partialCopyToStandardObject(
      List<Object> result,
      Object row,
      int startCol,
      int numCols,
      StructObjectInspector soi,
      ObjectInspectorCopyOption objectInspectorOption) {

    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    int i = 0, j = 0;
    for (StructField f : fields) {
      if (i++ >= startCol) {
        result.add(
            copyToStandardObject(
                soi.getStructFieldData(row, f),
                f.getFieldObjectInspector(),
                objectInspectorOption));
        if (++j == numCols) {
          break;
        }
      }
    }
  }
Esempio n. 22
0
 /**
  * If we received data with tags from ReduceSinkOperators, no keys will match. This should not
  * happen, but is important enough that we want to find out and work around it if some optimized
  * change causes RSO to pass on tags.
  */
 private void sanityCheckKeyForTag() throws SerDeException {
   if (hasTag != null) return;
   BinaryComparable b = (BinaryComparable) key;
   Object o = keySerDe.deserialize(key);
   StructObjectInspector soi = (StructObjectInspector) keySerDe.getObjectInspector();
   List<? extends StructField> fields = soi.getAllStructFieldRefs();
   Object[] data = new Object[fields.size()];
   List<ObjectInspector> fois = new ArrayList<ObjectInspector>(fields.size());
   for (int i = 0; i < fields.size(); i++) {
     data[i] = soi.getStructFieldData(o, fields.get(i));
     fois.add(fields.get(i).getFieldObjectInspector());
   }
   Output output = new Output();
   BinarySortableSerDe.serializeStruct(output, data, fois, new boolean[fields.size()]);
   hasTag = (output.getLength() != b.getLength());
   if (hasTag) {
     LOG.error("Tag found in keys and will be removed. This should not happen.");
     if (output.getLength() != (b.getLength() - 1)) {
       throw new SerDeException(
           "Unexpected tag: " + b.getLength() + " reserialized to " + output.getLength());
     }
   }
 }
Esempio n. 23
0
  @Override
  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
    assertState(OperationState.FINISHED);
    ArrayList<String> rows = new ArrayList<String>();
    driver.setMaxRows((int) maxRows);

    try {
      driver.getResults(rows);

      getSerDe();
      StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
      List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
      RowSet rowSet = new RowSet();

      Object[] deserializedFields = new Object[fieldRefs.size()];
      Object rowObj;
      ObjectInspector fieldOI;

      for (String rowString : rows) {
        rowObj = serde.deserialize(new BytesWritable(rowString.getBytes()));
        for (int i = 0; i < fieldRefs.size(); i++) {
          StructField fieldRef = fieldRefs.get(i);
          fieldOI = fieldRef.getFieldObjectInspector();
          deserializedFields[i] =
              convertLazyToJava(soi.getStructFieldData(rowObj, fieldRef), fieldOI);
        }
        rowSet.addRow(resultSchema, deserializedFields);
      }
      return rowSet;
    } catch (IOException e) {
      throw new HiveSQLException(e);
    } catch (CommandNeedRetryException e) {
      throw new HiveSQLException(e);
    } catch (Exception e) {
      throw new HiveSQLException(e);
    }
  }
  public static void copyStructToArray(
      Object o,
      ObjectInspector oi,
      ObjectInspectorCopyOption objectInspectorOption,
      Object[] dest,
      int offset)
      throws SerDeException {
    if (o == null) {
      return;
    }

    if (oi.getCategory() != Category.STRUCT) {
      throw new SerDeException("Unexpected category " + oi.getCategory());
    }

    StructObjectInspector soi = (StructObjectInspector) oi;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    for (int i = 0; i < fields.size(); ++i) {
      StructField f = fields.get(i);
      dest[offset + i] =
          copyToStandardObject(
              soi.getStructFieldData(o, f), f.getFieldObjectInspector(), objectInspectorOption);
    }
  }
Esempio n. 25
0
  private static Block serializeStruct(
      Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) {
    if (object == null) {
      requireNonNull(builder, "parent builder is null").appendNull();
      return null;
    }

    List<Type> typeParameters = type.getTypeParameters();
    List<? extends StructField> allStructFieldRefs = inspector.getAllStructFieldRefs();
    checkArgument(typeParameters.size() == allStructFieldRefs.size());
    BlockBuilder currentBuilder;
    if (builder != null) {
      currentBuilder = builder.beginBlockEntry();
    } else {
      currentBuilder =
          new InterleavedBlockBuilder(
              typeParameters, new BlockBuilderStatus(), typeParameters.size());
    }

    for (int i = 0; i < typeParameters.size(); i++) {
      StructField field = allStructFieldRefs.get(i);
      serializeObject(
          typeParameters.get(i),
          currentBuilder,
          inspector.getStructFieldData(object, field),
          field.getFieldObjectInspector());
    }

    if (builder != null) {
      builder.closeEntry();
      return null;
    } else {
      Block resultBlock = currentBuilder.build();
      return resultBlock;
    }
  }
Esempio n. 26
0
  @Override
  public void processOp(Object row, int tag) throws HiveException {
    try {
      reportProgress();

      // get alias
      alias = (byte) tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      ArrayList<Object> nr =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      if (handleSkewJoin) {
        skewJoinKeyContext.handleSkew(tag);
      }

      // number of rows for the key in the given table
      int sz = storage.get(alias).size();
      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
      Object keyObject = soi.getStructFieldData(row, sf);

      // Are we consuming too much memory
      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) {
        if (sz == joinEmitInterval) {
          // The input is sorted by alias, so if we are already in the last join
          // operand,
          // we can emit some results now.
          // Note this has to be done before adding the current row to the
          // storage,
          // to preserve the correctness for outer joins.
          checkAndGenObject();
          storage.get(alias).clear();
        }
      } else {
        if (sz == nextSz) {
          // Output a warning if we reached at least 1000 rows for a join
          // operand
          // We won't output a warning for the last join operand since the size
          // will never goes to joinEmitInterval.
          LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject);
          nextSz = getNextSize(nextSz);
        }
      }

      // Add the value to the vector
      storage.get(alias).add(nr);
      // if join-key is null, process each row in different group.
      if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) {
        endGroup();
        startGroup();
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
  public static Object copyToStandardObject(
      Object o, ObjectInspector oi, ObjectInspectorCopyOption objectInspectorOption) {
    if (o == null) {
      return null;
    }

    Object result = null;
    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector loi = (PrimitiveObjectInspector) oi;
          if (objectInspectorOption == ObjectInspectorCopyOption.DEFAULT) {
            objectInspectorOption =
                loi.preferWritable()
                    ? ObjectInspectorCopyOption.WRITABLE
                    : ObjectInspectorCopyOption.JAVA;
          }
          switch (objectInspectorOption) {
            case JAVA:
              result = loi.getPrimitiveJavaObject(o);
              if (loi.getPrimitiveCategory()
                  == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) {
                result =
                    PrimitiveObjectInspectorFactory.javaTimestampObjectInspector.copyObject(result);
              }
              break;
            case WRITABLE:
              result = loi.getPrimitiveWritableObject(loi.copyObject(o));
              break;
          }
          break;
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          int length = loi.getListLength(o);
          ArrayList<Object> list = new ArrayList<Object>(length);
          for (int i = 0; i < length; i++) {
            list.add(
                copyToStandardObject(
                    loi.getListElement(o, i),
                    loi.getListElementObjectInspector(),
                    objectInspectorOption));
          }
          result = list;
          break;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          HashMap<Object, Object> map = new HashMap<Object, Object>();
          Map<? extends Object, ? extends Object> omap = moi.getMap(o);
          for (Map.Entry<? extends Object, ? extends Object> entry : omap.entrySet()) {
            map.put(
                copyToStandardObject(
                    entry.getKey(), moi.getMapKeyObjectInspector(), objectInspectorOption),
                copyToStandardObject(
                    entry.getValue(), moi.getMapValueObjectInspector(), objectInspectorOption));
          }
          result = map;
          break;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> fields = soi.getAllStructFieldRefs();
          ArrayList<Object> struct = new ArrayList<Object>(fields.size());
          for (StructField f : fields) {
            struct.add(
                copyToStandardObject(
                    soi.getStructFieldData(o, f),
                    f.getFieldObjectInspector(),
                    objectInspectorOption));
          }
          result = struct;
          break;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          List<ObjectInspector> objectInspectors = uoi.getObjectInspectors();
          Object object =
              copyToStandardObject(
                  uoi.getField(o), objectInspectors.get(uoi.getTag(o)), objectInspectorOption);
          result = object;
          break;
        }
      default:
        {
          throw new RuntimeException("Unknown ObjectInspector category!");
        }
    }
    return result;
  }
  public static int hashCode(Object o, ObjectInspector objIns) {
    if (o == null) {
      return 0;
    }
    switch (objIns.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns);
          switch (poi.getPrimitiveCategory()) {
            case VOID:
              return 0;
            case BOOLEAN:
              return ((BooleanObjectInspector) poi).get(o) ? 1 : 0;
            case BYTE:
              return ((ByteObjectInspector) poi).get(o);
            case SHORT:
              return ((ShortObjectInspector) poi).get(o);
            case INT:
              return ((IntObjectInspector) poi).get(o);
            case LONG:
              {
                long a = ((LongObjectInspector) poi).get(o);
                return (int) ((a >>> 32) ^ a);
              }
            case FLOAT:
              return Float.floatToIntBits(((FloatObjectInspector) poi).get(o));
            case DOUBLE:
              {
                // This hash function returns the same result as Double.hashCode()
                // while DoubleWritable.hashCode returns a different result.
                long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o));
                return (int) ((a >>> 32) ^ a);
              }
            case STRING:
              {
                // This hash function returns the same result as String.hashCode() when
                // all characters are ASCII, while Text.hashCode() always returns a
                // different result.
                Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o);
                int r = 0;
                for (int i = 0; i < t.getLength(); i++) {
                  r = r * 31 + t.getBytes()[i];
                }
                return r;
              }
            case CHAR:
              return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case VARCHAR:
              return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case BINARY:
              return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();

            case DATE:
              return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case TIMESTAMP:
              TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o);
              return t.hashCode();
            case INTERVAL_YEAR_MONTH:
              HiveIntervalYearMonthWritable intervalYearMonth =
                  ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o);
              return intervalYearMonth.hashCode();
            case INTERVAL_DAY_TIME:
              HiveIntervalDayTimeWritable intervalDayTime =
                  ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o);
              return intervalDayTime.hashCode();
            case DECIMAL:
              return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();

            default:
              {
                throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          int r = 0;
          ListObjectInspector listOI = (ListObjectInspector) objIns;
          ObjectInspector elemOI = listOI.getListElementObjectInspector();
          for (int ii = 0; ii < listOI.getListLength(o); ++ii) {
            r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI);
          }
          return r;
        }
      case MAP:
        {
          int r = 0;
          MapObjectInspector mapOI = (MapObjectInspector) objIns;
          ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
          ObjectInspector valueOI = mapOI.getMapValueObjectInspector();
          Map<?, ?> map = mapOI.getMap(o);
          for (Map.Entry<?, ?> entry : map.entrySet()) {
            r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI);
          }
          return r;
        }
      case STRUCT:
        int r = 0;
        StructObjectInspector structOI = (StructObjectInspector) objIns;
        List<? extends StructField> fields = structOI.getAllStructFieldRefs();
        for (StructField field : fields) {
          r =
              31 * r
                  + hashCode(
                      structOI.getStructFieldData(o, field), field.getFieldObjectInspector());
        }
        return r;

      case UNION:
        UnionObjectInspector uOI = (UnionObjectInspector) objIns;
        byte tag = uOI.getTag(o);
        return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag));

      default:
        throw new RuntimeException("Unknown type: " + objIns.getTypeName());
    }
  }
  /** Compare two objects with their respective ObjectInspectors. */
  public static int compare(
      Object o1,
      ObjectInspector oi1,
      Object o2,
      ObjectInspector oi2,
      MapEqualComparer mapEqualComparer) {
    if (oi1.getCategory() != oi2.getCategory()) {
      return oi1.getCategory().compareTo(oi2.getCategory());
    }

    if (o1 == null) {
      return o2 == null ? 0 : -1;
    } else if (o2 == null) {
      return 1;
    }

    switch (oi1.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi1 = ((PrimitiveObjectInspector) oi1);
          PrimitiveObjectInspector poi2 = ((PrimitiveObjectInspector) oi2);
          if (poi1.getPrimitiveCategory() != poi2.getPrimitiveCategory()) {
            return poi1.getPrimitiveCategory().compareTo(poi2.getPrimitiveCategory());
          }
          switch (poi1.getPrimitiveCategory()) {
            case VOID:
              return 0;
            case BOOLEAN:
              {
                int v1 = ((BooleanObjectInspector) poi1).get(o1) ? 1 : 0;
                int v2 = ((BooleanObjectInspector) poi2).get(o2) ? 1 : 0;
                return v1 - v2;
              }
            case BYTE:
              {
                int v1 = ((ByteObjectInspector) poi1).get(o1);
                int v2 = ((ByteObjectInspector) poi2).get(o2);
                return v1 - v2;
              }
            case SHORT:
              {
                int v1 = ((ShortObjectInspector) poi1).get(o1);
                int v2 = ((ShortObjectInspector) poi2).get(o2);
                return v1 - v2;
              }
            case INT:
              {
                int v1 = ((IntObjectInspector) poi1).get(o1);
                int v2 = ((IntObjectInspector) poi2).get(o2);
                return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0);
              }
            case LONG:
              {
                long v1 = ((LongObjectInspector) poi1).get(o1);
                long v2 = ((LongObjectInspector) poi2).get(o2);
                return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0);
              }
            case FLOAT:
              {
                float v1 = ((FloatObjectInspector) poi1).get(o1);
                float v2 = ((FloatObjectInspector) poi2).get(o2);
                return Float.compare(v1, v2);
              }
            case DOUBLE:
              {
                double v1 = ((DoubleObjectInspector) poi1).get(o1);
                double v2 = ((DoubleObjectInspector) poi2).get(o2);
                return Double.compare(v1, v2);
              }
            case STRING:
              {
                if (poi1.preferWritable() || poi2.preferWritable()) {
                  Text t1 = (Text) poi1.getPrimitiveWritableObject(o1);
                  Text t2 = (Text) poi2.getPrimitiveWritableObject(o2);
                  return t1 == null ? (t2 == null ? 0 : -1) : (t2 == null ? 1 : t1.compareTo(t2));
                } else {
                  String s1 = (String) poi1.getPrimitiveJavaObject(o1);
                  String s2 = (String) poi2.getPrimitiveJavaObject(o2);
                  return s1 == null ? (s2 == null ? 0 : -1) : (s2 == null ? 1 : s1.compareTo(s2));
                }
              }
            case CHAR:
              {
                HiveCharWritable t1 =
                    ((HiveCharObjectInspector) poi1).getPrimitiveWritableObject(o1);
                HiveCharWritable t2 =
                    ((HiveCharObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return t1.compareTo(t2);
              }
            case VARCHAR:
              {
                HiveVarcharWritable t1 =
                    ((HiveVarcharObjectInspector) poi1).getPrimitiveWritableObject(o1);
                HiveVarcharWritable t2 =
                    ((HiveVarcharObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return t1.compareTo(t2);
              }
            case BINARY:
              {
                BytesWritable bw1 = ((BinaryObjectInspector) poi1).getPrimitiveWritableObject(o1);
                BytesWritable bw2 = ((BinaryObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return bw1.compareTo(bw2);
              }

            case DATE:
              {
                DateWritable d1 = ((DateObjectInspector) poi1).getPrimitiveWritableObject(o1);
                DateWritable d2 = ((DateObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return d1.compareTo(d2);
              }
            case TIMESTAMP:
              {
                TimestampWritable t1 =
                    ((TimestampObjectInspector) poi1).getPrimitiveWritableObject(o1);
                TimestampWritable t2 =
                    ((TimestampObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return t1.compareTo(t2);
              }
            case INTERVAL_YEAR_MONTH:
              {
                HiveIntervalYearMonthWritable i1 =
                    ((HiveIntervalYearMonthObjectInspector) poi1).getPrimitiveWritableObject(o1);
                HiveIntervalYearMonthWritable i2 =
                    ((HiveIntervalYearMonthObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return i1.compareTo(i2);
              }
            case INTERVAL_DAY_TIME:
              {
                HiveIntervalDayTimeWritable i1 =
                    ((HiveIntervalDayTimeObjectInspector) poi1).getPrimitiveWritableObject(o1);
                HiveIntervalDayTimeWritable i2 =
                    ((HiveIntervalDayTimeObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return i1.compareTo(i2);
              }
            case DECIMAL:
              {
                HiveDecimalWritable t1 =
                    ((HiveDecimalObjectInspector) poi1).getPrimitiveWritableObject(o1);
                HiveDecimalWritable t2 =
                    ((HiveDecimalObjectInspector) poi2).getPrimitiveWritableObject(o2);
                return t1.compareTo(t2);
              }
            default:
              {
                throw new RuntimeException("Unknown type: " + poi1.getPrimitiveCategory());
              }
          }
        }
      case STRUCT:
        {
          StructObjectInspector soi1 = (StructObjectInspector) oi1;
          StructObjectInspector soi2 = (StructObjectInspector) oi2;
          List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
          List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
          int minimum = Math.min(fields1.size(), fields2.size());
          for (int i = 0; i < minimum; i++) {
            int r =
                compare(
                    soi1.getStructFieldData(o1, fields1.get(i)),
                    fields1.get(i).getFieldObjectInspector(),
                    soi2.getStructFieldData(o2, fields2.get(i)),
                    fields2.get(i).getFieldObjectInspector(),
                    mapEqualComparer);
            if (r != 0) {
              return r;
            }
          }
          return fields1.size() - fields2.size();
        }
      case LIST:
        {
          ListObjectInspector loi1 = (ListObjectInspector) oi1;
          ListObjectInspector loi2 = (ListObjectInspector) oi2;
          int minimum = Math.min(loi1.getListLength(o1), loi2.getListLength(o2));
          for (int i = 0; i < minimum; i++) {
            int r =
                compare(
                    loi1.getListElement(o1, i),
                    loi1.getListElementObjectInspector(),
                    loi2.getListElement(o2, i),
                    loi2.getListElementObjectInspector(),
                    mapEqualComparer);
            if (r != 0) {
              return r;
            }
          }
          return loi1.getListLength(o1) - loi2.getListLength(o2);
        }
      case MAP:
        {
          if (mapEqualComparer == null) {
            throw new RuntimeException("Compare on map type not supported!");
          } else {
            return mapEqualComparer.compare(
                o1, (MapObjectInspector) oi1, o2, (MapObjectInspector) oi2);
          }
        }
      case UNION:
        {
          UnionObjectInspector uoi1 = (UnionObjectInspector) oi1;
          UnionObjectInspector uoi2 = (UnionObjectInspector) oi2;
          byte tag1 = uoi1.getTag(o1);
          byte tag2 = uoi2.getTag(o2);
          if (tag1 != tag2) {
            return tag1 - tag2;
          }
          return compare(
              uoi1.getField(o1),
              uoi1.getObjectInspectors().get(tag1),
              uoi2.getField(o2),
              uoi2.getObjectInspectors().get(tag2),
              mapEqualComparer);
        }
      default:
        throw new RuntimeException("Compare on unknown type: " + oi1.getCategory());
    }
  }
Esempio n. 30
0
  public void testSimpleReadAndWrite() throws IOException, SerDeException {
    fs.delete(file, true);

    byte[][] record_1 = {
      "123".getBytes("UTF-8"),
      "456".getBytes("UTF-8"),
      "789".getBytes("UTF-8"),
      "1000".getBytes("UTF-8"),
      "5.3".getBytes("UTF-8"),
      "hive and hadoop".getBytes("UTF-8"),
      new byte[0],
      "NULL".getBytes("UTF-8")
    };
    byte[][] record_2 = {
      "100".getBytes("UTF-8"),
      "200".getBytes("UTF-8"),
      "123".getBytes("UTF-8"),
      "1000".getBytes("UTF-8"),
      "5.3".getBytes("UTF-8"),
      "hive and hadoop".getBytes("UTF-8"),
      new byte[0],
      "NULL".getBytes("UTF-8")
    };

    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    Object[] expectedRecord_1 = {
      new ByteWritable((byte) 123),
      new ShortWritable((short) 456),
      new IntWritable(789),
      new LongWritable(1000),
      new DoubleWritable(5.3),
      new Text("hive and hadoop"),
      null,
      null
    };

    Object[] expectedRecord_2 = {
      new ByteWritable((byte) 100),
      new ShortWritable((short) 200),
      new IntWritable(123),
      new LongWritable(1000),
      new DoubleWritable(5.3),
      new Text("hive and hadoop"),
      null,
      null
    };

    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();

    for (int i = 0; i < 2; i++) {
      reader.next(rowID);
      BytesRefArrayWritable cols = new BytesRefArrayWritable();
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
        Object standardWritableData =
            ObjectInspectorUtils.copyToStandardObject(
                fieldData,
                fieldRefs.get(j).getFieldObjectInspector(),
                ObjectInspectorCopyOption.WRITABLE);
        if (i == 0) {
          assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
        } else {
          assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
        }
      }
    }

    reader.close();
  }