Ejemplo n.º 1
0
  /**
   * It writes a map type and its key-pair values to the Parquet RecordConsumer. This is called when
   * the original type (MAP) is detected by writeValue()
   *
   * @param value The list of map values that contains the repeated KEY_PAIR_VALUE group type
   * @param type Type that contains information about the group schema
   */
  private void writeMap(final ArrayWritable value, final GroupType type) {
    GroupType repeatedType = type.getType(0).asGroupType();
    ArrayWritable repeatedValue = (ArrayWritable) value.get()[0];

    recordConsumer.startGroup();
    recordConsumer.startField(repeatedType.getName(), 0);

    Writable[] map_values = repeatedValue.get();
    for (int record = 0; record < map_values.length; record++) {
      Writable key_value_pair = map_values[record];
      if (key_value_pair != null) {
        // Hive wraps a map key-pair into an ArrayWritable
        if (key_value_pair instanceof ArrayWritable) {
          writeGroup((ArrayWritable) key_value_pair, repeatedType);
        } else {
          throw new RuntimeException(
              "Map key-value pair is not an ArrayWritable object on record " + record);
        }
      } else {
        throw new RuntimeException("Map key-value pair is null on record " + record);
      }
    }

    recordConsumer.endField(repeatedType.getName(), 0);
    recordConsumer.endGroup();
  }
Ejemplo n.º 2
0
 /**
  * It writes all record values to the Parquet RecordConsumer.
  *
  * @param record Contains the record of values that are going to be written
  */
 public void write(final ArrayWritable record) {
   if (record != null) {
     recordConsumer.startMessage();
     try {
       writeGroupFields(record, schema);
     } catch (RuntimeException e) {
       String errorMessage = "Parquet record is malformed: " + e.getMessage();
       LOG.error(errorMessage, e);
       throw new RuntimeException(errorMessage, e);
     }
     recordConsumer.endMessage();
   }
 }
Ejemplo n.º 3
0
 /**
  * It writes the primitive value to the Parquet RecordConsumer.
  *
  * @param value The writable object that contains the primitive value.
  */
 private void writePrimitive(final Writable value) {
   if (value == null) {
     return;
   }
   if (value instanceof DoubleWritable) {
     recordConsumer.addDouble(((DoubleWritable) value).get());
   } else if (value instanceof BooleanWritable) {
     recordConsumer.addBoolean(((BooleanWritable) value).get());
   } else if (value instanceof FloatWritable) {
     recordConsumer.addFloat(((FloatWritable) value).get());
   } else if (value instanceof IntWritable) {
     recordConsumer.addInteger(((IntWritable) value).get());
   } else if (value instanceof LongWritable) {
     recordConsumer.addLong(((LongWritable) value).get());
   } else if (value instanceof ShortWritable) {
     recordConsumer.addInteger(((ShortWritable) value).get());
   } else if (value instanceof ByteWritable) {
     recordConsumer.addInteger(((ByteWritable) value).get());
   } else if (value instanceof HiveDecimalWritable) {
     throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented");
   } else if (value instanceof BytesWritable) {
     recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes())));
   } else if (value instanceof TimestampWritable) {
     Timestamp ts = ((TimestampWritable) value).getTimestamp();
     NanoTime nt = NanoTimeUtils.getNanoTime(ts);
     nt.writeValue(recordConsumer);
   } else {
     throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
   }
 }
Ejemplo n.º 4
0
  /**
   * It writes a list type and its array elements to the Parquet RecordConsumer. This is called when
   * the original type (LIST) is detected by writeValue()
   *
   * @param array The list of array values that contains the repeated array group type
   * @param type Type that contains information about the group schema
   */
  private void writeArray(final ArrayWritable array, final GroupType type) {
    GroupType repeatedType = type.getType(0).asGroupType();
    ArrayWritable repeatedValue = (ArrayWritable) array.get()[0];

    recordConsumer.startGroup();
    recordConsumer.startField(repeatedType.getName(), 0);

    Writable[] array_values = repeatedValue.get();
    for (int record = 0; record < array_values.length; record++) {
      recordConsumer.startGroup();

      // Null values must be wrapped into startGroup/endGroup
      Writable element = array_values[record];
      if (element != null) {
        for (int i = 0; i < type.getFieldCount(); i++) {
          Type fieldType = repeatedType.getType(i);
          String fieldName = fieldType.getName();

          recordConsumer.startField(fieldName, i);
          writeValue(element, fieldType);
          recordConsumer.endField(fieldName, i);
        }
      }

      recordConsumer.endGroup();
    }

    recordConsumer.endField(repeatedType.getName(), 0);
    recordConsumer.endGroup();
  }
Ejemplo n.º 5
0
  /**
   * It writes all the fields contained inside a group to the RecordConsumer.
   *
   * @param value The list of values contained in the group.
   * @param type Type that contains information about the group schema.
   */
  public void writeGroupFields(final ArrayWritable value, final GroupType type) {
    if (value != null) {
      for (int i = 0; i < type.getFieldCount(); i++) {
        Type fieldType = type.getType(i);
        String fieldName = fieldType.getName();
        Writable fieldValue = value.get()[i];

        // Parquet does not write null elements
        if (fieldValue != null) {
          recordConsumer.startField(fieldName, i);
          writeValue(fieldValue, fieldType);
          recordConsumer.endField(fieldName, i);
        }
      }
    }
  }
Ejemplo n.º 6
0
 @Override
 public void writeValue(RecordConsumer recordConsumer) {
   recordConsumer.addFloat(value);
 }
Ejemplo n.º 7
0
 /**
  * It writes a group type and all its values to the Parquet RecordConsumer. This is used only for
  * optional and required groups.
  *
  * @param value ArrayWritable object that contains the group values
  * @param type Type that contains information about the group schema
  */
 private void writeGroup(final ArrayWritable value, final GroupType type) {
   recordConsumer.startGroup();
   writeGroupFields(value, type);
   recordConsumer.endGroup();
 }