/** * It writes a map type and its key-pair values to the Parquet RecordConsumer. This is called when * the original type (MAP) is detected by writeValue() * * @param value The list of map values that contains the repeated KEY_PAIR_VALUE group type * @param type Type that contains information about the group schema */ private void writeMap(final ArrayWritable value, final GroupType type) { GroupType repeatedType = type.getType(0).asGroupType(); ArrayWritable repeatedValue = (ArrayWritable) value.get()[0]; recordConsumer.startGroup(); recordConsumer.startField(repeatedType.getName(), 0); Writable[] map_values = repeatedValue.get(); for (int record = 0; record < map_values.length; record++) { Writable key_value_pair = map_values[record]; if (key_value_pair != null) { // Hive wraps a map key-pair into an ArrayWritable if (key_value_pair instanceof ArrayWritable) { writeGroup((ArrayWritable) key_value_pair, repeatedType); } else { throw new RuntimeException( "Map key-value pair is not an ArrayWritable object on record " + record); } } else { throw new RuntimeException("Map key-value pair is null on record " + record); } } recordConsumer.endField(repeatedType.getName(), 0); recordConsumer.endGroup(); }
/** * It writes all record values to the Parquet RecordConsumer. * * @param record Contains the record of values that are going to be written */ public void write(final ArrayWritable record) { if (record != null) { recordConsumer.startMessage(); try { writeGroupFields(record, schema); } catch (RuntimeException e) { String errorMessage = "Parquet record is malformed: " + e.getMessage(); LOG.error(errorMessage, e); throw new RuntimeException(errorMessage, e); } recordConsumer.endMessage(); } }
/** * It writes the primitive value to the Parquet RecordConsumer. * * @param value The writable object that contains the primitive value. */ private void writePrimitive(final Writable value) { if (value == null) { return; } if (value instanceof DoubleWritable) { recordConsumer.addDouble(((DoubleWritable) value).get()); } else if (value instanceof BooleanWritable) { recordConsumer.addBoolean(((BooleanWritable) value).get()); } else if (value instanceof FloatWritable) { recordConsumer.addFloat(((FloatWritable) value).get()); } else if (value instanceof IntWritable) { recordConsumer.addInteger(((IntWritable) value).get()); } else if (value instanceof LongWritable) { recordConsumer.addLong(((LongWritable) value).get()); } else if (value instanceof ShortWritable) { recordConsumer.addInteger(((ShortWritable) value).get()); } else if (value instanceof ByteWritable) { recordConsumer.addInteger(((ByteWritable) value).get()); } else if (value instanceof HiveDecimalWritable) { throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented"); } else if (value instanceof BytesWritable) { recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes()))); } else if (value instanceof TimestampWritable) { Timestamp ts = ((TimestampWritable) value).getTimestamp(); NanoTime nt = NanoTimeUtils.getNanoTime(ts); nt.writeValue(recordConsumer); } else { throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass()); } }
/** * It writes a list type and its array elements to the Parquet RecordConsumer. This is called when * the original type (LIST) is detected by writeValue() * * @param array The list of array values that contains the repeated array group type * @param type Type that contains information about the group schema */ private void writeArray(final ArrayWritable array, final GroupType type) { GroupType repeatedType = type.getType(0).asGroupType(); ArrayWritable repeatedValue = (ArrayWritable) array.get()[0]; recordConsumer.startGroup(); recordConsumer.startField(repeatedType.getName(), 0); Writable[] array_values = repeatedValue.get(); for (int record = 0; record < array_values.length; record++) { recordConsumer.startGroup(); // Null values must be wrapped into startGroup/endGroup Writable element = array_values[record]; if (element != null) { for (int i = 0; i < type.getFieldCount(); i++) { Type fieldType = repeatedType.getType(i); String fieldName = fieldType.getName(); recordConsumer.startField(fieldName, i); writeValue(element, fieldType); recordConsumer.endField(fieldName, i); } } recordConsumer.endGroup(); } recordConsumer.endField(repeatedType.getName(), 0); recordConsumer.endGroup(); }
/** * It writes all the fields contained inside a group to the RecordConsumer. * * @param value The list of values contained in the group. * @param type Type that contains information about the group schema. */ public void writeGroupFields(final ArrayWritable value, final GroupType type) { if (value != null) { for (int i = 0; i < type.getFieldCount(); i++) { Type fieldType = type.getType(i); String fieldName = fieldType.getName(); Writable fieldValue = value.get()[i]; // Parquet does not write null elements if (fieldValue != null) { recordConsumer.startField(fieldName, i); writeValue(fieldValue, fieldType); recordConsumer.endField(fieldName, i); } } } }
@Override public void writeValue(RecordConsumer recordConsumer) { recordConsumer.addFloat(value); }
/** * It writes a group type and all its values to the Parquet RecordConsumer. This is used only for * optional and required groups. * * @param value ArrayWritable object that contains the group values * @param type Type that contains information about the group schema */ private void writeGroup(final ArrayWritable value, final GroupType type) { recordConsumer.startGroup(); writeGroupFields(value, type); recordConsumer.endGroup(); }