private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else if (hiveTypes[column] == HiveType.MAP || hiveTypes[column] == HiveType.LIST || hiveTypes[column] == HiveType.STRUCT) { // temporarily special case MAP, LIST, and STRUCT types as strings slices[column] = Slices.wrappedBuffer( SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column])); nulls[column] = false; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); if (fieldValue instanceof String) { slices[column] = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { slices[column] = Slices.wrappedBuffer((byte[]) fieldValue); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } nulls[column] = false; } }
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); Slice value; if (fieldValue instanceof String) { value = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { value = Slices.wrappedBuffer((byte[]) fieldValue); } else if (fieldValue instanceof HiveVarchar) { value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue()); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } Type type = types[column]; if (isVarcharType(type)) { value = truncateToLength(value, type); } slices[column] = value; nulls[column] = false; } }
private void newBuffer() { this.buffer = allocator.allocate(PageFormat.PAGE_HEADER_SIZE + fixedRecordSize); this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity()); this.count = 0; this.position = PageFormat.PAGE_HEADER_SIZE; this.stringReferences.clear(); this.stringReferenceSize = 0; }
private static Slice formatDatetime( ISOChronology chronology, Locale locale, long timestamp, Slice formatString) { String pattern = formatString.toString(Charsets.UTF_8); DateTimeFormatter formatter = DateTimeFormat.forPattern(pattern).withChronology(chronology).withLocale(locale); String datetimeString = formatter.print(timestamp); return Slices.wrappedBuffer(datetimeString.getBytes(Charsets.UTF_8)); }
private Page generateZeroPage(List<Type> types, int rowsCount, int fieldLength) { byte[] constantBytes = new byte[fieldLength]; Arrays.fill(constantBytes, (byte) 42); Slice constantSlice = Slices.wrappedBuffer(constantBytes); Block[] blocks = new Block[types.size()]; for (int i = 0; i < blocks.length; i++) { blocks[i] = createZeroBlock(types.get(i), rowsCount, constantSlice); } return new Page(rowsCount, blocks); }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }
private static void serializePrimitive( Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) { requireNonNull(builder, "parent builder is null"); if (object == null) { builder.appendNull(); return; } switch (inspector.getPrimitiveCategory()) { case BOOLEAN: BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object)); return; case BYTE: TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object)); return; case SHORT: SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object)); return; case INT: IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object)); return; case LONG: BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object)); return; case FLOAT: DoubleType.DOUBLE.writeDouble(builder, ((FloatObjectInspector) inspector).get(object)); return; case DOUBLE: DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object)); return; case STRING: type.writeSlice( builder, Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object))); return; case VARCHAR: type.writeSlice( builder, Slices.utf8Slice( ((HiveVarcharObjectInspector) inspector) .getPrimitiveJavaObject(object) .getValue())); return; case CHAR: CharType charType = checkType(type, CharType.class, "type"); HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object); type.writeSlice( builder, trimSpacesAndTruncateToLength( Slices.utf8Slice(hiveChar.getValue()), charType.getLength())); return; case DATE: DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector)); return; case TIMESTAMP: TimestampType.TIMESTAMP.writeLong( builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector)); return; case BINARY: VARBINARY.writeSlice( builder, Slices.wrappedBuffer( ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object))); return; case DECIMAL: DecimalType decimalType = checkType(type, DecimalType.class, "type"); HiveDecimalWritable hiveDecimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object); if (decimalType.isShort()) { decimalType.writeLong( builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale())); } else { decimalType.writeSlice( builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale())); } return; } throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory()); }
private StringLiteral(Optional<NodeLocation> location, String value) { super(location); requireNonNull(value, "value is null"); this.value = value; this.slice = Slices.wrappedBuffer(value.getBytes(UTF_8)); }
@Override public void addBinary(Binary value) { nulls[fieldIndex] = false; slices[fieldIndex] = Slices.wrappedBuffer(value.getBytes()); }
@Override public void addBinary(Binary value) { VARBINARY.writeSlice(builder, Slices.wrappedBuffer(value.getBytes())); wroteValue = true; }
public ParquetHiveRecordCursor( Configuration configuration, Path path, long start, long length, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager) { requireNonNull(path, "path is null"); checkArgument(length >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); this.totalBytes = length; int size = columns.size(); this.names = new String[size]; this.types = new Type[size]; this.isPartitionColumn = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; this.nullsRowDefault = new boolean[size]; for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); isPartitionColumn[i] = column.isPartitionKey(); nullsRowDefault[i] = !column.isPartitionKey(); } this.recordReader = createParquetRecordReader( configuration, path, start, length, columns, useParquetColumnNames); // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nullsRowDefault[columnIndex] = true; } else if (type.equals(BOOLEAN)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (type.equals(BIGINT)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (type.equals(DOUBLE)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (type.equals(VARCHAR)) { slices[columnIndex] = Slices.wrappedBuffer(bytes); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public static Slice base64Decode(byte[] bytes) { return Slices.wrappedBuffer(Base64.getDecoder().decode(bytes)); }