public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public static Expression toExpression(Object object, Type type) { requireNonNull(type, "type is null"); if (object instanceof Expression) { return (Expression) object; } if (object == null) { if (type.equals(UNKNOWN)) { return new NullLiteral(); } return new Cast(new NullLiteral(), type.getTypeSignature().toString(), false, true); } checkArgument( Primitives.wrap(type.getJavaType()).isInstance(object), "object.getClass (%s) and type.getJavaType (%s) do not agree", object.getClass(), type.getJavaType()); if (type.equals(BIGINT)) { return new LongLiteral(object.toString()); } if (type.equals(DOUBLE)) { Double value = (Double) object; // WARNING: the ORC predicate code depends on NaN and infinity not appearing in a tuple // domain, so // if you remove this, you will need to update the TupleDomainOrcPredicate if (value.isNaN()) { return new FunctionCall(new QualifiedName("nan"), ImmutableList.<Expression>of()); } else if (value.equals(Double.NEGATIVE_INFINITY)) { return ArithmeticUnaryExpression.negative( new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of())); } else if (value.equals(Double.POSITIVE_INFINITY)) { return new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of()); } else { return new DoubleLiteral(object.toString()); } } if (type instanceof VarcharType) { if (object instanceof String) { object = Slices.utf8Slice((String) object); } if (object instanceof Slice) { Slice value = (Slice) object; int length = SliceUtf8.countCodePoints(value); if (length == ((VarcharType) type).getLength()) { return new StringLiteral(value.toStringUtf8()); } return new Cast( new StringLiteral(value.toStringUtf8()), type.getDisplayName(), false, true); } throw new IllegalArgumentException( "object must be instance of Slice or String when type is VARCHAR"); } if (type.equals(BOOLEAN)) { return new BooleanLiteral(object.toString()); } if (object instanceof Block) { SliceOutput output = new DynamicSliceOutput(((Block) object).getSizeInBytes()); BlockSerdeUtil.writeBlock(output, (Block) object); object = output.slice(); // This if condition will evaluate to true: object instanceof Slice && !type.equals(VARCHAR) } if (object instanceof Slice) { // HACK: we need to serialize VARBINARY in a format that can be embedded in an expression to // be // able to encode it in the plan that gets sent to workers. // We do this by transforming the in-memory varbinary into a call to // from_base64(<base64-encoded value>) FunctionCall fromBase64 = new FunctionCall( new QualifiedName("from_base64"), ImmutableList.of( new StringLiteral(VarbinaryFunctions.toBase64((Slice) object).toStringUtf8()))); Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type); return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(fromBase64)); } Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type); Expression rawLiteral = toExpression(object, FunctionRegistry.typeForMagicLiteral(type)); return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(rawLiteral)); }
public ParquetHiveRecordCursor( Configuration configuration, Path path, long start, long length, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager) { requireNonNull(path, "path is null"); checkArgument(length >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); this.totalBytes = length; int size = columns.size(); this.names = new String[size]; this.types = new Type[size]; this.isPartitionColumn = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; this.nullsRowDefault = new boolean[size]; for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); isPartitionColumn[i] = column.isPartitionKey(); nullsRowDefault[i] = !column.isPartitionKey(); } this.recordReader = createParquetRecordReader( configuration, path, start, length, columns, useParquetColumnNames); // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nullsRowDefault[columnIndex] = true; } else if (type.equals(BOOLEAN)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (type.equals(BIGINT)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (type.equals(DOUBLE)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (type.equals(VARCHAR)) { slices[columnIndex] = Slices.wrappedBuffer(bytes); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }