@Setup public void setup() { Random random = new Random(); RowExpression[] arguments = new RowExpression[1 + inListCount]; switch (type) { case StandardTypes.BIGINT: prestoType = BIGINT; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant((long) random.nextInt(), BIGINT); } break; case StandardTypes.DOUBLE: prestoType = DOUBLE; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(random.nextDouble(), DOUBLE); } break; case StandardTypes.VARCHAR: prestoType = VARCHAR; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(Slices.utf8Slice(Long.toString(random.nextLong())), VARCHAR); } break; default: throw new IllegalStateException(); } arguments[0] = field(0, prestoType); RowExpression project = field(0, prestoType); PageBuilder pageBuilder = new PageBuilder(ImmutableList.of(prestoType)); for (int i = 0; i < 10_000; i++) { pageBuilder.declarePosition(); switch (type) { case StandardTypes.BIGINT: BIGINT.writeLong(pageBuilder.getBlockBuilder(0), random.nextInt()); break; case StandardTypes.DOUBLE: DOUBLE.writeDouble(pageBuilder.getBlockBuilder(0), random.nextDouble()); break; case StandardTypes.VARCHAR: VARCHAR.writeSlice( pageBuilder.getBlockBuilder(0), Slices.utf8Slice(Long.toString(random.nextLong()))); break; } } inputPage = pageBuilder.build(); RowExpression filter = call( new Signature(IN, SCALAR, parseTypeSignature(StandardTypes.BOOLEAN)), BOOLEAN, arguments); processor = new ExpressionCompiler(MetadataManager.createTestMetadataManager()) .compilePageProcessor(filter, ImmutableList.of(project)) .get(); }
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); Slice value; if (fieldValue instanceof String) { value = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { value = Slices.wrappedBuffer((byte[]) fieldValue); } else if (fieldValue instanceof HiveVarchar) { value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue()); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } Type type = types[column]; if (isVarcharType(type)) { value = truncateToLength(value, type); } slices[column] = value; nulls[column] = false; } }
@Test public void testUtf8Conversion() { String s = "apple \u2603 snowman"; Slice slice = Slices.copiedBuffer(s, UTF_8); assertEquals(Slices.utf8Slice(s), slice); assertEquals(slice.toStringUtf8(), s); assertEquals(Slices.utf8Slice(s).toStringUtf8(), s); }
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true; Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]); if (fieldData == null) { nulls[column] = true; } else if (hiveTypes[column] == HiveType.MAP || hiveTypes[column] == HiveType.LIST || hiveTypes[column] == HiveType.STRUCT) { // temporarily special case MAP, LIST, and STRUCT types as strings slices[column] = Slices.wrappedBuffer( SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column])); nulls[column] = false; } else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); if (fieldValue instanceof String) { slices[column] = Slices.utf8Slice((String) fieldValue); } else if (fieldValue instanceof byte[]) { slices[column] = Slices.wrappedBuffer((byte[]) fieldValue); } else { throw new IllegalStateException( "unsupported string field type: " + fieldValue.getClass().getName()); } nulls[column] = false; } }
public static Slice toJson(Type rowType, ConnectorSession session, Slice row) { Object object = rowType.getObjectValue(session, createBlock(rowType, row), 0); try { return Slices.utf8Slice(OBJECT_MAPPER.get().writeValueAsString(object)); } catch (JsonProcessingException e) { throw Throwables.propagate(e); } }
public static Slice charPartitionKey(String value, String name, Type columnType) { Slice partitionKey = trimSpaces(Slices.utf8Slice(value)); CharType charType = checkType(columnType, CharType.class, "columnType"); if (SliceUtf8.countCodePoints(partitionKey) > charType.getLength()) { throw new PrestoException( HIVE_INVALID_PARTITION_VALUE, format( "Invalid partition value '%s' for %s partition key: %s", value, columnType.toString(), name)); } return partitionKey; }
public static Expression toExpression(Object object, Type type) { requireNonNull(type, "type is null"); if (object instanceof Expression) { return (Expression) object; } if (object == null) { if (type.equals(UNKNOWN)) { return new NullLiteral(); } return new Cast(new NullLiteral(), type.getTypeSignature().toString(), false, true); } checkArgument( Primitives.wrap(type.getJavaType()).isInstance(object), "object.getClass (%s) and type.getJavaType (%s) do not agree", object.getClass(), type.getJavaType()); if (type.equals(BIGINT)) { return new LongLiteral(object.toString()); } if (type.equals(DOUBLE)) { Double value = (Double) object; // WARNING: the ORC predicate code depends on NaN and infinity not appearing in a tuple // domain, so // if you remove this, you will need to update the TupleDomainOrcPredicate if (value.isNaN()) { return new FunctionCall(new QualifiedName("nan"), ImmutableList.<Expression>of()); } else if (value.equals(Double.NEGATIVE_INFINITY)) { return ArithmeticUnaryExpression.negative( new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of())); } else if (value.equals(Double.POSITIVE_INFINITY)) { return new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of()); } else { return new DoubleLiteral(object.toString()); } } if (type instanceof VarcharType) { if (object instanceof String) { object = Slices.utf8Slice((String) object); } if (object instanceof Slice) { Slice value = (Slice) object; int length = SliceUtf8.countCodePoints(value); if (length == ((VarcharType) type).getLength()) { return new StringLiteral(value.toStringUtf8()); } return new Cast( new StringLiteral(value.toStringUtf8()), type.getDisplayName(), false, true); } throw new IllegalArgumentException( "object must be instance of Slice or String when type is VARCHAR"); } if (type.equals(BOOLEAN)) { return new BooleanLiteral(object.toString()); } if (object instanceof Block) { SliceOutput output = new DynamicSliceOutput(((Block) object).getSizeInBytes()); BlockSerdeUtil.writeBlock(output, (Block) object); object = output.slice(); // This if condition will evaluate to true: object instanceof Slice && !type.equals(VARCHAR) } if (object instanceof Slice) { // HACK: we need to serialize VARBINARY in a format that can be embedded in an expression to // be // able to encode it in the plan that gets sent to workers. // We do this by transforming the in-memory varbinary into a call to // from_base64(<base64-encoded value>) FunctionCall fromBase64 = new FunctionCall( new QualifiedName("from_base64"), ImmutableList.of( new StringLiteral(VarbinaryFunctions.toBase64((Slice) object).toStringUtf8()))); Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type); return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(fromBase64)); } Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type); Expression rawLiteral = toExpression(object, FunctionRegistry.typeForMagicLiteral(type)); return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(rawLiteral)); }
private static void serializePrimitive( Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) { requireNonNull(builder, "parent builder is null"); if (object == null) { builder.appendNull(); return; } switch (inspector.getPrimitiveCategory()) { case BOOLEAN: BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object)); return; case BYTE: TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object)); return; case SHORT: SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object)); return; case INT: IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object)); return; case LONG: BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object)); return; case FLOAT: DoubleType.DOUBLE.writeDouble(builder, ((FloatObjectInspector) inspector).get(object)); return; case DOUBLE: DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object)); return; case STRING: type.writeSlice( builder, Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object))); return; case VARCHAR: type.writeSlice( builder, Slices.utf8Slice( ((HiveVarcharObjectInspector) inspector) .getPrimitiveJavaObject(object) .getValue())); return; case CHAR: CharType charType = checkType(type, CharType.class, "type"); HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object); type.writeSlice( builder, trimSpacesAndTruncateToLength( Slices.utf8Slice(hiveChar.getValue()), charType.getLength())); return; case DATE: DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector)); return; case TIMESTAMP: TimestampType.TIMESTAMP.writeLong( builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector)); return; case BINARY: VARBINARY.writeSlice( builder, Slices.wrappedBuffer( ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object))); return; case DECIMAL: DecimalType decimalType = checkType(type, DecimalType.class, "type"); HiveDecimalWritable hiveDecimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object); if (decimalType.isShort()) { decimalType.writeLong( builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale())); } else { decimalType.writeSlice( builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale())); } return; } throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory()); }
@Test(groups = "hive") public abstract class AbstractTestHiveFileFormats { private static final double EPSILON = 0.001; private static final TypeManager TYPE_MANAGER = new TypeRegistry(); private static final long DATE_MILLIS_UTC = new DateTime(2011, 5, 6, 0, 0, UTC).getMillis(); private static final long DATE_DAYS = TimeUnit.MILLISECONDS.toDays(DATE_MILLIS_UTC); private static final String DATE_STRING = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC().print(DATE_MILLIS_UTC); private static final Date SQL_DATE = new Date(UTC.getMillisKeepLocal(DateTimeZone.getDefault(), DATE_MILLIS_UTC)); public static final long TIMESTAMP = new DateTime(2011, 5, 6, 7, 8, 9, 123).getMillis(); public static final String TIMESTAMP_STRING = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").print(TIMESTAMP); // TODO: support null values and determine if timestamp and binary are allowed as partition keys public static final int NUM_ROWS = 1000; public static final List<TestColumn> TEST_COLUMNS = ImmutableList.<TestColumn>builder() .add( new TestColumn( "p_empty_string", javaStringObjectInspector, "", Slices.EMPTY_SLICE, true)) .add( new TestColumn( "p_string", javaStringObjectInspector, "test", Slices.utf8Slice("test"), true)) .add(new TestColumn("p_tinyint", javaByteObjectInspector, "1", 1L, true)) .add(new TestColumn("p_smallint", javaShortObjectInspector, "2", 2L, true)) .add(new TestColumn("p_int", javaIntObjectInspector, "3", 3L, true)) .add(new TestColumn("p_bigint", javaLongObjectInspector, "4", 4L, true)) .add(new TestColumn("p_float", javaFloatObjectInspector, "5.1", 5.1, true)) .add(new TestColumn("p_double", javaDoubleObjectInspector, "6.2", 6.2, true)) .add(new TestColumn("p_boolean", javaBooleanObjectInspector, "true", true, true)) .add(new TestColumn("p_date", javaDateObjectInspector, DATE_STRING, DATE_DAYS, true)) .add( new TestColumn( "p_timestamp", javaTimestampObjectInspector, TIMESTAMP_STRING, TIMESTAMP, true)) // .add(new TestColumn("p_binary", javaByteArrayObjectInspector, "test2", // Slices.utf8Slice("test2"), true)) .add( new TestColumn( "p_null_string", javaStringObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_tinyint", javaByteObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_smallint", javaShortObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_int", javaIntObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_bigint", javaLongObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_float", javaFloatObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_double", javaDoubleObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_boolean", javaBooleanObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_date", javaDateObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add( new TestColumn( "p_null_timestamp", javaTimestampObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) // .add(new TestColumn("p_null_binary", javaByteArrayObjectInspector, // HIVE_DEFAULT_DYNAMIC_PARTITION, null, true)) .add(new TestColumn("t_null_string", javaStringObjectInspector, null, null)) .add( new TestColumn( "t_null_array_int", getStandardListObjectInspector(javaIntObjectInspector), null, null)) .add(new TestColumn("t_empty_string", javaStringObjectInspector, "", Slices.EMPTY_SLICE)) .add( new TestColumn( "t_string", javaStringObjectInspector, "test", Slices.utf8Slice("test"))) .add(new TestColumn("t_tinyint", javaByteObjectInspector, (byte) 1, 1L)) .add(new TestColumn("t_smallint", javaShortObjectInspector, (short) 2, 2L)) .add(new TestColumn("t_int", javaIntObjectInspector, 3, 3L)) .add(new TestColumn("t_bigint", javaLongObjectInspector, 4L, 4L)) .add(new TestColumn("t_float", javaFloatObjectInspector, 5.1f, 5.1)) .add(new TestColumn("t_double", javaDoubleObjectInspector, 6.2, 6.2)) .add(new TestColumn("t_boolean_true", javaBooleanObjectInspector, true, true)) .add(new TestColumn("t_boolean_false", javaBooleanObjectInspector, false, false)) .add(new TestColumn("t_date", javaDateObjectInspector, SQL_DATE, DATE_DAYS)) .add( new TestColumn( "t_timestamp", javaTimestampObjectInspector, new Timestamp(TIMESTAMP), TIMESTAMP)) .add( new TestColumn( "t_binary", javaByteArrayObjectInspector, Slices.utf8Slice("test2"), Slices.utf8Slice("test2"))) .add( new TestColumn( "t_map_string", getStandardMapObjectInspector( javaStringObjectInspector, javaStringObjectInspector), ImmutableMap.of("test", "test"), mapBlockOf(VARCHAR, VARCHAR, "test", "test"))) .add( new TestColumn( "t_map_tinyint", getStandardMapObjectInspector(javaByteObjectInspector, javaByteObjectInspector), ImmutableMap.of((byte) 1, (byte) 1), mapBlockOf(BIGINT, BIGINT, 1, 1))) .add( new TestColumn( "t_map_smallint", getStandardMapObjectInspector(javaShortObjectInspector, javaShortObjectInspector), ImmutableMap.of((short) 2, (short) 2), mapBlockOf(BIGINT, BIGINT, 2, 2))) .add( new TestColumn( "t_map_null_key", getStandardMapObjectInspector(javaIntObjectInspector, javaIntObjectInspector), asMap(null, 0, 2, 3), mapBlockOf(BIGINT, BIGINT, 2, 3))) .add( new TestColumn( "t_map_int", getStandardMapObjectInspector(javaIntObjectInspector, javaIntObjectInspector), ImmutableMap.of(3, 3), mapBlockOf(BIGINT, BIGINT, 3, 3))) .add( new TestColumn( "t_map_bigint", getStandardMapObjectInspector(javaLongObjectInspector, javaLongObjectInspector), ImmutableMap.of(4L, 4L), mapBlockOf(BIGINT, BIGINT, 4L, 4L))) .add( new TestColumn( "t_map_float", getStandardMapObjectInspector(javaFloatObjectInspector, javaFloatObjectInspector), ImmutableMap.of(5.0f, 5.0f), mapBlockOf(DOUBLE, DOUBLE, 5.0f, 5.0f))) .add( new TestColumn( "t_map_double", getStandardMapObjectInspector( javaDoubleObjectInspector, javaDoubleObjectInspector), ImmutableMap.of(6.0, 6.0), mapBlockOf(DOUBLE, DOUBLE, 6.0, 6.0))) .add( new TestColumn( "t_map_boolean", getStandardMapObjectInspector( javaBooleanObjectInspector, javaBooleanObjectInspector), ImmutableMap.of(true, true), mapBlockOf(BOOLEAN, BOOLEAN, true, true))) .add( new TestColumn( "t_map_date", getStandardMapObjectInspector(javaDateObjectInspector, javaDateObjectInspector), ImmutableMap.of(SQL_DATE, SQL_DATE), mapBlockOf(DateType.DATE, DateType.DATE, DATE_DAYS, DATE_DAYS))) .add( new TestColumn( "t_map_timestamp", getStandardMapObjectInspector( javaTimestampObjectInspector, javaTimestampObjectInspector), ImmutableMap.of(new Timestamp(TIMESTAMP), new Timestamp(TIMESTAMP)), mapBlockOf( TimestampType.TIMESTAMP, TimestampType.TIMESTAMP, TIMESTAMP, TIMESTAMP))) .add( new TestColumn( "t_array_empty", getStandardListObjectInspector(javaStringObjectInspector), ImmutableList.of(), arrayBlockOf(VARCHAR))) .add( new TestColumn( "t_array_string", getStandardListObjectInspector(javaStringObjectInspector), ImmutableList.of("test"), arrayBlockOf(VARCHAR, "test"))) .add( new TestColumn( "t_array_tinyint", getStandardListObjectInspector(javaByteObjectInspector), ImmutableList.of((byte) 1), arrayBlockOf(BIGINT, 1))) .add( new TestColumn( "t_array_smallint", getStandardListObjectInspector(javaShortObjectInspector), ImmutableList.of((short) 2), arrayBlockOf(BIGINT, 2))) .add( new TestColumn( "t_array_int", getStandardListObjectInspector(javaIntObjectInspector), ImmutableList.of(3), arrayBlockOf(BIGINT, 3))) .add( new TestColumn( "t_array_bigint", getStandardListObjectInspector(javaLongObjectInspector), ImmutableList.of(4L), arrayBlockOf(BIGINT, 4L))) .add( new TestColumn( "t_array_float", getStandardListObjectInspector(javaFloatObjectInspector), ImmutableList.of(5.0f), arrayBlockOf(DOUBLE, 5.0f))) .add( new TestColumn( "t_array_double", getStandardListObjectInspector(javaDoubleObjectInspector), ImmutableList.of(6.0), arrayBlockOf(DOUBLE, 6.0))) .add( new TestColumn( "t_array_boolean", getStandardListObjectInspector(javaBooleanObjectInspector), ImmutableList.of(true), arrayBlockOf(BOOLEAN, true))) .add( new TestColumn( "t_array_date", getStandardListObjectInspector(javaDateObjectInspector), ImmutableList.of(SQL_DATE), arrayBlockOf(DateType.DATE, DATE_DAYS))) .add( new TestColumn( "t_array_timestamp", getStandardListObjectInspector(javaTimestampObjectInspector), ImmutableList.of(new Timestamp(TIMESTAMP)), arrayBlockOf(TimestampType.TIMESTAMP, TIMESTAMP))) .add( new TestColumn( "t_struct_bigint", getStandardStructObjectInspector( ImmutableList.of("s_bigint"), ImmutableList.of(javaLongObjectInspector)), new Long[] {1L}, rowBlockOf(ImmutableList.of(BIGINT), 1))) .add( new TestColumn( "t_complex", getStandardMapObjectInspector( javaStringObjectInspector, getStandardListObjectInspector( getStandardStructObjectInspector( ImmutableList.of("s_int"), ImmutableList.<ObjectInspector>of(javaIntObjectInspector)))), ImmutableMap.of("test", ImmutableList.<Object>of(new Integer[] {1})), mapBlockOf( VARCHAR, new ArrayType(new RowType(ImmutableList.of(BIGINT), Optional.empty())), "test", arrayBlockOf( new RowType(ImmutableList.of(BIGINT), Optional.empty()), rowBlockOf(ImmutableList.of(BIGINT), 1))))) .add( new TestColumn( "t_map_null_key_complex_value", getStandardMapObjectInspector( javaStringObjectInspector, getStandardMapObjectInspector( javaLongObjectInspector, javaBooleanObjectInspector)), asMap(null, ImmutableMap.of(15L, true), "k", ImmutableMap.of(16L, false)), mapBlockOf( VARCHAR, new MapType(BIGINT, BOOLEAN), "k", mapBlockOf(BIGINT, BOOLEAN, 16L, false)))) .add( new TestColumn( "t_map_null_key_complex_key_value", getStandardMapObjectInspector( getStandardListObjectInspector(javaStringObjectInspector), getStandardMapObjectInspector( javaLongObjectInspector, javaBooleanObjectInspector)), asMap( null, ImmutableMap.of(15L, true), ImmutableList.of("k", "ka"), ImmutableMap.of(16L, false)), mapBlockOf( new ArrayType(VARCHAR), new MapType(BIGINT, BOOLEAN), arrayBlockOf(VARCHAR, "k", "ka"), mapBlockOf(BIGINT, BOOLEAN, 16L, false)))) .add( new TestColumn( "t_struct_nested", getStandardStructObjectInspector( ImmutableList.of("struct_field"), ImmutableList.of(getStandardListObjectInspector(javaStringObjectInspector))), ImmutableList.of(ImmutableList.of("1", "2", "3")), rowBlockOf( ImmutableList.of(new ArrayType(VARCHAR)), arrayBlockOf(VARCHAR, "1", "2", "3")))) .add( new TestColumn( "t_struct_null", getStandardStructObjectInspector( ImmutableList.of("struct_field", "struct_field2"), ImmutableList.of(javaStringObjectInspector, javaStringObjectInspector)), Arrays.asList(null, null), rowBlockOf(ImmutableList.of(VARCHAR, VARCHAR), null, null))) .add( new TestColumn( "t_struct_non_nulls_after_nulls", getStandardStructObjectInspector( ImmutableList.of("struct_field1", "struct_field2"), ImmutableList.of(javaIntObjectInspector, javaStringObjectInspector)), Arrays.asList(null, "some string"), rowBlockOf(ImmutableList.of(BIGINT, VARCHAR), null, "some string"))) .add( new TestColumn( "t_nested_struct_non_nulls_after_nulls", getStandardStructObjectInspector( ImmutableList.of("struct_field1", "struct_field2", "strict_field3"), ImmutableList.of( javaIntObjectInspector, javaStringObjectInspector, getStandardStructObjectInspector( ImmutableList.of("nested_struct_field1", "nested_struct_field2"), ImmutableList.of( javaIntObjectInspector, javaStringObjectInspector)))), Arrays.asList(null, "some string", Arrays.asList(null, "nested_string2")), rowBlockOf( ImmutableList.of( BIGINT, VARCHAR, new RowType(ImmutableList.of(BIGINT, VARCHAR), Optional.empty())), null, "some string", rowBlockOf(ImmutableList.of(BIGINT, VARCHAR), null, "nested_string2")))) .build(); private static Map<Integer, Integer> mapWithNullKey() { Map<Integer, Integer> map = new HashMap<>(); map.put(null, 0); map.put(2, 3); return map; } private static <K, V> Map<K, V> asMap(K k1, V v1, K k2, V v2) { Map<K, V> map = new HashMap<>(); map.put(k1, v1); map.put(k2, v2); return map; } protected List<HiveColumnHandle> getColumnHandles(List<TestColumn> testColumns) { List<HiveColumnHandle> columns = new ArrayList<>(); int nextHiveColumnIndex = 0; for (int i = 0; i < testColumns.size(); i++) { TestColumn testColumn = testColumns.get(i); int columnIndex = testColumn.isPartitionKey() ? -1 : nextHiveColumnIndex++; HiveType hiveType = HiveType.valueOf(testColumn.getObjectInspector().getTypeName()); columns.add( new HiveColumnHandle( "client_id", testColumn.getName(), hiveType, hiveType.getTypeSignature(), columnIndex, testColumn.isPartitionKey())); } return columns; } public static FileSplit createTestFile( String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception { // filter out partition keys, which are not written to the file testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey))); JobConf jobConf = new JobConf(); ReaderWriterProfiler.setProfilerOptions(jobConf); Properties tableProperties = new Properties(); tableProperties.setProperty( "columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty( "columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); serDe.initialize(new Configuration(), tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(new Configuration()).getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); jobConf.set("parquet.compression", compressionCodec); jobConf.set("parquet.enable.dictionary", "true"); } RecordWriter recordWriter = outputFormat.getHiveRecordWriter( jobConf, new Path(filePath), Text.class, compressionCodec != null, tableProperties, new Progressable() { @Override public void progress() {} }); try { serDe.initialize(new Configuration(), tableProperties); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector))); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < numRows; rowNumber++) { for (int i = 0; i < testColumns.size(); i++) { Object writeValue = testColumns.get(i).getWriteValue(); if (writeValue instanceof Slice) { writeValue = ((Slice) writeValue).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), writeValue); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(new Configuration()).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); } protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows) throws IOException { for (int row = 0; row < numRows; row++) { assertTrue(cursor.advanceNextPosition()); for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) { TestColumn testColumn = testColumns.get(i); Object fieldFromCursor; Type type = HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER); if (cursor.isNull(i)) { fieldFromCursor = null; } else if (BOOLEAN.equals(type)) { fieldFromCursor = cursor.getBoolean(i); } else if (BIGINT.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (DOUBLE.equals(type)) { fieldFromCursor = cursor.getDouble(i); } else if (VARCHAR.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (VARBINARY.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (DateType.DATE.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (TimestampType.TIMESTAMP.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (isStructuralType(type)) { fieldFromCursor = cursor.getObject(i); } else { throw new RuntimeException("unknown type"); } if (fieldFromCursor == null) { assertEquals( null, testColumn.getExpectedValue(), String.format("Expected null for column %s", testColumn.getName())); } else if (testColumn.getObjectInspector().getTypeName().equals("float") || testColumn.getObjectInspector().getTypeName().equals("double")) { assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON); } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) { assertEquals( fieldFromCursor, testColumn.getExpectedValue(), String.format("Wrong value for column %s", testColumn.getName())); } else { Block expected = (Block) testColumn.getExpectedValue(); Block actual = (Block) fieldFromCursor; assertBlockEquals( actual, expected, String.format("Wrong value for column %s", testColumn.getName())); } } } } protected void checkPageSource( ConnectorPageSource pageSource, List<TestColumn> testColumns, List<Type> types) throws IOException { try { MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, types); for (MaterializedRow row : result) { for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) { TestColumn testColumn = testColumns.get(i); Type type = types.get(i); Object actualValue = row.getField(i); Object expectedValue = testColumn.getExpectedValue(); if (actualValue == null) { assertEquals(null, expectedValue, String.format("Expected non-null for column %d", i)); } else if (testColumn.getObjectInspector().getTypeName().equals("float") || testColumn.getObjectInspector().getTypeName().equals("double")) { assertEquals((double) actualValue, (double) expectedValue, EPSILON); } else if (testColumn.getObjectInspector().getTypeName().equals("date")) { SqlDate expectedDate = new SqlDate(((Long) expectedValue).intValue()); assertEquals(actualValue, expectedDate); } else if (testColumn.getObjectInspector().getTypeName().equals("timestamp")) { SqlTimestamp expectedTimestamp = new SqlTimestamp((Long) expectedValue, SESSION.getTimeZoneKey()); assertEquals(actualValue, expectedTimestamp); } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) { if (expectedValue instanceof Slice) { expectedValue = ((Slice) expectedValue).toStringUtf8(); } if (actualValue instanceof Slice) { actualValue = ((Slice) actualValue).toStringUtf8(); } if (actualValue instanceof SqlVarbinary) { actualValue = new String(((SqlVarbinary) actualValue).getBytes(), UTF_8); } assertEquals(actualValue, expectedValue, String.format("Wrong value for column %d", i)); } else { BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), 1); type.writeObject(builder, expectedValue); expectedValue = type.getObjectValue(SESSION, builder.build(), 0); assertEquals( actualValue, expectedValue, String.format("Wrong value for column %s", testColumn.getName())); } } } } finally { pageSource.close(); } } private static void assertBlockEquals(Block actual, Block expected, String message) { assertEquals(blockToSlice(actual), blockToSlice(expected), message); } private static Slice blockToSlice(Block block) { // This function is strictly for testing use only SliceOutput sliceOutput = new DynamicSliceOutput(1000); BlockSerdeUtil.writeBlock(sliceOutput, block.copyRegion(0, block.getPositionCount())); return sliceOutput.slice(); } public static final class TestColumn { private final String name; private final ObjectInspector objectInspector; private final Object writeValue; private final Object expectedValue; private final boolean partitionKey; public TestColumn( String name, ObjectInspector objectInspector, Object writeValue, Object expectedValue) { this(name, objectInspector, writeValue, expectedValue, false); } public TestColumn( String name, ObjectInspector objectInspector, Object writeValue, Object expectedValue, boolean partitionKey) { this.name = requireNonNull(name, "name is null"); this.objectInspector = requireNonNull(objectInspector, "objectInspector is null"); this.writeValue = writeValue; this.expectedValue = expectedValue; this.partitionKey = partitionKey; } public String getName() { return name; } public String getType() { return objectInspector.getTypeName(); } public ObjectInspector getObjectInspector() { return objectInspector; } public Object getWriteValue() { return writeValue; } public Object getExpectedValue() { return expectedValue; } public boolean isPartitionKey() { return partitionKey; } @Override public String toString() { StringBuilder sb = new StringBuilder("TestColumn{"); sb.append("name='").append(name).append('\''); sb.append(", objectInspector=").append(objectInspector); sb.append(", writeValue=").append(writeValue); sb.append(", expectedValue=").append(expectedValue); sb.append(", partitionKey=").append(partitionKey); sb.append('}'); return sb.toString(); } } }
@Override public Slice getSlice(int field) { checkFieldType(field, VARCHAR); return Slices.utf8Slice(getFieldValue(field)); }