@Override public Block getSequenceBlock(int start, int length) { BlockBuilder blockBuilder = BIGINT.createBlockBuilder(new BlockBuilderStatus()); for (int i = start; i < start + length; i++) { BIGINT.writeLong(blockBuilder, i); } return blockBuilder.build(); }
private static Block getRowNumberColumn(List<Page> pages) { BlockBuilder builder = BIGINT.createBlockBuilder(new BlockBuilderStatus()); for (Page page : pages) { int rowNumberChannel = page.getChannelCount() - 1; for (int i = 0; i < page.getPositionCount(); i++) { BIGINT.writeLong(builder, page.getLong(BIGINT, rowNumberChannel, i)); } } return builder.build(); }
@Test public void testStackRepresentation() throws Exception { Block array = arrayBlockOf(BIGINT, 1L, 2L); Block actualBlock = mapBlockOf(DOUBLE, new ArrayType(BIGINT), ImmutableMap.of(1.0, array)); DynamicSliceOutput actualSliceOutput = new DynamicSliceOutput(100); writeBlock(actualSliceOutput, actualBlock); Block expectedBlock = new InterleavedBlockBuilder( ImmutableList.<Type>of(DOUBLE, new ArrayType(BIGINT)), new BlockBuilderStatus(), 3) .writeDouble(1.0) .closeEntry() .writeObject( BIGINT .createBlockBuilder(new BlockBuilderStatus(), 1) .writeLong(1L) .closeEntry() .writeLong(2L) .closeEntry() .build()) .closeEntry() .build(); DynamicSliceOutput expectedSliceOutput = new DynamicSliceOutput(100); writeBlock(expectedSliceOutput, expectedBlock); assertEquals(actualSliceOutput.slice(), expectedSliceOutput.slice()); }
private static void assertReadFields(RecordCursor cursor, List<ColumnMetadata> schema) { for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) { ColumnMetadata column = schema.get(columnIndex); if (!cursor.isNull(columnIndex)) { Type type = column.getType(); if (BOOLEAN.equals(type)) { cursor.getBoolean(columnIndex); } else if (BIGINT.equals(type)) { cursor.getLong(columnIndex); } else if (TIMESTAMP.equals(type)) { cursor.getLong(columnIndex); } else if (DOUBLE.equals(type)) { cursor.getDouble(columnIndex); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { try { cursor.getSlice(columnIndex); } catch (RuntimeException e) { throw new RuntimeException("column " + column, e); } } else { fail("Unknown primitive type " + columnIndex); } } } }
private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (INTEGER.equals(type)) { parseLongColumn(column); } else if (SMALLINT.equals(type)) { parseLongColumn(column); } else if (TINYINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (isVarcharType(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (isStructuralType(hiveTypes[column])) { parseObjectColumn(column); } else if (DATE.equals(type)) { parseLongColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else if (type instanceof DecimalType) { parseDecimalColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
private void testCorrectnessOfErrorFunction(List<Number> inputList) throws Exception { int inRange = 0; int numberOfRuns = 1000; double sampleRatio = 1 / (double) WEIGHT; double actual = getExpectedValue(inputList); Random rand = new Random(1); for (int i = 0; i < numberOfRuns; i++) { // Compute Sampled Value using sampledList (numberOfRuns times) ImmutableList.Builder<Number> sampledList = ImmutableList.builder(); for (Number x : inputList) { if (rand.nextDouble() < sampleRatio) { sampledList.add(x); } } BlockBuilder builder = getType().createBlockBuilder(new BlockBuilderStatus()); for (Number sample : sampledList.build()) { if (getType() == BIGINT) { BIGINT.writeLong(builder, sample.longValue()); } else if (getType() == DOUBLE) { DOUBLE.writeDouble(builder, sample.doubleValue()); } else { throw new AssertionError("Can only handle longs and doubles"); } } Page page = new Page(builder.build()); page = OperatorAssertion.appendSampleWeight(ImmutableList.of(page), WEIGHT).get(0); Accumulator accumulator = getFunction() .bind( ImmutableList.of(0), Optional.<Integer>absent(), Optional.of(page.getChannelCount() - 1), getConfidence()) .createAccumulator(); accumulator.addInput(page); Block result = accumulator.evaluateFinal(); String approxValue = BlockAssertions.toValues(accumulator.getFinalType(), result).get(0).toString(); double approx = Double.parseDouble(approxValue.split(" ")[0]); double error = Double.parseDouble(approxValue.split(" ")[2]); // Check if actual answer lies within [approxAnswer - error, approxAnswer + error] if (Math.abs(approx - actual) <= error) { inRange++; } } BinomialDistribution binomial = new BinomialDistribution(numberOfRuns, getConfidence()); int lowerBound = binomial.inverseCumulativeProbability(0.01); int upperBound = binomial.inverseCumulativeProbability(0.99); assertTrue( lowerBound < inRange && inRange < upperBound, String.format( "%d out of %d passed. Expected [%d, %d]", inRange, numberOfRuns, lowerBound, upperBound)); }
@Setup public void setup() { Random random = new Random(); RowExpression[] arguments = new RowExpression[1 + inListCount]; switch (type) { case StandardTypes.BIGINT: prestoType = BIGINT; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant((long) random.nextInt(), BIGINT); } break; case StandardTypes.DOUBLE: prestoType = DOUBLE; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(random.nextDouble(), DOUBLE); } break; case StandardTypes.VARCHAR: prestoType = VARCHAR; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(Slices.utf8Slice(Long.toString(random.nextLong())), VARCHAR); } break; default: throw new IllegalStateException(); } arguments[0] = field(0, prestoType); RowExpression project = field(0, prestoType); PageBuilder pageBuilder = new PageBuilder(ImmutableList.of(prestoType)); for (int i = 0; i < 10_000; i++) { pageBuilder.declarePosition(); switch (type) { case StandardTypes.BIGINT: BIGINT.writeLong(pageBuilder.getBlockBuilder(0), random.nextInt()); break; case StandardTypes.DOUBLE: DOUBLE.writeDouble(pageBuilder.getBlockBuilder(0), random.nextDouble()); break; case StandardTypes.VARCHAR: VARCHAR.writeSlice( pageBuilder.getBlockBuilder(0), Slices.utf8Slice(Long.toString(random.nextLong()))); break; } } inputPage = pageBuilder.build(); RowExpression filter = call( new Signature(IN, SCALAR, parseTypeSignature(StandardTypes.BOOLEAN)), BOOLEAN, arguments); processor = new ExpressionCompiler(MetadataManager.createTestMetadataManager()) .compilePageProcessor(filter, ImmutableList.of(project)) .get(); }
protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows) throws IOException { for (int row = 0; row < numRows; row++) { assertTrue(cursor.advanceNextPosition()); for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) { TestColumn testColumn = testColumns.get(i); Object fieldFromCursor; Type type = HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER); if (cursor.isNull(i)) { fieldFromCursor = null; } else if (BOOLEAN.equals(type)) { fieldFromCursor = cursor.getBoolean(i); } else if (BIGINT.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (DOUBLE.equals(type)) { fieldFromCursor = cursor.getDouble(i); } else if (VARCHAR.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (VARBINARY.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (DateType.DATE.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (TimestampType.TIMESTAMP.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (isStructuralType(type)) { fieldFromCursor = cursor.getObject(i); } else { throw new RuntimeException("unknown type"); } if (fieldFromCursor == null) { assertEquals( null, testColumn.getExpectedValue(), String.format("Expected null for column %s", testColumn.getName())); } else if (testColumn.getObjectInspector().getTypeName().equals("float") || testColumn.getObjectInspector().getTypeName().equals("double")) { assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON); } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) { assertEquals( fieldFromCursor, testColumn.getExpectedValue(), String.format("Wrong value for column %s", testColumn.getName())); } else { Block expected = (Block) testColumn.getExpectedValue(); Block actual = (Block) fieldFromCursor; assertBlockEquals( actual, expected, String.format("Wrong value for column %s", testColumn.getName())); } } } }
@SuppressWarnings("NumericCastThatLosesPrecision") @Override public int getBucket(Page page, int position) { long hash = 0; for (Block block : page.getBlocks()) { long value = BIGINT.getLong(block, position); hash = (hash * 31) + XxHash64.hash(value); } int value = (int) (hash & Integer.MAX_VALUE); return value % bucketCount; }
@Override public Block getSequenceBlock(int start, int length) { BlockBuilder blockBuilder = getType().createBlockBuilder(new BlockBuilderStatus()); for (int i = start; i < start + length; i++) { if (getType() == BIGINT) { BIGINT.writeLong(blockBuilder, (long) i); } else { DOUBLE.writeDouble(blockBuilder, (double) i); } } return blockBuilder.build(); }
@Test public void testBackwardsCompatible() { ClientTypeSignature signature = new ClientTypeSignature( StandardTypes.ARRAY, ImmutableList.of( new ClientTypeSignatureParameter( TypeSignatureParameter.of(BIGINT.getTypeSignature())))); ClientTypeSignature legacy = CLIENT_TYPE_SIGNATURE_CODEC.fromJson( "{\"rawType\":\"array\",\"literalArguments\":[],\"typeArguments\":[{\"rawType\":\"bigint\",\"literalArguments\":[],\"typeArguments\":[]}]}"); assertEquals(legacy, signature); }
private static void insertRows( ConnectorTableMetadata tableMetadata, Handle handle, RecordSet data) { List<ColumnMetadata> columns = ImmutableList.copyOf( Iterables.filter( tableMetadata.getColumns(), new Predicate<ColumnMetadata>() { @Override public boolean apply(ColumnMetadata columnMetadata) { return !columnMetadata.isHidden(); } })); String vars = Joiner.on(',').join(nCopies(columns.size(), "?")); String sql = format("INSERT INTO %s VALUES (%s)", tableMetadata.getTable().getTableName(), vars); RecordCursor cursor = data.cursor(); while (true) { // insert 1000 rows at a time PreparedBatch batch = handle.prepareBatch(sql); for (int row = 0; row < 1000; row++) { if (!cursor.advanceNextPosition()) { batch.execute(); return; } PreparedBatchPart part = batch.add(); for (int column = 0; column < columns.size(); column++) { Type type = columns.get(column).getType(); if (BOOLEAN.equals(type)) { part.bind(column, cursor.getBoolean(column)); } else if (BIGINT.equals(type)) { part.bind(column, cursor.getLong(column)); } else if (DOUBLE.equals(type)) { part.bind(column, cursor.getDouble(column)); } else if (VARCHAR.equals(type)) { part.bind(column, cursor.getSlice(column).toStringUtf8()); } else if (DATE.equals(type)) { long millisUtc = TimeUnit.DAYS.toMillis(cursor.getLong(column)); // H2 expects dates in to be millis at midnight in the JVM timezone long localMillis = DateTimeZone.UTC.getMillisKeepLocal(DateTimeZone.getDefault(), millisUtc); part.bind(column, new Date(localMillis)); } else { throw new IllegalArgumentException("Unsupported type " + type); } } } batch.execute(); } }
private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
@Test public void testJsonRoundTrip() { TypeSignature bigint = BIGINT.getTypeSignature(); assertJsonRoundTrip(new ClientTypeSignature(bigint)); assertJsonRoundTrip( new ClientTypeSignature( "array", ImmutableList.of(new ClientTypeSignatureParameter(TypeSignatureParameter.of(bigint))))); assertJsonRoundTrip( new ClientTypeSignature( "foo", ImmutableList.of(new ClientTypeSignatureParameter(TypeSignatureParameter.of(42))))); assertJsonRoundTrip( new ClientTypeSignature( "row", ImmutableList.of( new ClientTypeSignatureParameter( TypeSignatureParameter.of(new NamedTypeSignature("foo", bigint))), new ClientTypeSignatureParameter( TypeSignatureParameter.of(new NamedTypeSignature("bar", bigint)))))); }
@Override public void addInput(Page page) { Block block = page.getBlock(labelChannel); for (int position = 0; position < block.getPositionCount(); position++) { if (labelIsLong) { labels.add((double) BIGINT.getLong(block, position)); } else { labels.add(DOUBLE.getDouble(block, position)); } } block = page.getBlock(featuresChannel); for (int position = 0; position < block.getPositionCount(); position++) { FeatureVector featureVector = ModelUtils.jsonToFeatures(VARCHAR.getSlice(block, position)); rowsSize += featureVector.getEstimatedSize(); rows.add(featureVector); } if (params == null) { block = page.getBlock(paramsChannel); params = LibSvmUtils.parseParameters(VARCHAR.getSlice(block, 0).toStringUtf8()); } }
@Test public void testWriter() throws Exception { OrcStorageManager manager = createOrcStorageManager(); List<Long> columnIds = ImmutableList.of(3L, 7L); List<Type> columnTypes = ImmutableList.<Type>of(BIGINT, createVarcharType(10)); StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes); List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build(); sink.appendPages(pages); // shard is not recorded until flush assertEquals(shardRecorder.getShards().size(), 0); sink.flush(); // shard is recorded after flush List<RecordedShard> recordedShards = shardRecorder.getShards(); assertEquals(recordedShards.size(), 1); List<ShardInfo> shards = getFutureValue(sink.commit()); assertEquals(shards.size(), 1); ShardInfo shardInfo = Iterables.getOnlyElement(shards); UUID shardUuid = shardInfo.getShardUuid(); File file = storageService.getStorageFile(shardUuid); File backupFile = fileBackupStore.getBackupFile(shardUuid); assertEquals(recordedShards.get(0).getTransactionId(), TRANSACTION_ID); assertEquals(recordedShards.get(0).getShardUuid(), shardUuid); assertEquals(shardInfo.getRowCount(), 2); assertEquals(shardInfo.getCompressedSize(), file.length()); // verify primary and backup shard exist assertFile(file, "primary shard"); assertFile(backupFile, "backup shard"); assertFileEquals(file, backupFile); // remove primary shard to force recovery from backup assertTrue(file.delete()); assertTrue(file.getParentFile().delete()); assertFalse(file.exists()); recoveryManager.restoreFromBackup(shardUuid, OptionalLong.empty()); try (OrcDataSource dataSource = manager.openShard(shardUuid, READER_ATTRIBUTES)) { OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes); assertEquals(reader.nextBatch(), 2); Block column0 = reader.readBlock(BIGINT, 0); assertEquals(column0.isNull(0), false); assertEquals(column0.isNull(1), false); assertEquals(BIGINT.getLong(column0, 0), 123L); assertEquals(BIGINT.getLong(column0, 1), 456L); Block column1 = reader.readBlock(createVarcharType(10), 1); assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello")); assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("bye")); assertEquals(reader.nextBatch(), -1); } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }
@Test public void testWriter() throws Exception { List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L); ArrayType arrayType = new ArrayType(BIGINT); ArrayType arrayOfArrayType = new ArrayType(arrayType); MapType mapType = new MapType(createVarcharType(10), BOOLEAN); List<Type> columnTypes = ImmutableList.of( BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType); File file = new File(directory, System.nanoTime() + ".orc"); byte[] bytes1 = octets(0x00, 0xFE, 0xFF); byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80); RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes) .row( 123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))) .row( null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))) .row( 456L, "bye \u2603", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader()); OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) { writer.appendPages(rowPagesBuilder.build()); } try (OrcDataSource dataSource = fileOrcDataSource(file)) { OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes); assertEquals(reader.getReaderRowCount(), 3); assertEquals(reader.getReaderPosition(), 0); assertEquals(reader.getFileRowCount(), reader.getReaderRowCount()); assertEquals(reader.getFilePosition(), reader.getFilePosition()); assertEquals(reader.nextBatch(), 3); assertEquals(reader.getReaderPosition(), 0); assertEquals(reader.getFilePosition(), reader.getFilePosition()); Block column0 = reader.readBlock(BIGINT, 0); assertEquals(column0.isNull(0), false); assertEquals(column0.isNull(1), true); assertEquals(column0.isNull(2), false); assertEquals(BIGINT.getLong(column0, 0), 123L); assertEquals(BIGINT.getLong(column0, 2), 456L); Block column1 = reader.readBlock(createVarcharType(10), 1); assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello")); assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world")); assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603")); Block column2 = reader.readBlock(VARBINARY, 2); assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1)); assertEquals(column2.isNull(1), true); assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3)); Block column3 = reader.readBlock(DOUBLE, 3); assertEquals(column3.isNull(0), false); assertEquals(column3.isNull(1), false); assertEquals(column3.isNull(2), false); assertEquals(DOUBLE.getDouble(column3, 0), 123.456); assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY); assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN); Block column4 = reader.readBlock(BOOLEAN, 4); assertEquals(column4.isNull(0), false); assertEquals(column4.isNull(1), true); assertEquals(column4.isNull(2), false); assertEquals(BOOLEAN.getBoolean(column4, 0), true); assertEquals(BOOLEAN.getBoolean(column4, 2), false); Block column5 = reader.readBlock(arrayType, 5); assertEquals(column5.getPositionCount(), 3); assertTrue( arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2))); assertTrue( arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null))); assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT))); Block column6 = reader.readBlock(mapType, 6); assertEquals(column6.getPositionCount(), 3); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true))); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null))); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false))); Block column7 = reader.readBlock(arrayOfArrayType, 7); assertEquals(column7.getPositionCount(), 3); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)))); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)))); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)))); assertEquals(reader.nextBatch(), -1); assertEquals(reader.getReaderPosition(), 3); assertEquals(reader.getFilePosition(), reader.getFilePosition()); OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes()); assertEquals( orcFileMetadata, new OrcFileMetadata( ImmutableMap.<Long, TypeSignature>builder() .put(1L, BIGINT.getTypeSignature()) .put(2L, createVarcharType(10).getTypeSignature()) .put(4L, VARBINARY.getTypeSignature()) .put(6L, DOUBLE.getTypeSignature()) .put(7L, BOOLEAN.getTypeSignature()) .put(8L, arrayType.getTypeSignature()) .put(9L, mapType.getTypeSignature()) .put(10L, arrayOfArrayType.getTypeSignature()) .build())); } File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc"); assertFalse(crcFile.exists()); }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
@Override public void readFields(DataInput in) throws IOException { /* * extract pkt len. * * GPSQL-1107: * The DataInput might already be empty (EOF), but we can't check it beforehand. * If that's the case, pktlen is updated to -1, to mark that the object is still empty. * (can be checked with isEmpty()). */ pktlen = readPktLen(in); if (isEmpty()) { return; } /* extract the version and col cnt */ int version = in.readShort(); int curOffset = 4 + 2; int colCnt; /* !!! Check VERSION !!! */ if (version != GPDBWritable.VERSION && version != GPDBWritable.PREV_VERSION) { throw new IOException( "Current GPDBWritable version(" + GPDBWritable.VERSION + ") does not match input version(" + version + ")"); } if (version == GPDBWritable.VERSION) { errorFlag = in.readByte(); curOffset += 1; } colCnt = in.readShort(); curOffset += 2; /* Extract Column Type */ colType = new int[colCnt]; DBType[] coldbtype = new DBType[colCnt]; for (int i = 0; i < colCnt; i++) { int enumType = (in.readByte()); curOffset += 1; if (enumType == DBType.BIGINT.ordinal()) { colType[i] = BIGINT.getOID(); coldbtype[i] = DBType.BIGINT; } else if (enumType == DBType.BOOLEAN.ordinal()) { colType[i] = BOOLEAN.getOID(); coldbtype[i] = DBType.BOOLEAN; } else if (enumType == DBType.FLOAT8.ordinal()) { colType[i] = FLOAT8.getOID(); coldbtype[i] = DBType.FLOAT8; } else if (enumType == DBType.INTEGER.ordinal()) { colType[i] = INTEGER.getOID(); coldbtype[i] = DBType.INTEGER; } else if (enumType == DBType.REAL.ordinal()) { colType[i] = REAL.getOID(); coldbtype[i] = DBType.REAL; } else if (enumType == DBType.SMALLINT.ordinal()) { colType[i] = SMALLINT.getOID(); coldbtype[i] = DBType.SMALLINT; } else if (enumType == DBType.BYTEA.ordinal()) { colType[i] = BYTEA.getOID(); coldbtype[i] = DBType.BYTEA; } else if (enumType == DBType.TEXT.ordinal()) { colType[i] = TEXT.getOID(); coldbtype[i] = DBType.TEXT; } else { throw new IOException("Unknown GPDBWritable.DBType ordinal value"); } } /* Extract null bit array */ byte[] nullbytes = new byte[getNullByteArraySize(colCnt)]; in.readFully(nullbytes); curOffset += nullbytes.length; boolean[] colIsNull = byteArrayToBooleanArray(nullbytes, colCnt); /* extract column value */ colValue = new Object[colCnt]; for (int i = 0; i < colCnt; i++) { if (!colIsNull[i]) { /* Skip the alignment padding */ int skipbytes = roundUpAlignment(curOffset, coldbtype[i].getAlignment()) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; /* For fixed length type, increment the offset according to type type length here. * For var length type (BYTEA, TEXT), we'll read 4 byte length header and the * actual payload. */ int varcollen = -1; if (coldbtype[i].isVarLength()) { varcollen = in.readInt(); curOffset += 4 + varcollen; } else { curOffset += coldbtype[i].getTypeLength(); } switch (DataType.get(colType[i])) { case BIGINT: { colValue[i] = in.readLong(); break; } case BOOLEAN: { colValue[i] = in.readBoolean(); break; } case FLOAT8: { colValue[i] = in.readDouble(); break; } case INTEGER: { colValue[i] = in.readInt(); break; } case REAL: { colValue[i] = in.readFloat(); break; } case SMALLINT: { colValue[i] = in.readShort(); break; } /* For BYTEA column, it has a 4 byte var length header. */ case BYTEA: { colValue[i] = new byte[varcollen]; in.readFully((byte[]) colValue[i]); break; } /* For text formatted column, it has a 4 byte var length header * and it's always null terminated string. * So, we can remove the last "\0" when constructing the string. */ case TEXT: { byte[] data = new byte[varcollen]; in.readFully(data, 0, varcollen); colValue[i] = new String(data, 0, varcollen - 1, CHARSET); break; } default: throw new IOException("Unknown GPDBWritable ColType"); } } } /* Skip the ending alignment padding */ int skipbytes = roundUpAlignment(curOffset, 8) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; if (errorFlag != 0) { throw new IOException("Received error value " + errorFlag + " from format"); } }
@Override public void addInt(int value) { BIGINT.writeLong(builder, value); wroteValue = true; }
public static NullableValue parsePartitionValue( String partitionName, String value, Type type, DateTimeZone timeZone) { boolean isNull = HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value); if (type instanceof DecimalType) { DecimalType decimalType = (DecimalType) type; if (isNull) { return NullableValue.asNull(decimalType); } if (decimalType.isShort()) { if (value.isEmpty()) { return NullableValue.of(decimalType, 0L); } return NullableValue.of( decimalType, shortDecimalPartitionKey(value, decimalType, partitionName)); } else { if (value.isEmpty()) { return NullableValue.of(decimalType, Decimals.encodeUnscaledValue(BigInteger.ZERO)); } return NullableValue.of( decimalType, longDecimalPartitionKey(value, decimalType, partitionName)); } } if (BOOLEAN.equals(type)) { if (isNull) { return NullableValue.asNull(BOOLEAN); } if (value.isEmpty()) { return NullableValue.of(BOOLEAN, false); } return NullableValue.of(BOOLEAN, booleanPartitionKey(value, partitionName)); } if (TINYINT.equals(type)) { if (isNull) { return NullableValue.asNull(TINYINT); } if (value.isEmpty()) { return NullableValue.of(TINYINT, 0L); } return NullableValue.of(TINYINT, tinyintPartitionKey(value, partitionName)); } if (SMALLINT.equals(type)) { if (isNull) { return NullableValue.asNull(SMALLINT); } if (value.isEmpty()) { return NullableValue.of(SMALLINT, 0L); } return NullableValue.of(SMALLINT, smallintPartitionKey(value, partitionName)); } if (INTEGER.equals(type)) { if (isNull) { return NullableValue.asNull(INTEGER); } if (value.isEmpty()) { return NullableValue.of(INTEGER, 0L); } return NullableValue.of(INTEGER, integerPartitionKey(value, partitionName)); } if (BIGINT.equals(type)) { if (isNull) { return NullableValue.asNull(BIGINT); } if (value.isEmpty()) { return NullableValue.of(BIGINT, 0L); } return NullableValue.of(BIGINT, bigintPartitionKey(value, partitionName)); } if (DATE.equals(type)) { if (isNull) { return NullableValue.asNull(DATE); } return NullableValue.of(DATE, datePartitionKey(value, partitionName)); } if (TIMESTAMP.equals(type)) { if (isNull) { return NullableValue.asNull(TIMESTAMP); } return NullableValue.of(TIMESTAMP, timestampPartitionKey(value, timeZone, partitionName)); } if (REAL.equals(type)) { if (isNull) { return NullableValue.asNull(REAL); } if (value.isEmpty()) { return NullableValue.of(REAL, (long) floatToRawIntBits(0.0f)); } return NullableValue.of(REAL, floatPartitionKey(value, partitionName)); } if (DOUBLE.equals(type)) { if (isNull) { return NullableValue.asNull(DOUBLE); } if (value.isEmpty()) { return NullableValue.of(DOUBLE, 0.0); } return NullableValue.of(DOUBLE, doublePartitionKey(value, partitionName)); } if (type instanceof VarcharType) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, varcharPartitionKey(value, partitionName, type)); } if (isCharType(type)) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, charPartitionKey(value, partitionName, type)); } throw new PrestoException( NOT_SUPPORTED, format("Unsupported Type [%s] for partition: %s", type, partitionName)); }
public class TestApproximatePercentileAggregation { private static final Block EMPTY_DOUBLE_BLOCK = DOUBLE.createBlockBuilder(new BlockBuilderStatus(), 0).build(); private static final Block EMPTY_LONG_BLOCK = BIGINT.createBlockBuilder(new BlockBuilderStatus(), 0).build(); @Test public void testLongPartialStep() throws Exception { // regular approx_percentile assertAggregation( LONG_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, null, createLongsBlock(null, null), createRLEBlock(0.5, 2)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 1L, createLongsBlock(null, 1L), createRLEBlock(0.5, 2)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 2L, createLongsBlock(null, 1L, 2L, 3L), createRLEBlock(0.5, 4)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 2L, createLongsBlock(1L, 2L, 3L), createRLEBlock(0.5, 3)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 3L, createLongsBlock( 1L, null, 2L, 2L, null, 2L, 2L, null, 2L, 2L, null, 3L, 3L, null, 3L, null, 3L, 4L, 5L, 6L, 7L), createRLEBlock(0.5, 21)); // weighted approx_percentile assertAggregation( LONG_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, null, createLongsBlock(null, null), createLongsBlock(1L, 1L), createRLEBlock(0.5, 2)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 1L, createLongsBlock(null, 1L), createLongsBlock(1L, 1L), createRLEBlock(0.5, 2)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 2L, createLongsBlock(null, 1L, 2L, 3L), createLongsBlock(1L, 1L, 1L, 1L), createRLEBlock(0.5, 4)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 2L, createLongsBlock(1L, 2L, 3L), createLongsBlock(1L, 1L, 1L), createRLEBlock(0.5, 3)); assertAggregation( LONG_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 3L, createLongsBlock( 1L, null, 2L, null, 2L, null, 2L, null, 3L, null, 3L, null, 3L, 4L, 5L, 6L, 7L), createLongsBlock(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), createRLEBlock(0.5, 17)); } @Test public void testDoublePartialStep() throws Exception { // regular approx_percentile assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, null, createDoublesBlock(null, null), createRLEBlock(0.5, 2)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 1.0, createDoublesBlock(null, 1.0), createRLEBlock(0.5, 2)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 2.0, createDoublesBlock(null, 1.0, 2.0, 3.0), createRLEBlock(0.5, 4)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 2.0, createDoublesBlock(1.0, 2.0, 3.0), createRLEBlock(0.5, 3)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_AGGREGATION, 1.0, 3.0, createDoublesBlock( 1.0, null, 2.0, 2.0, null, 2.0, 2.0, null, 2.0, 2.0, null, 3.0, 3.0, null, 3.0, null, 3.0, 4.0, 5.0, 6.0, 7.0), createRLEBlock(0.5, 21)); // weighted approx_percentile assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, null, createDoublesBlock(null, null), createLongsBlock(1L, 1L), createRLEBlock(0.5, 2)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 1.0, createDoublesBlock(null, 1.0), createLongsBlock(1L, 1L), createRLEBlock(0.5, 2)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 2.0, createDoublesBlock(null, 1.0, 2.0, 3.0), createLongsBlock(1L, 1L, 1L, 1L), createRLEBlock(0.5, 4)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 2.0, createDoublesBlock(1.0, 2.0, 3.0), createLongsBlock(1L, 1L, 1L), createRLEBlock(0.5, 3)); assertAggregation( DOUBLE_APPROXIMATE_PERCENTILE_WEIGHTED_AGGREGATION, 1.0, 3.0, createDoublesBlock( 1.0, null, 2.0, null, 2.0, null, 2.0, null, 3.0, null, 3.0, null, 3.0, 4.0, 5.0, 6.0, 7.0), createLongsBlock(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), createRLEBlock(0.5, 17)); } }
@OutputFunction(StandardTypes.BIGINT) public static void output(LongState state, BlockBuilder out) { BIGINT.writeLong(out, state.getLong()); }
@Override public void project(int position, Block[] blocks, BlockBuilder output) { BIGINT.writeLong(output, TypeUtils.getHashPosition(columnTypes, blocks, position)); }