@Test(dataProvider = "hashEnabledValues") public void testDistinctLimit(boolean hashEnabled) throws Exception { RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, Ints.asList(0), BIGINT); List<Page> input = rowPagesBuilder.addSequencePage(3, 1).addSequencePage(5, 2).build(); OperatorFactory operatorFactory = new DistinctLimitOperator.DistinctLimitOperatorFactory( 0, new PlanNodeId("test"), ImmutableList.of(BIGINT), Ints.asList(0), 5, rowPagesBuilder.getHashChannel()); MaterializedResult expected = resultBuilder(driverContext.getSession(), BIGINT) .row(1L) .row(2L) .row(3L) .row(4L) .row(5L) .build(); assertOperatorEquals(operatorFactory, driverContext, input, expected); }
private static List<Integer> getHashChannels(RowPagesBuilder probe, RowPagesBuilder build) { ImmutableList.Builder<Integer> hashChannels = ImmutableList.builder(); if (probe.getHashChannel().isPresent()) { hashChannels.add(probe.getHashChannel().get()); } if (build.getHashChannel().isPresent()) { hashChannels.add(probe.getTypes().size() + build.getHashChannel().get()); } return hashChannels.build(); }
@Test(dataProvider = "hashEnabledValues") public void testProbeOuterJoin( boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception { TaskContext taskContext = createTaskContext(); // build List<Type> buildTypes = ImmutableList.<Type>of(VARCHAR, BIGINT, BIGINT); RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT, BIGINT)) .addSequencePage(10, 20, 30, 40); LookupSourceSupplier lookupSourceSupplier = buildHash(parallelBuild, taskContext, Ints.asList(0), buildPages); // probe List<Type> probeTypes = ImmutableList.<Type>of(VARCHAR, BIGINT, BIGINT); RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes); List<Page> probeInput = probePages.addSequencePage(15, 20, 1020, 2020).build(); OperatorFactory joinOperatorFactory = LookupJoinOperators.probeOuterJoin( 0, new PlanNodeId("test"), lookupSourceSupplier, probePages.getTypes(), Ints.asList(0), probePages.getHashChannel()); Operator joinOperator = joinOperatorFactory.createOperator( taskContext.addPipelineContext(true, true).addDriverContext()); // expected // expected MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)) .row("20", 1020, 2020, "20", 30, 40) .row("21", 1021, 2021, "21", 31, 41) .row("22", 1022, 2022, "22", 32, 42) .row("23", 1023, 2023, "23", 33, 43) .row("24", 1024, 2024, "24", 34, 44) .row("25", 1025, 2025, "25", 35, 45) .row("26", 1026, 2026, "26", 36, 46) .row("27", 1027, 2027, "27", 37, 47) .row("28", 1028, 2028, "28", 38, 48) .row("29", 1029, 2029, "29", 39, 49) .row("30", 1030, 2030, null, null, null) .row("31", 1031, 2031, null, null, null) .row("32", 1032, 2032, null, null, null) .row("33", 1033, 2033, null, null, null) .row("34", 1034, 2034, null, null, null) .build(); assertOperatorEquals( joinOperator, probeInput, expected, true, getHashChannels(probePages, buildPages)); }
@Test(dataProvider = "hashEnabledValues") public void testOuterJoinWithNullBuild( boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception { TaskContext taskContext = createTaskContext(); // build List<Type> buildTypes = ImmutableList.<Type>of(VARCHAR); RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR)) .row("a") .row((String) null) .row((String) null) .row("a") .row("b"); LookupSourceSupplier lookupSourceSupplier = buildHash(parallelBuild, taskContext, Ints.asList(0), buildPages); // probe List<Type> probeTypes = ImmutableList.<Type>of(VARCHAR); RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes); List<Page> probeInput = probePages.row("a").row("b").row("c").build(); OperatorFactory joinOperatorFactory = LookupJoinOperators.probeOuterJoin( 0, new PlanNodeId("test"), lookupSourceSupplier, probePages.getTypes(), Ints.asList(0), probePages.getHashChannel()); Operator joinOperator = joinOperatorFactory.createOperator( taskContext.addPipelineContext(true, true).addDriverContext()); // expected MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)) .row("a", "a") .row("a", "a") .row("b", "b") .row("c", null) .build(); assertOperatorEquals( joinOperator, probeInput, expected, true, getHashChannels(probePages, buildPages)); }
public void testGroupId() throws Exception { RowPagesBuilder rowPagesBuilder = rowPagesBuilder(false, ImmutableList.of(), BIGINT, VARCHAR, BOOLEAN, BIGINT); List<Page> input = rowPagesBuilder .addSequencePage(3, 100, 400, 0, 1000) .addSequencePage(3, 200, 500, 0, 1100) .build(); GroupIdOperatorFactory operatorFactory = new GroupIdOperatorFactory( 0, new PlanNodeId("test"), ImmutableList.of(VARCHAR, BOOLEAN, BIGINT, BIGINT, BIGINT), ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(3)), ImmutableList.of(1, 2, 3), ImmutableList.of(0)); MaterializedResult expected = resultBuilder(driverContext.getSession(), VARCHAR, BOOLEAN, BIGINT, BIGINT, BIGINT) .row("400", true, null, 100L, 0L) .row("401", false, null, 101L, 0L) .row("402", true, null, 102L, 0L) .row("500", true, null, 200L, 0L) .row("501", false, null, 201L, 0L) .row("502", true, null, 202L, 0L) .row(null, null, 1000L, 100L, 1L) .row(null, null, 1001L, 101L, 1L) .row(null, null, 1002L, 102L, 1L) .row(null, null, 1100L, 200L, 1L) .row(null, null, 1101L, 201L, 1L) .row(null, null, 1102L, 202L, 1L) .build(); assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected); }
@Test public void testWriter() throws Exception { List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L); ArrayType arrayType = new ArrayType(BIGINT); ArrayType arrayOfArrayType = new ArrayType(arrayType); MapType mapType = new MapType(createVarcharType(10), BOOLEAN); List<Type> columnTypes = ImmutableList.of( BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType); File file = new File(directory, System.nanoTime() + ".orc"); byte[] bytes1 = octets(0x00, 0xFE, 0xFF); byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80); RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes) .row( 123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))) .row( null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))) .row( 456L, "bye \u2603", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader()); OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) { writer.appendPages(rowPagesBuilder.build()); } try (OrcDataSource dataSource = fileOrcDataSource(file)) { OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes); assertEquals(reader.getReaderRowCount(), 3); assertEquals(reader.getReaderPosition(), 0); assertEquals(reader.getFileRowCount(), reader.getReaderRowCount()); assertEquals(reader.getFilePosition(), reader.getFilePosition()); assertEquals(reader.nextBatch(), 3); assertEquals(reader.getReaderPosition(), 0); assertEquals(reader.getFilePosition(), reader.getFilePosition()); Block column0 = reader.readBlock(BIGINT, 0); assertEquals(column0.isNull(0), false); assertEquals(column0.isNull(1), true); assertEquals(column0.isNull(2), false); assertEquals(BIGINT.getLong(column0, 0), 123L); assertEquals(BIGINT.getLong(column0, 2), 456L); Block column1 = reader.readBlock(createVarcharType(10), 1); assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello")); assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world")); assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603")); Block column2 = reader.readBlock(VARBINARY, 2); assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1)); assertEquals(column2.isNull(1), true); assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3)); Block column3 = reader.readBlock(DOUBLE, 3); assertEquals(column3.isNull(0), false); assertEquals(column3.isNull(1), false); assertEquals(column3.isNull(2), false); assertEquals(DOUBLE.getDouble(column3, 0), 123.456); assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY); assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN); Block column4 = reader.readBlock(BOOLEAN, 4); assertEquals(column4.isNull(0), false); assertEquals(column4.isNull(1), true); assertEquals(column4.isNull(2), false); assertEquals(BOOLEAN.getBoolean(column4, 0), true); assertEquals(BOOLEAN.getBoolean(column4, 2), false); Block column5 = reader.readBlock(arrayType, 5); assertEquals(column5.getPositionCount(), 3); assertTrue( arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2))); assertTrue( arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null))); assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT))); Block column6 = reader.readBlock(mapType, 6); assertEquals(column6.getPositionCount(), 3); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true))); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null))); assertTrue( mapBlocksEqual( createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false))); Block column7 = reader.readBlock(arrayOfArrayType, 7); assertEquals(column7.getPositionCount(), 3); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)))); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)))); assertTrue( arrayBlocksEqual( arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)))); assertEquals(reader.nextBatch(), -1); assertEquals(reader.getReaderPosition(), 3); assertEquals(reader.getFilePosition(), reader.getFilePosition()); OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes()); assertEquals( orcFileMetadata, new OrcFileMetadata( ImmutableMap.<Long, TypeSignature>builder() .put(1L, BIGINT.getTypeSignature()) .put(2L, createVarcharType(10).getTypeSignature()) .put(4L, VARBINARY.getTypeSignature()) .put(6L, DOUBLE.getTypeSignature()) .put(7L, BOOLEAN.getTypeSignature()) .put(8L, arrayType.getTypeSignature()) .put(9L, mapType.getTypeSignature()) .put(10L, arrayOfArrayType.getTypeSignature()) .build())); } File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc"); assertFalse(crcFile.exists()); }
private static LookupSourceSupplier buildHash( boolean parallelBuild, TaskContext taskContext, List<Integer> hashChannels, RowPagesBuilder buildPages) { if (parallelBuild) { ParallelHashBuilder parallelHashBuilder = new ParallelHashBuilder( buildPages.getTypes(), hashChannels, buildPages.getHashChannel(), 100, PARTITION_COUNT); // collect input data DriverContext collectDriverContext = taskContext.addPipelineContext(true, true).addDriverContext(); ValuesOperatorFactory valuesOperatorFactory = new ValuesOperatorFactory( 0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build()); OperatorFactory collectOperatorFactory = parallelHashBuilder.getCollectOperatorFactory(1, new PlanNodeId("test")); Driver driver = new Driver( collectDriverContext, valuesOperatorFactory.createOperator(collectDriverContext), collectOperatorFactory.createOperator(collectDriverContext)); while (!driver.isFinished()) { driver.process(); } // build hash tables PipelineContext buildPipeline = taskContext.addPipelineContext(true, true); OperatorFactory buildOperatorFactory = parallelHashBuilder.getBuildOperatorFactory(new PlanNodeId("test")); for (int i = 0; i < PARTITION_COUNT; i++) { DriverContext buildDriverContext = buildPipeline.addDriverContext(); Driver buildDriver = new Driver(buildDriverContext, buildOperatorFactory.createOperator(buildDriverContext)); while (!buildDriver.isFinished()) { buildDriver.process(); } } return parallelHashBuilder.getLookupSourceSupplier(); } else { DriverContext driverContext = taskContext.addPipelineContext(true, true).addDriverContext(); ValuesOperatorFactory valuesOperatorFactory = new ValuesOperatorFactory( 0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build()); HashBuilderOperatorFactory hashBuilderOperatorFactory = new HashBuilderOperatorFactory( 1, new PlanNodeId("test"), buildPages.getTypes(), hashChannels, buildPages.getHashChannel(), 100); Driver driver = new Driver( driverContext, valuesOperatorFactory.createOperator(driverContext), hashBuilderOperatorFactory.createOperator(driverContext)); while (!driver.isFinished()) { driver.process(); } return hashBuilderOperatorFactory.getLookupSourceSupplier(); } }