@Test(dataProvider = "hashEnabledValues")
  public void testDistinctLimit(boolean hashEnabled) throws Exception {
    RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, Ints.asList(0), BIGINT);
    List<Page> input = rowPagesBuilder.addSequencePage(3, 1).addSequencePage(5, 2).build();

    OperatorFactory operatorFactory =
        new DistinctLimitOperator.DistinctLimitOperatorFactory(
            0,
            new PlanNodeId("test"),
            ImmutableList.of(BIGINT),
            Ints.asList(0),
            5,
            rowPagesBuilder.getHashChannel());

    MaterializedResult expected =
        resultBuilder(driverContext.getSession(), BIGINT)
            .row(1L)
            .row(2L)
            .row(3L)
            .row(4L)
            .row(5L)
            .build();

    assertOperatorEquals(operatorFactory, driverContext, input, expected);
  }
Exemplo n.º 2
0
 private static List<Integer> getHashChannels(RowPagesBuilder probe, RowPagesBuilder build) {
   ImmutableList.Builder<Integer> hashChannels = ImmutableList.builder();
   if (probe.getHashChannel().isPresent()) {
     hashChannels.add(probe.getHashChannel().get());
   }
   if (build.getHashChannel().isPresent()) {
     hashChannels.add(probe.getTypes().size() + build.getHashChannel().get());
   }
   return hashChannels.build();
 }
Exemplo n.º 3
0
  @Test(dataProvider = "hashEnabledValues")
  public void testProbeOuterJoin(
      boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception {
    TaskContext taskContext = createTaskContext();

    // build
    List<Type> buildTypes = ImmutableList.<Type>of(VARCHAR, BIGINT, BIGINT);
    RowPagesBuilder buildPages =
        rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT, BIGINT))
            .addSequencePage(10, 20, 30, 40);
    LookupSourceSupplier lookupSourceSupplier =
        buildHash(parallelBuild, taskContext, Ints.asList(0), buildPages);

    // probe
    List<Type> probeTypes = ImmutableList.<Type>of(VARCHAR, BIGINT, BIGINT);
    RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
    List<Page> probeInput = probePages.addSequencePage(15, 20, 1020, 2020).build();
    OperatorFactory joinOperatorFactory =
        LookupJoinOperators.probeOuterJoin(
            0,
            new PlanNodeId("test"),
            lookupSourceSupplier,
            probePages.getTypes(),
            Ints.asList(0),
            probePages.getHashChannel());
    Operator joinOperator =
        joinOperatorFactory.createOperator(
            taskContext.addPipelineContext(true, true).addDriverContext());

    // expected
    // expected
    MaterializedResult expected =
        MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes))
            .row("20", 1020, 2020, "20", 30, 40)
            .row("21", 1021, 2021, "21", 31, 41)
            .row("22", 1022, 2022, "22", 32, 42)
            .row("23", 1023, 2023, "23", 33, 43)
            .row("24", 1024, 2024, "24", 34, 44)
            .row("25", 1025, 2025, "25", 35, 45)
            .row("26", 1026, 2026, "26", 36, 46)
            .row("27", 1027, 2027, "27", 37, 47)
            .row("28", 1028, 2028, "28", 38, 48)
            .row("29", 1029, 2029, "29", 39, 49)
            .row("30", 1030, 2030, null, null, null)
            .row("31", 1031, 2031, null, null, null)
            .row("32", 1032, 2032, null, null, null)
            .row("33", 1033, 2033, null, null, null)
            .row("34", 1034, 2034, null, null, null)
            .build();

    assertOperatorEquals(
        joinOperator, probeInput, expected, true, getHashChannels(probePages, buildPages));
  }
Exemplo n.º 4
0
  @Test(dataProvider = "hashEnabledValues")
  public void testOuterJoinWithNullBuild(
      boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception {
    TaskContext taskContext = createTaskContext();

    // build
    List<Type> buildTypes = ImmutableList.<Type>of(VARCHAR);
    RowPagesBuilder buildPages =
        rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR))
            .row("a")
            .row((String) null)
            .row((String) null)
            .row("a")
            .row("b");
    LookupSourceSupplier lookupSourceSupplier =
        buildHash(parallelBuild, taskContext, Ints.asList(0), buildPages);

    // probe
    List<Type> probeTypes = ImmutableList.<Type>of(VARCHAR);
    RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
    List<Page> probeInput = probePages.row("a").row("b").row("c").build();
    OperatorFactory joinOperatorFactory =
        LookupJoinOperators.probeOuterJoin(
            0,
            new PlanNodeId("test"),
            lookupSourceSupplier,
            probePages.getTypes(),
            Ints.asList(0),
            probePages.getHashChannel());
    Operator joinOperator =
        joinOperatorFactory.createOperator(
            taskContext.addPipelineContext(true, true).addDriverContext());

    // expected
    MaterializedResult expected =
        MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes))
            .row("a", "a")
            .row("a", "a")
            .row("b", "b")
            .row("c", null)
            .build();

    assertOperatorEquals(
        joinOperator, probeInput, expected, true, getHashChannels(probePages, buildPages));
  }
  public void testGroupId() throws Exception {
    RowPagesBuilder rowPagesBuilder =
        rowPagesBuilder(false, ImmutableList.of(), BIGINT, VARCHAR, BOOLEAN, BIGINT);
    List<Page> input =
        rowPagesBuilder
            .addSequencePage(3, 100, 400, 0, 1000)
            .addSequencePage(3, 200, 500, 0, 1100)
            .build();

    GroupIdOperatorFactory operatorFactory =
        new GroupIdOperatorFactory(
            0,
            new PlanNodeId("test"),
            ImmutableList.of(VARCHAR, BOOLEAN, BIGINT, BIGINT, BIGINT),
            ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(3)),
            ImmutableList.of(1, 2, 3),
            ImmutableList.of(0));

    MaterializedResult expected =
        resultBuilder(driverContext.getSession(), VARCHAR, BOOLEAN, BIGINT, BIGINT, BIGINT)
            .row("400", true, null, 100L, 0L)
            .row("401", false, null, 101L, 0L)
            .row("402", true, null, 102L, 0L)
            .row("500", true, null, 200L, 0L)
            .row("501", false, null, 201L, 0L)
            .row("502", true, null, 202L, 0L)
            .row(null, null, 1000L, 100L, 1L)
            .row(null, null, 1001L, 101L, 1L)
            .row(null, null, 1002L, 102L, 1L)
            .row(null, null, 1100L, 200L, 1L)
            .row(null, null, 1101L, 201L, 1L)
            .row(null, null, 1102L, 202L, 1L)
            .build();

    assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected);
  }
Exemplo n.º 6
0
  @Test
  public void testWriter() throws Exception {
    List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L);
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    MapType mapType = new MapType(createVarcharType(10), BOOLEAN);
    List<Type> columnTypes =
        ImmutableList.of(
            BIGINT,
            createVarcharType(10),
            VARBINARY,
            DOUBLE,
            BOOLEAN,
            arrayType,
            mapType,
            arrayOfArrayType);
    File file = new File(directory, System.nanoTime() + ".orc");

    byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
    byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);

    RowPagesBuilder rowPagesBuilder =
        RowPagesBuilder.rowPagesBuilder(columnTypes)
            .row(
                123L,
                "hello",
                wrappedBuffer(bytes1),
                123.456,
                true,
                arrayBlockOf(BIGINT, 1, 2),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true),
                arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)))
            .row(
                null,
                "world",
                null,
                Double.POSITIVE_INFINITY,
                null,
                arrayBlockOf(BIGINT, 3, null),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null),
                arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)))
            .row(
                456L,
                "bye \u2603",
                wrappedBuffer(bytes3),
                Double.NaN,
                false,
                arrayBlockOf(BIGINT),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false),
                arrayBlockOf(arrayType, arrayBlockOf(BIGINT)));

    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
        OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
      writer.appendPages(rowPagesBuilder.build());
    }

    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
      OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
      assertEquals(reader.getReaderRowCount(), 3);
      assertEquals(reader.getReaderPosition(), 0);
      assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      assertEquals(reader.nextBatch(), 3);
      assertEquals(reader.getReaderPosition(), 0);
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      Block column0 = reader.readBlock(BIGINT, 0);
      assertEquals(column0.isNull(0), false);
      assertEquals(column0.isNull(1), true);
      assertEquals(column0.isNull(2), false);
      assertEquals(BIGINT.getLong(column0, 0), 123L);
      assertEquals(BIGINT.getLong(column0, 2), 456L);

      Block column1 = reader.readBlock(createVarcharType(10), 1);
      assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
      assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
      assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603"));

      Block column2 = reader.readBlock(VARBINARY, 2);
      assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
      assertEquals(column2.isNull(1), true);
      assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));

      Block column3 = reader.readBlock(DOUBLE, 3);
      assertEquals(column3.isNull(0), false);
      assertEquals(column3.isNull(1), false);
      assertEquals(column3.isNull(2), false);
      assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
      assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
      assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);

      Block column4 = reader.readBlock(BOOLEAN, 4);
      assertEquals(column4.isNull(0), false);
      assertEquals(column4.isNull(1), true);
      assertEquals(column4.isNull(2), false);
      assertEquals(BOOLEAN.getBoolean(column4, 0), true);
      assertEquals(BOOLEAN.getBoolean(column4, 2), false);

      Block column5 = reader.readBlock(arrayType, 5);
      assertEquals(column5.getPositionCount(), 3);

      assertTrue(
          arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
      assertTrue(
          arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
      assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));

      Block column6 = reader.readBlock(mapType, 6);
      assertEquals(column6.getPositionCount(), 3);

      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 0),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 1),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null)));
      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 2),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));

      Block column7 = reader.readBlock(arrayOfArrayType, 7);
      assertEquals(column7.getPositionCount(), 3);

      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 0),
              arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 1),
              arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 2),
              arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));

      assertEquals(reader.nextBatch(), -1);
      assertEquals(reader.getReaderPosition(), 3);
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      OrcFileMetadata orcFileMetadata =
          METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
      assertEquals(
          orcFileMetadata,
          new OrcFileMetadata(
              ImmutableMap.<Long, TypeSignature>builder()
                  .put(1L, BIGINT.getTypeSignature())
                  .put(2L, createVarcharType(10).getTypeSignature())
                  .put(4L, VARBINARY.getTypeSignature())
                  .put(6L, DOUBLE.getTypeSignature())
                  .put(7L, BOOLEAN.getTypeSignature())
                  .put(8L, arrayType.getTypeSignature())
                  .put(9L, mapType.getTypeSignature())
                  .put(10L, arrayOfArrayType.getTypeSignature())
                  .build()));
    }

    File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
    assertFalse(crcFile.exists());
  }
Exemplo n.º 7
0
  private static LookupSourceSupplier buildHash(
      boolean parallelBuild,
      TaskContext taskContext,
      List<Integer> hashChannels,
      RowPagesBuilder buildPages) {
    if (parallelBuild) {
      ParallelHashBuilder parallelHashBuilder =
          new ParallelHashBuilder(
              buildPages.getTypes(),
              hashChannels,
              buildPages.getHashChannel(),
              100,
              PARTITION_COUNT);

      // collect input data
      DriverContext collectDriverContext =
          taskContext.addPipelineContext(true, true).addDriverContext();
      ValuesOperatorFactory valuesOperatorFactory =
          new ValuesOperatorFactory(
              0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build());
      OperatorFactory collectOperatorFactory =
          parallelHashBuilder.getCollectOperatorFactory(1, new PlanNodeId("test"));
      Driver driver =
          new Driver(
              collectDriverContext,
              valuesOperatorFactory.createOperator(collectDriverContext),
              collectOperatorFactory.createOperator(collectDriverContext));

      while (!driver.isFinished()) {
        driver.process();
      }

      // build hash tables
      PipelineContext buildPipeline = taskContext.addPipelineContext(true, true);
      OperatorFactory buildOperatorFactory =
          parallelHashBuilder.getBuildOperatorFactory(new PlanNodeId("test"));
      for (int i = 0; i < PARTITION_COUNT; i++) {
        DriverContext buildDriverContext = buildPipeline.addDriverContext();
        Driver buildDriver =
            new Driver(buildDriverContext, buildOperatorFactory.createOperator(buildDriverContext));

        while (!buildDriver.isFinished()) {
          buildDriver.process();
        }
      }

      return parallelHashBuilder.getLookupSourceSupplier();
    } else {
      DriverContext driverContext = taskContext.addPipelineContext(true, true).addDriverContext();

      ValuesOperatorFactory valuesOperatorFactory =
          new ValuesOperatorFactory(
              0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build());
      HashBuilderOperatorFactory hashBuilderOperatorFactory =
          new HashBuilderOperatorFactory(
              1,
              new PlanNodeId("test"),
              buildPages.getTypes(),
              hashChannels,
              buildPages.getHashChannel(),
              100);

      Driver driver =
          new Driver(
              driverContext,
              valuesOperatorFactory.createOperator(driverContext),
              hashBuilderOperatorFactory.createOperator(driverContext));

      while (!driver.isFinished()) {
        driver.process();
      }
      return hashBuilderOperatorFactory.getLookupSourceSupplier();
    }
  }