Ejemplo n.º 1
0
  @Test
  public void testWriter() throws Exception {
    List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L);
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    MapType mapType = new MapType(createVarcharType(10), BOOLEAN);
    List<Type> columnTypes =
        ImmutableList.of(
            BIGINT,
            createVarcharType(10),
            VARBINARY,
            DOUBLE,
            BOOLEAN,
            arrayType,
            mapType,
            arrayOfArrayType);
    File file = new File(directory, System.nanoTime() + ".orc");

    byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
    byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);

    RowPagesBuilder rowPagesBuilder =
        RowPagesBuilder.rowPagesBuilder(columnTypes)
            .row(
                123L,
                "hello",
                wrappedBuffer(bytes1),
                123.456,
                true,
                arrayBlockOf(BIGINT, 1, 2),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true),
                arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)))
            .row(
                null,
                "world",
                null,
                Double.POSITIVE_INFINITY,
                null,
                arrayBlockOf(BIGINT, 3, null),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null),
                arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)))
            .row(
                456L,
                "bye \u2603",
                wrappedBuffer(bytes3),
                Double.NaN,
                false,
                arrayBlockOf(BIGINT),
                mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false),
                arrayBlockOf(arrayType, arrayBlockOf(BIGINT)));

    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
        OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
      writer.appendPages(rowPagesBuilder.build());
    }

    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
      OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
      assertEquals(reader.getReaderRowCount(), 3);
      assertEquals(reader.getReaderPosition(), 0);
      assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      assertEquals(reader.nextBatch(), 3);
      assertEquals(reader.getReaderPosition(), 0);
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      Block column0 = reader.readBlock(BIGINT, 0);
      assertEquals(column0.isNull(0), false);
      assertEquals(column0.isNull(1), true);
      assertEquals(column0.isNull(2), false);
      assertEquals(BIGINT.getLong(column0, 0), 123L);
      assertEquals(BIGINT.getLong(column0, 2), 456L);

      Block column1 = reader.readBlock(createVarcharType(10), 1);
      assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
      assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
      assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603"));

      Block column2 = reader.readBlock(VARBINARY, 2);
      assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
      assertEquals(column2.isNull(1), true);
      assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));

      Block column3 = reader.readBlock(DOUBLE, 3);
      assertEquals(column3.isNull(0), false);
      assertEquals(column3.isNull(1), false);
      assertEquals(column3.isNull(2), false);
      assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
      assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
      assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);

      Block column4 = reader.readBlock(BOOLEAN, 4);
      assertEquals(column4.isNull(0), false);
      assertEquals(column4.isNull(1), true);
      assertEquals(column4.isNull(2), false);
      assertEquals(BOOLEAN.getBoolean(column4, 0), true);
      assertEquals(BOOLEAN.getBoolean(column4, 2), false);

      Block column5 = reader.readBlock(arrayType, 5);
      assertEquals(column5.getPositionCount(), 3);

      assertTrue(
          arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
      assertTrue(
          arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
      assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));

      Block column6 = reader.readBlock(mapType, 6);
      assertEquals(column6.getPositionCount(), 3);

      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 0),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 1),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null)));
      assertTrue(
          mapBlocksEqual(
              createVarcharType(5),
              BOOLEAN,
              arrayType.getObject(column6, 2),
              mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));

      Block column7 = reader.readBlock(arrayOfArrayType, 7);
      assertEquals(column7.getPositionCount(), 3);

      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 0),
              arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 1),
              arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
      assertTrue(
          arrayBlocksEqual(
              arrayType,
              arrayOfArrayType.getObject(column7, 2),
              arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));

      assertEquals(reader.nextBatch(), -1);
      assertEquals(reader.getReaderPosition(), 3);
      assertEquals(reader.getFilePosition(), reader.getFilePosition());

      OrcFileMetadata orcFileMetadata =
          METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
      assertEquals(
          orcFileMetadata,
          new OrcFileMetadata(
              ImmutableMap.<Long, TypeSignature>builder()
                  .put(1L, BIGINT.getTypeSignature())
                  .put(2L, createVarcharType(10).getTypeSignature())
                  .put(4L, VARBINARY.getTypeSignature())
                  .put(6L, DOUBLE.getTypeSignature())
                  .put(7L, BOOLEAN.getTypeSignature())
                  .put(8L, arrayType.getTypeSignature())
                  .put(9L, mapType.getTypeSignature())
                  .put(10L, arrayOfArrayType.getTypeSignature())
                  .build()));
    }

    File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
    assertFalse(crcFile.exists());
  }
Ejemplo n.º 2
0
  private static LookupSourceSupplier buildHash(
      boolean parallelBuild,
      TaskContext taskContext,
      List<Integer> hashChannels,
      RowPagesBuilder buildPages) {
    if (parallelBuild) {
      ParallelHashBuilder parallelHashBuilder =
          new ParallelHashBuilder(
              buildPages.getTypes(),
              hashChannels,
              buildPages.getHashChannel(),
              100,
              PARTITION_COUNT);

      // collect input data
      DriverContext collectDriverContext =
          taskContext.addPipelineContext(true, true).addDriverContext();
      ValuesOperatorFactory valuesOperatorFactory =
          new ValuesOperatorFactory(
              0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build());
      OperatorFactory collectOperatorFactory =
          parallelHashBuilder.getCollectOperatorFactory(1, new PlanNodeId("test"));
      Driver driver =
          new Driver(
              collectDriverContext,
              valuesOperatorFactory.createOperator(collectDriverContext),
              collectOperatorFactory.createOperator(collectDriverContext));

      while (!driver.isFinished()) {
        driver.process();
      }

      // build hash tables
      PipelineContext buildPipeline = taskContext.addPipelineContext(true, true);
      OperatorFactory buildOperatorFactory =
          parallelHashBuilder.getBuildOperatorFactory(new PlanNodeId("test"));
      for (int i = 0; i < PARTITION_COUNT; i++) {
        DriverContext buildDriverContext = buildPipeline.addDriverContext();
        Driver buildDriver =
            new Driver(buildDriverContext, buildOperatorFactory.createOperator(buildDriverContext));

        while (!buildDriver.isFinished()) {
          buildDriver.process();
        }
      }

      return parallelHashBuilder.getLookupSourceSupplier();
    } else {
      DriverContext driverContext = taskContext.addPipelineContext(true, true).addDriverContext();

      ValuesOperatorFactory valuesOperatorFactory =
          new ValuesOperatorFactory(
              0, new PlanNodeId("test"), buildPages.getTypes(), buildPages.build());
      HashBuilderOperatorFactory hashBuilderOperatorFactory =
          new HashBuilderOperatorFactory(
              1,
              new PlanNodeId("test"),
              buildPages.getTypes(),
              hashChannels,
              buildPages.getHashChannel(),
              100);

      Driver driver =
          new Driver(
              driverContext,
              valuesOperatorFactory.createOperator(driverContext),
              hashBuilderOperatorFactory.createOperator(driverContext));

      while (!driver.isFinished()) {
        driver.process();
      }
      return hashBuilderOperatorFactory.getLookupSourceSupplier();
    }
  }