Esempio n. 1
0
  @Test
  public void testBucketedTableDoubleFloat() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedDoubleFloat);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_float")), 406.1000061035156)
            .put(columnHandles.get(columnIndex.get("t_double")), 407.2)
            .build();

    // floats and doubles are not supported, so we should see all splits
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 32);

    int count = 0;
    for (ConnectorSplit split : splits) {
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        while (cursor.advanceNextPosition()) {
          count++;
        }
      }
    }
    assertEquals(count, 300);
  }
Esempio n. 2
0
  @Test
  public void testBucketedTableBigintBoolean() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedBigintBoolean);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    String testString = "textfile test";
    // This needs to match one of the rows where t_string is not empty or null, and where t_bigint
    // is not null
    // (i.e. (testBigint - 604) % 19 > 1 and (testBigint - 604) % 13 != 0)
    Long testBigint = 608L;
    Boolean testBoolean = true;

    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString))
            .put(columnHandles.get(columnIndex.get("t_bigint")), testBigint)
            .put(columnHandles.get(columnIndex.get("t_boolean")), testBoolean)
            .build();

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    boolean rowFound = false;
    try (RecordCursor cursor =
        recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) {
      while (cursor.advanceNextPosition()) {
        if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8())
            && testBigint == cursor.getLong(columnIndex.get("t_bigint"))
            && testBoolean == cursor.getBoolean(columnIndex.get("t_boolean"))) {
          rowFound = true;
          break;
        }
      }
      assertTrue(rowFound);
    }
  }
Esempio n. 3
0
  @Test
  public void testGetRecordsUnpartitioned() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of());

      long rowNumber = 0;
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        assertRecordCursorType(cursor, "textfile");
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned");
          }

          assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
Esempio n. 4
0
  @Test
  public void testBucketedTableStringInt() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedStringInt);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    String testString = "sequencefile test";
    Long testInt = 413L;
    Long testSmallint = 412L;

    // Reverse the order of bindings as compared to bucketing order
    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_int")), testInt)
            .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString))
            .put(columnHandles.get(columnIndex.get("t_smallint")), testSmallint)
            .build();

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    boolean rowFound = false;
    try (RecordCursor cursor =
        recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) {
      while (cursor.advanceNextPosition()) {
        if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8())
            && testInt == cursor.getLong(columnIndex.get("t_int"))
            && testSmallint == cursor.getLong(columnIndex.get("t_smallint"))) {
          rowFound = true;
        }
      }
      assertTrue(rowFound);
    }
  }
Esempio n. 5
0
  @Test
  public void testGetPartialRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);

      long rowNumber = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        while (cursor.advanceNextPosition()) {
          rowNumber++;

          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);
          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
  @Test
  public void testGetRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, tableHandle);
    List<ColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(SESSION, tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(SESSION, tableHandle, TupleDomain.<ColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(
            splitManager.getPartitionSplits(SESSION, tableHandle, partitionResult.getPartitions()));

    long rowNumber = 0;
    for (ConnectorSplit split : splits) {
      CassandraSplit cassandraSplit = (CassandraSplit) split;

      long completedBytes = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(SESSION, cassandraSplit, columnHandles).cursor()) {
        while (cursor.advanceNextPosition()) {
          try {
            assertReadFields(cursor, tableMetadata.getColumns());
          } catch (RuntimeException e) {
            throw new RuntimeException("row " + rowNumber, e);
          }

          rowNumber++;

          String keyValue = cursor.getSlice(columnIndex.get("key")).toStringUtf8();
          assertTrue(keyValue.startsWith("key "));
          int rowId = Integer.parseInt(keyValue.substring(4));

          assertEquals(keyValue, String.format("key %d", rowId));

          assertEquals(
              Bytes.toHexString(cursor.getSlice(columnIndex.get("typebytes")).getBytes()),
              String.format("0x%08X", rowId));

          // VARINT is returned as a string
          assertEquals(
              cursor.getSlice(columnIndex.get("typeinteger")).toStringUtf8(),
              String.valueOf(rowId));

          assertEquals(cursor.getLong(columnIndex.get("typelong")), 1000 + rowId);

          assertEquals(
              cursor.getSlice(columnIndex.get("typeuuid")).toStringUtf8(),
              String.format("00000000-0000-0000-0000-%012d", rowId));

          assertEquals(
              cursor.getSlice(columnIndex.get("typetimestamp")).toStringUtf8(),
              Long.valueOf(DATE.getTime()).toString());

          long newCompletedBytes = cursor.getCompletedBytes();
          assertTrue(newCompletedBytes >= completedBytes);
          completedBytes = newCompletedBytes;
        }
      }
    }
    assertEquals(rowNumber, 9);
  }
 @Override
 public Map<String, ColumnHandle> getColumnHandles(TableHandle tableHandle) {
   try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
     return delegate.getColumnHandles(tableHandle);
   }
 }
Esempio n. 8
0
  private void doCreateSampledTable() throws InterruptedException {
    // begin creating the table
    List<ColumnMetadata> columns =
        ImmutableList.<ColumnMetadata>builder()
            .add(new ColumnMetadata("sales", BIGINT, 1, false))
            .build();

    ConnectorTableMetadata tableMetadata =
        new ConnectorTableMetadata(temporaryCreateSampledTable, columns, tableOwner, true);
    ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata);

    // write the records
    RecordSink sink = recordSinkProvider.getRecordSink(outputHandle);

    sink.beginRecord(8);
    sink.appendLong(2);
    sink.finishRecord();

    sink.beginRecord(5);
    sink.appendLong(3);
    sink.finishRecord();

    sink.beginRecord(7);
    sink.appendLong(4);
    sink.finishRecord();

    String fragment = sink.commit();

    // commit the table
    metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment));

    // load the new table
    ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateSampledTable);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.<ConnectorColumnHandle>builder()
            .addAll(metadata.getColumnHandles(tableHandle).values())
            .add(metadata.getSampleWeightColumnHandle(tableHandle))
            .build();
    assertEquals(columnHandles.size(), 2);

    // verify the metadata
    tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateSampledTable));
    assertEquals(tableMetadata.getOwner(), tableOwner);

    Map<String, ColumnMetadata> columnMap =
        uniqueIndex(tableMetadata.getColumns(), columnNameGetter());
    assertEquals(columnMap.size(), 1);

    assertPrimitiveField(columnMap, 0, "sales", BIGINT, false);

    // verify the data
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    assertEquals(partitionResult.getPartitions().size(), 1);
    ConnectorSplitSource splitSource =
        splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());
    ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));

    try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
      assertRecordCursorType(cursor, "rcfile-binary");

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 2);
      assertEquals(cursor.getLong(1), 8);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 3);
      assertEquals(cursor.getLong(1), 5);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 4);
      assertEquals(cursor.getLong(1), 7);

      assertFalse(cursor.advanceNextPosition());
    }
  }
Esempio n. 9
0
  @Test
  public void testGetRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);
      assertEquals(dummy * 100, baseValue);

      long rowNumber = 0;
      long completedBytes = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          try {
            assertReadFields(cursor, tableMetadata.getColumns());
          } catch (RuntimeException e) {
            throw new RuntimeException("row " + rowNumber, e);
          }

          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test"));
          }

          assertEquals(
              cursor.getLong(columnIndex.get("t_tinyint")),
              (long) ((byte) (baseValue + 1 + rowNumber)));
          assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber);
          assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber);

          if (rowNumber % 13 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_bigint")));
          } else {
            assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber);
          }

          assertEquals(
              cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001);
          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);

          if (rowNumber % 3 == 2) {
            assertTrue(cursor.isNull(columnIndex.get("t_boolean")));
          } else {
            assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
          }

          if (rowNumber % 17 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_timestamp")));
          } else {
            long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis();
            assertEquals(
                cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test"));
          }

          if (rowNumber % 23 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_binary")));
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test"));
          }

          if (rowNumber % 29 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_map")));
          } else {
            String expectedJson = "{\"format\":\"" + fileType + "\"}";
            assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 27 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_array_string")));
          } else {
            String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 31 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_complex")));
          } else {
            String expectedJson =
                "{\"1\":[{\"s_string\":\""
                    + fileType
                    + "-a\",\"s_double\":0.1},{\"s_string\":\""
                    + fileType
                    + "-b\",\"s_double\":0.2}]}";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson);
          }

          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);

          long newCompletedBytes = cursor.getCompletedBytes();
          assertTrue(newCompletedBytes >= completedBytes);
          assertTrue(newCompletedBytes <= hiveSplit.getLength());
          completedBytes = newCompletedBytes;
        }
      }
      assertTrue(completedBytes <= hiveSplit.getLength());
      assertEquals(rowNumber, 100);
    }
  }
Esempio n. 10
0
  private void doCreateTable() throws InterruptedException {
    // begin creating the table
    List<ColumnMetadata> columns =
        ImmutableList.<ColumnMetadata>builder()
            .add(new ColumnMetadata("id", BIGINT, 1, false))
            .add(new ColumnMetadata("t_string", VARCHAR, 2, false))
            .add(new ColumnMetadata("t_bigint", BIGINT, 3, false))
            .add(new ColumnMetadata("t_double", DOUBLE, 4, false))
            .add(new ColumnMetadata("t_boolean", BOOLEAN, 5, false))
            .build();

    ConnectorTableMetadata tableMetadata =
        new ConnectorTableMetadata(temporaryCreateTable, columns, tableOwner);
    ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata);

    // write the records
    RecordSink sink = recordSinkProvider.getRecordSink(outputHandle);

    sink.beginRecord(1);
    sink.appendLong(1);
    sink.appendString("hello".getBytes(UTF_8));
    sink.appendLong(123);
    sink.appendDouble(43.5);
    sink.appendBoolean(true);
    sink.finishRecord();

    sink.beginRecord(1);
    sink.appendLong(2);
    sink.appendNull();
    sink.appendNull();
    sink.appendNull();
    sink.appendNull();
    sink.finishRecord();

    sink.beginRecord(1);
    sink.appendLong(3);
    sink.appendString("bye".getBytes(UTF_8));
    sink.appendLong(456);
    sink.appendDouble(98.1);
    sink.appendBoolean(false);
    sink.finishRecord();

    String fragment = sink.commit();

    // commit the table
    metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment));

    // load the new table
    ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateTable);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());

    // verify the metadata
    tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateTable));
    assertEquals(tableMetadata.getOwner(), tableOwner);

    Map<String, ColumnMetadata> columnMap =
        uniqueIndex(tableMetadata.getColumns(), columnNameGetter());

    assertPrimitiveField(columnMap, 0, "id", BIGINT, false);
    assertPrimitiveField(columnMap, 1, "t_string", VARCHAR, false);
    assertPrimitiveField(columnMap, 2, "t_bigint", BIGINT, false);
    assertPrimitiveField(columnMap, 3, "t_double", DOUBLE, false);
    assertPrimitiveField(columnMap, 4, "t_boolean", BOOLEAN, false);

    // verify the data
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    assertEquals(partitionResult.getPartitions().size(), 1);
    ConnectorSplitSource splitSource =
        splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());
    ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));

    try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
      assertRecordCursorType(cursor, "rcfile-binary");

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 1);
      assertEquals(cursor.getSlice(1).toStringUtf8(), "hello");
      assertEquals(cursor.getLong(2), 123);
      assertEquals(cursor.getDouble(3), 43.5);
      assertEquals(cursor.getBoolean(4), true);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 2);
      assertTrue(cursor.isNull(1));
      assertTrue(cursor.isNull(2));
      assertTrue(cursor.isNull(3));
      assertTrue(cursor.isNull(4));

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 3);
      assertEquals(cursor.getSlice(1).toStringUtf8(), "bye");
      assertEquals(cursor.getLong(2), 456);
      assertEquals(cursor.getDouble(3), 98.1);
      assertEquals(cursor.getBoolean(4), false);

      assertFalse(cursor.advanceNextPosition());
    }
  }