@Test
  public void testGetRecordsUnpartitioned() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of());

      long rowNumber = 0;
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        assertRecordCursorType(cursor, "textfile");
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned");
          }

          assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
  @Test
  public void testGetPartialRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);

      long rowNumber = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        while (cursor.advanceNextPosition()) {
          rowNumber++;

          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);
          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
Exemple #3
0
  @Test
  public void testJsonRoundTrip() {
    Properties schema = new Properties();
    schema.setProperty("foo", "bar");
    schema.setProperty("bar", "baz");

    ImmutableList<HivePartitionKey> partitionKeys =
        ImmutableList.of(
            new HivePartitionKey("a", HIVE_STRING, "apple"),
            new HivePartitionKey("b", HiveType.HIVE_LONG, "42"));
    ImmutableList<HostAddress> addresses =
        ImmutableList.of(
            HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45));
    HiveSplit expected =
        new HiveSplit(
            "clientId",
            "db",
            "table",
            "partitionId",
            "path",
            42,
            88,
            schema,
            partitionKeys,
            addresses,
            OptionalInt.empty(),
            true,
            TupleDomain.<HiveColumnHandle>all(),
            ImmutableMap.of(1, HIVE_STRING));

    String json = codec.toJson(expected);
    HiveSplit actual = codec.fromJson(json);

    assertEquals(actual.getClientId(), expected.getClientId());
    assertEquals(actual.getDatabase(), expected.getDatabase());
    assertEquals(actual.getTable(), expected.getTable());
    assertEquals(actual.getPartitionName(), expected.getPartitionName());
    assertEquals(actual.getPath(), expected.getPath());
    assertEquals(actual.getStart(), expected.getStart());
    assertEquals(actual.getLength(), expected.getLength());
    assertEquals(actual.getSchema(), expected.getSchema());
    assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys());
    assertEquals(actual.getAddresses(), expected.getAddresses());
    assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions());
    assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling());
  }
  @Test
  public void testGetRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);
      assertEquals(dummy * 100, baseValue);

      long rowNumber = 0;
      long completedBytes = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          try {
            assertReadFields(cursor, tableMetadata.getColumns());
          } catch (RuntimeException e) {
            throw new RuntimeException("row " + rowNumber, e);
          }

          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test"));
          }

          assertEquals(
              cursor.getLong(columnIndex.get("t_tinyint")),
              (long) ((byte) (baseValue + 1 + rowNumber)));
          assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber);
          assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber);

          if (rowNumber % 13 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_bigint")));
          } else {
            assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber);
          }

          assertEquals(
              cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001);
          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);

          if (rowNumber % 3 == 2) {
            assertTrue(cursor.isNull(columnIndex.get("t_boolean")));
          } else {
            assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
          }

          if (rowNumber % 17 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_timestamp")));
          } else {
            long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis();
            assertEquals(
                cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test"));
          }

          if (rowNumber % 23 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_binary")));
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test"));
          }

          if (rowNumber % 29 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_map")));
          } else {
            String expectedJson = "{\"format\":\"" + fileType + "\"}";
            assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 27 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_array_string")));
          } else {
            String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 31 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_complex")));
          } else {
            String expectedJson =
                "{\"1\":[{\"s_string\":\""
                    + fileType
                    + "-a\",\"s_double\":0.1},{\"s_string\":\""
                    + fileType
                    + "-b\",\"s_double\":0.2}]}";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson);
          }

          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);

          long newCompletedBytes = cursor.getCompletedBytes();
          assertTrue(newCompletedBytes >= completedBytes);
          assertTrue(newCompletedBytes <= hiveSplit.getLength());
          completedBytes = newCompletedBytes;
        }
      }
      assertTrue(completedBytes <= hiveSplit.getLength());
      assertEquals(rowNumber, 100);
    }
  }