Exemplo n.º 1
0
  @Test
  public void testGetRecordsUnpartitioned() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of());

      long rowNumber = 0;
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        assertRecordCursorType(cursor, "textfile");
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned");
          }

          assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
Exemplo n.º 2
0
  @Test
  public void testGetRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);
      assertEquals(dummy * 100, baseValue);

      long rowNumber = 0;
      long completedBytes = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          try {
            assertReadFields(cursor, tableMetadata.getColumns());
          } catch (RuntimeException e) {
            throw new RuntimeException("row " + rowNumber, e);
          }

          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test"));
          }

          assertEquals(
              cursor.getLong(columnIndex.get("t_tinyint")),
              (long) ((byte) (baseValue + 1 + rowNumber)));
          assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber);
          assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber);

          if (rowNumber % 13 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_bigint")));
          } else {
            assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber);
          }

          assertEquals(
              cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001);
          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);

          if (rowNumber % 3 == 2) {
            assertTrue(cursor.isNull(columnIndex.get("t_boolean")));
          } else {
            assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
          }

          if (rowNumber % 17 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_timestamp")));
          } else {
            long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis();
            assertEquals(
                cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test"));
          }

          if (rowNumber % 23 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_binary")));
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test"));
          }

          if (rowNumber % 29 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_map")));
          } else {
            String expectedJson = "{\"format\":\"" + fileType + "\"}";
            assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 27 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_array_string")));
          } else {
            String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 31 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_complex")));
          } else {
            String expectedJson =
                "{\"1\":[{\"s_string\":\""
                    + fileType
                    + "-a\",\"s_double\":0.1},{\"s_string\":\""
                    + fileType
                    + "-b\",\"s_double\":0.2}]}";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson);
          }

          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);

          long newCompletedBytes = cursor.getCompletedBytes();
          assertTrue(newCompletedBytes >= completedBytes);
          assertTrue(newCompletedBytes <= hiveSplit.getLength());
          completedBytes = newCompletedBytes;
        }
      }
      assertTrue(completedBytes <= hiveSplit.getLength());
      assertEquals(rowNumber, 100);
    }
  }