@Test public void testGetRecordsUnpartitioned() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 1); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of()); long rowNumber = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) { assertRecordCursorType(cursor, "textfile"); assertEquals(cursor.getTotalBytes(), hiveSplit.getLength()); while (cursor.advanceNextPosition()) { rowNumber++; if (rowNumber % 19 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_string"))); } else if (rowNumber % 19 == 1) { assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), ""); } else { assertEquals( cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned"); } assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber); } } assertEquals(rowNumber, 100); } }
@Test public void testGetPartialRecords() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), this.partitions.size()); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys(); String ds = partitionKeys.get(0).getValue(); String fileType = partitionKeys.get(1).getValue(); long dummy = Long.parseLong(partitionKeys.get(2).getValue()); long baseValue = getBaseValueForFileType(fileType); long rowNumber = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) { assertRecordCursorType(cursor, fileType); while (cursor.advanceNextPosition()) { rowNumber++; assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber); assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds); assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType); assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy); } } assertEquals(rowNumber, 100); } }
@Test public void testJsonRoundTrip() { Properties schema = new Properties(); schema.setProperty("foo", "bar"); schema.setProperty("bar", "baz"); ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of( new HivePartitionKey("a", HIVE_STRING, "apple"), new HivePartitionKey("b", HiveType.HIVE_LONG, "42")); ImmutableList<HostAddress> addresses = ImmutableList.of( HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45)); HiveSplit expected = new HiveSplit( "clientId", "db", "table", "partitionId", "path", 42, 88, schema, partitionKeys, addresses, OptionalInt.empty(), true, TupleDomain.<HiveColumnHandle>all(), ImmutableMap.of(1, HIVE_STRING)); String json = codec.toJson(expected); HiveSplit actual = codec.fromJson(json); assertEquals(actual.getClientId(), expected.getClientId()); assertEquals(actual.getDatabase(), expected.getDatabase()); assertEquals(actual.getTable(), expected.getTable()); assertEquals(actual.getPartitionName(), expected.getPartitionName()); assertEquals(actual.getPath(), expected.getPath()); assertEquals(actual.getStart(), expected.getStart()); assertEquals(actual.getLength(), expected.getLength()); assertEquals(actual.getSchema(), expected.getSchema()); assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys()); assertEquals(actual.getAddresses(), expected.getAddresses()); assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions()); assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling()); }
@Test public void testGetRecords() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), this.partitions.size()); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys(); String ds = partitionKeys.get(0).getValue(); String fileType = partitionKeys.get(1).getValue(); long dummy = Long.parseLong(partitionKeys.get(2).getValue()); long baseValue = getBaseValueForFileType(fileType); assertEquals(dummy * 100, baseValue); long rowNumber = 0; long completedBytes = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) { assertRecordCursorType(cursor, fileType); assertEquals(cursor.getTotalBytes(), hiveSplit.getLength()); while (cursor.advanceNextPosition()) { try { assertReadFields(cursor, tableMetadata.getColumns()); } catch (RuntimeException e) { throw new RuntimeException("row " + rowNumber, e); } rowNumber++; if (rowNumber % 19 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_string"))); } else if (rowNumber % 19 == 1) { assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), ""); } else { assertEquals( cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test")); } assertEquals( cursor.getLong(columnIndex.get("t_tinyint")), (long) ((byte) (baseValue + 1 + rowNumber))); assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber); assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber); if (rowNumber % 13 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_bigint"))); } else { assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber); } assertEquals( cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001); assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber); if (rowNumber % 3 == 2) { assertTrue(cursor.isNull(columnIndex.get("t_boolean"))); } else { assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0); } if (rowNumber % 17 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_timestamp"))); } else { long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis(); assertEquals( cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test")); } if (rowNumber % 23 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_binary"))); } else { assertEquals( cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test")); } if (rowNumber % 29 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_map"))); } else { String expectedJson = "{\"format\":\"" + fileType + "\"}"; assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson); } if (rowNumber % 27 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_array_string"))); } else { String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]"; assertEquals( cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson); } if (rowNumber % 31 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_complex"))); } else { String expectedJson = "{\"1\":[{\"s_string\":\"" + fileType + "-a\",\"s_double\":0.1},{\"s_string\":\"" + fileType + "-b\",\"s_double\":0.2}]}"; assertEquals( cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson); } assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds); assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType); assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy); long newCompletedBytes = cursor.getCompletedBytes(); assertTrue(newCompletedBytes >= completedBytes); assertTrue(newCompletedBytes <= hiveSplit.getLength()); completedBytes = newCompletedBytes; } } assertTrue(completedBytes <= hiveSplit.getLength()); assertEquals(rowNumber, 100); } }