@Test public void testBucketedTableDoubleFloat() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableBucketedDoubleFloat); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); assertTableIsBucketed(tableHandle); ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings = ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder() .put(columnHandles.get(columnIndex.get("t_float")), 406.1000061035156) .put(columnHandles.get(columnIndex.get("t_double")), 407.2) .build(); // floats and doubles are not supported, so we should see all splits ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings)); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 32); int count = 0; for (ConnectorSplit split : splits) { try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) { while (cursor.advanceNextPosition()) { count++; } } } assertEquals(count, 300); }
private List<CassandraPartition> getCassandraPartitions( CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) { if (tupleDomain.isNone()) { return ImmutableList.of(); } Set<List<Comparable<?>>> partitionKeysSet = getPartitionKeysSet(table, tupleDomain); // empty filter means, all partitions if (partitionKeysSet.isEmpty()) { return schemaProvider.getAllPartitions(table); } ImmutableList.Builder<ListenableFuture<List<CassandraPartition>>> getPartitionResults = ImmutableList.builder(); for (List<Comparable<?>> partitionKeys : partitionKeysSet) { getPartitionResults.add( executor.submit(() -> schemaProvider.getPartitions(table, partitionKeys))); } ImmutableList.Builder<CassandraPartition> partitions = ImmutableList.builder(); for (ListenableFuture<List<CassandraPartition>> result : getPartitionResults.build()) { try { partitions.addAll(result.get()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw Throwables.propagate(e); } catch (ExecutionException e) { throw new PrestoException(EXTERNAL, "Error fetching cassandra partitions", e); } } return partitions.build(); }
@Test public void testGetPartitionNames() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); assertExpectedPartitions(partitionResult.getPartitions()); }
@Test public void testGetPartitionSplitsTableOfflinePartition() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableOfflinePartition); assertNotNull(tableHandle); ConnectorColumnHandle dsColumn = metadata.getColumnHandle(tableHandle, "ds"); assertNotNull(dsColumn); Domain domain = Domain.singleValue(utf8Slice("2012-12-30")); TupleDomain<ConnectorColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, domain)); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, tupleDomain); for (ConnectorPartition partition : partitionResult.getPartitions()) { if (domain.equals(partition.getTupleDomain().getDomains().get(dsColumn))) { try { getSplitCount(splitManager.getPartitionSplits(tableHandle, ImmutableList.of(partition))); fail("Expected PartitionOfflineException"); } catch (PartitionOfflineException e) { assertEquals(e.getTableName(), tableOfflinePartition); assertEquals(e.getPartition(), "ds=2012-12-30"); } } else { getSplitCount(splitManager.getPartitionSplits(tableHandle, ImmutableList.of(partition))); } } }
private static Set<List<Comparable<?>>> getPartitionKeysSet( CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) { ImmutableList.Builder<Set<Comparable<?>>> partitionColumnValues = ImmutableList.builder(); for (CassandraColumnHandle columnHandle : table.getPartitionKeyColumns()) { Domain domain = tupleDomain.getDomains().get(columnHandle); // if there is no constraint on a partition key, return an empty set if (domain == null) { return ImmutableSet.of(); } // todo does cassandra allow null partition keys? if (domain.isNullAllowed()) { return ImmutableSet.of(); } ImmutableSet.Builder<Comparable<?>> columnValues = ImmutableSet.builder(); for (Range range : domain.getRanges()) { // if the range is not a single value, we can not perform partition pruning if (!range.isSingleValue()) { return ImmutableSet.of(); } Comparable<?> value = range.getSingleValue(); CassandraType valueType = columnHandle.getCassandraType(); columnValues.add(valueType.getValueForPartitionKey(value)); } partitionColumnValues.add(columnValues.build()); } return Sets.cartesianProduct(partitionColumnValues.build()); }
private static TupleDomain spanTupleDomain(TupleDomain tupleDomain) { if (tupleDomain.isNone()) { return tupleDomain; } Map<ColumnHandle, Domain> spannedDomains = Maps.transformValues( tupleDomain.getDomains(), new Function<Domain, Domain>() { @Override public Domain apply(Domain domain) { // Retain nullability, but collapse each SortedRangeSet into a single span return Domain.create( getSortedRangeSpan(domain.getRanges()), domain.isNullAllowed()); } }); return TupleDomain.withColumnDomains(spannedDomains); }
@Test public void testGetPartitionNamesUnpartitioned() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); assertEquals(partitionResult.getPartitions().size(), 1); assertEquals(partitionResult.getPartitions(), unpartitionedPartitions); }
@Override public ConnectorPartition fromPartitionDto(PartitionDto partitionDto) { return new ConnectorPartitionDetailImpl( partitionDto.getName().getPartitionName(), TupleDomain.none(), fromStorageDto(partitionDto.getSerde()), partitionDto.getMetadata(), fromAuditDto(partitionDto.getAudit())); }
@Test public void testGetPartitionSplitsBatchUnpartitioned() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); ConnectorSplitSource splitSource = splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()); assertEquals(getSplitCount(splitSource), 1); }
@Test public void testGetPartitionsWithBindings() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); ConnectorPartitionResult partitionResult = splitManager.getPartitions( tableHandle, TupleDomain.withColumnDomains( ImmutableMap.<ConnectorColumnHandle, Domain>of(intColumn, Domain.singleValue(5L)))); assertExpectedPartitions(partitionResult.getPartitions()); }
@Test public void testGetPartitionTableOffline() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableOffline); try { splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); fail("expected TableOfflineException"); } catch (TableOfflineException e) { assertEquals(e.getTableName(), tableOffline); } }
private void assertTableIsBucketed(ConnectorTableHandle tableHandle) throws Exception { // the bucketed test tables should have exactly 32 splits ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 32); // verify all paths are unique Set<String> paths = new HashSet<>(); for (ConnectorSplit split : splits) { assertTrue(paths.add(((HiveSplit) split).getPath())); } }
@Test( expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*" + INVALID_COLUMN + ".*") public void testGetRecordsInvalidColumn() throws Exception { ConnectorTableHandle table = getTableHandle(tableUnpartitioned); ConnectorPartitionResult partitionResult = splitManager.getPartitions(table, TupleDomain.<ConnectorColumnHandle>all()); ConnectorSplit split = Iterables.getFirst( getAllSplits(splitManager.getPartitionSplits(table, partitionResult.getPartitions())), null); RecordSet recordSet = recordSetProvider.getRecordSet(split, ImmutableList.of(invalidColumnHandle)); recordSet.cursor(); }
@Override protected RelationPlan visitTable(Table node, Void context) { Query namedQuery = analysis.getNamedQuery(node); if (namedQuery != null) { RelationPlan subPlan = process(namedQuery, null); return new RelationPlan( subPlan.getRoot(), analysis.getOutputDescriptor(node), subPlan.getOutputSymbols(), subPlan.getSampleWeight()); } TupleDescriptor descriptor = analysis.getOutputDescriptor(node); TableHandle handle = analysis.getTableHandle(node); ImmutableList.Builder<Symbol> outputSymbolsBuilder = ImmutableList.builder(); ImmutableMap.Builder<Symbol, ColumnHandle> columns = ImmutableMap.builder(); for (Field field : descriptor.getAllFields()) { Symbol symbol = symbolAllocator.newSymbol(field.getName().get(), field.getType()); outputSymbolsBuilder.add(symbol); columns.put(symbol, analysis.getColumn(field)); } List<Symbol> planOutputSymbols = outputSymbolsBuilder.build(); Optional<ColumnHandle> sampleWeightColumn = metadata.getSampleWeightColumnHandle(session, handle); Symbol sampleWeightSymbol = null; if (sampleWeightColumn.isPresent()) { sampleWeightSymbol = symbolAllocator.newSymbol("$sampleWeight", BIGINT); outputSymbolsBuilder.add(sampleWeightSymbol); columns.put(sampleWeightSymbol, sampleWeightColumn.get()); } List<Symbol> nodeOutputSymbols = outputSymbolsBuilder.build(); PlanNode root = new TableScanNode( idAllocator.getNextId(), handle, nodeOutputSymbols, columns.build(), Optional.empty(), TupleDomain.all(), null); return new RelationPlan( root, descriptor, planOutputSymbols, Optional.ofNullable(sampleWeightSymbol)); }
@Test public void testBucketedTableBigintBoolean() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableBucketedBigintBoolean); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); assertTableIsBucketed(tableHandle); String testString = "textfile test"; // This needs to match one of the rows where t_string is not empty or null, and where t_bigint // is not null // (i.e. (testBigint - 604) % 19 > 1 and (testBigint - 604) % 13 != 0) Long testBigint = 608L; Boolean testBoolean = true; ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings = ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder() .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString)) .put(columnHandles.get(columnIndex.get("t_bigint")), testBigint) .put(columnHandles.get(columnIndex.get("t_boolean")), testBoolean) .build(); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings)); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 1); boolean rowFound = false; try (RecordCursor cursor = recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) { while (cursor.advanceNextPosition()) { if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8()) && testBigint == cursor.getLong(columnIndex.get("t_bigint")) && testBoolean == cursor.getBoolean(columnIndex.get("t_boolean"))) { rowFound = true; break; } } assertTrue(rowFound); } }
@Test public void testGetRecordsUnpartitioned() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 1); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of()); long rowNumber = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) { assertRecordCursorType(cursor, "textfile"); assertEquals(cursor.getTotalBytes(), hiveSplit.getLength()); while (cursor.advanceNextPosition()) { rowNumber++; if (rowNumber % 19 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_string"))); } else if (rowNumber % 19 == 1) { assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), ""); } else { assertEquals( cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned"); } assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber); } } assertEquals(rowNumber, 100); } }
// TODO: get the right partitions right here @Override public ConnectorPartitionResult getPartitions( ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) { checkArgument( tableHandle instanceof RiakTableHandle, "tableHandle is not an instance of RiakTableHandle"); RiakTableHandle riakTableHandle = (RiakTableHandle) tableHandle; log.info("==========================tupleDomain============================="); log.info(tupleDomain.toString()); try { String parentTable = PRSubTable.parentTableName(riakTableHandle.getTableName()); SchemaTableName parentSchemaTable = new SchemaTableName(riakTableHandle.getSchemaName(), parentTable); PRTable table = riakClient.getTable(parentSchemaTable); List<String> indexedColumns = new LinkedList<String>(); for (RiakColumn riakColumn : table.getColumns()) { if (riakColumn.getIndex()) { indexedColumns.add(riakColumn.getName()); } } // Riak connector has only one partition List<ConnectorPartition> partitions = ImmutableList.<ConnectorPartition>of( new RiakPartition( riakTableHandle.getSchemaName(), riakTableHandle.getTableName(), tupleDomain, indexedColumns)); // Riak connector does not do any additional processing/filtering with the TupleDomain, so // just return the whole TupleDomain return new ConnectorPartitionResult(partitions, tupleDomain); } catch (Exception e) { log.error("interrupted: %s", e.toString()); throw new TableNotFoundException(riakTableHandle.toSchemaTableName()); } }
@Test public void testBucketedTableStringInt() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tableBucketedStringInt); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); assertTableIsBucketed(tableHandle); String testString = "sequencefile test"; Long testInt = 413L; Long testSmallint = 412L; // Reverse the order of bindings as compared to bucketing order ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings = ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder() .put(columnHandles.get(columnIndex.get("t_int")), testInt) .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString)) .put(columnHandles.get(columnIndex.get("t_smallint")), testSmallint) .build(); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings)); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), 1); boolean rowFound = false; try (RecordCursor cursor = recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) { while (cursor.advanceNextPosition()) { if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8()) && testInt == cursor.getLong(columnIndex.get("t_int")) && testSmallint == cursor.getLong(columnIndex.get("t_smallint"))) { rowFound = true; } } assertTrue(rowFound); } }
@Test public void testGetPartialRecords() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), this.partitions.size()); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys(); String ds = partitionKeys.get(0).getValue(); String fileType = partitionKeys.get(1).getValue(); long dummy = Long.parseLong(partitionKeys.get(2).getValue()); long baseValue = getBaseValueForFileType(fileType); long rowNumber = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) { assertRecordCursorType(cursor, fileType); while (cursor.advanceNextPosition()) { rowNumber++; assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber); assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds); assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType); assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy); } } assertEquals(rowNumber, 100); } }
private InternalTable buildPartitions( Session session, String catalogName, Map<String, SerializableNativeValue> filters) { QualifiedTableName tableName = extractQualifiedTableName(catalogName, filters); InternalTable.Builder table = InternalTable.builder(informationSchemaTableColumns(TABLE_INTERNAL_PARTITIONS)); Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableName); checkArgument(tableHandle.isPresent(), "Table %s does not exist", tableName); Map<ColumnHandle, String> columnHandles = ImmutableBiMap.copyOf(metadata.getColumnHandles(session, tableHandle.get())).inverse(); List<TableLayoutResult> layouts = metadata.getLayouts( session, tableHandle.get(), Constraint.<ColumnHandle>alwaysTrue(), Optional.empty()); if (layouts.size() == 1) { TableLayout layout = Iterables.getOnlyElement(layouts).getLayout(); layout .getDiscretePredicates() .ifPresent( domains -> { int partitionNumber = 1; for (TupleDomain<ColumnHandle> domain : domains) { for (Entry<ColumnHandle, SerializableNativeValue> entry : domain.extractNullableFixedValues().entrySet()) { ColumnHandle columnHandle = entry.getKey(); String columnName = columnHandles.get(columnHandle); String value = null; if (entry.getValue().getValue() != null) { ColumnMetadata columnMetadata = metadata.getColumnMetadata(session, tableHandle.get(), columnHandle); try { FunctionInfo operator = metadata .getFunctionRegistry() .getCoercion(columnMetadata.getType(), VARCHAR); value = ((Slice) operator .getMethodHandle() .invokeWithArguments(entry.getValue().getValue())) .toStringUtf8(); } catch (OperatorNotFoundException e) { value = "<UNREPRESENTABLE VALUE>"; } catch (Throwable throwable) { throw Throwables.propagate(throwable); } } table.add( catalogName, tableName.getSchemaName(), tableName.getTableName(), partitionNumber, columnName, value); } partitionNumber++; } }); } return table.build(); }
@Test(expectedExceptions = TableNotFoundException.class) public void testGetPartitionNamesException() throws Exception { splitManager.getPartitions(invalidTableHandle, TupleDomain.<ConnectorColumnHandle>all()); }
@BeforeMethod public void setUp() throws Exception { DualMetadata dualMetadata = new DualMetadata(); TableHandle tableHandle = dualMetadata.getTableHandle(new SchemaTableName("default", DualMetadata.NAME)); assertNotNull(tableHandle, "tableHandle is null"); ColumnHandle columnHandle = dualMetadata.getColumnHandle(tableHandle, DualMetadata.COLUMN_NAME); assertNotNull(columnHandle, "columnHandle is null"); Symbol symbol = new Symbol(DualMetadata.COLUMN_NAME); MetadataManager metadata = new MetadataManager(new FeaturesConfig()); metadata.addInternalSchemaMetadata(MetadataManager.INTERNAL_CONNECTOR_ID, dualMetadata); DualSplitManager dualSplitManager = new DualSplitManager(new InMemoryNodeManager()); PartitionResult partitionResult = dualSplitManager.getPartitions(tableHandle, TupleDomain.all()); SplitSource splitSource = dualSplitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()); split = Iterables.getOnlyElement(splitSource.getNextBatch(1)); assertTrue(splitSource.isFinished()); LocalExecutionPlanner planner = new LocalExecutionPlanner( new NodeInfo("test"), metadata, new DataStreamManager(new DualDataStreamProvider()), new MockLocalStorageManager(new File("target/temp")), new RecordSinkManager(), new MockExchangeClientSupplier(), new ExpressionCompiler(metadata)); taskExecutor = new TaskExecutor(8); taskExecutor.start(); tableScanNodeId = new PlanNodeId("tableScan"); PlanFragment testFragment = new PlanFragment( new PlanFragmentId("fragment"), new TableScanNode( tableScanNodeId, tableHandle, ImmutableList.of(symbol), ImmutableMap.of(symbol, columnHandle), null, Optional.<GeneratedPartitions>absent()), ImmutableMap.of(symbol, Type.VARCHAR), PlanDistribution.SOURCE, tableScanNodeId, OutputPartitioning.NONE, ImmutableList.<Symbol>of()); TaskId taskId = new TaskId("query", "stage", "task"); Session session = new Session("user", "test", "default", "default", "test", "test"); taskNotificationExecutor = Executors.newCachedThreadPool(threadsNamed("task-notification-%d")); outputBuffers = OutputBuffers.INITIAL_EMPTY_OUTPUT_BUFFERS; taskExecution = SqlTaskExecution.createSqlTaskExecution( session, taskId, URI.create("fake://task/" + taskId), testFragment, ImmutableList.<TaskSource>of(), outputBuffers, planner, new DataSize(32, Unit.MEGABYTE), taskExecutor, taskNotificationExecutor, new DataSize(256, Unit.MEGABYTE), new DataSize(8, Unit.MEGABYTE), new QueryMonitor( new ObjectMapperProvider().get(), new NullEventClient(), new NodeInfo("test")), false); }
private void doCreateTable() throws InterruptedException { // begin creating the table List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder() .add(new ColumnMetadata("id", BIGINT, 1, false)) .add(new ColumnMetadata("t_string", VARCHAR, 2, false)) .add(new ColumnMetadata("t_bigint", BIGINT, 3, false)) .add(new ColumnMetadata("t_double", DOUBLE, 4, false)) .add(new ColumnMetadata("t_boolean", BOOLEAN, 5, false)) .build(); ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(temporaryCreateTable, columns, tableOwner); ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata); // write the records RecordSink sink = recordSinkProvider.getRecordSink(outputHandle); sink.beginRecord(1); sink.appendLong(1); sink.appendString("hello".getBytes(UTF_8)); sink.appendLong(123); sink.appendDouble(43.5); sink.appendBoolean(true); sink.finishRecord(); sink.beginRecord(1); sink.appendLong(2); sink.appendNull(); sink.appendNull(); sink.appendNull(); sink.appendNull(); sink.finishRecord(); sink.beginRecord(1); sink.appendLong(3); sink.appendString("bye".getBytes(UTF_8)); sink.appendLong(456); sink.appendDouble(98.1); sink.appendBoolean(false); sink.finishRecord(); String fragment = sink.commit(); // commit the table metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment)); // load the new table ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateTable); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); // verify the metadata tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateTable)); assertEquals(tableMetadata.getOwner(), tableOwner); Map<String, ColumnMetadata> columnMap = uniqueIndex(tableMetadata.getColumns(), columnNameGetter()); assertPrimitiveField(columnMap, 0, "id", BIGINT, false); assertPrimitiveField(columnMap, 1, "t_string", VARCHAR, false); assertPrimitiveField(columnMap, 2, "t_bigint", BIGINT, false); assertPrimitiveField(columnMap, 3, "t_double", DOUBLE, false); assertPrimitiveField(columnMap, 4, "t_boolean", BOOLEAN, false); // verify the data ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); assertEquals(partitionResult.getPartitions().size(), 1); ConnectorSplitSource splitSource = splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()); ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) { assertRecordCursorType(cursor, "rcfile-binary"); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 1); assertEquals(cursor.getSlice(1).toStringUtf8(), "hello"); assertEquals(cursor.getLong(2), 123); assertEquals(cursor.getDouble(3), 43.5); assertEquals(cursor.getBoolean(4), true); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 2); assertTrue(cursor.isNull(1)); assertTrue(cursor.isNull(2)); assertTrue(cursor.isNull(3)); assertTrue(cursor.isNull(4)); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 3); assertEquals(cursor.getSlice(1).toStringUtf8(), "bye"); assertEquals(cursor.getLong(2), 456); assertEquals(cursor.getDouble(3), 98.1); assertEquals(cursor.getBoolean(4), false); assertFalse(cursor.advanceNextPosition()); } }
@Override public PlanNode rewriteTableScan( TableScanNode node, Expression inheritedPredicate, PlanRewriter<Expression> planRewriter) { DomainTranslator.ExtractionResult extractionResult = DomainTranslator.fromPredicate( inheritedPredicate, symbolAllocator.getTypes(), node.getAssignments()); Expression extractionRemainingExpression = extractionResult.getRemainingExpression(); TupleDomain tupleDomain = extractionResult.getTupleDomain(); if (node.getGeneratedPartitions().isPresent()) { // Add back in the TupleDomain that was used to generate the previous set of Partitions if // present // And just for kicks, throw in the domain summary too (as that can only help prune down the // ranges) // The domains should never widen between each pass. tupleDomain = tupleDomain .intersect(node.getGeneratedPartitions().get().getTupleDomainInput()) .intersect(node.getPartitionsDomainSummary()); } PartitionResult matchingPartitions = splitManager.getPartitions(node.getTable(), Optional.of(tupleDomain)); List<Partition> partitions = matchingPartitions.getPartitions(); TupleDomain undeterminedTupleDomain = matchingPartitions.getUndeterminedTupleDomain(); Expression unevaluatedDomainPredicate = DomainTranslator.toPredicate( undeterminedTupleDomain, ImmutableBiMap.copyOf(node.getAssignments()).inverse()); // Construct the post scan predicate. Add the unevaluated TupleDomain back first since those // are generally cheaper to evaluate than anything we can't extract Expression postScanPredicate = combineConjuncts(unevaluatedDomainPredicate, extractionRemainingExpression); // Do some early partition pruning partitions = ImmutableList.copyOf( filter( partitions, not(shouldPrunePartition(postScanPredicate, node.getAssignments())))); GeneratedPartitions generatedPartitions = new GeneratedPartitions(tupleDomain, partitions); PlanNode output = node; if (!node.getGeneratedPartitions().equals(Optional.of(generatedPartitions))) { // Only overwrite the originalConstraint if it was previously null Expression originalConstraint = node.getOriginalConstraint() == null ? inheritedPredicate : node.getOriginalConstraint(); output = new TableScanNode( node.getId(), node.getTable(), node.getOutputSymbols(), node.getAssignments(), originalConstraint, Optional.of(generatedPartitions)); } if (!postScanPredicate.equals(BooleanLiteral.TRUE_LITERAL)) { output = new FilterNode(idAllocator.getNextId(), output, postScanPredicate); } return output; }
private void doCreateSampledTable() throws InterruptedException { // begin creating the table List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder() .add(new ColumnMetadata("sales", BIGINT, 1, false)) .build(); ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(temporaryCreateSampledTable, columns, tableOwner, true); ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata); // write the records RecordSink sink = recordSinkProvider.getRecordSink(outputHandle); sink.beginRecord(8); sink.appendLong(2); sink.finishRecord(); sink.beginRecord(5); sink.appendLong(3); sink.finishRecord(); sink.beginRecord(7); sink.appendLong(4); sink.finishRecord(); String fragment = sink.commit(); // commit the table metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment)); // load the new table ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateSampledTable); List<ConnectorColumnHandle> columnHandles = ImmutableList.<ConnectorColumnHandle>builder() .addAll(metadata.getColumnHandles(tableHandle).values()) .add(metadata.getSampleWeightColumnHandle(tableHandle)) .build(); assertEquals(columnHandles.size(), 2); // verify the metadata tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateSampledTable)); assertEquals(tableMetadata.getOwner(), tableOwner); Map<String, ColumnMetadata> columnMap = uniqueIndex(tableMetadata.getColumns(), columnNameGetter()); assertEquals(columnMap.size(), 1); assertPrimitiveField(columnMap, 0, "sales", BIGINT, false); // verify the data ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); assertEquals(partitionResult.getPartitions().size(), 1); ConnectorSplitSource splitSource = splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()); ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) { assertRecordCursorType(cursor, "rcfile-binary"); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 2); assertEquals(cursor.getLong(1), 8); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 3); assertEquals(cursor.getLong(1), 5); assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 4); assertEquals(cursor.getLong(1), 7); assertFalse(cursor.advanceNextPosition()); } }
@Test public void testGetRecords() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(table); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle); List<ConnectorColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all()); List<ConnectorSplit> splits = getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions())); assertEquals(splits.size(), this.partitions.size()); for (ConnectorSplit split : splits) { HiveSplit hiveSplit = (HiveSplit) split; List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys(); String ds = partitionKeys.get(0).getValue(); String fileType = partitionKeys.get(1).getValue(); long dummy = Long.parseLong(partitionKeys.get(2).getValue()); long baseValue = getBaseValueForFileType(fileType); assertEquals(dummy * 100, baseValue); long rowNumber = 0; long completedBytes = 0; try (RecordCursor cursor = recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) { assertRecordCursorType(cursor, fileType); assertEquals(cursor.getTotalBytes(), hiveSplit.getLength()); while (cursor.advanceNextPosition()) { try { assertReadFields(cursor, tableMetadata.getColumns()); } catch (RuntimeException e) { throw new RuntimeException("row " + rowNumber, e); } rowNumber++; if (rowNumber % 19 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_string"))); } else if (rowNumber % 19 == 1) { assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), ""); } else { assertEquals( cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test")); } assertEquals( cursor.getLong(columnIndex.get("t_tinyint")), (long) ((byte) (baseValue + 1 + rowNumber))); assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber); assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber); if (rowNumber % 13 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_bigint"))); } else { assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber); } assertEquals( cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001); assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber); if (rowNumber % 3 == 2) { assertTrue(cursor.isNull(columnIndex.get("t_boolean"))); } else { assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0); } if (rowNumber % 17 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_timestamp"))); } else { long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis(); assertEquals( cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test")); } if (rowNumber % 23 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_binary"))); } else { assertEquals( cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test")); } if (rowNumber % 29 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_map"))); } else { String expectedJson = "{\"format\":\"" + fileType + "\"}"; assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson); } if (rowNumber % 27 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_array_string"))); } else { String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]"; assertEquals( cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson); } if (rowNumber % 31 == 0) { assertTrue(cursor.isNull(columnIndex.get("t_complex"))); } else { String expectedJson = "{\"1\":[{\"s_string\":\"" + fileType + "-a\",\"s_double\":0.1},{\"s_string\":\"" + fileType + "-b\",\"s_double\":0.2}]}"; assertEquals( cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson); } assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds); assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType); assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy); long newCompletedBytes = cursor.getCompletedBytes(); assertTrue(newCompletedBytes >= completedBytes); assertTrue(newCompletedBytes <= hiveSplit.getLength()); completedBytes = newCompletedBytes; } } assertTrue(completedBytes <= hiveSplit.getLength()); assertEquals(rowNumber, 100); } }
@Override public TupleDomain getTupleDomain() { return TupleDomain.all(); }
@Override public ConnectorPartitionResult getPartitions( ConnectorSession session, ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) { CassandraTableHandle cassandraTableHandle = checkType(tableHandle, CassandraTableHandle.class, "tableHandle"); checkNotNull(tupleDomain, "tupleDomain is null"); CassandraTable table = schemaProvider.getTable(cassandraTableHandle); List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns(); // fetch the partitions List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain); log.debug( "%s.%s #partitions: %d", cassandraTableHandle.getSchemaName(), cassandraTableHandle.getTableName(), allPartitions.size()); // do a final pass to filter based on fields that could not be used to build the prefix List<ConnectorPartition> partitions = allPartitions .stream() .filter(partition -> tupleDomain.overlaps(partition.getTupleDomain())) .collect(toList()); // All partition key domains will be fully evaluated, so we don't need to include those TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none(); if (!tupleDomain.isNone()) { if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) { remainingTupleDomain = tupleDomain; } else { @SuppressWarnings({"rawtypes", "unchecked"}) List<ColumnHandle> partitionColumns = (List) partitionKeys; remainingTupleDomain = TupleDomain.withColumnDomains( Maps.filterKeys(tupleDomain.getDomains(), not(in(partitionColumns)))); } } // push down indexed column fixed value predicates only for unpartitioned partition which uses // token range query if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) { Map<ColumnHandle, Domain> domains = tupleDomain.getDomains(); List<ColumnHandle> indexedColumns = new ArrayList<>(); // compose partitionId by using indexed column StringBuilder sb = new StringBuilder(); for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) { CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey(); Domain domain = entry.getValue(); if (column.isIndexed() && domain.isSingleValue()) { sb.append(CassandraCqlUtils.validColumnName(column.getName())) .append(" = ") .append( CassandraCqlUtils.cqlValue( toCQLCompatibleString(entry.getValue().getSingleValue()), column.getCassandraType())); indexedColumns.add(column); // Only one indexed column predicate can be pushed down. break; } } if (sb.length() > 0) { CassandraPartition partition = (CassandraPartition) partitions.get(0); TupleDomain<ColumnHandle> filterIndexedColumn = TupleDomain.withColumnDomains( Maps.filterKeys(remainingTupleDomain.getDomains(), not(in(indexedColumns)))); partitions = new ArrayList<>(); partitions.add( new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true)); return new ConnectorPartitionResult(partitions, filterIndexedColumn); } } return new ConnectorPartitionResult(partitions, remainingTupleDomain); }