/** * @return the unused section of the page, or null if fully applied. pagesIndex guaranteed to have * at least one row after this method returns */ private Page updatePagesIndex(Page page) { checkArgument(page.getPositionCount() > 0); // TODO: Fix pagesHashStrategy to allow specifying channels for comparison, it currently // requires us to rearrange the right side blocks in consecutive channel order Page preGroupedPage = rearrangePage(page, preGroupedChannels); if (pagesIndex.getPositionCount() == 0 || pagesIndex.positionEqualsRow( preGroupedPartitionHashStrategy, 0, 0, preGroupedPage.getBlocks())) { // Find the position where the pre-grouped columns change int groupEnd = findGroupEnd(preGroupedPage, preGroupedPartitionHashStrategy, 0); // Add the section of the page that contains values for the current group pagesIndex.addPage(page.getRegion(0, groupEnd)); if (page.getPositionCount() - groupEnd > 0) { // Save the remaining page, which may contain multiple partitions return page.getRegion(groupEnd, page.getPositionCount() - groupEnd); } else { // Page fully consumed return null; } } else { // We had previous results buffered, but the new page starts with new group values return page; } }
// Assumes input grouped on relevant pagesHashStrategy columns private static int findGroupEnd( Page page, PagesHashStrategy pagesHashStrategy, int startPosition) { checkArgument(page.getPositionCount() > 0, "Must have at least one position"); checkPositionIndex(startPosition, page.getPositionCount(), "startPosition out of bounds"); // Short circuit if the whole page has the same value if (pagesHashStrategy.rowEqualsRow( startPosition, page.getBlocks(), page.getPositionCount() - 1, page.getBlocks())) { return page.getPositionCount(); } // TODO: do position binary search int endPosition = startPosition + 1; while (endPosition < page.getPositionCount() && pagesHashStrategy.rowEqualsRow( endPosition - 1, page.getBlocks(), endPosition, page.getBlocks())) { endPosition++; } return endPosition; }
@SuppressWarnings("NumericCastThatLosesPrecision") @Override public int getBucket(Page page, int position) { long hash = 0; for (Block block : page.getBlocks()) { long value = BIGINT.getLong(block, position); hash = (hash * 31) + XxHash64.hash(value); } int value = (int) (hash & Integer.MAX_VALUE); return value % bucketCount; }
@Override public void addInput(Page page) { requireNonNull(page, "page is null"); checkState(!finishing, "Operator is finishing"); checkState(channelSet != null, "Set has not been built yet"); checkState(outputPage == null, "Operator still has pending output"); // create the block builder for the new boolean column // we know the exact size required for the block BlockBuilder blockBuilder = BOOLEAN.createFixedSizeBlockBuilder(page.getPositionCount()); Page probeJoinPage = new Page(page.getBlock(probeJoinChannel)); // update hashing strategy to use probe cursor for (int position = 0; position < page.getPositionCount(); position++) { if (probeJoinPage.getBlock(0).isNull(position)) { throw new PrestoException( NOT_SUPPORTED, "NULL values are not allowed on the probe side of SemiJoin operator. See the query plan for details."); } else { boolean contains = channelSet.contains(position, probeJoinPage); if (!contains && channelSet.containsNull()) { blockBuilder.appendNull(); } else { BOOLEAN.writeBoolean(blockBuilder, contains); } } } // add the new boolean column to the page Block[] sourceBlocks = page.getBlocks(); Block[] outputBlocks = new Block[sourceBlocks.length + 1]; // +1 for the single boolean output channel System.arraycopy(sourceBlocks, 0, outputBlocks, 0, sourceBlocks.length); outputBlocks[sourceBlocks.length] = blockBuilder.build(); outputPage = new Page(outputBlocks); }