// For testing public static ScanRanges createSingleSpan(RowKeySchema schema, List<List<KeyRange>> ranges) { return create( schema, ranges, ScanUtil.getDefaultSlotSpans(ranges.size()), KeyRange.EVERYTHING_RANGE, null, true, -1); }
/** * Converts a partially qualified KeyRange into a KeyRange with a inclusive lower bound and an * exclusive upper bound, widening as necessary. */ public static KeyRange convertToInclusiveExclusiveRange( KeyRange partialRange, RowKeySchema schema, ImmutableBytesWritable ptr) { // Ensure minMaxRange is lower inclusive and upper exclusive, as that's // what we need to intersect against for the HBase scan. byte[] lowerRange = partialRange.getLowerRange(); if (!partialRange.lowerUnbound()) { if (!partialRange.isLowerInclusive()) { lowerRange = ScanUtil.nextKey(lowerRange, schema, ptr); } } byte[] upperRange = partialRange.getUpperRange(); if (!partialRange.upperUnbound()) { if (partialRange.isUpperInclusive()) { upperRange = ScanUtil.nextKey(upperRange, schema, ptr); } } if (partialRange.getLowerRange() != lowerRange || partialRange.getUpperRange() != upperRange) { partialRange = KeyRange.getKeyRange(lowerRange, upperRange); } return partialRange; }
private static Collection<?> foreach( KeyRange[][] ranges, int[] widths, KeyRange[] expectedSplits) { RowKeySchema schema = buildSchema(widths); List<List<KeyRange>> slots = Lists.transform(Lists.newArrayList(ranges), ARRAY_TO_LIST); SkipScanFilter filter = new SkipScanFilter(slots, schema); // Always set start and stop key to max to verify we are using the information in skipscan // filter over the scan's KMIN and KMAX. Scan scan = new Scan().setFilter(filter); ScanRanges scanRanges = ScanRanges.create(schema, slots, ScanUtil.getDefaultSlotSpans(ranges.length)); List<Object> ret = Lists.newArrayList(); ret.add(new Object[] {scan, scanRanges, Arrays.<KeyRange>asList(expectedSplits)}); return ret; }
private static List<byte[]> getPointKeys( List<List<KeyRange>> ranges, int[] slotSpan, RowKeySchema schema, Integer bucketNum) { if (ranges == null || ranges.isEmpty()) { return Collections.emptyList(); } boolean isSalted = bucketNum != null; int count = 1; int offset = isSalted ? 1 : 0; // Skip salt byte range in the first position if salted for (int i = offset; i < ranges.size(); i++) { count *= ranges.get(i).size(); } List<byte[]> keys = Lists.newArrayListWithExpectedSize(count); int[] position = new int[ranges.size()]; int maxKeyLength = SchemaUtil.getMaxKeyLength(schema, ranges); int length; byte[] key = new byte[maxKeyLength]; do { length = ScanUtil.setKey( schema, ranges, slotSpan, position, Bound.LOWER, key, offset, offset, ranges.size(), offset); if (isSalted) { key[0] = SaltingUtil.getSaltingByte(key, offset, length, bucketNum); } keys.add(Arrays.copyOf(key, length + offset)); } while (incrementKey(ranges, position)); return keys; }
/** * Perform a binary lookup on the list of KeyRange for the tightest slot such that the slotBound * of the current slot is higher or equal than the slotBound of our range. * * @return the index of the slot whose slot bound equals or are the tightest one that is smaller * than rangeBound of range, or slots.length if no bound can be found. */ public static int searchClosestKeyRangeWithUpperHigherThanPtr( List<KeyRange> slots, ImmutableBytesWritable ptr, int lower, Field field) { int upper = slots.size() - 1; int mid; BytesComparator comparator = ScanUtil.getComparator(field.getDataType().isFixedWidth(), field.getSortOrder()); while (lower <= upper) { mid = (lower + upper) / 2; int cmp = slots.get(mid).compareUpperToLowerBound(ptr, true, comparator); if (cmp < 0) { lower = mid + 1; } else if (cmp > 0) { upper = mid - 1; } else { return mid; } } mid = (lower + upper) / 2; if (mid == 0 && slots.get(mid).compareUpperToLowerBound(ptr, true, comparator) > 0) { return mid; } else { return ++mid; } }
public static int setKey( RowKeySchema schema, List<List<KeyRange>> slots, int[] slotSpan, int[] position, Bound bound, byte[] key, int byteOffset, int slotStartIndex, int slotEndIndex, int schemaStartIndex) { int offset = byteOffset; boolean lastInclusiveUpperSingleKey = false; boolean anyInclusiveUpperRangeKey = false; // The index used for slots should be incremented by 1, // but the index for the field it represents in the schema // should be incremented by 1 + value in the current slotSpan index // slotSpan stores the number of columns beyond one that the range spans Field field = null; int i = slotStartIndex, fieldIndex = ScanUtil.getRowKeyPosition(slotSpan, slotStartIndex); for (i = slotStartIndex; i < slotEndIndex; i++) { // Build up the key by appending the bound of each key range // from the current position of each slot. KeyRange range = slots.get(i).get(position[i]); // Use last slot in a multi-span column to determine if fixed width field = schema.getField(fieldIndex + slotSpan[i]); boolean isFixedWidth = field.getDataType().isFixedWidth(); fieldIndex += slotSpan[i] + 1; /* * If the current slot is unbound then stop if: * 1) setting the upper bound. There's no value in * continuing because nothing will be filtered. * 2) setting the lower bound when the type is fixed length * for the same reason. However, if the type is variable width * continue building the key because null values will be filtered * since our separator byte will be appended and incremented. */ if (range.isUnbound(bound) && (bound == Bound.UPPER || isFixedWidth)) { break; } byte[] bytes = range.getRange(bound); System.arraycopy(bytes, 0, key, offset, bytes.length); offset += bytes.length; /* * We must add a terminator to a variable length key even for the last PK column if * the lower key is non inclusive or the upper key is inclusive. Otherwise, we'd be * incrementing the key value itself, and thus bumping it up too much. */ boolean inclusiveUpper = range.isInclusive(bound) && bound == Bound.UPPER; boolean exclusiveLower = !range.isInclusive(bound) && bound == Bound.LOWER; // If we are setting the upper bound of using inclusive single key, we remember // to increment the key if we exit the loop after this iteration. // // We remember to increment the last slot if we are setting the upper bound with an // inclusive range key. // // We cannot combine the two flags together in case for single-inclusive key followed // by the range-exclusive key. In that case, we do not need to increment the end at the // end. But if we combine the two flag, the single inclusive key in the middle of the // key slots would cause the flag to become true. lastInclusiveUpperSingleKey = range.isSingleKey() && inclusiveUpper; anyInclusiveUpperRangeKey |= !range.isSingleKey() && inclusiveUpper; // A null or empty byte array is always represented as a zero byte byte sepByte = SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), bytes.length == 0, field); if (!isFixedWidth && (fieldIndex < schema.getMaxFields() || inclusiveUpper || exclusiveLower || sepByte == QueryConstants.DESC_SEPARATOR_BYTE)) { key[offset++] = sepByte; // Set lastInclusiveUpperSingleKey back to false if this is the last pk column // as we don't want to increment the null byte in this case lastInclusiveUpperSingleKey &= i < schema.getMaxFields() - 1; } // If we are setting the lower bound with an exclusive range key, we need to bump the // slot up for each key part. For an upper bound, we bump up an inclusive key, but // only after the last key part. if (exclusiveLower) { if (!ByteUtil.nextKey(key, offset)) { // Special case for not being able to increment. // In this case we return a negative byteOffset to // remove this part from the key being formed. Since the // key has overflowed, this means that we should not // have an end key specified. return -byteOffset; } // We're filtering on values being non null here, but we still need the 0xFF // terminator, since DESC keys ignore the last byte as it's expected to be // the terminator. Without this, we'd ignore the separator byte that was // just added and incremented. if (!isFixedWidth && bytes.length == 0 && SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), false, field) == QueryConstants.DESC_SEPARATOR_BYTE) { key[offset++] = QueryConstants.DESC_SEPARATOR_BYTE; } } } if (lastInclusiveUpperSingleKey || anyInclusiveUpperRangeKey) { if (!ByteUtil.nextKey(key, offset)) { // Special case for not being able to increment. // In this case we return a negative byteOffset to // remove this part from the key being formed. Since the // key has overflowed, this means that we should not // have an end key specified. return -byteOffset; } } // Remove trailing separator bytes, since the columns may have been added // after the table has data, in which case there won't be a separator // byte. if (bound == Bound.LOWER) { while (--i >= schemaStartIndex && offset > byteOffset && !(field = schema.getField(--fieldIndex)).getDataType().isFixedWidth() && field.getSortOrder() == SortOrder.ASC && key[offset - 1] == QueryConstants.SEPARATOR_BYTE) { offset--; fieldIndex -= slotSpan[i]; } } return offset - byteOffset; }
public static boolean intersectScanRange( Scan scan, byte[] startKey, byte[] stopKey, boolean useSkipScan) { boolean mayHaveRows = false; int offset = 0; if (ScanUtil.isLocalIndex(scan)) { offset = startKey.length != 0 ? startKey.length : stopKey.length; } byte[] existingStartKey = scan.getStartRow(); byte[] existingStopKey = scan.getStopRow(); if (existingStartKey.length > 0) { if (startKey.length == 0 || Bytes.compareTo(existingStartKey, startKey) > 0) { startKey = existingStartKey; } } else { mayHaveRows = true; } if (existingStopKey.length > 0) { if (stopKey.length == 0 || Bytes.compareTo(existingStopKey, stopKey) < 0) { stopKey = existingStopKey; } } else { mayHaveRows = true; } scan.setStartRow(startKey); scan.setStopRow(stopKey); if (offset > 0 && useSkipScan) { byte[] temp = null; if (startKey.length != 0) { temp = new byte[startKey.length - offset]; System.arraycopy(startKey, offset, temp, 0, startKey.length - offset); startKey = temp; } if (stopKey.length != 0) { temp = new byte[stopKey.length - offset]; System.arraycopy(stopKey, offset, temp, 0, stopKey.length - offset); stopKey = temp; } } mayHaveRows = mayHaveRows || Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) < 0; // If the scan is using skip scan filter, intersect and replace the filter. if (mayHaveRows && useSkipScan) { Filter filter = scan.getFilter(); if (filter instanceof SkipScanFilter) { SkipScanFilter oldFilter = (SkipScanFilter) filter; SkipScanFilter newFilter = oldFilter.intersect(startKey, stopKey); if (newFilter == null) { return false; } // Intersect found: replace skip scan with intersected one scan.setFilter(newFilter); } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { SkipScanFilter newFilter = ((SkipScanFilter) f).intersect(startKey, stopKey); if (newFilter == null) { return false; } newList.addFilter(newFilter); } else { newList.addFilter(f); } } scan.setFilter(newList); } } return mayHaveRows; }
public ServerCache addServerCache( ScanRanges keyRanges, final ImmutableBytesWritable cachePtr, final byte[] txState, final ServerCacheFactory cacheFactory, final TableRef cacheUsingTableRef) throws SQLException { ConnectionQueryServices services = connection.getQueryServices(); MemoryChunk chunk = services.getMemoryManager().allocate(cachePtr.getLength()); List<Closeable> closeables = new ArrayList<Closeable>(); closeables.add(chunk); ServerCache hashCacheSpec = null; SQLException firstException = null; final byte[] cacheId = generateId(); /** Execute EndPoint in parallel on each server to send compressed hash cache */ // TODO: generalize and package as a per region server EndPoint caller // (ideally this would be functionality provided by the coprocessor framework) boolean success = false; ExecutorService executor = services.getExecutor(); List<Future<Boolean>> futures = Collections.emptyList(); try { final PTable cacheUsingTable = cacheUsingTableRef.getTable(); List<HRegionLocation> locations = services.getAllTableRegions(cacheUsingTable.getPhysicalName().getBytes()); int nRegions = locations.size(); // Size these based on worst case futures = new ArrayList<Future<Boolean>>(nRegions); Set<HRegionLocation> servers = new HashSet<HRegionLocation>(nRegions); for (HRegionLocation entry : locations) { // Keep track of servers we've sent to and only send once byte[] regionStartKey = entry.getRegionInfo().getStartKey(); byte[] regionEndKey = entry.getRegionInfo().getEndKey(); if (!servers.contains(entry) && keyRanges.intersects( regionStartKey, regionEndKey, cacheUsingTable.getIndexType() == IndexType.LOCAL ? ScanUtil.getRowKeyOffset(regionStartKey, regionEndKey) : 0, true)) { // Call RPC once per server servers.add(entry); if (LOG.isDebugEnabled()) { LOG.debug( addCustomAnnotations("Adding cache entry to be sent for " + entry, connection)); } final byte[] key = entry.getRegionInfo().getStartKey(); final HTableInterface htable = services.getTable(cacheUsingTableRef.getTable().getPhysicalName().getBytes()); closeables.add(htable); futures.add( executor.submit( new JobCallable<Boolean>() { @Override public Boolean call() throws Exception { final Map<byte[], AddServerCacheResponse> results; try { results = htable.coprocessorService( ServerCachingService.class, key, key, new Batch.Call<ServerCachingService, AddServerCacheResponse>() { @Override public AddServerCacheResponse call(ServerCachingService instance) throws IOException { ServerRpcController controller = new ServerRpcController(); BlockingRpcCallback<AddServerCacheResponse> rpcCallback = new BlockingRpcCallback<AddServerCacheResponse>(); AddServerCacheRequest.Builder builder = AddServerCacheRequest.newBuilder(); if (connection.getTenantId() != null) { try { byte[] tenantIdBytes = ScanUtil.getTenantIdBytes( cacheUsingTable.getRowKeySchema(), cacheUsingTable.getBucketNum() != null, connection.getTenantId(), cacheUsingTable.isMultiTenant()); builder.setTenantId(ByteStringer.wrap(tenantIdBytes)); } catch (SQLException e) { new IOException(e); } } builder.setCacheId(ByteStringer.wrap(cacheId)); builder.setCachePtr( org.apache.phoenix.protobuf.ProtobufUtil.toProto(cachePtr)); ServerCacheFactoryProtos.ServerCacheFactory.Builder svrCacheFactoryBuider = ServerCacheFactoryProtos.ServerCacheFactory .newBuilder(); svrCacheFactoryBuider.setClassName( cacheFactory.getClass().getName()); builder.setCacheFactory(svrCacheFactoryBuider.build()); builder.setTxState(HBaseZeroCopyByteString.wrap(txState)); instance.addServerCache( controller, builder.build(), rpcCallback); if (controller.getFailedOn() != null) { throw controller.getFailedOn(); } return rpcCallback.get(); } }); } catch (Throwable t) { throw new Exception(t); } if (results != null && results.size() == 1) { return results.values().iterator().next().getReturn(); } return false; } /** * Defines the grouping for round robin behavior. All threads spawned to process * this scan will be grouped together and time sliced with other simultaneously * executing parallel scans. */ @Override public Object getJobId() { return ServerCacheClient.this; } @Override public TaskExecutionMetricsHolder getTaskExecutionMetric() { return NO_OP_INSTANCE; } })); } else { if (LOG.isDebugEnabled()) { LOG.debug( addCustomAnnotations( "NOT adding cache entry to be sent for " + entry + " since one already exists for that entry", connection)); } } } hashCacheSpec = new ServerCache(cacheId, servers, cachePtr.getLength()); // Execute in parallel int timeoutMs = services .getProps() .getInt( QueryServices.THREAD_TIMEOUT_MS_ATTRIB, QueryServicesOptions.DEFAULT_THREAD_TIMEOUT_MS); for (Future<Boolean> future : futures) { future.get(timeoutMs, TimeUnit.MILLISECONDS); } cacheUsingTableRefMap.put(Bytes.mapKey(cacheId), cacheUsingTableRef); success = true; } catch (SQLException e) { firstException = e; } catch (Exception e) { firstException = new SQLException(e); } finally { try { if (!success) { SQLCloseables.closeAllQuietly(Collections.singletonList(hashCacheSpec)); for (Future<Boolean> future : futures) { future.cancel(true); } } } finally { try { Closeables.closeAll(closeables); } catch (IOException e) { if (firstException == null) { firstException = new SQLException(e); } } finally { if (firstException != null) { throw firstException; } } } } if (LOG.isDebugEnabled()) { LOG.debug( addCustomAnnotations("Cache " + cacheId + " successfully added to servers.", connection)); } return hashCacheSpec; }
@Override protected RegionScanner doPostScannerOpen( final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException { byte[] isUngroupedAgg = scan.getAttribute(BaseScannerRegionObserver.UNGROUPED_AGG); if (isUngroupedAgg == null) { return s; } final ScanProjector p = ScanProjector.deserializeProjectorFromScan(scan); final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan); RegionScanner theScanner = s; if (p != null || j != null) { theScanner = new HashJoinRegionScanner(s, p, j, ScanUtil.getTenantId(scan), c.getEnvironment()); } final RegionScanner innerScanner = theScanner; byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID); PTable projectedTable = null; List<Expression> selectExpressions = null; byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE); boolean isUpsert = false; boolean isDelete = false; byte[] deleteCQ = null; byte[] deleteCF = null; byte[][] values = null; byte[] emptyCF = null; ImmutableBytesWritable ptr = null; if (upsertSelectTable != null) { isUpsert = true; projectedTable = deserializeTable(upsertSelectTable); selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS)); values = new byte[projectedTable.getPKColumns().size()][]; ptr = new ImmutableBytesWritable(); } else { byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG); isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0; if (!isDelete) { deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF); deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ); } emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF); } int batchSize = 0; long ts = scan.getTimeRange().getMax(); HRegion region = c.getEnvironment().getRegion(); List<Mutation> mutations = Collections.emptyList(); if (isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null) { // TODO: size better mutations = Lists.newArrayListWithExpectedSize(1024); batchSize = c.getEnvironment() .getConfiguration() .getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE); } Aggregators aggregators = ServerAggregators.deserialize( scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), c.getEnvironment().getConfiguration()); Aggregator[] rowAggregators = aggregators.getAggregators(); boolean hasMore; boolean hasAny = false; MultiKeyValueTuple result = new MultiKeyValueTuple(); if (logger.isInfoEnabled()) { logger.info("Starting ungrouped coprocessor scan " + scan); } long rowCount = 0; region.startRegionOperation(); try { do { List<Cell> results = new ArrayList<Cell>(); // Results are potentially returned even when the return value of s.next is false // since this is an indication of whether or not there are more values after the // ones returned hasMore = innerScanner.nextRaw(results); if (!results.isEmpty()) { rowCount++; result.setKeyValues(results); try { if (isDelete) { // FIXME: the version of the Delete constructor without the lock args was introduced // in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version // of the client. Cell firstKV = results.get(0); Delete delete = new Delete( firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts); mutations.add(delete); } else if (isUpsert) { Arrays.fill(values, null); int i = 0; List<PColumn> projectedColumns = projectedTable.getColumns(); for (; i < projectedTable.getPKColumns().size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { values[i] = ptr.copyBytes(); // If SortOrder from expression in SELECT doesn't match the // column being projected into then invert the bits. if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) { SortOrder.invert(values[i], 0, values[i], 0, values[i].length); } } } projectedTable.newKey(ptr, values); PRow row = projectedTable.newRow(kvBuilder, ts, ptr); for (; i < projectedColumns.size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { PColumn column = projectedColumns.get(i); Object value = expression.getDataType().toObject(ptr, column.getSortOrder()); // We are guaranteed that the two column will have the same type. if (!column .getDataType() .isSizeCompatible( ptr, value, column.getDataType(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) { throw new ValueTypeIncompatibleException( column.getDataType(), column.getMaxLength(), column.getScale()); } column .getDataType() .coerceBytes( ptr, value, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder()); byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr); row.setValue(column, bytes); } } for (Mutation mutation : row.toRowMutations()) { mutations.add(mutation); } } else if (deleteCF != null && deleteCQ != null) { // No need to search for delete column, since we project only it // if no empty key value is being set if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) { Delete delete = new Delete( results.get(0).getRowArray(), results.get(0).getRowOffset(), results.get(0).getRowLength()); delete.deleteColumns(deleteCF, deleteCQ, ts); mutations.add(delete); } } if (emptyCF != null) { /* * If we've specified an emptyCF, then we need to insert an empty * key value "retroactively" for any key value that is visible at * the timestamp that the DDL was issued. Key values that are not * visible at this timestamp will not ever be projected up to * scans past this timestamp, so don't need to be considered. * We insert one empty key value per row per timestamp. */ Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size()); for (Cell kv : results) { long kvts = kv.getTimestamp(); if (!timeStamps.contains(kvts)) { Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()); put.add( emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY); mutations.add(put); } } } // Commit in batches based on UPSERT_BATCH_SIZE_ATTRIB in config if (!mutations.isEmpty() && batchSize > 0 && mutations.size() % batchSize == 0) { commitBatch(region, mutations, indexUUID); mutations.clear(); } } catch (ConstraintViolationException e) { // Log and ignore in count logger.error( "Failed to create row in " + region.getRegionNameAsString() + " with values " + SchemaUtil.toString(values), e); continue; } aggregators.aggregate(rowAggregators, result); hasAny = true; } } while (hasMore); } finally { innerScanner.close(); region.closeRegionOperation(); } if (logger.isInfoEnabled()) { logger.info("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan); } if (!mutations.isEmpty()) { commitBatch(region, mutations, indexUUID); } final boolean hadAny = hasAny; KeyValue keyValue = null; if (hadAny) { byte[] value = aggregators.toBytes(rowAggregators); keyValue = KeyValueUtil.newKeyValue( UNGROUPED_AGG_ROW_KEY, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length); } final KeyValue aggKeyValue = keyValue; RegionScanner scanner = new BaseRegionScanner() { private boolean done = !hadAny; @Override public HRegionInfo getRegionInfo() { return innerScanner.getRegionInfo(); } @Override public boolean isFilterDone() { return done; } @Override public void close() throws IOException { innerScanner.close(); } @Override public boolean next(List<Cell> results) throws IOException { if (done) return false; done = true; results.add(aggKeyValue); return false; } @Override public long getMaxResultSize() { return scan.getMaxResultSize(); } }; return scanner; }
public int getBoundPkColumnCount() { return this.useSkipScanFilter ? ScanUtil.getRowKeyPosition(slotSpan, ranges.size()) : getBoundPkSpan(ranges, slotSpan); }
public Scan intersectScan( Scan scan, final byte[] originalStartKey, final byte[] originalStopKey, final int keyOffset, boolean crossesRegionBoundary) { byte[] startKey = originalStartKey; byte[] stopKey = originalStopKey; if (stopKey.length > 0 && Bytes.compareTo(startKey, stopKey) >= 0) { return null; } boolean mayHaveRows = false; // Keep the keys as they are if we have a point lookup, as we've already resolved the // salt bytes in that case. final int scanKeyOffset = this.isSalted && !this.isPointLookup ? SaltingUtil.NUM_SALTING_BYTES : 0; assert (scanKeyOffset == 0 || keyOffset == 0); // Total offset for startKey/stopKey. Either 1 for salted tables or the prefix length // of the current region for local indexes. We'll never have a case where a table is // both salted and local. final int totalKeyOffset = scanKeyOffset + keyOffset; byte[] prefixBytes = ByteUtil.EMPTY_BYTE_ARRAY; if (totalKeyOffset > 0) { prefixBytes = ScanUtil.getPrefix(startKey, totalKeyOffset); /* * If our startKey to stopKey crosses a region boundary consider everything after the startKey as our scan * is always done within a single region. This prevents us from having to prefix the key prior to knowing * whether or not there may be an intersection. We can't calculate whether or not we've crossed a region * boundary for local indexes, because we don't know the key offset of the next region, but only for the * current one (which is the one passed in). If the next prefix happened to be a subset of the previous * prefix, then this wouldn't detect that we crossed a region boundary. */ if (crossesRegionBoundary) { stopKey = ByteUtil.EMPTY_BYTE_ARRAY; } } int scanStartKeyOffset = scanKeyOffset; byte[] scanStartKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStartRow(); // Compare ignoring key prefix and salt byte if (scanStartKey.length > 0) { if (startKey.length > 0 && Bytes.compareTo( scanStartKey, scanKeyOffset, scanStartKey.length - scanKeyOffset, startKey, totalKeyOffset, startKey.length - totalKeyOffset) < 0) { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; } } else { scanStartKey = startKey; scanStartKeyOffset = totalKeyOffset; mayHaveRows = true; } int scanStopKeyOffset = scanKeyOffset; byte[] scanStopKey = scan == null ? ByteUtil.EMPTY_BYTE_ARRAY : scan.getStopRow(); if (scanStopKey.length > 0) { if (stopKey.length > 0 && Bytes.compareTo( scanStopKey, scanKeyOffset, scanStopKey.length - scanKeyOffset, stopKey, totalKeyOffset, stopKey.length - totalKeyOffset) > 0) { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; } } else { scanStopKey = stopKey; scanStopKeyOffset = totalKeyOffset; mayHaveRows = true; } mayHaveRows = mayHaveRows || Bytes.compareTo( scanStartKey, scanStartKeyOffset, scanStartKey.length - scanStartKeyOffset, scanStopKey, scanStopKeyOffset, scanStopKey.length - scanStopKeyOffset) < 0; if (!mayHaveRows) { return null; } if (originalStopKey.length != 0 && scanStopKey.length == 0) { scanStopKey = originalStopKey; } Filter newFilter = null; // If the scan is using skip scan filter, intersect and replace the filter. if (scan == null || this.useSkipScanFilter()) { byte[] skipScanStartKey = scanStartKey; byte[] skipScanStopKey = scanStopKey; // If we have a keyOffset and we've used the startKey/stopKey that // were passed in (which have the prefix) for the above range check, // we need to remove the prefix before running our intersect method. // TODO: we could use skipScanFilter.setOffset(keyOffset) if both // the startKey and stopKey were used above *and* our intersect // method honored the skipScanFilter.offset variable. if (scanKeyOffset > 0) { if (skipScanStartKey != originalStartKey) { // original already has correct salt byte skipScanStartKey = replaceSaltByte(skipScanStartKey, prefixBytes); } if (skipScanStopKey != originalStopKey) { skipScanStopKey = replaceSaltByte(skipScanStopKey, prefixBytes); } } else if (keyOffset > 0) { if (skipScanStartKey == originalStartKey) { skipScanStartKey = stripPrefix(skipScanStartKey, keyOffset); } if (skipScanStopKey == originalStopKey) { skipScanStopKey = stripPrefix(skipScanStopKey, keyOffset); } } if (scan == null) { return filter.hasIntersect(skipScanStartKey, skipScanStopKey) ? HAS_INTERSECTION : null; } Filter filter = scan.getFilter(); SkipScanFilter newSkipScanFilter = null; if (filter instanceof SkipScanFilter) { SkipScanFilter oldSkipScanFilter = (SkipScanFilter) filter; newFilter = newSkipScanFilter = oldSkipScanFilter.intersect(skipScanStartKey, skipScanStopKey); if (newFilter == null) { return null; } } else if (filter instanceof FilterList) { FilterList oldList = (FilterList) filter; FilterList newList = new FilterList(FilterList.Operator.MUST_PASS_ALL); newFilter = newList; for (Filter f : oldList.getFilters()) { if (f instanceof SkipScanFilter) { newSkipScanFilter = ((SkipScanFilter) f).intersect(skipScanStartKey, skipScanStopKey); if (newSkipScanFilter == null) { return null; } newList.addFilter(newSkipScanFilter); } else { newList.addFilter(f); } } } // TODO: it seems that our SkipScanFilter or HBase runs into problems if we don't // have an enclosing range when we do a point lookup. if (isPointLookup) { scanStartKey = ScanUtil.getMinKey(schema, newSkipScanFilter.getSlots(), slotSpan); scanStopKey = ScanUtil.getMaxKey(schema, newSkipScanFilter.getSlots(), slotSpan); } } if (newFilter == null) { newFilter = scan.getFilter(); } Scan newScan = ScanUtil.newScan(scan); newScan.setFilter(newFilter); // If we have an offset (salted table or local index), we need to make sure to // prefix our scan start/stop row by the prefix of the startKey or stopKey that // were passed in. Our scan either doesn't have the prefix or has a placeholder // for it. if (totalKeyOffset > 0) { if (scanStartKey != originalStartKey) { scanStartKey = prefixKey(scanStartKey, scanKeyOffset, prefixBytes, keyOffset); } if (scanStopKey != originalStopKey) { scanStopKey = prefixKey(scanStopKey, scanKeyOffset, prefixBytes, keyOffset); } } // Don't let the stopRow of the scan go beyond the originalStopKey if (originalStopKey.length > 0 && Bytes.compareTo(scanStopKey, originalStopKey) > 0) { scanStopKey = originalStopKey; } if (scanStopKey.length > 0 && Bytes.compareTo(scanStartKey, scanStopKey) >= 0) { return null; } newScan.setAttribute(SCAN_ACTUAL_START_ROW, scanStartKey); newScan.setStartRow(scanStartKey); newScan.setStopRow(scanStopKey); if (keyOffset > 0) { newScan.setAttribute(STARTKEY_OFFSET, Bytes.toBytes(keyOffset)); } return newScan; }
public static ScanRanges create( RowKeySchema schema, List<List<KeyRange>> ranges, int[] slotSpan, KeyRange minMaxRange, Integer nBuckets, boolean useSkipScan, int rowTimestampColIndex) { int offset = nBuckets == null ? 0 : SaltingUtil.NUM_SALTING_BYTES; int nSlots = ranges.size(); if (nSlots == offset && minMaxRange == KeyRange.EVERYTHING_RANGE) { return EVERYTHING; } else if (minMaxRange == KeyRange.EMPTY_RANGE || (nSlots == 1 + offset && ranges.get(offset).size() == 1 && ranges.get(offset).get(0) == KeyRange.EMPTY_RANGE)) { return NOTHING; } TimeRange rowTimestampRange = getRowTimestampColumnRange(ranges, schema, rowTimestampColIndex); boolean isPointLookup = isPointLookup(schema, ranges, slotSpan, useSkipScan); if (isPointLookup) { // TODO: consider keeping original to use for serialization as it would be smaller? List<byte[]> keys = ScanRanges.getPointKeys(ranges, slotSpan, schema, nBuckets); List<KeyRange> keyRanges = Lists.newArrayListWithExpectedSize(keys.size()); KeyRange unsaltedMinMaxRange = minMaxRange; if (nBuckets != null && minMaxRange != KeyRange.EVERYTHING_RANGE) { unsaltedMinMaxRange = KeyRange.getKeyRange( stripPrefix(minMaxRange.getLowerRange(), offset), minMaxRange.lowerUnbound(), stripPrefix(minMaxRange.getUpperRange(), offset), minMaxRange.upperUnbound()); } // We have full keys here, so use field from our varbinary schema BytesComparator comparator = ScanUtil.getComparator(SchemaUtil.VAR_BINARY_SCHEMA.getField(0)); for (byte[] key : keys) { // Filter now based on unsalted minMaxRange and ignore the point key salt byte if (unsaltedMinMaxRange.compareLowerToUpperBound( key, offset, key.length - offset, true, comparator) <= 0 && unsaltedMinMaxRange.compareUpperToLowerBound( key, offset, key.length - offset, true, comparator) >= 0) { keyRanges.add(KeyRange.getKeyRange(key)); } } ranges = Collections.singletonList(keyRanges); useSkipScan = keyRanges.size() > 1; // Treat as binary if descending because we've got a separator byte at the end // which is not part of the value. if (keys.size() > 1 || SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), false, schema.getField(0)) == QueryConstants.DESC_SEPARATOR_BYTE) { schema = SchemaUtil.VAR_BINARY_SCHEMA; slotSpan = ScanUtil.SINGLE_COLUMN_SLOT_SPAN; } else { // Keep original schema and don't use skip scan as it's not necessary // when there's a single key. slotSpan = new int[] {schema.getMaxFields() - 1}; } } List<List<KeyRange>> sortedRanges = Lists.newArrayListWithExpectedSize(ranges.size()); for (int i = 0; i < ranges.size(); i++) { List<KeyRange> sorted = Lists.newArrayList(ranges.get(i)); Collections.sort(sorted, KeyRange.COMPARATOR); sortedRanges.add(ImmutableList.copyOf(sorted)); } // Don't set minMaxRange for point lookup because it causes issues during intersect // by going across region boundaries KeyRange scanRange = KeyRange.EVERYTHING_RANGE; // if (!isPointLookup && (nBuckets == null || !useSkipScanFilter)) { // if (! ( isPointLookup || (nBuckets != null && useSkipScanFilter) ) ) { // if (nBuckets == null || (nBuckets != null && (!isPointLookup || !useSkipScanFilter))) { if (nBuckets == null || !isPointLookup || !useSkipScan) { byte[] minKey = ScanUtil.getMinKey(schema, sortedRanges, slotSpan); byte[] maxKey = ScanUtil.getMaxKey(schema, sortedRanges, slotSpan); // If the maxKey has crossed the salt byte boundary, then we do not // have anything to filter at the upper end of the range if (ScanUtil.crossesPrefixBoundary(maxKey, ScanUtil.getPrefix(minKey, offset), offset)) { maxKey = KeyRange.UNBOUND; } // We won't filter anything at the low end of the range if we just have the salt byte if (minKey.length <= offset) { minKey = KeyRange.UNBOUND; } scanRange = KeyRange.getKeyRange(minKey, maxKey); } if (minMaxRange != KeyRange.EVERYTHING_RANGE) { minMaxRange = ScanUtil.convertToInclusiveExclusiveRange( minMaxRange, schema, new ImmutableBytesWritable()); scanRange = scanRange.intersect(minMaxRange); } if (scanRange == KeyRange.EMPTY_RANGE) { return NOTHING; } return new ScanRanges( schema, slotSpan, sortedRanges, scanRange, minMaxRange, useSkipScan, isPointLookup, nBuckets, rowTimestampRange); }