protected static List<ByteArrayRange> getQueryRanges( final BinnedNumericDataset[] binnedQueries, final SpaceFillingCurve sfc, final int maxRanges, final byte tier) { final List<ByteArrayRange> queryRanges = new ArrayList<ByteArrayRange>(); int maxRangeDecompositionPerBin = maxRanges; if ((maxRanges > 1) && (binnedQueries.length > 1)) { maxRangeDecompositionPerBin = (int) Math.ceil((double) maxRanges / (double) binnedQueries.length); } for (final BinnedNumericDataset binnedQuery : binnedQueries) { final RangeDecomposition rangeDecomp = sfc.decomposeRange(binnedQuery, true, maxRangeDecompositionPerBin); final byte[] tierAndBinId = ByteArrayUtils.combineArrays( new byte[] {tier // we're assuming tiers only go to 127 (the max byte // value) }, binnedQuery.getBinId()); for (final ByteArrayRange range : rangeDecomp.getRanges()) { queryRanges.add( new ByteArrayRange( new ByteArrayId( ByteArrayUtils.combineArrays(tierAndBinId, range.getStart().getBytes())), new ByteArrayId( ByteArrayUtils.combineArrays(tierAndBinId, range.getEnd().getBytes())))); } } return queryRanges; }
protected static List<ByteArrayId> decomposeRangesForEntry( final BinnedNumericDataset index, final byte tierId, final SpaceFillingCurve sfc) { final List<ByteArrayId> retVal = new ArrayList<ByteArrayId>(); final byte[] tierAndBinId = ByteArrayUtils.combineArrays(new byte[] {tierId}, index.getBinId()); final RangeDecomposition rangeDecomp = sfc.decomposeRange(index, false, DEFAULT_MAX_RANGES); // this range does not fit into a single row ID at the lowest // tier, decompose it for (final ByteArrayRange range : rangeDecomp.getRanges()) { final byte[] currentRowId = Arrays.copyOf(range.getStart().getBytes(), range.getStart().getBytes().length); retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, currentRowId))); while (!Arrays.equals(currentRowId, range.getEnd().getBytes())) { // increment until we reach the end row ID boolean overflow = !ByteArrayUtils.increment(currentRowId); if (!overflow) { retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, currentRowId))); } else { // the increment caused an overflow which shouldn't // ever happen assuming the start row ID is less // than the end row ID LOGGER.warn( "Row IDs overflowed when ingesting data; start of range decomposition must be less than or equal to end of range. This may be because the start of the decomposed range is higher than the end of the range."); overflow = true; break; } } } return retVal; }
protected static List<ByteArrayId> getRowIdsAtTier( final BinnedNumericDataset index, final byte tierId, final SpaceFillingCurve sfc, final BigInteger maxEstimatedDuplicateIds, final int sfcIndex) { final List<ByteArrayId> retVal = new ArrayList<ByteArrayId>(); final BigInteger rowCount = sfc.getEstimatedIdCount(index); if (rowCount.equals(BigInteger.ONE)) { final byte[] tierAndBinId = ByteArrayUtils.combineArrays(new byte[] {tierId}, index.getBinId()); final double[] maxValues = index.getMaxValuesPerDimension(); retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, sfc.getId(maxValues)))); return retVal; } else if ((maxEstimatedDuplicateIds == null) || (rowCount.compareTo(maxEstimatedDuplicateIds) <= 0) || (sfcIndex == 0)) { return decomposeRangesForEntry(index, tierId, sfc); } return null; }
private List<ByteArrayId> internalGetInsertionIds( final MultiDimensionalNumericData indexedData, final BigInteger maxDuplicateInsertionIds) { final BinnedNumericDataset[] ranges = BinnedNumericDataset.applyBins(indexedData, baseDefinitions); // place each of these indices into a single row ID at a tier that will // fit its min and max final List<ByteArrayId> rowIds = new ArrayList<ByteArrayId>(ranges.length); for (final BinnedNumericDataset range : ranges) { rowIds.addAll(getRowIds(range, maxDuplicateInsertionIds)); } return rowIds; }
@Override public List<ByteArrayRange> getQueryRanges( final MultiDimensionalNumericData indexedRange, final int maxRangeDecomposition) { // TODO don't just pass max ranges along to the SFC, take tiering and // binning into account to limit the number of ranges correctly final List<ByteArrayRange> queryRanges = new ArrayList<ByteArrayRange>(); final BinnedNumericDataset[] binnedQueries = BinnedNumericDataset.applyBins(indexedRange, baseDefinitions); int maxRangeDecompositionPerSfc = maxRangeDecomposition; if ((maxRangeDecomposition > 1) && (orderedSfcs.length > 1)) { maxRangeDecompositionPerSfc = (int) Math.ceil((double) maxRangeDecomposition / (double) orderedSfcs.length); } for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) { final SpaceFillingCurve sfc = orderedSfcs[sfcIndex]; final Byte tier = orderedSfcIndexToTierId.get(sfcIndex); queryRanges.addAll(getQueryRanges(binnedQueries, sfc, maxRangeDecompositionPerSfc, tier)); } return queryRanges; }