コード例 #1
0
  protected static List<ByteArrayRange> getQueryRanges(
      final BinnedNumericDataset[] binnedQueries,
      final SpaceFillingCurve sfc,
      final int maxRanges,
      final byte tier) {
    final List<ByteArrayRange> queryRanges = new ArrayList<ByteArrayRange>();

    int maxRangeDecompositionPerBin = maxRanges;
    if ((maxRanges > 1) && (binnedQueries.length > 1)) {
      maxRangeDecompositionPerBin =
          (int) Math.ceil((double) maxRanges / (double) binnedQueries.length);
    }
    for (final BinnedNumericDataset binnedQuery : binnedQueries) {
      final RangeDecomposition rangeDecomp =
          sfc.decomposeRange(binnedQuery, true, maxRangeDecompositionPerBin);
      final byte[] tierAndBinId =
          ByteArrayUtils.combineArrays(
              new byte[] {tier
                // we're assuming tiers only go to 127 (the max byte
                // value)
              },
              binnedQuery.getBinId());
      for (final ByteArrayRange range : rangeDecomp.getRanges()) {
        queryRanges.add(
            new ByteArrayRange(
                new ByteArrayId(
                    ByteArrayUtils.combineArrays(tierAndBinId, range.getStart().getBytes())),
                new ByteArrayId(
                    ByteArrayUtils.combineArrays(tierAndBinId, range.getEnd().getBytes()))));
      }
    }
    return queryRanges;
  }
コード例 #2
0
 protected static List<ByteArrayId> decomposeRangesForEntry(
     final BinnedNumericDataset index, final byte tierId, final SpaceFillingCurve sfc) {
   final List<ByteArrayId> retVal = new ArrayList<ByteArrayId>();
   final byte[] tierAndBinId = ByteArrayUtils.combineArrays(new byte[] {tierId}, index.getBinId());
   final RangeDecomposition rangeDecomp = sfc.decomposeRange(index, false, DEFAULT_MAX_RANGES);
   // this range does not fit into a single row ID at the lowest
   // tier, decompose it
   for (final ByteArrayRange range : rangeDecomp.getRanges()) {
     final byte[] currentRowId =
         Arrays.copyOf(range.getStart().getBytes(), range.getStart().getBytes().length);
     retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, currentRowId)));
     while (!Arrays.equals(currentRowId, range.getEnd().getBytes())) {
       // increment until we reach the end row ID
       boolean overflow = !ByteArrayUtils.increment(currentRowId);
       if (!overflow) {
         retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, currentRowId)));
       } else {
         // the increment caused an overflow which shouldn't
         // ever happen assuming the start row ID is less
         // than the end row ID
         LOGGER.warn(
             "Row IDs overflowed when ingesting data; start of range decomposition must be less than or equal to end of range. This may be because the start of the decomposed range is higher than the end of the range.");
         overflow = true;
         break;
       }
     }
   }
   return retVal;
 }
コード例 #3
0
 protected static List<ByteArrayId> getRowIdsAtTier(
     final BinnedNumericDataset index,
     final byte tierId,
     final SpaceFillingCurve sfc,
     final BigInteger maxEstimatedDuplicateIds,
     final int sfcIndex) {
   final List<ByteArrayId> retVal = new ArrayList<ByteArrayId>();
   final BigInteger rowCount = sfc.getEstimatedIdCount(index);
   if (rowCount.equals(BigInteger.ONE)) {
     final byte[] tierAndBinId =
         ByteArrayUtils.combineArrays(new byte[] {tierId}, index.getBinId());
     final double[] maxValues = index.getMaxValuesPerDimension();
     retVal.add(new ByteArrayId(ByteArrayUtils.combineArrays(tierAndBinId, sfc.getId(maxValues))));
     return retVal;
   } else if ((maxEstimatedDuplicateIds == null)
       || (rowCount.compareTo(maxEstimatedDuplicateIds) <= 0)
       || (sfcIndex == 0)) {
     return decomposeRangesForEntry(index, tierId, sfc);
   }
   return null;
 }
コード例 #4
0
 private List<ByteArrayId> internalGetInsertionIds(
     final MultiDimensionalNumericData indexedData, final BigInteger maxDuplicateInsertionIds) {
   final BinnedNumericDataset[] ranges =
       BinnedNumericDataset.applyBins(indexedData, baseDefinitions);
   // place each of these indices into a single row ID at a tier that will
   // fit its min and max
   final List<ByteArrayId> rowIds = new ArrayList<ByteArrayId>(ranges.length);
   for (final BinnedNumericDataset range : ranges) {
     rowIds.addAll(getRowIds(range, maxDuplicateInsertionIds));
   }
   return rowIds;
 }
コード例 #5
0
  @Override
  public List<ByteArrayRange> getQueryRanges(
      final MultiDimensionalNumericData indexedRange, final int maxRangeDecomposition) {
    // TODO don't just pass max ranges along to the SFC, take tiering and
    // binning into account to limit the number of ranges correctly

    final List<ByteArrayRange> queryRanges = new ArrayList<ByteArrayRange>();
    final BinnedNumericDataset[] binnedQueries =
        BinnedNumericDataset.applyBins(indexedRange, baseDefinitions);
    int maxRangeDecompositionPerSfc = maxRangeDecomposition;
    if ((maxRangeDecomposition > 1) && (orderedSfcs.length > 1)) {
      maxRangeDecompositionPerSfc =
          (int) Math.ceil((double) maxRangeDecomposition / (double) orderedSfcs.length);
    }
    for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) {
      final SpaceFillingCurve sfc = orderedSfcs[sfcIndex];
      final Byte tier = orderedSfcIndexToTierId.get(sfcIndex);
      queryRanges.addAll(getQueryRanges(binnedQueries, sfc, maxRangeDecompositionPerSfc, tier));
    }
    return queryRanges;
  }