Example #1
0
  /** Read the metadata table to get tablets and match up ranges to them. */
  public static List<InputSplit> getSplits(
      final BasicHBaseOperations operations,
      final DistributableQuery query,
      final QueryOptions queryOptions,
      final AdapterStore adapterStore,
      final DataStatisticsStore statsStore,
      final IndexStore indexStore,
      final AdapterIndexMappingStore adapterIndexMappingStore,
      final Integer minSplits,
      final Integer maxSplits)
      throws IOException, InterruptedException {

    final Map<PrimaryIndex, RowRangeHistogramStatistics<?>> statsCache =
        new HashMap<PrimaryIndex, RowRangeHistogramStatistics<?>>();

    final List<InputSplit> retVal = new ArrayList<InputSplit>();
    final TreeSet<IntermediateSplitInfo> splits = new TreeSet<IntermediateSplitInfo>();

    for (final Pair<PrimaryIndex, List<DataAdapter<Object>>> indexAdapterPair :
        queryOptions.getAdaptersWithMinimalSetOfIndices(
            adapterStore, adapterIndexMappingStore, indexStore)) {

      populateIntermediateSplits(
          splits,
          operations,
          indexAdapterPair.getLeft(),
          indexAdapterPair.getValue(),
          statsCache,
          adapterStore,
          statsStore,
          maxSplits,
          query,
          queryOptions.getAuthorizations());
    }

    // this is an incremental algorithm, it may be better use the target
    // split count to drive it (ie. to get 3 splits this will split 1
    // large
    // range into two down the middle and then split one of those ranges
    // down the middle to get 3, rather than splitting one range into
    // thirds)
    if (!statsCache.isEmpty()
        && !splits.isEmpty()
        && (minSplits != null)
        && (splits.size() < minSplits)) {
      // set the ranges to at least min splits
      do {
        // remove the highest range, split it into 2 and add both
        // back,
        // increasing the size by 1
        final IntermediateSplitInfo highestSplit = splits.pollLast();
        final IntermediateSplitInfo otherSplit = highestSplit.split(statsCache);
        splits.add(highestSplit);
        if (otherSplit == null) {
          LOGGER.warn("Cannot meet minimum splits");
          break;
        }
        splits.add(otherSplit);
      } while (splits.size() < minSplits);
    } else if (((maxSplits != null) && (maxSplits > 0)) && (splits.size() > maxSplits)) {
      // merge splits to fit within max splits
      do {
        // this is the naive approach, remove the lowest two ranges
        // and
        // merge them, decreasing the size by 1

        // TODO Ideally merge takes into account locations (as well
        // as
        // possibly the index as a secondary criteria) to limit the
        // number of locations/indices
        final IntermediateSplitInfo lowestSplit = splits.pollFirst();
        final IntermediateSplitInfo nextLowestSplit = splits.pollFirst();
        lowestSplit.merge(nextLowestSplit);
        splits.add(lowestSplit);
      } while (splits.size() > maxSplits);
    }

    for (final IntermediateSplitInfo split : splits) {
      retVal.add(split.toFinalSplit());
    }
    return retVal;
  }
  /** Initialize a scanner over the given input split using this task attempt configuration. */
  @Override
  public void initialize(final InputSplit inSplit, final TaskAttemptContext attempt)
      throws IOException {
    split = (GeoWaveAccumuloInputSplit) inSplit;

    numKeysRead = 0;

    final Map<RangeLocationPair, CloseableIterator<?>> iteratorsPerRange =
        new LinkedHashMap<RangeLocationPair, CloseableIterator<?>>();

    final Set<PrimaryIndex> indices = split.getIndices();
    BigDecimal sum = BigDecimal.ZERO;

    final Map<RangeLocationPair, BigDecimal> incrementalRangeSums =
        new LinkedHashMap<RangeLocationPair, BigDecimal>();

    for (final PrimaryIndex i : indices) {
      final List<RangeLocationPair> ranges = split.getRanges(i);
      List<QueryFilter> queryFilters = null;
      if (query != null) {
        queryFilters = query.createFilters(i.getIndexModel());
      }
      for (final RangeLocationPair r : ranges) {
        final QueryOptions rangeQueryOptions = new QueryOptions(queryOptions);
        rangeQueryOptions.setIndex(i);
        iteratorsPerRange.put(
            r,
            new InputFormatAccumuloRangeQuery(
                    adapterStore,
                    i,
                    r.getRange(),
                    queryFilters,
                    isOutputWritable,
                    rangeQueryOptions)
                .query(
                    accumuloOperations,
                    adapterStore,
                    rangeQueryOptions.getMaxResolutionSubsamplingPerDimension(),
                    rangeQueryOptions.getLimit()));
        incrementalRangeSums.put(r, sum);
        sum = sum.add(BigDecimal.valueOf(r.getCardinality()));
      }
    }

    // finally we can compute percent progress
    progressPerRange = new LinkedHashMap<RangeLocationPair, ProgressPerRange>();
    RangeLocationPair prevRangeIndex = null;
    float prevProgress = 0f;
    for (final Entry<RangeLocationPair, BigDecimal> entry : incrementalRangeSums.entrySet()) {
      final BigDecimal value = entry.getValue();
      final float progress = value.divide(sum, RoundingMode.HALF_UP).floatValue();
      if (prevRangeIndex != null) {
        progressPerRange.put(prevRangeIndex, new ProgressPerRange(prevProgress, progress));
      }
      prevRangeIndex = entry.getKey();
      prevProgress = progress;
    }
    progressPerRange.put(prevRangeIndex, new ProgressPerRange(prevProgress, 1f));
    // concatenate iterators
    iterator =
        new CloseableIteratorWrapper<Object>(
            new Closeable() {
              @Override
              public void close() throws IOException {
                for (final CloseableIterator<?> it : iteratorsPerRange.values()) {
                  it.close();
                }
              }
            },
            concatenateWithCallback(
                iteratorsPerRange.entrySet().iterator(),
                new NextRangeCallback() {

                  @Override
                  public void setRange(final RangeLocationPair indexPair) {
                    currentGeoWaveRangeIndexPair = indexPair;
                  }
                }));
  }