protected static double getRangeLength(final HBaseMRRowRange range) { final ByteArrayId start = range.getStart(); final ByteArrayId end = range.getEnd(); final int maxDepth = Math.max(end.getBytes().length, start.getBytes().length); final BigInteger startBI = new BigInteger(extractBytes(start, maxDepth)); final BigInteger endBI = new BigInteger(extractBytes(end, maxDepth)); return endBI.subtract(startBI).doubleValue(); }
private static List<HBaseMRRowRange> binRanges( final List<HBaseMRRowRange> inputRanges, final Map<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>> binnedRanges, final RegionLocator regionLocator) throws IOException { // Loop through ranges, getting RegionLocation and RegionInfo for // startKey, clipping range by that regionInfo's extent, and leaving // remainder in the List to be region'd final ListIterator<HBaseMRRowRange> i = inputRanges.listIterator(); while (i.hasNext()) { final HBaseMRRowRange range = i.next(); final HRegionLocation location = regionLocator.getRegionLocation(range.getStart().getBytes()); Map<HRegionInfo, List<HBaseMRRowRange>> regionInfoMap = binnedRanges.get(location); if (regionInfoMap == null) { regionInfoMap = new HashMap<HRegionInfo, List<HBaseMRRowRange>>(); binnedRanges.put(location, regionInfoMap); } final HRegionInfo regionInfo = location.getRegionInfo(); List<HBaseMRRowRange> rangeList = regionInfoMap.get(regionInfo); if (rangeList == null) { rangeList = new ArrayList<HBaseMRRowRange>(); regionInfoMap.put(regionInfo, rangeList); } if (regionInfo.containsRange(range.getStart().getBytes(), range.getEnd().getBytes())) { rangeList.add(range); i.remove(); } else { final ByteArrayRange overlappingRange = range.intersection( new ByteArrayRange( new ByteArrayId(regionInfo.getStartKey()), new ByteArrayId(regionInfo.getEndKey()))); rangeList.add(new HBaseMRRowRange(overlappingRange)); final HBaseMRRowRange uncoveredRange = new HBaseMRRowRange( new ByteArrayId(HBaseUtils.getNextPrefix(regionInfo.getEndKey())), range.getEnd()); i.add(uncoveredRange); } } return inputRanges; }
private static double getCardinality( final RowRangeHistogramStatistics<?> rangeStats, final HBaseMRRowRange range) { return rangeStats == null ? getRangeLength(range) : rangeStats.cardinality(range.getStart().getBytes(), range.getEnd().getBytes()); }
private static TreeSet<IntermediateSplitInfo> populateIntermediateSplits( final TreeSet<IntermediateSplitInfo> splits, final BasicHBaseOperations operations, final PrimaryIndex index, final List<DataAdapter<Object>> adapters, final Map<PrimaryIndex, RowRangeHistogramStatistics<?>> statsCache, final AdapterStore adapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final DistributableQuery query, final String[] authorizations) throws IOException { if ((query != null) && !query.isSupported(index)) { return splits; } final HBaseMRRowRange fullrange = getRangeMax(index, adapterStore, statsStore, authorizations); final String tableName = index.getId().getString(); final NumericIndexStrategy indexStrategy = index.getIndexStrategy(); // Build list of row ranges from query List<HBaseMRRowRange> ranges = new ArrayList<HBaseMRRowRange>(); final List<ByteArrayRange> constraintRanges; if (query != null) { final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(indexStrategy); if ((maxSplits != null) && (maxSplits > 0)) { constraintRanges = DataStoreUtils.constraintsToByteArrayRanges(indexConstraints, indexStrategy, maxSplits); } else { constraintRanges = DataStoreUtils.constraintsToByteArrayRanges(indexConstraints, indexStrategy, -1); } for (final ByteArrayRange constraintRange : constraintRanges) { ranges.add(new HBaseMRRowRange(constraintRange)); } } else { ranges.add(fullrange); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Protected range: " + fullrange); } } final Map<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>> binnedRanges = new HashMap<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>>(); final RegionLocator regionLocator = operations.getRegionLocator(tableName); while (!ranges.isEmpty()) { ranges = binRanges(ranges, binnedRanges, regionLocator); } for (final Entry<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>> locationEntry : binnedRanges.entrySet()) { final String hostname = locationEntry.getKey().getHostname(); for (final Entry<HRegionInfo, List<HBaseMRRowRange>> regionEntry : locationEntry.getValue().entrySet()) { final Map<PrimaryIndex, List<RangeLocationPair>> splitInfo = new HashMap<PrimaryIndex, List<RangeLocationPair>>(); final List<RangeLocationPair> rangeList = new ArrayList<RangeLocationPair>(); for (final HBaseMRRowRange range : regionEntry.getValue()) { final double cardinality = getCardinality( getHistStats( index, adapters, adapterStore, statsStore, statsCache, authorizations), range); if (range.intersects(fullrange)) { rangeList.add( new RangeLocationPair(range, hostname, cardinality < 1 ? 1.0 : cardinality)); } else { LOGGER.info("Query split outside of range"); } if (LOGGER.isTraceEnabled()) { LOGGER.warn("Clipped range: " + rangeList.get(rangeList.size() - 1).getRange()); } } if (!rangeList.isEmpty()) { splitInfo.put(index, rangeList); splits.add(new IntermediateSplitInfo(splitInfo)); } } } return splits; }