@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { if (valuesSource == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } final SortedNumericDocValues values = valuesSource.longValues(ctx); return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { assert bucket == 0; values.setDocument(doc); final int valuesCount = values.count(); long previousKey = Long.MIN_VALUE; for (int i = 0; i < valuesCount; ++i) { long value = values.valueAt(i); long key = rounding.roundKey(value); assert key >= previousKey; if (key == previousKey) { continue; } long bucketOrd = bucketOrds.add(key); if (bucketOrd < 0) { // already seen bucketOrd = -1 - bucketOrd; collectExistingBucket(sub, doc, bucketOrd); } else { collectBucket(sub, doc, bucketOrd); } previousKey = key; } } }; }
private Collector createCollector(AtomicReaderContext reader) { // if rehash is false then the value source is either already hashed, or the user explicitly // requested not to hash the values (perhaps they already hashed the values themselves before // indexing the doc) // so we can just work with the original value source as is if (!rehash) { MurmurHash3Values hashValues = MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Numeric) { ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource; MurmurHash3Values hashValues = source.isFloatingPoint() ? MurmurHash3Values.hash(source.doubleValues()) : MurmurHash3Values.hash(source.longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource; final RandomAccessOrds ordinalValues = source.ordinalsValues(); final long maxOrd = ordinalValues.getValueCount(); if (maxOrd == 0) { return new EmptyCollector(); } final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd); final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision); // only use ordinals if they don't increase memory usage by more than 25% if (ordinalsMemoryUsage < countsMemoryUsage / 4) { return new OrdinalsCollector(counts, ordinalValues, bigArrays); } } return new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues())); }
@Override public boolean needsScores() { return (valuesSource != null && valuesSource.needsScores()) || super.needsScores(); }