@Override public InternalAggregation buildAggregation(long owningBucketOrdinal) { if (counts == null || owningBucketOrdinal >= counts.maxBucket() || counts.cardinality(owningBucketOrdinal) == 0) { return buildEmptyAggregation(); } // We need to build a copy because the returned Aggregation needs remain usable after // this Aggregator (and its HLL++ counters) is released. HyperLogLogPlusPlus copy = new HyperLogLogPlusPlus(precision, BigArrays.NON_RECYCLING_INSTANCE, 1); copy.merge(0, counts, owningBucketOrdinal); return new InternalCardinality(name, copy, formatter); }
@Override public void postCollect() { final FixedBitSet allVisitedOrds = new FixedBitSet(maxOrd); for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) { final FixedBitSet bits = visitedOrds.get(bucket); if (bits != null) { allVisitedOrds.or(bits); } } final org.elasticsearch.common.hash.MurmurHash3.Hash128 hash = new org.elasticsearch.common.hash.MurmurHash3.Hash128(); try (LongArray hashes = bigArrays.newLongArray(maxOrd, false)) { for (int ord = allVisitedOrds.nextSetBit(0); ord != -1; ord = ord + 1 < maxOrd ? allVisitedOrds.nextSetBit(ord + 1) : -1) { final BytesRef value = values.lookupOrd(ord); org.elasticsearch.common.hash.MurmurHash3.hash128( value.bytes, value.offset, value.length, 0, hash); hashes.set(ord, hash.h1); } for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) { final FixedBitSet bits = visitedOrds.get(bucket); if (bits != null) { for (int ord = bits.nextSetBit(0); ord != -1; ord = ord + 1 < maxOrd ? bits.nextSetBit(ord + 1) : -1) { counts.collect(bucket, hashes.get(ord)); } } } } }
@Override public void collect(int doc, long bucketOrd) { hashes.setDocument(doc); final int valueCount = hashes.count(); for (int i = 0; i < valueCount; ++i) { counts.collect(bucketOrd, hashes.valueAt(i)); } }
private Collector createCollector(AtomicReaderContext reader) { // if rehash is false then the value source is either already hashed, or the user explicitly // requested not to hash the values (perhaps they already hashed the values themselves before // indexing the doc) // so we can just work with the original value source as is if (!rehash) { MurmurHash3Values hashValues = MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Numeric) { ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource; MurmurHash3Values hashValues = source.isFloatingPoint() ? MurmurHash3Values.hash(source.doubleValues()) : MurmurHash3Values.hash(source.longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource; final RandomAccessOrds ordinalValues = source.ordinalsValues(); final long maxOrd = ordinalValues.getValueCount(); if (maxOrd == 0) { return new EmptyCollector(); } final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd); final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision); // only use ordinals if they don't increase memory usage by more than 25% if (ordinalsMemoryUsage < countsMemoryUsage / 4) { return new OrdinalsCollector(counts, ordinalValues, bigArrays); } } return new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues())); }
@Override public double metric(long owningBucketOrd) { return counts == null ? 0 : counts.cardinality(owningBucketOrd); }