@Override public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException { final long numOrds = globalOrdinals.getValueCount(); final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds); final TermsEnum termEnum = globalOrdinals.termsEnum(); BytesRef term = termEnum.next(); while (term != null) { if (Math.floorMod( StringHelper.murmurhash3_x86_32(term, HASH_PARTITIONING_SEED), incNumPartitions) == incZeroBasedPartition) { acceptedGlobalOrdinals.set(termEnum.ord()); } term = termEnum.next(); } return acceptedGlobalOrdinals; }
OrdinalsCollector(HyperLogLogPlusPlus counts, RandomAccessOrds values, BigArrays bigArrays) { Preconditions.checkArgument(values.getValueCount() <= Integer.MAX_VALUE); maxOrd = (int) values.getValueCount(); this.bigArrays = bigArrays; this.counts = counts; this.values = values; visitedOrds = bigArrays.newObjectArray(1); }
public void testDuelGlobalOrdinals() throws Exception { Random random = getRandom(); final int numDocs = scaledRandomIntBetween(10, 1000); final int numValues = scaledRandomIntBetween(10, 500); final String[] values = new String[numValues]; for (int i = 0; i < numValues; ++i) { values[i] = new String(RandomStrings.randomAsciiOfLength(random, 10)); } for (int i = 0; i < numDocs; i++) { Document d = new Document(); final int numVals = randomInt(3); for (int j = 0; j < numVals; ++j) { final String value = RandomPicks.randomFrom(random, Arrays.asList(values)); d.add(new StringField("string", value, Field.Store.NO)); d.add(new SortedSetDocValuesField("bytes", new BytesRef(value))); } writer.addDocument(d); if (randomInt(10) == 0) { refreshReader(); } } refreshReader(); Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>(); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) { ifdService.clear(); IndexOrdinalsFieldData fieldData = getForField(entry.getKey(), entry.getValue().name().toLowerCase(Locale.ROOT)); RandomAccessOrds left = fieldData.load(readerContext).getOrdinalsValues(); fieldData.clear(); RandomAccessOrds right = fieldData .loadGlobal(topLevelReader) .load(topLevelReader.leaves().get(0)) .getOrdinalsValues(); assertEquals(left.getValueCount(), right.getValueCount()); for (long ord = 0; ord < left.getValueCount(); ++ord) { assertEquals(left.lookupOrd(ord), right.lookupOrd(ord)); } } }
/** Computes which global ordinals are accepted by this IncludeExclude instance. */ @Override public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); TermsEnum globalTermsEnum; Terms globalTerms = new DocValuesTerms(globalOrdinals); // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can // avoid i/o and just set bits. globalTermsEnum = compiled.getTermsEnum(globalTerms); for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) { acceptedGlobalOrdinals.set(globalTermsEnum.ord()); } return acceptedGlobalOrdinals; }
private Collector createCollector(AtomicReaderContext reader) { // if rehash is false then the value source is either already hashed, or the user explicitly // requested not to hash the values (perhaps they already hashed the values themselves before // indexing the doc) // so we can just work with the original value source as is if (!rehash) { MurmurHash3Values hashValues = MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Numeric) { ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource; MurmurHash3Values hashValues = source.isFloatingPoint() ? MurmurHash3Values.hash(source.doubleValues()) : MurmurHash3Values.hash(source.longValues()); return new DirectCollector(counts, hashValues); } if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource; final RandomAccessOrds ordinalValues = source.ordinalsValues(); final long maxOrd = ordinalValues.getValueCount(); if (maxOrd == 0) { return new EmptyCollector(); } final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd); final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision); // only use ordinals if they don't increase memory usage by more than 25% if (ordinalsMemoryUsage < countsMemoryUsage / 4) { return new OrdinalsCollector(counts, ordinalValues, bigArrays); } } return new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues())); }
@Override public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); if (includeValues != null) { for (BytesRef term : includeValues) { long ord = globalOrdinals.lookupTerm(term); if (ord >= 0) { acceptedGlobalOrdinals.set(ord); } } } else if (acceptedGlobalOrdinals.length() > 0) { // default to all terms being acceptable acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length()); } if (excludeValues != null) { for (BytesRef term : excludeValues) { long ord = globalOrdinals.lookupTerm(term); if (ord >= 0) { acceptedGlobalOrdinals.clear(ord); } } } return acceptedGlobalOrdinals; }