예제 #1
1
    @Override
    public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
      final long numOrds = globalOrdinals.getValueCount();
      final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds);
      final TermsEnum termEnum = globalOrdinals.termsEnum();

      BytesRef term = termEnum.next();
      while (term != null) {
        if (Math.floorMod(
                StringHelper.murmurhash3_x86_32(term, HASH_PARTITIONING_SEED), incNumPartitions)
            == incZeroBasedPartition) {
          acceptedGlobalOrdinals.set(termEnum.ord());
        }
        term = termEnum.next();
      }
      return acceptedGlobalOrdinals;
    }
 OrdinalsCollector(HyperLogLogPlusPlus counts, RandomAccessOrds values, BigArrays bigArrays) {
   Preconditions.checkArgument(values.getValueCount() <= Integer.MAX_VALUE);
   maxOrd = (int) values.getValueCount();
   this.bigArrays = bigArrays;
   this.counts = counts;
   this.values = values;
   visitedOrds = bigArrays.newObjectArray(1);
 }
  public void testDuelGlobalOrdinals() throws Exception {
    Random random = getRandom();
    final int numDocs = scaledRandomIntBetween(10, 1000);
    final int numValues = scaledRandomIntBetween(10, 500);
    final String[] values = new String[numValues];
    for (int i = 0; i < numValues; ++i) {
      values[i] = new String(RandomStrings.randomAsciiOfLength(random, 10));
    }
    for (int i = 0; i < numDocs; i++) {
      Document d = new Document();
      final int numVals = randomInt(3);
      for (int j = 0; j < numVals; ++j) {
        final String value = RandomPicks.randomFrom(random, Arrays.asList(values));
        d.add(new StringField("string", value, Field.Store.NO));
        d.add(new SortedSetDocValuesField("bytes", new BytesRef(value)));
      }
      writer.addDocument(d);
      if (randomInt(10) == 0) {
        refreshReader();
      }
    }
    refreshReader();

    Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>();
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")),
        Type.Bytes);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Bytes);

    for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) {
      ifdService.clear();
      IndexOrdinalsFieldData fieldData =
          getForField(entry.getKey(), entry.getValue().name().toLowerCase(Locale.ROOT));
      RandomAccessOrds left = fieldData.load(readerContext).getOrdinalsValues();
      fieldData.clear();
      RandomAccessOrds right =
          fieldData
              .loadGlobal(topLevelReader)
              .load(topLevelReader.leaves().get(0))
              .getOrdinalsValues();
      assertEquals(left.getValueCount(), right.getValueCount());
      for (long ord = 0; ord < left.getValueCount(); ++ord) {
        assertEquals(left.lookupOrd(ord), right.lookupOrd(ord));
      }
    }
  }
예제 #4
0
 /** Computes which global ordinals are accepted by this IncludeExclude instance. */
 @Override
 public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
   LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
   TermsEnum globalTermsEnum;
   Terms globalTerms = new DocValuesTerms(globalOrdinals);
   // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can
   // avoid i/o and just set bits.
   globalTermsEnum = compiled.getTermsEnum(globalTerms);
   for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
     acceptedGlobalOrdinals.set(globalTermsEnum.ord());
   }
   return acceptedGlobalOrdinals;
 }
  private Collector createCollector(AtomicReaderContext reader) {

    // if rehash is false then the value source is either already hashed, or the user explicitly
    // requested not to hash the values (perhaps they already hashed the values themselves before
    // indexing the doc)
    // so we can just work with the original value source as is
    if (!rehash) {
      MurmurHash3Values hashValues =
          MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues());
      return new DirectCollector(counts, hashValues);
    }

    if (valuesSource instanceof ValuesSource.Numeric) {
      ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource;
      MurmurHash3Values hashValues =
          source.isFloatingPoint()
              ? MurmurHash3Values.hash(source.doubleValues())
              : MurmurHash3Values.hash(source.longValues());
      return new DirectCollector(counts, hashValues);
    }

    if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
      ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource;
      final RandomAccessOrds ordinalValues = source.ordinalsValues();
      final long maxOrd = ordinalValues.getValueCount();
      if (maxOrd == 0) {
        return new EmptyCollector();
      }

      final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd);
      final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision);
      // only use ordinals if they don't increase memory usage by more than 25%
      if (ordinalsMemoryUsage < countsMemoryUsage / 4) {
        return new OrdinalsCollector(counts, ordinalValues, bigArrays);
      }
    }

    return new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues()));
  }
예제 #6
0
 @Override
 public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
   LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
   if (includeValues != null) {
     for (BytesRef term : includeValues) {
       long ord = globalOrdinals.lookupTerm(term);
       if (ord >= 0) {
         acceptedGlobalOrdinals.set(ord);
       }
     }
   } else if (acceptedGlobalOrdinals.length() > 0) {
     // default to all terms being acceptable
     acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
   }
   if (excludeValues != null) {
     for (BytesRef term : excludeValues) {
       long ord = globalOrdinals.lookupTerm(term);
       if (ord >= 0) {
         acceptedGlobalOrdinals.clear(ord);
       }
     }
   }
   return acceptedGlobalOrdinals;
 }