public void testIterator() throws IOException {
   int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8));
   BytesReference pbr = newBytesReference(length);
   BytesRefIterator iterator = pbr.iterator();
   BytesRef ref;
   BytesRefBuilder builder = new BytesRefBuilder();
   while ((ref = iterator.next()) != null) {
     builder.append(ref);
   }
   assertArrayEquals(pbr.toBytes(), BytesRef.deepCopyOf(builder.toBytesRef()).bytes);
 }
 public void testSliceIterator() throws IOException {
   int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8));
   BytesReference pbr = newBytesReference(length);
   int sliceOffset = randomIntBetween(0, pbr.length());
   int sliceLength = randomIntBetween(pbr.length() - sliceOffset, pbr.length() - sliceOffset);
   BytesReference slice = pbr.slice(sliceOffset, sliceLength);
   BytesRefIterator iterator = slice.iterator();
   BytesRef ref = null;
   BytesRefBuilder builder = new BytesRefBuilder();
   while ((ref = iterator.next()) != null) {
     builder.append(ref);
   }
   assertArrayEquals(slice.toBytes(), BytesRef.deepCopyOf(builder.toBytesRef()).bytes);
 }
Exemplo n.º 3
0
  private void check(BytesRefSorter sorter) throws Exception {
    for (int i = 0; i < 100; i++) {
      byte[] current = new byte[random().nextInt(256)];
      random().nextBytes(current);
      sorter.add(new BytesRef(current));
    }

    // Create two iterators and check that they're aligned with each other.
    BytesRefIterator i1 = sorter.iterator();
    BytesRefIterator i2 = sorter.iterator();

    // Verify sorter contract.
    expectThrows(
        IllegalStateException.class,
        () -> {
          sorter.add(new BytesRef(new byte[1]));
        });

    BytesRef spare1;
    BytesRef spare2;
    while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) {
      assertEquals(spare1, spare2);
    }
    assertNull(i1.next());
    assertNull(i2.next());
  }
  @Override
  public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator =
        new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
      data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
      estimator.adjustForNoTerms(data.getMemorySizeInBytes());
      return data;
    }
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer
    // order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio =
        fieldDataType
            .getSettings()
            .getAsFloat(
                "acceptable_transient_overhead_ratio",
                OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try (OrdinalsBuilder builder =
        new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
      BytesRefIterator iter = builder.buildFromTerms(termsEnum);
      BytesRef term;
      assert !getNumericType().isFloatingPoint();
      final boolean indexedAsLong = getNumericType().requiredBits() > 32;
      while ((term = iter.next()) != null) {
        final long value =
            indexedAsLong
                ? NumericUtils.prefixCodedToLong(term)
                : NumericUtils.prefixCodedToInt(term);
        assert values.size() == 0 || value > values.get(values.size() - 1);
        values.add(value);
      }
      Ordinals build = builder.build(fieldDataType.getSettings());

      if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
        Docs ordinals = build.ordinals();
        final FixedBitSet set = builder.buildDocsWithValuesSet();

        long minValue, maxValue;
        minValue = maxValue = 0;
        if (values.size() > 0) {
          minValue = values.get(0);
          maxValue = values.get(values.size() - 1);
        }

        // Encode document without a value with a special value
        long missingValue = 0;
        if (set != null) {
          if ((maxValue - minValue + 1) == values.size()) {
            // values are dense
            if (minValue > Long.MIN_VALUE) {
              missingValue = --minValue;
            } else {
              assert maxValue != Long.MAX_VALUE;
              missingValue = ++maxValue;
            }
          } else {
            for (long i = 1; i < values.size(); ++i) {
              if (values.get(i) > values.get(i - 1) + 1) {
                missingValue = values.get(i - 1) + 1;
                break;
              }
            }
          }
          missingValue -= minValue; // delta
        }

        final long delta = maxValue - minValue;
        final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
        final float acceptableOverheadRatio =
            fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
        final PackedInts.FormatAndBits formatAndBits =
            PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);

        // there's sweet spot where due to low unique value count, using ordinals will consume less
        // memory
        final long singleValuesSize =
            formatAndBits.format.longCount(
                    PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue)
                * 8L;
        final long uniqueValuesSize = values.ramBytesUsed();
        final long ordinalsSize = build.getMemorySizeInBytes();

        if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
          data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        } else {
          final PackedInts.Mutable sValues =
              PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
          if (missingValue != 0) {
            sValues.fill(0, sValues.size(), missingValue);
          }
          for (int i = 0; i < reader.maxDoc(); i++) {
            final long ord = ordinals.getOrd(i);
            if (ord != Ordinals.MISSING_ORDINAL) {
              sValues.set(i, values.get(ord - 1) - minValue);
            }
          }
          if (set == null) {
            data =
                new PackedArrayAtomicFieldData.Single(
                    sValues, minValue, reader.maxDoc(), ordinals.getNumOrds());
          } else {
            data =
                new PackedArrayAtomicFieldData.SingleSparse(
                    sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds());
          }
        }
      } else {
        data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
      }

      success = true;
      return data;
    } finally {
      if (!success) {
        // If something went wrong, unwind any current estimations we've made
        estimator.afterLoad(termsEnum, 0);
      } else {
        // Adjust as usual, based on the actual size of the field data
        estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
      }
    }
  }
  @Override
  public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception {
    final LeafReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    AtomicNumericFieldData data = null;
    PackedArrayEstimator estimator =
        new PackedArrayEstimator(
            breakerService.getBreaker(CircuitBreaker.FIELDDATA),
            getNumericType(),
            getFieldNames().fullName());
    if (terms == null) {
      data = AtomicLongFieldData.empty(reader.maxDoc());
      estimator.adjustForNoTerms(data.ramBytesUsed());
      return data;
    }
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer
    // order so we know the sequence of
    // longs is going to be monotonically increasing
    final PackedLongValues.Builder valuesBuilder =
        PackedLongValues.monotonicBuilder(PackedInts.COMPACT);

    final float acceptableTransientOverheadRatio =
        fieldDataType
            .getSettings()
            .getAsFloat(
                "acceptable_transient_overhead_ratio",
                OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    assert !getNumericType().isFloatingPoint();
    boolean success = false;
    try (OrdinalsBuilder builder =
        new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
      BytesRefIterator iter = builder.buildFromTerms(termsEnum);
      BytesRef term;
      while ((term = iter.next()) != null) {
        final long value = numericType.toLong(term);
        valuesBuilder.add(value);
      }
      final PackedLongValues values = valuesBuilder.build();
      final Ordinals build = builder.build(fieldDataType.getSettings());
      CommonSettings.MemoryStorageFormat formatHint =
          CommonSettings.getMemoryStorageHint(fieldDataType);

      RandomAccessOrds ordinals = build.ordinals();
      if (FieldData.isMultiValued(ordinals)
          || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) {
        final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
        data =
            new AtomicLongFieldData(ramBytesUsed) {

              @Override
              public SortedNumericDocValues getLongValues() {
                return withOrdinals(build, values, reader.maxDoc());
              }

              @Override
              public Collection<Accountable> getChildResources() {
                List<Accountable> resources = new ArrayList<>();
                resources.add(Accountables.namedAccountable("ordinals", build));
                resources.add(Accountables.namedAccountable("values", values));
                return Collections.unmodifiableList(resources);
              }
            };
      } else {
        final BitSet docsWithValues = builder.buildDocsWithValuesSet();

        long minV, maxV;
        minV = maxV = 0;
        if (values.size() > 0) {
          minV = values.get(0);
          maxV = values.get(values.size() - 1);
        }

        final float acceptableOverheadRatio =
            fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
        final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024);

        if (formatHint == null) {
          formatHint =
              chooseStorageFormat(
                  reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize);
        }

        logger.trace(
            "single value format for field [{}] set to [{}]",
            getFieldNames().fullName(),
            formatHint);

        switch (formatHint) {
          case PACKED:
            // Encode document without a value with a special value
            long missingV = 0;
            if (docsWithValues != null) {
              if ((maxV - minV + 1) == values.size()) {
                // values are dense
                if (minV > Long.MIN_VALUE) {
                  missingV = --minV;
                } else {
                  assert maxV != Long.MAX_VALUE;
                  missingV = ++maxV;
                }
              } else {
                for (long i = 1; i < values.size(); ++i) {
                  if (values.get(i) > values.get(i - 1) + 1) {
                    missingV = values.get(i - 1) + 1;
                    break;
                  }
                }
              }
              missingV -= minV;
            }
            final long missingValue = missingV;
            final long minValue = minV;
            final long maxValue = maxV;

            final long valuesDelta = maxValue - minValue;
            int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
            final PackedInts.Mutable sValues =
                PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);

            if (docsWithValues != null) {
              sValues.fill(0, sValues.size(), missingV);
            }

            for (int i = 0; i < reader.maxDoc(); i++) {
              ordinals.setDocument(i);
              if (ordinals.cardinality() > 0) {
                final long ord = ordinals.ordAt(0);
                long value = values.get(ord);
                sValues.set(i, value - minValue);
              }
            }
            long ramBytesUsed =
                values.ramBytesUsed()
                    + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed());
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    if (docsWithValues == null) {
                      return singles(sValues, minValue);
                    } else {
                      return sparseSingles(sValues, minValue, missingValue, reader.maxDoc());
                    }
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("values", sValues));
                    if (docsWithValues != null) {
                      resources.add(
                          Accountables.namedAccountable("missing bitset", docsWithValues));
                    }
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          case PAGED:
            final PackedLongValues.Builder dpValues =
                PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);

            long lastValue = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
              ordinals.setDocument(i);
              if (ordinals.cardinality() > 0) {
                final long ord = ordinals.ordAt(i);
                lastValue = values.get(ord);
              }
              dpValues.add(lastValue);
            }
            final PackedLongValues pagedValues = dpValues.build();
            ramBytesUsed = pagedValues.ramBytesUsed();
            if (docsWithValues != null) {
              ramBytesUsed += docsWithValues.ramBytesUsed();
            }
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    return pagedSingles(pagedValues, docsWithValues);
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("values", pagedValues));
                    if (docsWithValues != null) {
                      resources.add(
                          Accountables.namedAccountable("missing bitset", docsWithValues));
                    }
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          case ORDINALS:
            ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    return withOrdinals(build, values, reader.maxDoc());
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("ordinals", build));
                    resources.add(Accountables.namedAccountable("values", values));
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          default:
            throw new ElasticsearchException("unknown memory format: " + formatHint);
        }
      }

      success = true;
      return data;
    } finally {
      if (!success) {
        // If something went wrong, unwind any current estimations we've made
        estimator.afterLoad(termsEnum, 0);
      } else {
        // Adjust as usual, based on the actual size of the field data
        estimator.afterLoad(termsEnum, data.ramBytesUsed());
      }
    }
  }