public void testIntStream() throws Exception {
   final NumericTokenStream stream = new NumericTokenStream().setIntValue(ivalue);
   // use getAttribute to test if attributes really exist, if not an IAE will be throwed
   final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
   final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
   final NumericTokenStream.NumericTermAttribute numericAtt =
       stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
   final BytesRef bytes = bytesAtt.getBytesRef();
   stream.reset();
   assertEquals(32, numericAtt.getValueSize());
   for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT) {
     assertTrue("New token is available", stream.incrementToken());
     assertEquals("Shift value wrong", shift, numericAtt.getShift());
     final int hash = bytesAtt.fillBytesRef();
     assertEquals("Hash incorrect", bytes.hashCode(), hash);
     assertEquals(
         "Term is incorrectly encoded",
         ivalue & ~((1 << shift) - 1),
         NumericUtils.prefixCodedToInt(bytes));
     assertEquals(
         "Term raw value is incorrectly encoded",
         ((long) ivalue) & ~((1L << shift) - 1L),
         numericAtt.getRawValue());
     assertEquals(
         "Type incorrect",
         (shift == 0)
             ? NumericTokenStream.TOKEN_TYPE_FULL_PREC
             : NumericTokenStream.TOKEN_TYPE_LOWER_PREC,
         typeAtt.type());
   }
   assertFalse("More tokens available", stream.incrementToken());
   stream.end();
   stream.close();
 }
Exemplo n.º 2
0
 @Override
 public String indexedToReadable(String indexedForm) {
   switch (type) {
     case INTEGER:
       return Integer.toString(NumericUtils.prefixCodedToInt(indexedForm));
     case FLOAT:
       return Float.toString(
           NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(indexedForm)));
     case LONG:
       return Long.toString(NumericUtils.prefixCodedToLong(indexedForm));
     case DOUBLE:
       return Double.toString(
           NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)));
     case DATE:
       return dateField.toExternal(new Date(NumericUtils.prefixCodedToLong(indexedForm)));
     default:
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
   }
 }
  @Override
  public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator =
        new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
      data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
      estimator.adjustForNoTerms(data.getMemorySizeInBytes());
      return data;
    }
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer
    // order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio =
        fieldDataType
            .getSettings()
            .getAsFloat(
                "acceptable_transient_overhead_ratio",
                OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try (OrdinalsBuilder builder =
        new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
      BytesRefIterator iter = builder.buildFromTerms(termsEnum);
      BytesRef term;
      assert !getNumericType().isFloatingPoint();
      final boolean indexedAsLong = getNumericType().requiredBits() > 32;
      while ((term = iter.next()) != null) {
        final long value =
            indexedAsLong
                ? NumericUtils.prefixCodedToLong(term)
                : NumericUtils.prefixCodedToInt(term);
        assert values.size() == 0 || value > values.get(values.size() - 1);
        values.add(value);
      }
      Ordinals build = builder.build(fieldDataType.getSettings());

      if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
        Docs ordinals = build.ordinals();
        final FixedBitSet set = builder.buildDocsWithValuesSet();

        long minValue, maxValue;
        minValue = maxValue = 0;
        if (values.size() > 0) {
          minValue = values.get(0);
          maxValue = values.get(values.size() - 1);
        }

        // Encode document without a value with a special value
        long missingValue = 0;
        if (set != null) {
          if ((maxValue - minValue + 1) == values.size()) {
            // values are dense
            if (minValue > Long.MIN_VALUE) {
              missingValue = --minValue;
            } else {
              assert maxValue != Long.MAX_VALUE;
              missingValue = ++maxValue;
            }
          } else {
            for (long i = 1; i < values.size(); ++i) {
              if (values.get(i) > values.get(i - 1) + 1) {
                missingValue = values.get(i - 1) + 1;
                break;
              }
            }
          }
          missingValue -= minValue; // delta
        }

        final long delta = maxValue - minValue;
        final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
        final float acceptableOverheadRatio =
            fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
        final PackedInts.FormatAndBits formatAndBits =
            PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);

        // there's sweet spot where due to low unique value count, using ordinals will consume less
        // memory
        final long singleValuesSize =
            formatAndBits.format.longCount(
                    PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue)
                * 8L;
        final long uniqueValuesSize = values.ramBytesUsed();
        final long ordinalsSize = build.getMemorySizeInBytes();

        if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
          data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        } else {
          final PackedInts.Mutable sValues =
              PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
          if (missingValue != 0) {
            sValues.fill(0, sValues.size(), missingValue);
          }
          for (int i = 0; i < reader.maxDoc(); i++) {
            final long ord = ordinals.getOrd(i);
            if (ord != Ordinals.MISSING_ORDINAL) {
              sValues.set(i, values.get(ord - 1) - minValue);
            }
          }
          if (set == null) {
            data =
                new PackedArrayAtomicFieldData.Single(
                    sValues, minValue, reader.maxDoc(), ordinals.getNumOrds());
          } else {
            data =
                new PackedArrayAtomicFieldData.SingleSparse(
                    sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds());
          }
        }
      } else {
        data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
      }

      success = true;
      return data;
    } finally {
      if (!success) {
        // If something went wrong, unwind any current estimations we've made
        estimator.afterLoad(termsEnum, 0);
      } else {
        // Adjust as usual, based on the actual size of the field data
        estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
      }
    }
  }