@Override public void collect(int doc, long bucketOrd) { visitedOrds = bigArrays.grow(visitedOrds, bucketOrd + 1); FixedBitSet bits = visitedOrds.get(bucketOrd); if (bits == null) { bits = new FixedBitSet(maxOrd); visitedOrds.set(bucketOrd, bits); } values.setDocument(doc); final int valueCount = values.cardinality(); for (int i = 0; i < valueCount; ++i) { bits.set((int) values.ordAt(i)); } }
protected CommonSettings.MemoryStorageFormat chooseStorageFormat( LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals, long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) { CommonSettings.MemoryStorageFormat format; // estimate memory usage for a single packed array long packedDelta = maxValue - minValue + 1; // allow for a missing value // valuesDelta can be negative if the difference between max and min values overflows the // positive side of longs. int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta); PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); final long singleValuesSize = formatAndBits.format.longCount( PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; // ordinal memory usage final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed(); // estimate the memory signature of paged packing long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages int pageIndex = 0; long pageMinOrdinal = Long.MAX_VALUE; long pageMaxOrdinal = Long.MIN_VALUE; for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { long ordinal = ordinals.ordAt(0); pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal); pageMinOrdinal = Math.min(ordinal, pageMinOrdinal); } if (pageIndex == pageSize - 1) { // end of page, we now know enough to estimate memory usage pagedSingleValuesSize += getPageMemoryUsage( values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal); pageMinOrdinal = Long.MAX_VALUE; pageMaxOrdinal = Long.MIN_VALUE; } } if (pageIndex > 0) { // last page estimation pageIndex++; pagedSingleValuesSize += getPageMemoryUsage( values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal); } if (ordinalsSize < singleValuesSize) { if (ordinalsSize < pagedSingleValuesSize) { format = CommonSettings.MemoryStorageFormat.ORDINALS; } else { format = CommonSettings.MemoryStorageFormat.PAGED; } } else { if (pagedSingleValuesSize < singleValuesSize) { format = CommonSettings.MemoryStorageFormat.PAGED; } else { format = CommonSettings.MemoryStorageFormat.PACKED; } } return format; }
@Override public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception { final LeafReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); AtomicNumericFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator( breakerService.getBreaker(CircuitBreaker.FIELDDATA), getNumericType(), getFieldNames().fullName()); if (terms == null) { data = AtomicLongFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.ramBytesUsed()); return data; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer // order so we know the sequence of // longs is going to be monotonically increasing final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); final float acceptableTransientOverheadRatio = fieldDataType .getSettings() .getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); TermsEnum termsEnum = estimator.beforeLoad(terms); assert !getNumericType().isFloatingPoint(); boolean success = false; try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; while ((term = iter.next()) != null) { final long value = numericType.toLong(term); valuesBuilder.add(value); } final PackedLongValues values = valuesBuilder.build(); final Ordinals build = builder.build(fieldDataType.getSettings()); CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType); RandomAccessOrds ordinals = build.ordinals(); if (FieldData.isMultiValued(ordinals) || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) { final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; } else { final BitSet docsWithValues = builder.buildDocsWithValuesSet(); long minV, maxV; minV = maxV = 0; if (values.size() > 0) { minV = values.get(0); maxV = values.get(values.size() - 1); } final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024); if (formatHint == null) { formatHint = chooseStorageFormat( reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize); } logger.trace( "single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint); switch (formatHint) { case PACKED: // Encode document without a value with a special value long missingV = 0; if (docsWithValues != null) { if ((maxV - minV + 1) == values.size()) { // values are dense if (minV > Long.MIN_VALUE) { missingV = --minV; } else { assert maxV != Long.MAX_VALUE; missingV = ++maxV; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingV = values.get(i - 1) + 1; break; } } } missingV -= minV; } final long missingValue = missingV; final long minValue = minV; final long maxValue = maxV; final long valuesDelta = maxValue - minValue; int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta); final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (docsWithValues != null) { sValues.fill(0, sValues.size(), missingV); } for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(0); long value = values.get(ord); sValues.set(i, value - minValue); } } long ramBytesUsed = values.ramBytesUsed() + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed()); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { if (docsWithValues == null) { return singles(sValues, minValue); } else { return sparseSingles(sValues, minValue, missingValue, reader.maxDoc()); } } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", sValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case PAGED: final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio); long lastValue = 0; for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(i); lastValue = values.get(ord); } dpValues.add(lastValue); } final PackedLongValues pagedValues = dpValues.build(); ramBytesUsed = pagedValues.ramBytesUsed(); if (docsWithValues != null) { ramBytesUsed += docsWithValues.ramBytesUsed(); } data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return pagedSingles(pagedValues, docsWithValues); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", pagedValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case ORDINALS: ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; break; default: throw new ElasticsearchException("unknown memory format: " + formatHint); } } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.ramBytesUsed()); } } }