public void testDuelGlobalOrdinals() throws Exception { Random random = getRandom(); final int numDocs = scaledRandomIntBetween(10, 1000); final int numValues = scaledRandomIntBetween(10, 500); final String[] values = new String[numValues]; for (int i = 0; i < numValues; ++i) { values[i] = new String(RandomStrings.randomAsciiOfLength(random, 10)); } for (int i = 0; i < numDocs; i++) { Document d = new Document(); final int numVals = randomInt(3); for (int j = 0; j < numVals; ++j) { final String value = RandomPicks.randomFrom(random, Arrays.asList(values)); d.add(new StringField("string", value, Field.Store.NO)); d.add(new SortedSetDocValuesField("bytes", new BytesRef(value))); } writer.addDocument(d); if (randomInt(10) == 0) { refreshReader(); } } refreshReader(); Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>(); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes); typeMap.put( new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) { ifdService.clear(); IndexOrdinalsFieldData fieldData = getForField(entry.getKey(), entry.getValue().name().toLowerCase(Locale.ROOT)); RandomAccessOrds left = fieldData.load(readerContext).getOrdinalsValues(); fieldData.clear(); RandomAccessOrds right = fieldData .loadGlobal(topLevelReader) .load(topLevelReader.leaves().get(0)) .getOrdinalsValues(); assertEquals(left.getValueCount(), right.getValueCount()); for (long ord = 0; ord < left.getValueCount(); ++ord) { assertEquals(left.lookupOrd(ord), right.lookupOrd(ord)); } } }
protected CommonSettings.MemoryStorageFormat chooseStorageFormat( LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals, long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) { CommonSettings.MemoryStorageFormat format; // estimate memory usage for a single packed array long packedDelta = maxValue - minValue + 1; // allow for a missing value // valuesDelta can be negative if the difference between max and min values overflows the // positive side of longs. int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta); PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); final long singleValuesSize = formatAndBits.format.longCount( PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; // ordinal memory usage final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed(); // estimate the memory signature of paged packing long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages int pageIndex = 0; long pageMinOrdinal = Long.MAX_VALUE; long pageMaxOrdinal = Long.MIN_VALUE; for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { long ordinal = ordinals.ordAt(0); pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal); pageMinOrdinal = Math.min(ordinal, pageMinOrdinal); } if (pageIndex == pageSize - 1) { // end of page, we now know enough to estimate memory usage pagedSingleValuesSize += getPageMemoryUsage( values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal); pageMinOrdinal = Long.MAX_VALUE; pageMaxOrdinal = Long.MIN_VALUE; } } if (pageIndex > 0) { // last page estimation pageIndex++; pagedSingleValuesSize += getPageMemoryUsage( values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal); } if (ordinalsSize < singleValuesSize) { if (ordinalsSize < pagedSingleValuesSize) { format = CommonSettings.MemoryStorageFormat.ORDINALS; } else { format = CommonSettings.MemoryStorageFormat.PAGED; } } else { if (pagedSingleValuesSize < singleValuesSize) { format = CommonSettings.MemoryStorageFormat.PAGED; } else { format = CommonSettings.MemoryStorageFormat.PACKED; } } return format; }
@Override public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception { final LeafReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); AtomicNumericFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator( breakerService.getBreaker(CircuitBreaker.FIELDDATA), getNumericType(), getFieldNames().fullName()); if (terms == null) { data = AtomicLongFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.ramBytesUsed()); return data; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer // order so we know the sequence of // longs is going to be monotonically increasing final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); final float acceptableTransientOverheadRatio = fieldDataType .getSettings() .getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); TermsEnum termsEnum = estimator.beforeLoad(terms); assert !getNumericType().isFloatingPoint(); boolean success = false; try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; while ((term = iter.next()) != null) { final long value = numericType.toLong(term); valuesBuilder.add(value); } final PackedLongValues values = valuesBuilder.build(); final Ordinals build = builder.build(fieldDataType.getSettings()); CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType); RandomAccessOrds ordinals = build.ordinals(); if (FieldData.isMultiValued(ordinals) || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) { final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; } else { final BitSet docsWithValues = builder.buildDocsWithValuesSet(); long minV, maxV; minV = maxV = 0; if (values.size() > 0) { minV = values.get(0); maxV = values.get(values.size() - 1); } final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024); if (formatHint == null) { formatHint = chooseStorageFormat( reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize); } logger.trace( "single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint); switch (formatHint) { case PACKED: // Encode document without a value with a special value long missingV = 0; if (docsWithValues != null) { if ((maxV - minV + 1) == values.size()) { // values are dense if (minV > Long.MIN_VALUE) { missingV = --minV; } else { assert maxV != Long.MAX_VALUE; missingV = ++maxV; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingV = values.get(i - 1) + 1; break; } } } missingV -= minV; } final long missingValue = missingV; final long minValue = minV; final long maxValue = maxV; final long valuesDelta = maxValue - minValue; int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta); final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (docsWithValues != null) { sValues.fill(0, sValues.size(), missingV); } for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(0); long value = values.get(ord); sValues.set(i, value - minValue); } } long ramBytesUsed = values.ramBytesUsed() + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed()); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { if (docsWithValues == null) { return singles(sValues, minValue); } else { return sparseSingles(sValues, minValue, missingValue, reader.maxDoc()); } } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", sValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case PAGED: final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio); long lastValue = 0; for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(i); lastValue = values.get(ord); } dpValues.add(lastValue); } final PackedLongValues pagedValues = dpValues.build(); ramBytesUsed = pagedValues.ramBytesUsed(); if (docsWithValues != null) { ramBytesUsed += docsWithValues.ramBytesUsed(); } data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return pagedSingles(pagedValues, docsWithValues); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", pagedValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case ORDINALS: ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; break; default: throw new ElasticsearchException("unknown memory format: " + formatHint); } } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.ramBytesUsed()); } } }