@Test public void testDateScripts() throws Exception { fillSingleValueAllSet(); IndexNumericFieldData indexFieldData = getForField("value"); AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); ScriptDocValues.Longs scriptValues = (ScriptDocValues.Longs) fieldData.getScriptValues(); scriptValues.setNextDocId(0); assertThat(scriptValues.getValue(), equalTo(2l)); assertThat(scriptValues.getDate().getMillis(), equalTo(2l)); assertThat(scriptValues.getDate().getZone(), equalTo(DateTimeZone.UTC)); }
@Override @SuppressWarnings("rawtypes") // ValueSource uses a rawtype public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf); NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d); return new DoubleDocValues(this) { @Override public double doubleVal(int doc) { return docValues.get(doc); } }; }
private void test(List<TLongSet> values) throws Exception { StringField id = new StringField("_id", "", Field.Store.NO); for (int i = 0; i < values.size(); ++i) { Document doc = new Document(); id.setStringValue("" + i); doc.add(id); final TLongSet v = values.get(i); for (TLongIterator it = v.iterator(); it.hasNext(); ) { LongField value = new LongField("value", it.next(), Field.Store.NO); doc.add(value); } writer.addDocument(doc); } writer.forceMerge(1); final IndexNumericFieldData indexFieldData = getForField("value"); final AtomicNumericFieldData atomicFieldData = indexFieldData.load(refreshReader()); final LongValues data = atomicFieldData.getLongValues(); final DoubleValues doubleData = atomicFieldData.getDoubleValues(); final TLongSet set = new TLongHashSet(); final TDoubleSet doubleSet = new TDoubleHashSet(); for (int i = 0; i < values.size(); ++i) { final TLongSet v = values.get(i); assertThat(data.hasValue(i), equalTo(!v.isEmpty())); assertThat(doubleData.hasValue(i), equalTo(!v.isEmpty())); if (v.isEmpty()) { assertThat(data.getValue(i), equalTo(0L)); assertThat(doubleData.getValue(i), equalTo(0d)); } set.clear(); for (LongValues.Iter iter = data.getIter(i); iter.hasNext(); ) { set.add(iter.next()); } assertThat(set, equalTo(v)); final TDoubleSet doubleV = new TDoubleHashSet(); for (TLongIterator it = v.iterator(); it.hasNext(); ) { doubleV.add((double) it.next()); } doubleSet.clear(); for (DoubleValues.Iter iter = doubleData.getIter(i); iter.hasNext(); ) { doubleSet.add(iter.next()); } assertThat(doubleSet, equalTo(doubleV)); } }
@Test public void testOptimizeTypeLong() throws Exception { Document d = new Document(); d.add(new StringField("_id", "1", Field.Store.NO)); d.add(new LongField("value", Integer.MAX_VALUE + 1l, Field.Store.NO)); writer.addDocument(d); d = new Document(); d.add(new StringField("_id", "2", Field.Store.NO)); d.add(new LongField("value", Integer.MIN_VALUE - 1l, Field.Store.NO)); writer.addDocument(d); IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); assertThat(fieldData, instanceOf(PackedArrayAtomicFieldData.class)); assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE + 1l)); assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE - 1l)); }
@Override public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception { final LeafReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); AtomicNumericFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator( breakerService.getBreaker(CircuitBreaker.FIELDDATA), getNumericType(), getFieldNames().fullName()); if (terms == null) { data = AtomicLongFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.ramBytesUsed()); return data; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer // order so we know the sequence of // longs is going to be monotonically increasing final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); final float acceptableTransientOverheadRatio = fieldDataType .getSettings() .getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); TermsEnum termsEnum = estimator.beforeLoad(terms); assert !getNumericType().isFloatingPoint(); boolean success = false; try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; while ((term = iter.next()) != null) { final long value = numericType.toLong(term); valuesBuilder.add(value); } final PackedLongValues values = valuesBuilder.build(); final Ordinals build = builder.build(fieldDataType.getSettings()); CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType); RandomAccessOrds ordinals = build.ordinals(); if (FieldData.isMultiValued(ordinals) || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) { final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; } else { final BitSet docsWithValues = builder.buildDocsWithValuesSet(); long minV, maxV; minV = maxV = 0; if (values.size() > 0) { minV = values.get(0); maxV = values.get(values.size() - 1); } final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024); if (formatHint == null) { formatHint = chooseStorageFormat( reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize); } logger.trace( "single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint); switch (formatHint) { case PACKED: // Encode document without a value with a special value long missingV = 0; if (docsWithValues != null) { if ((maxV - minV + 1) == values.size()) { // values are dense if (minV > Long.MIN_VALUE) { missingV = --minV; } else { assert maxV != Long.MAX_VALUE; missingV = ++maxV; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingV = values.get(i - 1) + 1; break; } } } missingV -= minV; } final long missingValue = missingV; final long minValue = minV; final long maxValue = maxV; final long valuesDelta = maxValue - minValue; int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta); final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (docsWithValues != null) { sValues.fill(0, sValues.size(), missingV); } for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(0); long value = values.get(ord); sValues.set(i, value - minValue); } } long ramBytesUsed = values.ramBytesUsed() + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed()); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { if (docsWithValues == null) { return singles(sValues, minValue); } else { return sparseSingles(sValues, minValue, missingValue, reader.maxDoc()); } } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", sValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case PAGED: final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio); long lastValue = 0; for (int i = 0; i < reader.maxDoc(); i++) { ordinals.setDocument(i); if (ordinals.cardinality() > 0) { final long ord = ordinals.ordAt(i); lastValue = values.get(ord); } dpValues.add(lastValue); } final PackedLongValues pagedValues = dpValues.build(); ramBytesUsed = pagedValues.ramBytesUsed(); if (docsWithValues != null) { ramBytesUsed += docsWithValues.ramBytesUsed(); } data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return pagedSingles(pagedValues, docsWithValues); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("values", pagedValues)); if (docsWithValues != null) { resources.add( Accountables.namedAccountable("missing bitset", docsWithValues)); } return Collections.unmodifiableList(resources); } }; break; case ORDINALS: ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed(); data = new AtomicLongFieldData(ramBytesUsed) { @Override public SortedNumericDocValues getLongValues() { return withOrdinals(build, values, reader.maxDoc()); } @Override public Collection<Accountable> getChildResources() { List<Accountable> resources = new ArrayList<>(); resources.add(Accountables.namedAccountable("ordinals", build)); resources.add(Accountables.namedAccountable("values", values)); return Collections.unmodifiableList(resources); } }; break; default: throw new ElasticsearchException("unknown memory format: " + formatHint); } } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.ramBytesUsed()); } } }