public static void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field) throws Exception { BytesRef term; while ((term = termsEnum.next()) != null) { BytesRef r = new BytesRef(); r.copyBytes(term); tiq.insertWithOverflow(new TermStats(field, r, termsEnum.docFreq())); } }
private BytesRef bytesFromTokenStream(TokenStream stream) throws IOException { TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttr.getBytesRef(); stream.reset(); while (stream.incrementToken()) { termAttr.fillBytesRef(); } stream.close(); BytesRef copy = new BytesRef(); copy.copyBytes(bytesRef); return copy; }
@Override public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException { // System.out.println("VGW: index term=" + text.utf8ToString()); // NOTE: we must force the first term per field to be // indexed, in case policy doesn't: if (policy.isIndexTerm(text, stats) || first) { first = false; // System.out.println(" YES"); return true; } else { lastTerm.copyBytes(text); return false; } }
private static void duelFieldDataBytes( Random random, AtomicReaderContext context, IndexFieldData<?> left, IndexFieldData<?> right, Preprocessor pre) throws Exception { AtomicFieldData leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context); AtomicFieldData rightData = random.nextBoolean() ? right.load(context) : right.loadDirect(context); int numDocs = context.reader().maxDoc(); SortedBinaryDocValues leftBytesValues = leftData.getBytesValues(); SortedBinaryDocValues rightBytesValues = rightData.getBytesValues(); BytesRef leftSpare = new BytesRef(); BytesRef rightSpare = new BytesRef(); for (int i = 0; i < numDocs; i++) { leftBytesValues.setDocument(i); rightBytesValues.setDocument(i); int numValues = leftBytesValues.count(); assertThat(numValues, equalTo(rightBytesValues.count())); BytesRef previous = null; for (int j = 0; j < numValues; j++) { rightSpare.copyBytes(rightBytesValues.valueAt(j)); leftSpare.copyBytes(leftBytesValues.valueAt(j)); if (previous != null) { assertThat(pre.compare(previous, rightSpare), lessThan(0)); } previous = BytesRef.deepCopyOf(rightSpare); pre.toString(rightSpare); pre.toString(leftSpare); assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare))); } } }
@Override public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException { if (text.length == 0) { // We already added empty string in ctor assert termsFilePointer == startTermsFilePointer; return; } final int lengthSave = text.length; text.length = indexedTermPrefixLength(lastTerm, text); try { fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer); } finally { text.length = lengthSave; } lastTerm.copyBytes(text); }
public void testUpdateDelteSlices() { DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); final int size = 200 + random().nextInt(500) * RANDOM_MULTIPLIER; Integer[] ids = new Integer[size]; for (int i = 0; i < ids.length; i++) { ids[i] = random().nextInt(); } DeleteSlice slice1 = queue.newSlice(); DeleteSlice slice2 = queue.newSlice(); BufferedUpdates bd1 = new BufferedUpdates(); BufferedUpdates bd2 = new BufferedUpdates(); int last1 = 0; int last2 = 0; Set<Term> uniqueValues = new HashSet<>(); for (int j = 0; j < ids.length; j++) { Integer i = ids[j]; // create an array here since we compare identity below against tailItem Term[] term = new Term[] {new Term("id", i.toString())}; uniqueValues.add(term[0]); queue.addDelete(term); if (random().nextInt(20) == 0 || j == ids.length - 1) { queue.updateSlice(slice1); assertTrue(slice1.isTailItem(term)); slice1.apply(bd1, j); assertAllBetween(last1, j, bd1, ids); last1 = j + 1; } if (random().nextInt(10) == 5 || j == ids.length - 1) { queue.updateSlice(slice2); assertTrue(slice2.isTailItem(term)); slice2.apply(bd2, j); assertAllBetween(last2, j, bd2, ids); last2 = j + 1; } assertEquals(j + 1, queue.numGlobalTermDeletes()); } assertEquals(uniqueValues, bd1.terms.keySet()); assertEquals(uniqueValues, bd2.terms.keySet()); HashSet<Term> frozenSet = new HashSet<>(); for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) { BytesRef bytesRef = new BytesRef(); bytesRef.copyBytes(t.bytes); frozenSet.add(new Term(t.field, bytesRef)); } assertEquals(uniqueValues, frozenSet); assertEquals("num deletes must be 0 after freeze", 0, queue.numGlobalTermDeletes()); }
@Override protected BytesRef nextSeekTerm(final BytesRef term) throws IOException { // System.out.println("ATE.nextSeekTerm term=" + term); if (term == null) { assert seekBytesRef.length == 0; // return the empty term, as its valid if (runAutomaton.isAccept(runAutomaton.getInitialState())) { return seekBytesRef; } } else { seekBytesRef.copyBytes(term); } // seek to the next possible string; if (nextString()) { return seekBytesRef; // reposition } else { return null; // no more possible strings can match } }
public void testStressDeleteQueue() throws InterruptedException { DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); Set<Term> uniqueValues = new HashSet<>(); final int size = 10000 + random().nextInt(500) * RANDOM_MULTIPLIER; Integer[] ids = new Integer[size]; for (int i = 0; i < ids.length; i++) { ids[i] = random().nextInt(); uniqueValues.add(new Term("id", ids[i].toString())); } CountDownLatch latch = new CountDownLatch(1); AtomicInteger index = new AtomicInteger(0); final int numThreads = 2 + random().nextInt(5); UpdateThread[] threads = new UpdateThread[numThreads]; for (int i = 0; i < threads.length; i++) { threads[i] = new UpdateThread(queue, index, ids, latch); threads[i].start(); } latch.countDown(); for (int i = 0; i < threads.length; i++) { threads[i].join(); } for (UpdateThread updateThread : threads) { DeleteSlice slice = updateThread.slice; queue.updateSlice(slice); BufferedUpdates deletes = updateThread.deletes; slice.apply(deletes, BufferedUpdates.MAX_INT); assertEquals(uniqueValues, deletes.terms.keySet()); } queue.tryApplyGlobalSlice(); Set<Term> frozenSet = new HashSet<>(); for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) { BytesRef bytesRef = new BytesRef(); bytesRef.copyBytes(t.bytes); frozenSet.add(new Term(t.field, bytesRef)); } assertEquals("num deletes must be 0 after freeze", 0, queue.numGlobalTermDeletes()); assertEquals(uniqueValues.size(), frozenSet.size()); assertEquals(uniqueValues, frozenSet); }
@Override public void build(TermFreqIterator iterator) throws IOException { BytesRef scratch = new BytesRef(); TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); } fst = builder.finish(); }
@Override public void copy(MutableValue source) { MutableValueStr s = (MutableValueStr) source; exists = s.exists; value.copyBytes(s.value); }