示例#1
0
 public static void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field)
     throws Exception {
   BytesRef term;
   while ((term = termsEnum.next()) != null) {
     BytesRef r = new BytesRef();
     r.copyBytes(term);
     tiq.insertWithOverflow(new TermStats(field, r, termsEnum.docFreq()));
   }
 }
 private BytesRef bytesFromTokenStream(TokenStream stream) throws IOException {
   TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
   BytesRef bytesRef = termAttr.getBytesRef();
   stream.reset();
   while (stream.incrementToken()) {
     termAttr.fillBytesRef();
   }
   stream.close();
   BytesRef copy = new BytesRef();
   copy.copyBytes(bytesRef);
   return copy;
 }
 @Override
 public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
   // System.out.println("VGW: index term=" + text.utf8ToString());
   // NOTE: we must force the first term per field to be
   // indexed, in case policy doesn't:
   if (policy.isIndexTerm(text, stats) || first) {
     first = false;
     // System.out.println("  YES");
     return true;
   } else {
     lastTerm.copyBytes(text);
     return false;
   }
 }
  private static void duelFieldDataBytes(
      Random random,
      AtomicReaderContext context,
      IndexFieldData<?> left,
      IndexFieldData<?> right,
      Preprocessor pre)
      throws Exception {
    AtomicFieldData leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context);
    AtomicFieldData rightData =
        random.nextBoolean() ? right.load(context) : right.loadDirect(context);

    int numDocs = context.reader().maxDoc();
    SortedBinaryDocValues leftBytesValues = leftData.getBytesValues();
    SortedBinaryDocValues rightBytesValues = rightData.getBytesValues();
    BytesRef leftSpare = new BytesRef();
    BytesRef rightSpare = new BytesRef();

    for (int i = 0; i < numDocs; i++) {
      leftBytesValues.setDocument(i);
      rightBytesValues.setDocument(i);
      int numValues = leftBytesValues.count();
      assertThat(numValues, equalTo(rightBytesValues.count()));
      BytesRef previous = null;
      for (int j = 0; j < numValues; j++) {
        rightSpare.copyBytes(rightBytesValues.valueAt(j));
        leftSpare.copyBytes(leftBytesValues.valueAt(j));
        if (previous != null) {
          assertThat(pre.compare(previous, rightSpare), lessThan(0));
        }
        previous = BytesRef.deepCopyOf(rightSpare);
        pre.toString(rightSpare);
        pre.toString(leftSpare);
        assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare)));
      }
    }
  }
 @Override
 public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
   if (text.length == 0) {
     // We already added empty string in ctor
     assert termsFilePointer == startTermsFilePointer;
     return;
   }
   final int lengthSave = text.length;
   text.length = indexedTermPrefixLength(lastTerm, text);
   try {
     fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
   } finally {
     text.length = lengthSave;
   }
   lastTerm.copyBytes(text);
 }
 public void testUpdateDelteSlices() {
   DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
   final int size = 200 + random().nextInt(500) * RANDOM_MULTIPLIER;
   Integer[] ids = new Integer[size];
   for (int i = 0; i < ids.length; i++) {
     ids[i] = random().nextInt();
   }
   DeleteSlice slice1 = queue.newSlice();
   DeleteSlice slice2 = queue.newSlice();
   BufferedUpdates bd1 = new BufferedUpdates();
   BufferedUpdates bd2 = new BufferedUpdates();
   int last1 = 0;
   int last2 = 0;
   Set<Term> uniqueValues = new HashSet<>();
   for (int j = 0; j < ids.length; j++) {
     Integer i = ids[j];
     // create an array here since we compare identity below against tailItem
     Term[] term = new Term[] {new Term("id", i.toString())};
     uniqueValues.add(term[0]);
     queue.addDelete(term);
     if (random().nextInt(20) == 0 || j == ids.length - 1) {
       queue.updateSlice(slice1);
       assertTrue(slice1.isTailItem(term));
       slice1.apply(bd1, j);
       assertAllBetween(last1, j, bd1, ids);
       last1 = j + 1;
     }
     if (random().nextInt(10) == 5 || j == ids.length - 1) {
       queue.updateSlice(slice2);
       assertTrue(slice2.isTailItem(term));
       slice2.apply(bd2, j);
       assertAllBetween(last2, j, bd2, ids);
       last2 = j + 1;
     }
     assertEquals(j + 1, queue.numGlobalTermDeletes());
   }
   assertEquals(uniqueValues, bd1.terms.keySet());
   assertEquals(uniqueValues, bd2.terms.keySet());
   HashSet<Term> frozenSet = new HashSet<>();
   for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
     BytesRef bytesRef = new BytesRef();
     bytesRef.copyBytes(t.bytes);
     frozenSet.add(new Term(t.field, bytesRef));
   }
   assertEquals(uniqueValues, frozenSet);
   assertEquals("num deletes must be 0 after freeze", 0, queue.numGlobalTermDeletes());
 }
示例#7
0
  @Override
  protected BytesRef nextSeekTerm(final BytesRef term) throws IOException {
    // System.out.println("ATE.nextSeekTerm term=" + term);
    if (term == null) {
      assert seekBytesRef.length == 0;
      // return the empty term, as its valid
      if (runAutomaton.isAccept(runAutomaton.getInitialState())) {
        return seekBytesRef;
      }
    } else {
      seekBytesRef.copyBytes(term);
    }

    // seek to the next possible string;
    if (nextString()) {
      return seekBytesRef; // reposition
    } else {
      return null; // no more possible strings can match
    }
  }
  public void testStressDeleteQueue() throws InterruptedException {
    DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
    Set<Term> uniqueValues = new HashSet<>();
    final int size = 10000 + random().nextInt(500) * RANDOM_MULTIPLIER;
    Integer[] ids = new Integer[size];
    for (int i = 0; i < ids.length; i++) {
      ids[i] = random().nextInt();
      uniqueValues.add(new Term("id", ids[i].toString()));
    }
    CountDownLatch latch = new CountDownLatch(1);
    AtomicInteger index = new AtomicInteger(0);
    final int numThreads = 2 + random().nextInt(5);
    UpdateThread[] threads = new UpdateThread[numThreads];
    for (int i = 0; i < threads.length; i++) {
      threads[i] = new UpdateThread(queue, index, ids, latch);
      threads[i].start();
    }
    latch.countDown();
    for (int i = 0; i < threads.length; i++) {
      threads[i].join();
    }

    for (UpdateThread updateThread : threads) {
      DeleteSlice slice = updateThread.slice;
      queue.updateSlice(slice);
      BufferedUpdates deletes = updateThread.deletes;
      slice.apply(deletes, BufferedUpdates.MAX_INT);
      assertEquals(uniqueValues, deletes.terms.keySet());
    }
    queue.tryApplyGlobalSlice();
    Set<Term> frozenSet = new HashSet<>();
    for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
      BytesRef bytesRef = new BytesRef();
      bytesRef.copyBytes(t.bytes);
      frozenSet.add(new Term(t.field, bytesRef));
    }
    assertEquals("num deletes must be 0 after freeze", 0, queue.numGlobalTermDeletes());
    assertEquals(uniqueValues.size(), frozenSet.size());
    assertEquals(uniqueValues, frozenSet);
  }
  @Override
  public void build(TermFreqIterator iterator) throws IOException {
    BytesRef scratch = new BytesRef();
    TermFreqIterator iter =
        new WFSTTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
    IntsRef scratchInts = new IntsRef();
    BytesRef previous = null;
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
    while ((scratch = iter.next()) != null) {
      long cost = iter.weight();

      if (previous == null) {
        previous = new BytesRef();
      } else if (scratch.equals(previous)) {
        continue; // for duplicate suggestions, the best weight is actually
        // added
      }
      Util.toIntsRef(scratch, scratchInts);
      builder.add(scratchInts, cost);
      previous.copyBytes(scratch);
    }
    fst = builder.finish();
  }
 @Override
 public void copy(MutableValue source) {
   MutableValueStr s = (MutableValueStr) source;
   exists = s.exists;
   value.copyBytes(s.value);
 }