Beispiel #1
0
  public void testBackToTheFuture() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));

    Document doc = new Document();
    doc.add(newStringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(newStringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    DirectoryReader r1 = DirectoryReader.open(iw, true);

    iw.deleteDocuments(new Term("foo", "baz"));
    DirectoryReader r2 = DirectoryReader.open(iw, true);

    FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo");

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo");
    assertEquals(2, v.getValueCount());
    v.setDocument(1);
    assertEquals(1, v.nextOrd());

    iw.close();
    r1.close();
    r2.close();
    dir.close();
  }
 @Override
 public int nextDoc() throws IOException {
   assertThread("Sorted set doc values", creationThread);
   int docID = in.nextDoc();
   assert docID > lastDocID;
   assert docID == NO_MORE_DOCS || docID < maxDoc;
   assert docID == in.docID();
   lastDocID = docID;
   lastOrd = -2;
   return docID;
 }
 @Override
 public int advance(int target) throws IOException {
   assertThread("Sorted set doc values", creationThread);
   assert target >= 0;
   assert target >= in.docID();
   int docID = in.advance(target);
   assert docID == in.docID();
   assert docID >= target;
   assert docID == NO_MORE_DOCS || docID < maxDoc;
   lastDocID = docID;
   lastOrd = -2;
   return docID;
 }
 @Override
 public long getValueCount() {
   assertThread("Sorted set doc values", creationThread);
   long valueCount = in.getValueCount();
   assert valueCount == this.valueCount; // should not change
   return valueCount;
 }
 @Override
 public long cost() {
   assertThread("Sorted set doc values", creationThread);
   long cost = in.cost();
   assert cost >= 0;
   return cost;
 }
 @Override
 public BytesRef lookupOrd(long ord) {
   assertThread("Sorted set doc values", creationThread);
   assert ord >= 0 && ord < valueCount;
   final BytesRef result = in.lookupOrd(ord);
   assert result.isValid();
   return result;
 }
 @Override
 public long lookupTerm(BytesRef key) {
   assertThread("Sorted set doc values", creationThread);
   assert key.isValid();
   long result = in.lookupTerm(key);
   assert result < valueCount;
   assert key.isValid();
   return result;
 }
 @Override
 public long nextOrd() throws IOException {
   assertThread("Sorted set doc values", creationThread);
   assert lastOrd != NO_MORE_ORDS;
   long ord = in.nextOrd();
   assert ord < valueCount;
   assert ord == NO_MORE_ORDS || ord > lastOrd;
   lastOrd = ord;
   return ord;
 }
 @Override
 public int docID() {
   assertThread("Sorted set doc values", creationThread);
   return in.docID();
 }
 public AssertingSortedSetDocValues(SortedSetDocValues in, int maxDoc) {
   this.in = in;
   this.maxDoc = maxDoc;
   this.valueCount = in.getValueCount();
   assert valueCount >= 0;
 }
  // tries to make more dups than testSortedSet
  public void testSortedSetWithDups() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numValues = random().nextInt(5);
      for (int j = 0; j < numValues; j++) {
        doc.add(
            new SortedSetDocValuesField(
                "bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
    SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
    if (multi == null) {
      assertNull(single);
    } else {
      assertEquals(single.getValueCount(), multi.getValueCount());
      // check values
      for (long i = 0; i < single.getValueCount(); i++) {
        final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
        final BytesRef actual = multi.lookupOrd(i);
        assertEquals(expected, actual);
      }
      // check ord list
      for (int i = 0; i < numDocs; i++) {
        single.setDocument(i);
        ArrayList<Long> expectedList = new ArrayList<>();
        long ord;
        while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          expectedList.add(ord);
        }

        multi.setDocument(i);
        int upto = 0;
        while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          assertEquals(expectedList.get(upto).longValue(), ord);
          upto++;
        }
        assertEquals(expectedList.size(), upto);
      }
    }

    ir.close();
    ir2.close();
    dir.close();
  }
Beispiel #12
0
  public void testSimple() throws Exception {
    Directory dir = newDirectory();
    final RandomIndexWriter w =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);

    field.setStringValue("d e f");
    w.addDocument(doc);

    field.setStringValue("a f");
    w.addDocument(doc);

    final IndexReader r = w.getReader();
    w.close();

    final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);

    iter.setDocument(0);
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    iter.setDocument(1);
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    iter.setDocument(2);
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    r.close();
    dir.close();
  }
Beispiel #13
0
  public void testSortedTermsEnum() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "world", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "beer", Field.Store.NO));
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);

    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();

    AtomicReader ar = getOnlySegmentReader(ireader);
    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field");
    assertEquals(3, dv.getValueCount());

    TermsEnum termsEnum = dv.termsEnum();

    // next()
    assertEquals("beer", termsEnum.next().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals("hello", termsEnum.next().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals("world", termsEnum.next().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // seekCeil()
    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));

    // seekExact()
    assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));

    // seek(ord)
    termsEnum.seekExact(0);
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    termsEnum.seekExact(1);
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    termsEnum.seekExact(2);
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    ireader.close();
    directory.close();
  }
Beispiel #14
0
  private void verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
      throws Exception {

    final DocTermOrds dto =
        new DocTermOrds(
            r,
            r.getLiveDocs(),
            "field",
            prefixRef,
            Integer.MAX_VALUE,
            _TestUtil.nextInt(random(), 2, 10));

    final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(r, "id", false);
    /*
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
      System.out.println(
          "TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
      System.out.println("TEST: all TERMS:");
      TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(null);
      int ord = 0;
      while (allTE.next() != null) {
        System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
      }
    }

    // final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
      if (prefixRef == null) {
        assertNull(MultiFields.getTerms(r, "field"));
      } else {
        Terms terms = MultiFields.getTerms(r, "field");
        if (terms != null) {
          TermsEnum termsEnum = terms.iterator(null);
          TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef, false);
          if (result != TermsEnum.SeekStatus.END) {
            assertFalse(
                "term="
                    + termsEnum.term().utf8ToString()
                    + " matches prefix="
                    + prefixRef.utf8ToString(),
                StringHelper.startsWith(termsEnum.term(), prefixRef));
          } else {
            // ok
          }
        } else {
          // ok
        }
      }
      return;
    }

    if (VERBOSE) {
      System.out.println("TEST: TERMS:");
      te.seekExact(0);
      while (true) {
        System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
        if (te.next() == null) {
          break;
        }
      }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
      if (VERBOSE) {
        System.out.println(
            "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
      }
      iter.setDocument(docID);
      final int[] answers = idToOrds[docIDToID.get(docID)];
      int upto = 0;
      long ord;
      while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        te.seekExact(ord);
        final BytesRef expected = termsArray[answers[upto++]];
        if (VERBOSE) {
          System.out.println(
              "  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
        }
        assertEquals(
            "expected="
                + expected.utf8ToString()
                + " actual="
                + te.term().utf8ToString()
                + " ord="
                + ord,
            expected,
            te.term());
      }
      assertEquals(answers.length, upto);
    }
  }