public void testBackToTheFuture() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); doc.add(newStringField("foo", "bar", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(newStringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); DirectoryReader r1 = DirectoryReader.open(iw, true); iw.deleteDocuments(new Term("foo", "baz")); DirectoryReader r2 = DirectoryReader.open(iw, true); FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo"); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo"); assertEquals(2, v.getValueCount()); v.setDocument(1); assertEquals(1, v.nextOrd()); iw.close(); r1.close(); r2.close(); dir.close(); }
@Override public long nextOrd() throws IOException { assertThread("Sorted set doc values", creationThread); assert lastOrd != NO_MORE_ORDS; long ord = in.nextOrd(); assert ord < valueCount; assert ord == NO_MORE_ORDS || ord > lastOrd; lastOrd = ord; return ord; }
public void testSimple() throws Exception { Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newTextField("field", "", Field.Store.NO); doc.add(field); field.setStringValue("a b c"); w.addDocument(doc); field.setStringValue("d e f"); w.addDocument(doc); field.setStringValue("a f"); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); SortedSetDocValues iter = dto.iterator(ar); iter.setDocument(0); assertEquals(0, iter.nextOrd()); assertEquals(1, iter.nextOrd()); assertEquals(2, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(1); assertEquals(3, iter.nextOrd()); assertEquals(4, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(2); assertEquals(0, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); r.close(); dir.close(); }
// tries to make more dups than testSortedSet public void testSortedSetWithDups() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add( new SortedSetDocValuesField( "bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); if (multi == null) { assertNull(single); } else { assertEquals(single.getValueCount(), multi.getValueCount()); // check values for (long i = 0; i < single.getValueCount(); i++) { final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i)); final BytesRef actual = multi.lookupOrd(i); assertEquals(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.setDocument(i); ArrayList<Long> expectedList = new ArrayList<>(); long ord; while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.add(ord); } multi.setDocument(i); int upto = 0; while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { assertEquals(expectedList.get(upto).longValue(), ord); upto++; } assertEquals(expectedList.size(), upto); } } ir.close(); ir2.close(); dir.close(); }
private void verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception { final DocTermOrds dto = new DocTermOrds( r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE, _TestUtil.nextInt(random(), 2, 10)); final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(r, "id", false); /* for(int docID=0;docID<subR.maxDoc();docID++) { System.out.println(" docID=" + docID + " id=" + docIDToID[docID]); } */ if (VERBOSE) { System.out.println( "TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString())); System.out.println("TEST: all TERMS:"); TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(null); int ord = 0; while (allTE.next() != null) { System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString()); } } // final TermsEnum te = subR.fields().terms("field").iterator(); final TermsEnum te = dto.getOrdTermsEnum(r); if (dto.numTerms() == 0) { if (prefixRef == null) { assertNull(MultiFields.getTerms(r, "field")); } else { Terms terms = MultiFields.getTerms(r, "field"); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef, false); if (result != TermsEnum.SeekStatus.END) { assertFalse( "term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef)); } else { // ok } } else { // ok } } return; } if (VERBOSE) { System.out.println("TEST: TERMS:"); te.seekExact(0); while (true) { System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString()); if (te.next() == null) { break; } } } SortedSetDocValues iter = dto.iterator(r); for (int docID = 0; docID < r.maxDoc(); docID++) { if (VERBOSE) { System.out.println( "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")"); } iter.setDocument(docID); final int[] answers = idToOrds[docIDToID.get(docID)]; int upto = 0; long ord; while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { te.seekExact(ord); final BytesRef expected = termsArray[answers[upto++]]; if (VERBOSE) { System.out.println( " exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString()); } assertEquals( "expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term()); } assertEquals(answers.length, upto); } }