public void testNRTAndCommit() throws Exception { Directory dir = newDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig conf = newIndexWriterConfig(analyzer); RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf); final LineFileDocs docs = new LineFileDocs(random(), true); final int numDocs = TestUtil.nextInt(random(), 100, 400); if (VERBOSE) { System.out.println("TEST: numDocs=" + numDocs); } final List<BytesRef> ids = new ArrayList<>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { final Document doc = docs.nextDoc(); ids.add(new BytesRef(doc.get("docid"))); w.addDocument(doc); if (random().nextInt(20) == 17) { if (r == null) { r = DirectoryReader.open(w.w); } else { final DirectoryReader r2 = DirectoryReader.openIfChanged(r); if (r2 != null) { r.close(); r = r2; } } assertEquals(1 + docCount, r.numDocs()); final IndexSearcher s = newSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.close(); } // Close should force cache to clear since all files are sync'd w.close(); final String[] cachedFiles = cachedDir.listCachedFiles(); for (String file : cachedFiles) { System.out.println("FAIL: cached file " + file + " remains after sync"); } assertEquals(0, cachedFiles.length); r = DirectoryReader.open(dir); for (BytesRef id : ids) { assertEquals(1, r.docFreq(new Term("docid", id))); } r.close(); cachedDir.close(); docs.close(); }
public void testDuellMemIndex() throws IOException { LineFileDocs lineFileDocs = new LineFileDocs(random()); int numDocs = atLeast(10); MemoryIndex memory = randomMemoryIndex(); for (int i = 0; i < numDocs; i++) { Directory dir = newDirectory(); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); Document nextDoc = lineFileDocs.nextDoc(); Document doc = new Document(); for (IndexableField field : nextDoc.getFields()) { if (field.fieldType().indexOptions() != IndexOptions.NONE) { doc.add(field); if (random().nextInt(3) == 0) { doc.add(field); // randomly add the same field twice } } } writer.addDocument(doc); writer.close(); for (IndexableField field : doc) { memory.addField(field.name(), ((Field) field).stringValue(), mockAnalyzer); } DirectoryReader competitor = DirectoryReader.open(dir); LeafReader memIndexReader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); duellReaders(competitor, memIndexReader); IOUtils.close(competitor, memIndexReader); memory.reset(); dir.close(); } lineFileDocs.close(); }
public void test() throws Exception { final Directory d = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // Try to make an index that requires merging: w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11)); final int numStartDocs = atLeast(20); final LineFileDocs docs = new LineFileDocs(random(), true); for (int docIDX = 0; docIDX < numStartDocs; docIDX++) { w.addDocument(docs.nextDoc()); } MergePolicy mp = w.getConfig().getMergePolicy(); final int mergeAtOnce = 1 + w.segmentInfos.size(); if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeAtOnce); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(mergeAtOnce); } else { // skip test w.close(); d.close(); return; } final AtomicBoolean doStop = new AtomicBoolean(); w.getConfig().setMaxBufferedDocs(2); Thread t = new Thread() { @Override public void run() { try { while (!doStop.get()) { w.updateDocument( new Term("docid", "" + random().nextInt(numStartDocs)), docs.nextDoc()); // Force deletes to apply w.getReader().close(); } } catch (Throwable t) { throw new RuntimeException(t); } } }; t.start(); w.forceMerge(1); doStop.set(true); t.join(); assertTrue("merge count is " + w.mergeCount.get(), w.mergeCount.get() <= 1); w.close(); d.close(); docs.close(); }
// make sure we never reuse from another reader even if it is the same field & codec etc public void testReuseDocsEnumDifferentReader() throws IOException { Directory dir = newDirectory(); Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setCodec(cp)); int numdocs = atLeast(20); createRandomIndex(numdocs, writer, random()); writer.commit(); DirectoryReader firstReader = DirectoryReader.open(dir); DirectoryReader secondReader = DirectoryReader.open(dir); List<LeafReaderContext> leaves = firstReader.leaves(); List<LeafReaderContext> leaves2 = secondReader.leaves(); for (LeafReaderContext ctx : leaves) { Terms terms = ctx.reader().terms("body"); TermsEnum iterator = terms.iterator(); IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>(); MatchNoBits bits = new Bits.MatchNoBits(firstReader.maxDoc()); iterator = terms.iterator(); PostingsEnum docs = null; BytesRef term = null; while ((term = iterator.next()) != null) { docs = iterator.postings( null, randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE); enums.put(docs, true); } assertEquals(terms.size(), enums.size()); iterator = terms.iterator(); enums.clear(); docs = null; while ((term = iterator.next()) != null) { docs = iterator.postings( bits, randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE); enums.put(docs, true); } assertEquals(terms.size(), enums.size()); } writer.close(); IOUtils.close(firstReader, secondReader, dir); }
public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception { final String field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); if (random().nextBoolean()) { mockAnalyzer.setOffsetGap(random().nextInt(100)); } // index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add(new Field(field_name, "la la", type)); doc.add(new Field(field_name, "foo bar foo bar foo", type)); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.updateDocument(new Term("id", "1"), doc); writer.commit(); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); // Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.addField(field_name, "la la", mockAnalyzer); memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer); // compare term vectors Terms ramTv = reader.getTermVector(0, field_name); IndexReader memIndexReader = memIndex.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); Terms memTv = memIndexReader.getTermVector(0, field_name); compareTermVectors(ramTv, memTv, field_name); memIndexReader.close(); reader.close(); dir.close(); }
public void testSameFieldAddedMultipleTimes() throws IOException { MemoryIndex mindex = randomMemoryIndex(); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mindex.addField("field", "the quick brown fox", mockAnalyzer); mindex.addField("field", "jumps over the", mockAnalyzer); LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader(); TestUtil.checkReader(reader); assertEquals(7, reader.terms("field").getSumTotalTermFreq()); PhraseQuery query = new PhraseQuery("field", "fox", "jumps"); assertTrue(mindex.search(query) > 0.1); mindex.reset(); mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10)); mindex.addField("field", "the quick brown fox", mockAnalyzer); mindex.addField("field", "jumps over the", mockAnalyzer); assertEquals(0, mindex.search(query), 0.00001f); query = new PhraseQuery(10, "field", "fox", "jumps"); assertTrue( "posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001); TestUtil.checkReader(mindex.createSearcher().getIndexReader()); }
@Test public void testRollingUpdates() throws Exception { Random random = new Random(random().nextLong()); final BaseDirectoryWrapper dir = newDirectory(); // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to // delete files" if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } final LineFileDocs docs = new LineFileDocs(random, true); // provider.register(new MemoryCodec()); if (random().nextBoolean()) { Codec.setDefault( TestUtil.alwaysPostingsFormat( new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); final int SIZE = atLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; final int numUpdates = (int) (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble()))); if (VERBOSE) { System.out.println("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { final Document doc = docs.nextDoc(); final String myID = Integer.toString(id); if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { System.out.println(" docIter=" + docIter + " id=" + id); } ((Field) doc.getField("docid")).setStringValue(myID); Term idTerm = new Term("docid", myID); final boolean doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.search(new TermQuery(idTerm), 1); assertEquals(1, hits.totalHits); doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc); if (VERBOSE) { if (doUpdate) { System.out.println(" tryDeleteDocument failed"); } else { System.out.println(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { System.out.println(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { if (random().nextBoolean()) { w.updateDocument(idTerm, doc); } else { // It's OK to not be atomic for this test (no separate thread reopening readers): w.deleteDocuments(new TermQuery(idTerm)); w.addDocument(doc); } } else { w.addDocument(doc); } if (docIter >= SIZE && random().nextInt(50) == 17) { if (r != null) { r.close(); } final boolean applyDeletions = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: reopen applyDeletions=" + applyDeletions); } r = w.getReader(applyDeletions); if (applyDeletions) { s = newSearcher(r); } else { s = null; } assertTrue( "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); updateCount = 0; } } if (r != null) { r.close(); } w.commit(); assertEquals(SIZE, w.numDocs()); w.close(); TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.close(); // LUCENE-4455: SegmentInfos infos = SegmentInfos.readLatestCommit(dir); long totalBytes = 0; for (SegmentCommitInfo sipc : infos) { totalBytes += sipc.sizeInBytes(); } long totalBytes2 = 0; for (String fileName : dir.listAll()) { if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) { totalBytes2 += dir.fileLength(fileName); } } assertEquals(totalBytes2, totalBytes); dir.close(); }
public void runTest(String testName) throws Exception { failed.set(false); addCount.set(0); delCount.set(0); packCount.set(0); final long t0 = System.currentTimeMillis(); Random random = new Random(random().nextLong()); final LineFileDocs docs = new LineFileDocs(random, true); final Path tempDir = createTempDir(testName); dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (dir instanceof BaseDirectoryWrapper) { ((BaseDirectoryWrapper) dir) .setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false); conf.setInfoStream(new FailOnNonBulkMergesInfoStream()); if (conf.getMergePolicy() instanceof MockRandomMergePolicy) { ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false); } if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.getMergePolicy(); if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.); } else if (mp instanceof LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMaxMergeDocs(100000); } } conf.setMergedSegmentWarmer( new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { if (VERBOSE) { System.out.println("TEST: now warm merged reader=" + reader); } warmed.put(((SegmentReader) reader).core, Boolean.TRUE); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); int sum = 0; final int inc = Math.max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.get(docID)) { final StoredDocument doc = reader.document(docID); sum += doc.getFields().size(); } } IndexSearcher searcher = newSearcher(reader); sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits; if (VERBOSE) { System.out.println("TEST: warm visited " + sum + " fields"); } } }); if (VERBOSE) { conf.setInfoStream( new PrintStreamInfoStream(System.out) { @Override public void message(String component, String message) { if ("TP".equals(component)) { return; // ignore test points! } super.message(component, message); } }); } writer = new IndexWriter(dir, conf); TestUtil.reduceOpenFiles(writer); final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); doAfterWriter(es); final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4); final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>()); final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>()); final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>()); final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000; final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { System.out.println( "TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]"); } // Let index build up a bit Thread.sleep(100); doSearching(es, stopTime); if (VERBOSE) { System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]"); } for (int thread = 0; thread < indexThreads.length; thread++) { indexThreads[thread].join(); } if (VERBOSE) { System.out.println( "TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount); } final IndexSearcher s = getFinalSearcher(); if (VERBOSE) { System.out.println("TEST: finalSearcher=" + s); } assertFalse(failed.get()); boolean doFail = false; // Verify: make sure delIDs are in fact deleted: for (String id : delIDs) { final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1); if (hits.totalHits != 0) { System.out.println( "doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: for (String id : delPackIDs) { final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1); if (hits.totalHits != 0) { System.out.println( "packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: for (SubDocs subDocs : allSubDocs) { TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20); if (!subDocs.deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.totalHits != subDocs.subIDs.size()) { System.out.println( "packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; for (ScoreDoc scoreDoc : hits.scoreDocs) { final int docID = scoreDoc.doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; final StoredDocument doc = s.doc(docID); assertEquals(subDocs.packID, doc.get("packID")); } lastDocID = startDocID - 1; for (String subID : subDocs.subIDs) { hits = s.search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.totalHits); final int docID = hits.scoreDocs[0].doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: for (String subID : subDocs.subIDs) { assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: final int endID = Integer.parseInt(docs.nextDoc().get("docid")); docs.close(); for (int id = 0; id < endID; id++) { String stringID = "" + id; if (!delIDs.contains(stringID)) { final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1); if (hits.totalHits != 1) { System.out.println( "doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs); doFail = true; } } } assertFalse(doFail); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs()); releaseSearcher(s); writer.commit(); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs()); doClose(); try { writer.commit(); } finally { writer.close(); } // Cannot close until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); } TestUtil.checkIndex(dir); dir.close(); IOUtils.rm(tempDir); if (VERBOSE) { System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]"); } }