public void testNRTAndCommit() throws Exception { Directory dir = newDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig conf = newIndexWriterConfig(analyzer); RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf); final LineFileDocs docs = new LineFileDocs(random(), true); final int numDocs = TestUtil.nextInt(random(), 100, 400); if (VERBOSE) { System.out.println("TEST: numDocs=" + numDocs); } final List<BytesRef> ids = new ArrayList<>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { final Document doc = docs.nextDoc(); ids.add(new BytesRef(doc.get("docid"))); w.addDocument(doc); if (random().nextInt(20) == 17) { if (r == null) { r = DirectoryReader.open(w.w); } else { final DirectoryReader r2 = DirectoryReader.openIfChanged(r); if (r2 != null) { r.close(); r = r2; } } assertEquals(1 + docCount, r.numDocs()); final IndexSearcher s = newSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.close(); } // Close should force cache to clear since all files are sync'd w.close(); final String[] cachedFiles = cachedDir.listCachedFiles(); for (String file : cachedFiles) { System.out.println("FAIL: cached file " + file + " remains after sync"); } assertEquals(0, cachedFiles.length); r = DirectoryReader.open(dir); for (BytesRef id : ids) { assertEquals(1, r.docFreq(new Term("docid", id))); } r.close(); cachedDir.close(); docs.close(); }
public void testUpdateSameDoc() throws Exception { final Directory dir = newDirectory(); final LineFileDocs docs = new LineFileDocs(random()); for (int r = 0; r < 3; r++) { final IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2)); final int numUpdates = atLeast(20); int numThreads = TestUtil.nextInt(random(), 2, 6); IndexingThread[] threads = new IndexingThread[numThreads]; for (int i = 0; i < numThreads; i++) { threads[i] = new IndexingThread(docs, w, numUpdates); threads[i].start(); } for (int i = 0; i < numThreads; i++) { threads[i].join(); } w.close(); } IndexReader open = DirectoryReader.open(dir); assertEquals(1, open.numDocs()); open.close(); docs.close(); dir.close(); }
public void testDuellMemIndex() throws IOException { LineFileDocs lineFileDocs = new LineFileDocs(random()); int numDocs = atLeast(10); MemoryIndex memory = randomMemoryIndex(); for (int i = 0; i < numDocs; i++) { Directory dir = newDirectory(); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); Document nextDoc = lineFileDocs.nextDoc(); Document doc = new Document(); for (IndexableField field : nextDoc.getFields()) { if (field.fieldType().indexOptions() != IndexOptions.NONE) { doc.add(field); if (random().nextInt(3) == 0) { doc.add(field); // randomly add the same field twice } } } writer.addDocument(doc); writer.close(); for (IndexableField field : doc) { memory.addField(field.name(), ((Field) field).stringValue(), mockAnalyzer); } DirectoryReader competitor = DirectoryReader.open(dir); LeafReader memIndexReader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); duellReaders(competitor, memIndexReader); IOUtils.close(competitor, memIndexReader); memory.reset(); dir.close(); } lineFileDocs.close(); }
/** populates a writer with random stuff. this must be fully reproducable with the seed! */ public static void createRandomIndex(int numdocs, RandomIndexWriter writer, Random random) throws IOException { LineFileDocs lineFileDocs = new LineFileDocs(random); for (int i = 0; i < numdocs; i++) { writer.addDocument(lineFileDocs.nextDoc()); } lineFileDocs.close(); }
public void test() throws Exception { final Directory d = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // Try to make an index that requires merging: w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11)); final int numStartDocs = atLeast(20); final LineFileDocs docs = new LineFileDocs(random(), true); for (int docIDX = 0; docIDX < numStartDocs; docIDX++) { w.addDocument(docs.nextDoc()); } MergePolicy mp = w.getConfig().getMergePolicy(); final int mergeAtOnce = 1 + w.segmentInfos.size(); if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeAtOnce); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(mergeAtOnce); } else { // skip test w.close(); d.close(); return; } final AtomicBoolean doStop = new AtomicBoolean(); w.getConfig().setMaxBufferedDocs(2); Thread t = new Thread() { @Override public void run() { try { while (!doStop.get()) { w.updateDocument( new Term("docid", "" + random().nextInt(numStartDocs)), docs.nextDoc()); // Force deletes to apply w.getReader().close(); } } catch (Throwable t) { throw new RuntimeException(t); } } }; t.start(); w.forceMerge(1); doStop.set(true); t.join(); assertTrue("merge count is " + w.mergeCount.get(), w.mergeCount.get() <= 1); w.close(); d.close(); docs.close(); }
@Test public void testRollingUpdates() throws Exception { Random random = new Random(random().nextLong()); final BaseDirectoryWrapper dir = newDirectory(); // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to // delete files" if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } final LineFileDocs docs = new LineFileDocs(random, true); // provider.register(new MemoryCodec()); if (random().nextBoolean()) { Codec.setDefault( TestUtil.alwaysPostingsFormat( new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); final int SIZE = atLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; final int numUpdates = (int) (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble()))); if (VERBOSE) { System.out.println("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { final Document doc = docs.nextDoc(); final String myID = Integer.toString(id); if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { System.out.println(" docIter=" + docIter + " id=" + id); } ((Field) doc.getField("docid")).setStringValue(myID); Term idTerm = new Term("docid", myID); final boolean doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.search(new TermQuery(idTerm), 1); assertEquals(1, hits.totalHits); doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc); if (VERBOSE) { if (doUpdate) { System.out.println(" tryDeleteDocument failed"); } else { System.out.println(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { System.out.println(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { if (random().nextBoolean()) { w.updateDocument(idTerm, doc); } else { // It's OK to not be atomic for this test (no separate thread reopening readers): w.deleteDocuments(new TermQuery(idTerm)); w.addDocument(doc); } } else { w.addDocument(doc); } if (docIter >= SIZE && random().nextInt(50) == 17) { if (r != null) { r.close(); } final boolean applyDeletions = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: reopen applyDeletions=" + applyDeletions); } r = w.getReader(applyDeletions); if (applyDeletions) { s = newSearcher(r); } else { s = null; } assertTrue( "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); updateCount = 0; } } if (r != null) { r.close(); } w.commit(); assertEquals(SIZE, w.numDocs()); w.close(); TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.close(); // LUCENE-4455: SegmentInfos infos = SegmentInfos.readLatestCommit(dir); long totalBytes = 0; for (SegmentCommitInfo sipc : infos) { totalBytes += sipc.sizeInBytes(); } long totalBytes2 = 0; for (String fileName : dir.listAll()) { if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) { totalBytes2 += dir.fileLength(fileName); } } assertEquals(totalBytes2, totalBytes); dir.close(); }
@AfterClass public static void afterClass() throws Exception { lineDocFile.close(); lineDocFile = null; }
public void runTest(String testName) throws Exception { failed.set(false); addCount.set(0); delCount.set(0); packCount.set(0); final long t0 = System.currentTimeMillis(); Random random = new Random(random().nextLong()); final LineFileDocs docs = new LineFileDocs(random, true); final Path tempDir = createTempDir(testName); dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (dir instanceof BaseDirectoryWrapper) { ((BaseDirectoryWrapper) dir) .setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false); conf.setInfoStream(new FailOnNonBulkMergesInfoStream()); if (conf.getMergePolicy() instanceof MockRandomMergePolicy) { ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false); } if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.getMergePolicy(); if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.); } else if (mp instanceof LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMaxMergeDocs(100000); } } conf.setMergedSegmentWarmer( new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { if (VERBOSE) { System.out.println("TEST: now warm merged reader=" + reader); } warmed.put(((SegmentReader) reader).core, Boolean.TRUE); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); int sum = 0; final int inc = Math.max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.get(docID)) { final StoredDocument doc = reader.document(docID); sum += doc.getFields().size(); } } IndexSearcher searcher = newSearcher(reader); sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits; if (VERBOSE) { System.out.println("TEST: warm visited " + sum + " fields"); } } }); if (VERBOSE) { conf.setInfoStream( new PrintStreamInfoStream(System.out) { @Override public void message(String component, String message) { if ("TP".equals(component)) { return; // ignore test points! } super.message(component, message); } }); } writer = new IndexWriter(dir, conf); TestUtil.reduceOpenFiles(writer); final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); doAfterWriter(es); final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4); final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>()); final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>()); final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>()); final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000; final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { System.out.println( "TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]"); } // Let index build up a bit Thread.sleep(100); doSearching(es, stopTime); if (VERBOSE) { System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]"); } for (int thread = 0; thread < indexThreads.length; thread++) { indexThreads[thread].join(); } if (VERBOSE) { System.out.println( "TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount); } final IndexSearcher s = getFinalSearcher(); if (VERBOSE) { System.out.println("TEST: finalSearcher=" + s); } assertFalse(failed.get()); boolean doFail = false; // Verify: make sure delIDs are in fact deleted: for (String id : delIDs) { final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1); if (hits.totalHits != 0) { System.out.println( "doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: for (String id : delPackIDs) { final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1); if (hits.totalHits != 0) { System.out.println( "packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: for (SubDocs subDocs : allSubDocs) { TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20); if (!subDocs.deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.totalHits != subDocs.subIDs.size()) { System.out.println( "packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; for (ScoreDoc scoreDoc : hits.scoreDocs) { final int docID = scoreDoc.doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; final StoredDocument doc = s.doc(docID); assertEquals(subDocs.packID, doc.get("packID")); } lastDocID = startDocID - 1; for (String subID : subDocs.subIDs) { hits = s.search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.totalHits); final int docID = hits.scoreDocs[0].doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: for (String subID : subDocs.subIDs) { assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: final int endID = Integer.parseInt(docs.nextDoc().get("docid")); docs.close(); for (int id = 0; id < endID; id++) { String stringID = "" + id; if (!delIDs.contains(stringID)) { final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1); if (hits.totalHits != 1) { System.out.println( "doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs); doFail = true; } } } assertFalse(doFail); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs()); releaseSearcher(s); writer.commit(); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs()); doClose(); try { writer.commit(); } finally { writer.close(); } // Cannot close until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); } TestUtil.checkIndex(dir); dir.close(); IOUtils.rm(tempDir); if (VERBOSE) { System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]"); } }
@Test public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { final boolean preserveSeparators = getRandom().nextBoolean(); final boolean preservePositionIncrements = getRandom().nextBoolean(); final boolean usePayloads = getRandom().nextBoolean(); final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester reference = new XAnalyzingSuggester( new StandardAnalyzer(), null, new StandardAnalyzer(), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); LineFileDocs docs = new LineFileDocs(getRandom()); int num = scaledRandomIntBetween(150, 300); final String[] titles = new String[num]; final long[] weights = new long[num]; for (int i = 0; i < titles.length; i++) { Document nextDoc = docs.nextDoc(); IndexableField field = nextDoc.getField("title"); titles[i] = field.stringValue(); weights[i] = between(0, 100); } docs.close(); final InputIterator primaryIter = new InputIterator() { int index = 0; long currentWeight = -1; @Override public BytesRef next() throws IOException { if (index < titles.length) { currentWeight = weights[index]; return new BytesRef(titles[index++]); } return null; } @Override public long weight() { return currentWeight; } @Override public BytesRef payload() { return null; } @Override public boolean hasPayloads() { return false; } @Override public Set<BytesRef> contexts() { return null; } @Override public boolean hasContexts() { return false; } }; InputIterator iter; if (usePayloads) { iter = new InputIterator() { @Override public long weight() { return primaryIter.weight(); } @Override public BytesRef next() throws IOException { return primaryIter.next(); } @Override public BytesRef payload() { return new BytesRef(Long.toString(weight())); } @Override public boolean hasPayloads() { return true; } @Override public Set<BytesRef> contexts() { return null; } @Override public boolean hasContexts() { return false; } }; } else { iter = primaryIter; } reference.build(iter); PostingsFormat provider = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer()); final CompletionFieldMapper mapper = new CompletionFieldMapper( new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads, preserveSeparators, preservePositionIncrements, Integer.MAX_VALUE, indexSettings, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING); Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights); Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput"); field.setAccessible(true); Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput"); refField.setAccessible(true); assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup))); for (int i = 0; i < titles.length; i++) { int res = between(1, 10); final StringBuilder builder = new StringBuilder(); SuggestUtils.analyze( namedAnalzyer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { if (builder.length() == 0) { builder.append(this.charTermAttr.toString()); } } }); String firstTerm = builder.toString(); String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length())); List<LookupResult> refLookup = reference.lookup(prefix, false, res); List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res); assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size())); for (int j = 0; j < refLookup.size(); j++) { assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key)); assertThat( "prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - " + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value, lookup.get(j).value, equalTo(refLookup.get(j).value)); assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload)); if (usePayloads) { assertThat( lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value))); } } } }