// LUCENE-1270 public void testHangOnClose() throws IOException { Directory dir = newDirectory(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(100); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(5) .setMergePolicy(lmp)); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); for (int i = 0; i < 60; i++) writer.addDocument(doc); Document doc2 = new Document(); FieldType customType2 = new FieldType(); customType2.setStored(true); doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); for (int i = 0; i < 10; i++) writer.addDocument(doc2); writer.close(); Directory dir2 = newDirectory(); lmp = new LogByteSizeMergePolicy(); lmp.setMinMergeMB(0.0001); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(4); writer = new IndexWriter( dir2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergeScheduler(new SerialMergeScheduler()) .setMergePolicy(lmp)); writer.addIndexes(dir); writer.close(); dir.close(); dir2.close(); }
public void test() throws Exception { IndexWriterConfig defaultConfig = new IndexWriterConfig(null); Codec defaultCodec = defaultConfig.getCodec(); if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) { Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)"); Matcher matcher = regex.matcher(defaultCodec.toString()); assertTrue( "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find()); int maxDocsPerChunk = Integer.parseInt(matcher.group(1)); int blockSize = Integer.parseInt(matcher.group(2)); int product = maxDocsPerChunk * blockSize; assumeTrue( defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16); } BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setCodec(TestUtil.getDefaultCodec())); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); DirectoryReader subReaders[] = new DirectoryReader[1000]; Arrays.fill(subReaders, oneThousand); BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w2, subReaders); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new DirectoryReader[2000]; Arrays.fill(subReaders, oneMillion); BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w3, subReaders); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
@Nightly public void test() throws Exception { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexed(false); ft.setStored(true); ft.freeze(); final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20); final byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte) random().nextInt(256); } final Field f = new Field("fld", value, ft); doc.add(f); final int numDocs = (int) ((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.addDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { System.out.println(i + " of " + numDocs + "..."); } } w.forceMerge(1); w.close(); if (VERBOSE) { boolean found = false; for (String file : dir.listAll()) { if (file.endsWith(".fdt")) { final long fileLength = dir.fileLength(file); if (fileLength >= 1L << 32) { found = true; } System.out.println("File length of " + file + " : " + fileLength); } } if (!found) { System.out.println("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.open(dir); Document sd = rd.document(numDocs - 1); assertNotNull(sd); assertEquals(1, sd.getFields().size()); BytesRef valueRef = sd.getBinaryValue("fld"); assertNotNull(valueRef); assertEquals(new BytesRef(value), valueRef); rd.close(); dir.close(); }
// @Absurd @Ignore takes ~20GB-30GB of space and 10 minutes. // with some codecs needs more heap space as well. @Ignore("Very slow. Enable manually by removing @Ignore.") public void test() throws Exception { BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); IndexReader subReaders[] = new IndexReader[1000]; Arrays.fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.addIndexes(mr); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new IndexReader[2000]; Arrays.fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.addIndexes(mr); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
public void runTest(String testName) throws Exception { failed.set(false); addCount.set(0); delCount.set(0); packCount.set(0); final long t0 = System.currentTimeMillis(); Random random = new Random(random().nextLong()); final LineFileDocs docs = new LineFileDocs(random, true); final Path tempDir = createTempDir(testName); dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (dir instanceof BaseDirectoryWrapper) { ((BaseDirectoryWrapper) dir) .setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false); conf.setInfoStream(new FailOnNonBulkMergesInfoStream()); if (conf.getMergePolicy() instanceof MockRandomMergePolicy) { ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false); } if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.getMergePolicy(); if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.); } else if (mp instanceof LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMaxMergeDocs(100000); } } conf.setMergedSegmentWarmer( new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { if (VERBOSE) { System.out.println("TEST: now warm merged reader=" + reader); } warmed.put(((SegmentReader) reader).core, Boolean.TRUE); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); int sum = 0; final int inc = Math.max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.get(docID)) { final StoredDocument doc = reader.document(docID); sum += doc.getFields().size(); } } IndexSearcher searcher = newSearcher(reader); sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits; if (VERBOSE) { System.out.println("TEST: warm visited " + sum + " fields"); } } }); if (VERBOSE) { conf.setInfoStream( new PrintStreamInfoStream(System.out) { @Override public void message(String component, String message) { if ("TP".equals(component)) { return; // ignore test points! } super.message(component, message); } }); } writer = new IndexWriter(dir, conf); TestUtil.reduceOpenFiles(writer); final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); doAfterWriter(es); final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4); final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>()); final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>()); final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>()); final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000; final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { System.out.println( "TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]"); } // Let index build up a bit Thread.sleep(100); doSearching(es, stopTime); if (VERBOSE) { System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]"); } for (int thread = 0; thread < indexThreads.length; thread++) { indexThreads[thread].join(); } if (VERBOSE) { System.out.println( "TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount); } final IndexSearcher s = getFinalSearcher(); if (VERBOSE) { System.out.println("TEST: finalSearcher=" + s); } assertFalse(failed.get()); boolean doFail = false; // Verify: make sure delIDs are in fact deleted: for (String id : delIDs) { final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1); if (hits.totalHits != 0) { System.out.println( "doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: for (String id : delPackIDs) { final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1); if (hits.totalHits != 0) { System.out.println( "packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: for (SubDocs subDocs : allSubDocs) { TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20); if (!subDocs.deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.totalHits != subDocs.subIDs.size()) { System.out.println( "packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; for (ScoreDoc scoreDoc : hits.scoreDocs) { final int docID = scoreDoc.doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; final StoredDocument doc = s.doc(docID); assertEquals(subDocs.packID, doc.get("packID")); } lastDocID = startDocID - 1; for (String subID : subDocs.subIDs) { hits = s.search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.totalHits); final int docID = hits.scoreDocs[0].doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: for (String subID : subDocs.subIDs) { assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: final int endID = Integer.parseInt(docs.nextDoc().get("docid")); docs.close(); for (int id = 0; id < endID; id++) { String stringID = "" + id; if (!delIDs.contains(stringID)) { final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1); if (hits.totalHits != 1) { System.out.println( "doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs); doFail = true; } } } assertFalse(doFail); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs()); releaseSearcher(s); writer.commit(); assertEquals( "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs()); doClose(); try { writer.commit(); } finally { writer.close(); } // Cannot close until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); } TestUtil.checkIndex(dir); dir.close(); IOUtils.rm(tempDir); if (VERBOSE) { System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]"); } }