// Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public void testOmitTermFreqAndPositions() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have Tf Field f1 = newField("f1", "This field has term freqs", normalType); d.add(f1); // this field will NOT have Tf Field f2 = newField("f2", "This field has NO Tf in all docs", omitType); d.add(f2); writer.addDocument(d); writer.forceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the // SegmentMerger // keep things constant d = new Document(); // Reverse f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); f2 = newField("f2", "This field has NO Tf in all docs", normalType); d.add(f2); writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); FieldInfos fi = reader.getFieldInfos(); assertEquals( "OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").getIndexOptions()); assertEquals( "OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").getIndexOptions()); reader.close(); ram.close(); }
public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())) .setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.add(newTextField(term.field(), term.text(), Field.Store.NO)); writer.addDocument(d1); } writer.commit(); writer.forceMerge(1); writer.close(); AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); for (int i = 0; i < 2; i++) { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250); // one skip on level 2 } }
// LUCENE-1262 public void testExceptions() throws Throwable { Path indexDir = createTempDir("testfieldswriterexceptions"); Directory fsDir = newFSDirectory(indexDir); FaultyFSDirectory dir = new FaultyFSDirectory(fsDir); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); dir.startFailing(); boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); }
/** * Tests that index merging (specifically addIndexes(Directory...)) doesn't change the index order * of documents. */ public void testLucene() throws IOException { int num = 100; Directory indexA = newDirectory(); Directory indexB = newDirectory(); fillIndex(random(), indexA, 0, num); boolean fail = verifyIndex(indexA, 0); if (fail) { fail("Index a is invalid"); } fillIndex(random(), indexB, num, num); fail = verifyIndex(indexB, num); if (fail) { fail("Index b is invalid"); } Directory merged = newDirectory(); IndexWriter writer = new IndexWriter( merged, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(2))); writer.addIndexes(indexA, indexB); writer.forceMerge(1); writer.close(); fail = verifyIndex(merged, 0); assertFalse("The merged index is invalid", fail); indexA.close(); indexB.close(); merged.close(); }
// Verifies no *.nrm exists when all fields omit norms: public void testNoNrmFile() throws Throwable { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f1 = newField("f1", "This field has no norms", customType); d.add(f1); for (int i = 0; i < 30; i++) { writer.addDocument(d); } writer.commit(); assertNoNrm(ram); // force merge writer.forceMerge(1); // flush writer.close(); assertNoNrm(ram); ram.close(); }
// LUCENE-3870 public void testLengthPrefixAcrossTwoPages() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); byte[] bytes = new byte[32764]; BytesRef b = new BytesRef(); b.bytes = bytes; b.length = bytes.length; doc.add(new SortedDocValuesField("field", b)); w.addDocument(doc); bytes[0] = 1; w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field"); BytesRef bytes1 = new BytesRef(); s.get(0, bytes1); assertEquals(bytes.length, bytes1.length); bytes[0] = 0; assertEquals(b, bytes1); s.get(1, bytes1); assertEquals(bytes.length, bytes1.length); bytes[0] = 1; assertEquals(b, bytes1); r.close(); w.close(); d.close(); }
/* * Test a deletion policy that keeps last N commits. */ public void testKeepLastNDeletionPolicy() throws IOException { final int N = 5; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 17; i++) { addDoc(writer); } writer.forceMerge(1); writer.close(); } assertTrue(policy.numDelete > 0); assertEquals(N + 1, policy.numOnInit); assertEquals(N + 1, policy.numOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); if (i == N) { fail("should have failed on commits prior to last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
public void testForceMergeNotNeeded() throws IOException { try (Directory dir = newDirectory()) { final AtomicBoolean mayMerge = new AtomicBoolean(true); final MergeScheduler mergeScheduler = new SerialMergeScheduler() { @Override public synchronized void merge( IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException { if (mayMerge.get() == false) { MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge != null) { System.out.println( "TEST: we should not need any merging, yet merge policy returned merge " + merge); throw new AssertionError(); } } super.merge(writer, trigger, newMergesFound); } }; MergePolicy mp = mergePolicy(); assumeFalse( "this test cannot tolerate random forceMerges", mp.toString().contains("MockRandomMergePolicy")); mp.setNoCFSRatio(random().nextBoolean() ? 0 : 1); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(mergeScheduler); iwc.setMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, iwc); final int numSegments = TestUtil.nextInt(random(), 2, 20); for (int i = 0; i < numSegments; ++i) { final int numDocs = TestUtil.nextInt(random(), 1, 5); for (int j = 0; j < numDocs; ++j) { writer.addDocument(new Document()); } writer.getReader().close(); } for (int i = 5; i >= 0; --i) { final int segmentCount = writer.getSegmentCount(); final int maxNumSegments = i == 0 ? 1 : TestUtil.nextInt(random(), 1, 10); mayMerge.set(segmentCount > maxNumSegments); if (VERBOSE) { System.out.println( "TEST: now forceMerge(maxNumSegments=" + maxNumSegments + ") vs segmentCount=" + segmentCount); } writer.forceMerge(maxNumSegments); } writer.close(); } }
private void runTest(EnumSet<Type> types, TestType type) throws CorruptIndexException, IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); int num_1 = atLeast(200); int num_2 = atLeast(200); int num_3 = atLeast(200); long[] values = new long[num_1 + num_2 + num_3]; index(writer, randomValueType(types, random()), values, 0, num_1); writer.commit(); index(writer, randomValueType(types, random()), values, num_1, num_2); writer.commit(); if (random().nextInt(4) == 0) { // once in a while use addIndexes writer.forceMerge(1); Directory dir_2 = newDirectory(); IndexWriter writer_2 = new IndexWriter( dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); index(writer_2, randomValueType(types, random()), values, num_1 + num_2, num_3); writer_2.commit(); writer_2.close(); if (rarely()) { writer.addIndexes(dir_2); } else { // do a real merge here IndexReader open = maybeWrapReader(IndexReader.open(dir_2)); writer.addIndexes(open); open.close(); } dir_2.close(); } else { index(writer, randomValueType(types, random()), values, num_1 + num_2, num_3); } writer.forceMerge(1); writer.close(); assertValues(type, dir, values); dir.close(); }
// indexes Integer.MAX_VALUE docs with a fixed binary field public void testFixedSorted() throws Exception { BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); Document doc = new Document(); byte bytes[] = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.add(dvField); for (int i = 0; i < Integer.MAX_VALUE; i++) { bytes[0] = (byte) (i >> 8); bytes[1] = (byte) i; w.addDocument(doc); if (i % 100000 == 0) { System.out.println("indexed: " + i); System.out.flush(); } } w.forceMerge(1); w.close(); System.out.println("verifying..."); System.out.flush(); DirectoryReader r = DirectoryReader.open(dir); int expectedValue = 0; for (AtomicReaderContext context : r.leaves()) { AtomicReader reader = context.reader(); BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.getSortedDocValues("dv"); for (int i = 0; i < reader.maxDoc(); i++) { bytes[0] = (byte) (expectedValue >> 8); bytes[1] = (byte) expectedValue; dv.get(i, scratch); assertEquals(data, scratch); expectedValue++; } } r.close(); dir.close(); }
// Tests whether the DocumentWriter correctly enable the // omitNorms bit in the FieldInfo public void testOmitNorms() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have norms Field f1 = newTextField("f1", "This field has norms", Field.Store.NO); d.add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f2 = newField("f2", "This field has NO norms in all docs", customType); d.add(f2); writer.addDocument(d); writer.forceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the // SegmentMerger // keep things constant d = new Document(); // Reverse d.add(newField("f1", "This field has norms", customType)); d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO)); writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); FieldInfos fi = reader.getFieldInfos(); assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms()); assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms()); reader.close(); ram.close(); }
/* Test keeping NO commit points. This is a viable and * useful case eg where you want to build a big index and * you know there are no readers. */ public void testKeepNoneOnInitDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy()) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 107; i++) { addDoc(writer); } writer.close(); conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); writer = new IndexWriter(dir, conf); policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); assertEquals(2, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(2, policy.numOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.close(); } }
// Verifies no *.prx exists when all fields omit term positions: public void testNoPrxFile() throws Throwable { Directory ram = newDirectory(); if (ram instanceof MockDirectoryWrapper) { // we verify some files get deleted ((MockDirectoryWrapper) ram).setEnableVirusScanner(false); } Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field f1 = newField("f1", "This field has term freqs", ft); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); writer.commit(); assertNoPrx(ram); // now add some documents with positions, and check there is no prox after optimization d = new Document(); f1 = newTextField("f1", "This field has positions", Field.Store.NO); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); assertNoPrx(ram); ram.close(); }
// Make sure first adding docs that do not omitNorms for // field X, then adding docs that do omitNorms for that same // field, public void testMixedRAM() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(10) .setMergePolicy(newLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = newTextField("f1", "This field has norms", Field.Store.NO); d.add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f2 = newField("f2", "This field has NO norms in all docs", customType); d.add(f2); for (int i = 0; i < 5; i++) { writer.addDocument(d); } for (int i = 0; i < 20; i++) { writer.addDocument(d); } // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); FieldInfos fi = reader.getFieldInfos(); assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms()); assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms()); reader.close(); ram.close(); }
// Make sure first adding docs that do not omitTermFreqAndPositions for // field X, then adding docs that do omitTermFreqAndPositions for that same // field, public void testMixedRAM() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(10) .setMergePolicy(newLogMergePolicy(2))); Document d = new Document(); // this field will have Tf Field f1 = newField("f1", "This field has term freqs", normalType); d.add(f1); // this field will NOT have Tf Field f2 = newField("f2", "This field has NO Tf in all docs", omitType); d.add(f2); for (int i = 0; i < 5; i++) writer.addDocument(d); for (int i = 0; i < 20; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); FieldInfos fi = reader.getFieldInfos(); assertEquals( "OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").getIndexOptions()); assertEquals( "OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").getIndexOptions()); reader.close(); ram.close(); }
public void testWithPendingDeletes3() throws IOException { // main directory Directory dir = newDirectory(); // auxiliary directory Directory aux = newDirectory(); setUpDirs(dir, aux); IndexWriter writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.add(newStringField("id", "" + (i % 10), Field.Store.NO)); doc.add(newTextField("content", "bbb " + i, Field.Store.NO)); writer.updateDocument(new Term("id", "" + (i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.add(new Term("content", "bbb")); q.add(new Term("content", "14")); writer.deleteDocuments(q); writer.addIndexes(aux); writer.forceMerge(1); writer.commit(); verifyNumDocs(dir, 1039); verifyTermDocs(dir, new Term("content", "aaa"), 1030); verifyTermDocs(dir, new Term("content", "bbb"), 9); writer.close(); dir.close(); aux.close(); }
// LUCENE-1262 public void testExceptions() throws Throwable { File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions"); try { Directory dir = new FaultyFSDirectory(indexDir); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE)); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); FaultyIndexInput.doFail = true; boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); } finally { _TestUtil.rmDir(indexDir); } }
// Verifies no *.prx exists when all fields omit term freq: public void testNoPrxFile() throws Throwable { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setUseCompoundFile(false); Document d = new Document(); Field f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); writer.commit(); assertNoPrx(ram); // now add some documents with positions, and check // there is no prox after full merge d = new Document(); f1 = newTextField("f1", "This field has positions", Field.Store.NO); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); assertNoPrx(ram); ram.close(); }
/** Optimize the index database */ public void optimize() { synchronized (lock) { if (running) { log.warning("Optimize terminated... Someone else is updating / optimizing it!"); return; } running = true; } IndexWriter wrt = null; try { log.info("Optimizing the index ... "); Analyzer analyzer = new StandardAnalyzer(SearchEngine.LUCENE_VERSION); IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore log.info("done"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { log.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { log.log(Level.SEVERE, "ERROR: optimizing index: {0}", e); } finally { if (wrt != null) { try { wrt.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } synchronized (lock) { running = false; } } }
// LUCENE-1382 public void testCommitUserData() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w); w.close(); DirectoryReader r = DirectoryReader.open(dir); // commit(Map) never called for this index assertEquals(0, r.getIndexCommit().getUserData().size()); r.close(); w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w); Map<String, String> data = new HashMap<String, String>(); data.put("label", "test1"); w.commit(data); w.close(); r = DirectoryReader.open(dir); assertEquals("test1", r.getIndexCommit().getUserData().get("label")); r.close(); w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); w.forceMerge(1); w.close(); dir.close(); }
public void test() throws Exception { IndexWriterConfig defaultConfig = new IndexWriterConfig(null); Codec defaultCodec = defaultConfig.getCodec(); if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) { Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)"); Matcher matcher = regex.matcher(defaultCodec.toString()); assertTrue( "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find()); int maxDocsPerChunk = Integer.parseInt(matcher.group(1)); int blockSize = Integer.parseInt(matcher.group(2)); int product = maxDocsPerChunk * blockSize; assumeTrue( defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16); } BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setCodec(TestUtil.getDefaultCodec())); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); DirectoryReader subReaders[] = new DirectoryReader[1000]; Arrays.fill(subReaders, oneThousand); BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w2, subReaders); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new DirectoryReader[2000]; Arrays.fill(subReaders, oneMillion); BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w3, subReaders); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
public void testLiveMaxMergeCount() throws Exception { Directory d = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); TieredMergePolicy tmp = new TieredMergePolicy(); tmp.setSegmentsPerTier(1000); tmp.setMaxMergeAtOnce(1000); tmp.setMaxMergeAtOnceExplicit(10); iwc.setMergePolicy(tmp); iwc.setMaxBufferedDocs(2); iwc.setRAMBufferSizeMB(-1); final AtomicInteger maxRunningMergeCount = new AtomicInteger(); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() { final AtomicInteger runningMergeCount = new AtomicInteger(); @Override public void doMerge(MergePolicy.OneMerge merge) throws IOException { int count = runningMergeCount.incrementAndGet(); // evil? synchronized (this) { if (count > maxRunningMergeCount.get()) { maxRunningMergeCount.set(count); } } try { super.doMerge(merge); } finally { runningMergeCount.decrementAndGet(); } } }; cms.setMaxMergesAndThreads(5, 3); iwc.setMergeScheduler(cms); IndexWriter w = new IndexWriter(d, iwc); // Makes 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } // No merges should have run so far, because TMP has high segmentsPerTier: assertEquals(0, maxRunningMergeCount.get()); w.forceMerge(1); // At most 5 merge threads should have launched at once: assertTrue("maxRunningMergeCount=" + maxRunningMergeCount, maxRunningMergeCount.get() <= 5); maxRunningMergeCount.set(0); // Makes another 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).setMaxMergesAndThreads(1, 1); w.forceMerge(1); // At most 1 merge thread should have launched at once: assertEquals(1, maxRunningMergeCount.get()); w.close(); d.close(); }
// @Absurd @Ignore takes ~20GB-30GB of space and 10 minutes. // with some codecs needs more heap space as well. @Ignore("Very slow. Enable manually by removing @Ignore.") public void test() throws Exception { BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); IndexReader subReaders[] = new IndexReader[1000]; Arrays.fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.addIndexes(mr); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new IndexReader[2000]; Arrays.fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.addIndexes(mr); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
public void testSimpleCase() throws IOException { // main directory Directory dir = newDirectory(); // two auxiliary directories Directory aux = newDirectory(); Directory aux2 = newDirectory(); IndexWriter writer = null; writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE)); // add 100 documents addDocs(writer, 100); assertEquals(100, writer.maxDoc()); writer.close(); TestUtil.checkIndex(dir); writer = newWriter( aux, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setMergePolicy(newLogMergePolicy(false))); // add 40 documents in separate files addDocs(writer, 40); assertEquals(40, writer.maxDoc()); writer.close(); writer = newWriter( aux2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE)); // add 50 documents in compound files addDocs2(writer, 50); assertEquals(50, writer.maxDoc()); writer.close(); // test doc count before segments are merged writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); assertEquals(100, writer.maxDoc()); writer.addIndexes(aux, aux2); assertEquals(190, writer.maxDoc()); writer.close(); TestUtil.checkIndex(dir); // make sure the old index is correct verifyNumDocs(aux, 40); // make sure the new index is correct verifyNumDocs(dir, 190); // now add another set in. Directory aux3 = newDirectory(); writer = newWriter(aux3, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); // add 40 documents addDocs(writer, 40); assertEquals(40, writer.maxDoc()); writer.close(); // test doc count before segments are merged writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); assertEquals(190, writer.maxDoc()); writer.addIndexes(aux3); assertEquals(230, writer.maxDoc()); writer.close(); // make sure the new index is correct verifyNumDocs(dir, 230); verifyTermDocs(dir, new Term("content", "aaa"), 180); verifyTermDocs(dir, new Term("content", "bbb"), 50); // now fully merge it. writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); writer.forceMerge(1); writer.close(); // make sure the new index is correct verifyNumDocs(dir, 230); verifyTermDocs(dir, new Term("content", "aaa"), 180); verifyTermDocs(dir, new Term("content", "bbb"), 50); // now add a single document Directory aux4 = newDirectory(); writer = newWriter(aux4, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); addDocs2(writer, 1); writer.close(); writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); assertEquals(230, writer.maxDoc()); writer.addIndexes(aux4); assertEquals(231, writer.maxDoc()); writer.close(); verifyNumDocs(dir, 231); verifyTermDocs(dir, new Term("content", "bbb"), 51); dir.close(); aux.close(); aux2.close(); aux3.close(); aux4.close(); }
/* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ public void testAddIndexOnDiskFull() throws IOException { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): final String idFormat = _TestUtil.getPostingsFormat("id"); final String contentFormat = _TestUtil.getPostingsFormat("content"); assumeFalse( "This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory")); int START_COUNT = 57; int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = newDirectory(); IndexWriter writer = new IndexWriter( dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); for (int j = 0; j < 25; j++) { addDocWithIndex(writer, 25 * i + j); } writer.close(); String[] files = dirs[i].listAll(); for (int j = 0; j < files.length; j++) { inputDiskUsage += dirs[i].fileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = newMockDirectory(); IndexWriter writer = new IndexWriter( startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); for (int j = 0; j < START_COUNT; j++) { addDocWithIndex(writer, j); } writer.close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.open(startDir); assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 57, hits.length); reader.close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.listAll(); long diskUsage = startDir.sizeInBytes(); long startDiskUsage = 0; String[] files = startDir.listAll(); for (int i = 0; i < files.length; i++) { startDiskUsage += startDir.fileLength(files[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + _TestUtil.nextInt(random(), 50, 200); int method = iter; boolean success = false; boolean done = false; String methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { System.out.println("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory(startDir, newIOContext(random()))); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy(false))); IOException err = null; MergeScheduler ms = writer.getConfig().getMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms instanceof ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; String testName = null; if (0 == x) { dir.setRandomIOExceptionRateOnOpen(random().nextDouble() * 0.01); thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } else { dir.setRandomIOExceptionRateOnOpen(0.0); thisDiskFree = 0; rate = 0.0; if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space"; } if (VERBOSE) System.out.println("\ncycle: " + testName); dir.setTrackDiskUsage(true); dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); try { if (0 == method) { if (VERBOSE) { System.out.println("TEST: now addIndexes count=" + dirs.length); } writer.addIndexes(dirs); if (VERBOSE) { System.out.println("TEST: now forceMerge"); } writer.forceMerge(1); } else if (1 == method) { IndexReader readers[] = new IndexReader[dirs.length]; for (int i = 0; i < dirs.length; i++) { readers[i] = DirectoryReader.open(dirs[i]); } try { writer.addIndexes(readers); } finally { for (int i = 0; i < dirs.length; i++) { readers[i].close(); } } } else { writer.addIndexes(dirs); } success = true; if (VERBOSE) { System.out.println(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { System.out.println(" hit IOException: " + e); e.printStackTrace(System.out); } if (1 == x) { e.printStackTrace(System.out); fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.syncConcurrentMerges(writer); if (VERBOSE) { System.out.println(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.setRandomIOExceptionRateOnOpen(0.0); try { reader = DirectoryReader.open(dir); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (result != START_COUNT) { fail( testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = newSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs; } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length; if (success) { if (result2 != result) { fail( testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.close(); if (VERBOSE) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { System.out.println( " start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue( "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes() - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below: dir.setMaxSizeInBytes(0); dir.setRandomIOExceptionRate(0.0); dir.setRandomIOExceptionRateOnOpen(0.0); writer.close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.syncConcurrentMerges(ms); dir.close(); // Try again with more free space: diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random(), 4000, 8000) : _TestUtil.nextInt(random(), 40000, 80000); } } startDir.close(); for (Directory dir : dirs) dir.close(); }
public void testMergeIncompatibleTypes() throws IOException { Directory dir = newDirectory(); IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); writerConfig.setMergePolicy( NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values IndexWriter writer = new IndexWriter(dir, writerConfig); int num_1 = atLeast(200); int num_2 = atLeast(200); long[] values = new long[num_1 + num_2]; index(writer, randomValueType(INTEGERS, random()), values, 0, num_1); writer.commit(); if (random().nextInt(4) == 0) { // once in a while use addIndexes Directory dir_2 = newDirectory(); IndexWriter writer_2 = new IndexWriter( dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); index( writer_2, randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, num_1, num_2); writer_2.commit(); writer_2.close(); if (random().nextBoolean()) { writer.addIndexes(dir_2); } else { // do a real merge here IndexReader open = IndexReader.open(dir_2); writer.addIndexes(open); open.close(); } dir_2.close(); } else { index( writer, randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, num_1, num_2); writer.commit(); } writer.close(); writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); if (writerConfig.getMergePolicy() instanceof NoMergePolicy) { writerConfig.setMergePolicy( newLogMergePolicy()); // make sure we merge to one segment (merge everything together) } writer = new IndexWriter(dir, writerConfig); // now merge writer.forceMerge(1); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.getSequentialSubReaders().length); IndexReaderContext topReaderContext = reader.getTopReaderContext(); AtomicReaderContext[] children = topReaderContext.leaves(); DocValues docValues = children[0].reader().docValues("promote"); assertNotNull(docValues); assertValues(TestType.Byte, dir, values); assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType()); reader.close(); dir.close(); }
/* Uses KeepAllDeletionPolicy to keep all commits around, * then, opens a new IndexWriter on a previous commit * point. */ public void testOpenPriorSnapshot() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(new KeepAllDeletionPolicy(dir)) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 10; i++) { addDoc(writer); if ((1 + i) % 2 == 0) writer.commit(); } writer.close(); Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); assertEquals(5, commits.size()); IndexCommit lastCommit = null; for (final IndexCommit commit : commits) { if (lastCommit == null || commit.getGeneration() > lastCommit.getGeneration()) lastCommit = commit; } assertTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); addDoc(writer); assertEquals(11, writer.numDocs()); writer.forceMerge(1); writer.close(); assertEquals(6, DirectoryReader.listCommits(dir).size()); // Now open writer on the commit just before merge: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Should undo our rollback: writer.rollback(); DirectoryReader r = DirectoryReader.open(dir); // Still merged, still 11 docs assertEquals(1, r.leaves().size()); assertEquals(11, r.numDocs()); r.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Commits the rollback: writer.close(); // Now 8 because we made another commit assertEquals(7, DirectoryReader.listCommits(dir).size()); r = DirectoryReader.open(dir); // Not fully merged because we rolled it back, and now only // 10 docs assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); // Re-merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); writer.forceMerge(1); writer.close(); r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); writer.close(); // Now reader sees not-fully-merged index: r = DirectoryReader.open(dir); assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); dir.close(); }
/* * Test a silly deletion policy that keeps all commits around. */ public void testKeepAllDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); } assertEquals(needsMerging ? 2 : 1, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(1 + (needsMerging ? 1 : 0), policy.numOnCommit); // Test listCommits Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); // 2 from closing writer assertEquals(1 + (needsMerging ? 1 : 0), commits.size()); // Make sure we can open a reader on each commit: for (final IndexCommit commit : commits) { IndexReader r = DirectoryReader.open(commit); r.close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.listAll().length; writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy)); writer.close(); int postCount = dir.listAll().length; assertTrue(postCount < preCount); } } dir.close(); } }
@Nightly public void test() throws Exception { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexed(false); ft.setStored(true); ft.freeze(); final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20); final byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte) random().nextInt(256); } final Field f = new Field("fld", value, ft); doc.add(f); final int numDocs = (int) ((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.addDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { System.out.println(i + " of " + numDocs + "..."); } } w.forceMerge(1); w.close(); if (VERBOSE) { boolean found = false; for (String file : dir.listAll()) { if (file.endsWith(".fdt")) { final long fileLength = dir.fileLength(file); if (fileLength >= 1L << 32) { found = true; } System.out.println("File length of " + file + " : " + fileLength); } } if (!found) { System.out.println("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.open(dir); Document sd = rd.document(numDocs - 1); assertNotNull(sd); assertEquals(1, sd.getFields().size()); BytesRef valueRef = sd.getBinaryValue("fld"); assertNotNull(valueRef); assertEquals(new BytesRef(value), valueRef); rd.close(); dir.close(); }
// Test scores with one field with Term Freqs and one without, otherwise with equal content public void testBasic() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(2) .setSimilarity(new SimpleSimilarity()) .setMergePolicy(newLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.append(term).append(" "); String content = sb.toString(); Field noTf = newField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); d.add(noTf); Field tf = newField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); d.add(tf); writer.addDocument(d); // System.out.println(d); } writer.forceMerge(1); // flush writer.close(); /* * Verify the index */ IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.add(a); pq.add(c); try { searcher.search(pq, 10); fail("did not hit expected exception"); } catch (Exception e) { Throwable cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.getCause() != null) { cause = cause.getCause(); } assertTrue("Expected an IAE, got " + cause, cause instanceof IllegalStateException); } searcher.search( q1, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue("got score=" + score, score == 1.0f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q2, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q2: Doc=" + doc + " score=" + score); float score = scorer.score(); assertEquals(1.0f + doc, score, 0.00001f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q3, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue(score == 1.0f); assertFalse(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q4, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { float score = scorer.score(); // System.out.println("Q1: Doc=" + doc + " score=" + score); assertTrue(score == 1.0f); assertTrue(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.add(q1, Occur.MUST); bq.add(q4, Occur.MUST); searcher.search( bq, new CountingHitCollector() { @Override public final void collect(int doc) throws IOException { // System.out.println("BQ: Doc=" + doc + " score=" + score); super.collect(doc); } }); assertEquals(15, CountingHitCollector.getCount()); reader.close(); dir.close(); }