public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())) .setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.add(newTextField(term.field(), term.text(), Field.Store.NO)); writer.addDocument(d1); } writer.commit(); writer.forceMerge(1); writer.close(); AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); for (int i = 0; i < 2; i++) { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250); // one skip on level 2 } }
// LUCENE-1382 public void testCommitUserData() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w); w.close(); DirectoryReader r = DirectoryReader.open(dir); // commit(Map) never called for this index assertEquals(0, r.getIndexCommit().getUserData().size()); r.close(); w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w); Map<String, String> data = new HashMap<String, String>(); data.put("label", "test1"); w.commit(data); w.close(); r = DirectoryReader.open(dir); assertEquals("test1", r.getIndexCommit().getUserData().get("label")); r.close(); w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); w.forceMerge(1); w.close(); dir.close(); }
/* * Verify that calling forceMerge when writer is open for * "commit on close" works correctly both for rollback() * and close(). */ public void testCommitOnCloseForceMerge() throws IOException { MockDirectoryWrapper dir = newDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once dir.setPreventDoubleWrite(false); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(10) .setMergePolicy(newLogMergePolicy(10))); for (int j = 0; j < 17; j++) { TestIndexWriter.addDocWithIndex(writer, j); } writer.close(); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); writer.forceMerge(1); // Open a reader before closing (commiting) the writer: DirectoryReader reader = DirectoryReader.open(dir); // Reader should see index as multi-seg at this // point: assertTrue("Reader incorrectly sees one segment", reader.getSequentialSubReaders().size() > 1); reader.close(); // Abort the writer: writer.rollback(); TestIndexWriter.assertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.open(dir); // Reader should still see index as multi-segment assertTrue("Reader incorrectly sees one segment", reader.getSequentialSubReaders().size() > 1); reader.close(); if (VERBOSE) { System.out.println("TEST: do real full merge"); } writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); writer.forceMerge(1); writer.close(); if (VERBOSE) { System.out.println("TEST: writer closed"); } TestIndexWriter.assertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.open(dir); // Reader should see index as one segment assertEquals( "Reader incorrectly sees more than one segment", 1, reader.getSequentialSubReaders().size()); reader.close(); dir.close(); }
/* * Verify that a writer with "commit on close" indeed * cleans up the temp segments created after opening * that are not referenced by the starting segments * file. We check this by using MockDirectoryWrapper to * measure max temp disk space used. */ public void testCommitOnCloseDiskUsage() throws IOException { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): final String idFormat = _TestUtil.getPostingsFormat("id"); final String contentFormat = _TestUtil.getPostingsFormat("content"); assumeFalse( "This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory")); MockDirectoryWrapper dir = newDirectory(); Analyzer analyzer; if (random().nextBoolean()) { // no payloads analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName, Reader reader) { return new TokenStreamComponents( new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } }; } else { // fixed length payloads final int length = random().nextInt(200); analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents( tokenizer, new MockFixedLengthPayloadFilter(random(), tokenizer, length)); } }; } IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(10) .setReaderPooling(false) .setMergePolicy(newLogMergePolicy(10))); for (int j = 0; j < 30; j++) { TestIndexWriter.addDocWithIndex(writer, j); } writer.close(); dir.resetMaxUsedSizeInBytes(); dir.setTrackDiskUsage(true); long startDiskUsage = dir.getMaxUsedSizeInBytes(); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setOpenMode(OpenMode.APPEND) .setMaxBufferedDocs(10) .setMergeScheduler(new SerialMergeScheduler()) .setReaderPooling(false) .setMergePolicy(newLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { TestIndexWriter.addDocWithIndex(writer, j); } long midDiskUsage = dir.getMaxUsedSizeInBytes(); dir.resetMaxUsedSizeInBytes(); writer.forceMerge(1); writer.close(); DirectoryReader.open(dir).close(); long endDiskUsage = dir.getMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + // endDiskUsage); assertTrue( "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150), midDiskUsage < 150 * startDiskUsage); assertTrue( "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150), endDiskUsage < 150 * startDiskUsage); dir.close(); }