public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("dv", "some text", Field.Store.NO)); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); DirectoryReader r = writer.getReader(); writer.close(); AtomicReader subR = r.leaves().get(0).reader(); assertEquals(2, subR.numDocs()); Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv"); assertTrue(bits.get(0)); assertTrue(bits.get(1)); r.close(); dir.close(); }
// LUCENE-3870 public void testLengthPrefixAcrossTwoPages() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); byte[] bytes = new byte[32764]; BytesRef b = new BytesRef(); b.bytes = bytes; b.length = bytes.length; doc.add(new SortedDocValuesField("field", b)); w.addDocument(doc); bytes[0] = 1; w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field"); BytesRef bytes1 = new BytesRef(); s.get(0, bytes1); assertEquals(bytes.length, bytes1.length); bytes[0] = 0; assertEquals(b, bytes1); s.get(1, bytes1); assertEquals(bytes.length, bytes1.length); bytes[0] = 1; assertEquals(b, bytes1); r.close(); w.close(); d.close(); }
public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); NumericDocValues dv = slow.getNumericDocValues("dv"); for (int i = 0; i < 50; i++) { assertEquals(i, dv.get(i)); StoredDocument d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); }
private final IndexReader doOpenFromWriter(boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException { assert readOnly; if (!openReadOnly) { throw new IllegalArgumentException( "a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)"); } if (commit != null) { throw new IllegalArgumentException( "a reader obtained from IndexWriter.getReader() cannot currently accept a commit"); } if (writer.nrtIsCurrent(segmentInfos)) { return null; } IndexReader reader = writer.getReader(applyAllDeletes); // If in fact no changes took place, return null: if (reader.getVersion() == segmentInfos.getVersion()) { reader.decRef(); return null; } reader.readerFinishedListeners = readerFinishedListeners; return reader; }
// LUCENE-1727: make sure doc fields are stored in order public void testStoredFieldsOrder() throws Throwable { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(); customType.setStored(true); doc.add(newField("zzz", "a b c", customType)); doc.add(newField("aaa", "a b c", customType)); doc.add(newField("zzz", "1 2 3", customType)); w.addDocument(doc); IndexReader r = w.getReader(); Document doc2 = r.document(0); Iterator<IndexableField> it = doc2.getFields().iterator(); assertTrue(it.hasNext()); Field f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "aaa"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "1 2 3"); assertFalse(it.hasNext()); r.close(); w.close(); d.close(); }
public void testForceMergeNotNeeded() throws IOException { try (Directory dir = newDirectory()) { final AtomicBoolean mayMerge = new AtomicBoolean(true); final MergeScheduler mergeScheduler = new SerialMergeScheduler() { @Override public synchronized void merge( IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException { if (mayMerge.get() == false) { MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge != null) { System.out.println( "TEST: we should not need any merging, yet merge policy returned merge " + merge); throw new AssertionError(); } } super.merge(writer, trigger, newMergesFound); } }; MergePolicy mp = mergePolicy(); assumeFalse( "this test cannot tolerate random forceMerges", mp.toString().contains("MockRandomMergePolicy")); mp.setNoCFSRatio(random().nextBoolean() ? 0 : 1); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(mergeScheduler); iwc.setMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, iwc); final int numSegments = TestUtil.nextInt(random(), 2, 20); for (int i = 0; i < numSegments; ++i) { final int numDocs = TestUtil.nextInt(random(), 1, 5); for (int j = 0; j < numDocs; ++j) { writer.addDocument(new Document()); } writer.getReader().close(); } for (int i = 5; i >= 0; --i) { final int segmentCount = writer.getSegmentCount(); final int maxNumSegments = i == 0 ? 1 : TestUtil.nextInt(random(), 1, 10); mayMerge.set(segmentCount > maxNumSegments); if (VERBOSE) { System.out.println( "TEST: now forceMerge(maxNumSegments=" + maxNumSegments + ") vs segmentCount=" + segmentCount); } writer.forceMerge(maxNumSegments); } writer.close(); } }
// test deleteAll() w/ near real-time reader public void testDeleteAllNRT() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); IndexReader reader = modifier.getReader(); assertEquals(7, reader.numDocs()); reader.close(); addDoc(modifier, ++id, value); addDoc(modifier, ++id, value); // Delete all modifier.deleteAll(); reader = modifier.getReader(); assertEquals(0, reader.numDocs()); reader.close(); // Roll it back modifier.rollback(); modifier.close(); // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); dir.close(); }
public void testTermDocsEnum() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document d = new Document(); d.add(newStringField("f", "j", Field.Store.NO)); w.addDocument(d); w.commit(); w.addDocument(d); IndexReader r = w.getReader(); w.close(); DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j")); assertEquals(0, de.nextDoc()); assertEquals(1, de.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc()); r.close(); dir.close(); }
public void testSeparateEnums() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document d = new Document(); d.add(newStringField("f", "j", Field.Store.NO)); w.addDocument(d); w.commit(); w.addDocument(d); IndexReader r = w.getReader(); w.close(); DocsEnum d1 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0); DocsEnum d2 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0); assertEquals(0, d1.nextDoc()); assertEquals(0, d2.nextDoc()); r.close(); dir.close(); }
public void testRandom() throws Exception { int num = atLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(NoMergePolicy.COMPOUND_FILES)); _TestUtil.keepFullyDeletedSegments(w); Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>(); Set<Integer> deleted = new HashSet<Integer>(); List<BytesRef> terms = new ArrayList<BytesRef>(); int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER); Document doc = new Document(); Field f = newStringField("field", "", Field.Store.NO); doc.add(f); Field id = newStringField("id", "", Field.Store.NO); doc.add(id); boolean onlyUniqueTerms = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } Set<BytesRef> uniqueTerms = new HashSet<BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) { // re-use existing term BytesRef term = terms.get(random().nextInt(terms.size())); docs.get(term).add(i); f.setStringValue(term.utf8ToString()); } else { String s = _TestUtil.randomUnicodeString(random(), 10); BytesRef term = new BytesRef(s); if (!docs.containsKey(term)) { docs.put(term, new ArrayList<Integer>()); } docs.get(term).add(i); terms.add(term); uniqueTerms.add(term); f.setStringValue(s); } id.setStringValue("" + i); w.addDocument(doc); if (random().nextInt(4) == 1) { w.commit(); } if (i > 0 && random().nextInt(20) == 1) { int delID = random().nextInt(i); deleted.add(delID); w.deleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { System.out.println("TEST: delete " + delID); } } } if (VERBOSE) { List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms); Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator()); System.out.println("TEST: terms in UTF16 order:"); for (BytesRef b : termsList) { System.out.println(" " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b); for (int docID : docs.get(b)) { if (deleted.contains(docID)) { System.out.println(" " + docID + " (deleted)"); } else { System.out.println(" " + docID); } } } } IndexReader reader = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + reader); } Bits liveDocs = MultiFields.getLiveDocs(reader); for (int delDoc : deleted) { assertFalse(liveDocs.get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms.get(random().nextInt(terms.size())); if (VERBOSE) { System.out.println( "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); } DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0); assertNotNull(docsEnum); for (int docID : docs.get(term)) { if (!deleted.contains(docID)) { assertEquals(docID, docsEnum.nextDoc()); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); } reader.close(); dir.close(); } }
@Test public void testRollingUpdates() throws Exception { Random random = new Random(random().nextLong()); final BaseDirectoryWrapper dir = newDirectory(); // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to // delete files" if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } final LineFileDocs docs = new LineFileDocs(random, true); // provider.register(new MemoryCodec()); if (random().nextBoolean()) { Codec.setDefault( TestUtil.alwaysPostingsFormat( new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); final int SIZE = atLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; final int numUpdates = (int) (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble()))); if (VERBOSE) { System.out.println("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { final Document doc = docs.nextDoc(); final String myID = Integer.toString(id); if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { System.out.println(" docIter=" + docIter + " id=" + id); } ((Field) doc.getField("docid")).setStringValue(myID); Term idTerm = new Term("docid", myID); final boolean doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.search(new TermQuery(idTerm), 1); assertEquals(1, hits.totalHits); doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc); if (VERBOSE) { if (doUpdate) { System.out.println(" tryDeleteDocument failed"); } else { System.out.println(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { System.out.println(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { if (random().nextBoolean()) { w.updateDocument(idTerm, doc); } else { // It's OK to not be atomic for this test (no separate thread reopening readers): w.deleteDocuments(new TermQuery(idTerm)); w.addDocument(doc); } } else { w.addDocument(doc); } if (docIter >= SIZE && random().nextInt(50) == 17) { if (r != null) { r.close(); } final boolean applyDeletions = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: reopen applyDeletions=" + applyDeletions); } r = w.getReader(applyDeletions); if (applyDeletions) { s = newSearcher(r); } else { s = null; } assertTrue( "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); updateCount = 0; } } if (r != null) { r.close(); } w.commit(); assertEquals(SIZE, w.numDocs()); w.close(); TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.close(); // LUCENE-4455: SegmentInfos infos = SegmentInfos.readLatestCommit(dir); long totalBytes = 0; for (SegmentCommitInfo sipc : infos) { totalBytes += sipc.sizeInBytes(); } long totalBytes2 = 0; for (String fileName : dir.listAll()) { if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) { totalBytes2 += dir.fileLength(fileName); } } assertEquals(totalBytes2, totalBytes); dir.close(); }
public void testDeletes1() throws Exception { // IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setMaxBufferedDocs(5000); iwc.setRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(false); iwc.setMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.addDocument(DocHelper.createDocument(x, "1", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit1"); writer.commit(); assertEquals(1, writer.segmentInfos.size()); for (int x = 5; x < 10; x++) { writer.addDocument(DocHelper.createDocument(x, "2", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit2"); writer.commit(); assertEquals(2, writer.segmentInfos.size()); for (int x = 10; x < 15; x++) { writer.addDocument(DocHelper.createDocument(x, "3", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.deleteDocuments(new Term("id", "1")); writer.deleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); assertTrue(writer.bufferedUpdatesStream.any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); assertFalse(writer.bufferedUpdatesStream.any()); r1.close(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.deleteDocuments(new Term("id", "2")); writer.flush(false, false); fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy(); fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.maybeMerge(); assertEquals(2, writer.segmentInfos.size()); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.getReader(); int[] id2docs = toDocsArray(new Term("id", "2"), null, r2); assertTrue(id2docs == null); r2.close(); /** * // added docs are in the ram buffer for (int x = 15; x < 20; x++) { * writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2)); * System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } * assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new * Term("id", Integer.toString(13))); * * <p>Term id3 = new Term("id", Integer.toString(3)); * * <p>// delete from the 1st segment writer.deleteDocuments(id3); * * <p>assertTrue(writer.numRamDocs() > 0); * * <p>//System.out // .println("segdels1:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>// we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; * System.out.println("maybeMerge "+writer.segmentInfos); * * <p>SegmentInfo info0 = writer.segmentInfos.info(0); SegmentInfo info1 = * writer.segmentInfos.info(1); * * <p>writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there * should be docs in RAM assertTrue(writer.numRamDocs() > 0); * * <p>// assert we've merged the 1 and 2 segments // and still have a segment leftover == 2 * assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0, * writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos)); * * <p>//System.out.println("segdels2:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0]; * printDelDocs(r1.getLiveDocs()); int[] docs = toDocsArray(id3, null, r); * System.out.println("id3 docs:"+Arrays.toString(docs)); // there shouldn't be any docs for * id:3 assertTrue(docs == null); r.close(); * * <p>part2(writer, fsmp); */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); // System.out.println("close"); writer.close(); dir.close(); }