// test the simple case public void testSimpleCase() throws IOException { String[] keywords = {"1", "2"}; String[] unindexed = {"Netherlands", "Italy"}; String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; String[] text = {"Amsterdam", "Venice"}; Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.addDocument(doc); } modifier.optimize(); modifier.commit(); Term term = new Term("city", "Amsterdam"); int hitCount = getHitCount(dir, term); assertEquals(1, hitCount); modifier.deleteDocuments(term); modifier.commit(); hitCount = getHitCount(dir, term); assertEquals(0, hitCount); modifier.close(); dir.close(); }
// test when delete terms only apply to disk segments public void testNonRAMDelete() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); assertEquals(0, modifier.getNumBufferedDocuments()); assertTrue(0 < modifier.getSegmentCount()); modifier.commit(); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); modifier.deleteDocuments(new Term("value", String.valueOf(value))); modifier.commit(); reader = IndexReader.open(dir, true); assertEquals(0, reader.numDocs()); reader.close(); modifier.close(); dir.close(); }
@Override public void doWork() throws Throwable { IndexWriter writer1 = new IndexWriter( dir1, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(3) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(2))); ((ConcurrentMergeScheduler) writer1.getConfig().getMergeScheduler()).setSuppressExceptions(); // Intentionally use different params so flush/merge // happen @ different times IndexWriter writer2 = new IndexWriter( dir2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(3))); ((ConcurrentMergeScheduler) writer2.getConfig().getMergeScheduler()).setSuppressExceptions(); update(writer1); update(writer2); TestTransactions.doFail = true; try { synchronized (lock) { try { writer1.prepareCommit(); } catch (Throwable t) { writer1.rollback(); writer2.rollback(); return; } try { writer2.prepareCommit(); } catch (Throwable t) { writer1.rollback(); writer2.rollback(); return; } writer1.commit(); writer2.commit(); } } finally { TestTransactions.doFail = false; } writer1.close(); writer2.close(); }
// LUCENE-1274: test writer.prepareCommit() public void testPrepareCommitRollback() throws IOException { MockDirectoryWrapper dir = newDirectory(); dir.setPreventDoubleWrite(false); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(5))); writer.commit(); for (int i = 0; i < 23; i++) TestIndexWriter.addDoc(writer); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(0, reader.numDocs()); writer.prepareCommit(); IndexReader reader2 = DirectoryReader.open(dir); assertEquals(0, reader2.numDocs()); writer.rollback(); IndexReader reader3 = DirectoryReader.openIfChanged(reader); assertNull(reader3); assertEquals(0, reader.numDocs()); assertEquals(0, reader2.numDocs()); reader.close(); reader2.close(); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); for (int i = 0; i < 17; i++) TestIndexWriter.addDoc(writer); reader = DirectoryReader.open(dir); assertEquals(0, reader.numDocs()); reader.close(); writer.prepareCommit(); reader = DirectoryReader.open(dir); assertEquals(0, reader.numDocs()); reader.close(); writer.commit(); reader = DirectoryReader.open(dir); assertEquals(17, reader.numDocs()); reader.close(); writer.close(); dir.close(); }
// test rollback of deleteAll() public void testDeleteAllRollback() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); addDoc(modifier, ++id, value); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); // Delete all modifier.deleteAll(); // Roll it back modifier.rollback(); modifier.close(); // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); dir.close(); }
/** * {@link PersistentSnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to enable * flexible snapshotting. * * @param primary the {@link IndexDeletionPolicy} that is used on non-snapshotted commits. * Snapshotted commits, by definition, are not deleted until explicitly released via {@link * #release(String)}. * @param dir the {@link Directory} which will be used to persist the snapshots information. * @param mode specifies whether a new index should be created, deleting all existing snapshots * information (immediately), or open an existing index, initializing the class with the * snapshots information. * @param matchVersion specifies the {@link Version} that should be used when opening the * IndexWriter. */ public PersistentSnapshotDeletionPolicy( IndexDeletionPolicy primary, Directory dir, OpenMode mode, Version matchVersion) throws CorruptIndexException, LockObtainFailedException, IOException { super(primary, null); // Initialize the index writer over the snapshot directory. writer = new IndexWriter(dir, new IndexWriterConfig(matchVersion, null).setOpenMode(mode)); if (mode != OpenMode.APPEND) { // IndexWriter no longer creates a first commit on an empty Directory. So // if we were asked to CREATE*, call commit() just to be sure. If the // index contains information and mode is CREATE_OR_APPEND, it's a no-op. writer.commit(); } try { // Initializes the snapshots information. This code should basically run // only if mode != CREATE, but if it is, it's no harm as we only open the // reader once and immediately close it. for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) { registerSnapshotInfo(e.getKey(), e.getValue(), null); } } catch (RuntimeException e) { writer.close(); // don't leave any open file handles throw e; } catch (IOException e) { writer.close(); // don't leave any open file handles throw e; } }
// LUCENE-2996: tests that addIndexes(IndexReader) applies existing deletes correctly. public void testExistingDeletes() throws Exception { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.length; i++) { dirs[i] = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dirs[i], conf); Document doc = new Document(); doc.add(new StringField("id", "myid", Field.Store.NO)); writer.addDocument(doc); writer.close(); } IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dirs[0], conf); // Now delete the document writer.deleteDocuments(new Term("id", "myid")); IndexReader r = DirectoryReader.open(dirs[1]); try { writer.addIndexes(r); } finally { r.close(); } writer.commit(); assertEquals( "Documents from the incoming index should not have been deleted", 1, writer.numDocs()); writer.close(); for (Directory dir : dirs) { dir.close(); } }
public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())) .setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.add(newTextField(term.field(), term.text(), Field.Store.NO)); writer.addDocument(d1); } writer.commit(); writer.forceMerge(1); writer.close(); AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); for (int i = 0; i < 2; i++) { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250); // one skip on level 2 } }
/** * Remove a stale file (uidIter.term().text()) from the index database (and the xref file) * * @throws java.io.IOException if an error occurs */ private void removeFile() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); writer.prepareCommit(); writer.commit(); File xrefFile; if (RuntimeEnvironment.getInstance().isCompressXref()) { xrefFile = new File(xrefDir, path + ".gz"); } else { xrefFile = new File(xrefDir, path); } File parent = xrefFile.getParentFile(); if (!xrefFile.delete() && xrefFile.exists()) { log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath()); } // Remove the parent directory if it's empty if (parent.delete()) { log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath()); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } }
// Verifies no *.nrm exists when all fields omit norms: public void testNoNrmFile() throws Throwable { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f1 = newField("f1", "This field has no norms", customType); d.add(f1); for (int i = 0; i < 30; i++) { writer.addDocument(d); } writer.commit(); assertNoNrm(ram); // force merge writer.forceMerge(1); // flush writer.close(); assertNoNrm(ram); ram.close(); }
private void runTest(EnumSet<Type> types, TestType type) throws CorruptIndexException, IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); int num_1 = atLeast(200); int num_2 = atLeast(200); int num_3 = atLeast(200); long[] values = new long[num_1 + num_2 + num_3]; index(writer, randomValueType(types, random()), values, 0, num_1); writer.commit(); index(writer, randomValueType(types, random()), values, num_1, num_2); writer.commit(); if (random().nextInt(4) == 0) { // once in a while use addIndexes writer.forceMerge(1); Directory dir_2 = newDirectory(); IndexWriter writer_2 = new IndexWriter( dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); index(writer_2, randomValueType(types, random()), values, num_1 + num_2, num_3); writer_2.commit(); writer_2.close(); if (rarely()) { writer.addIndexes(dir_2); } else { // do a real merge here IndexReader open = maybeWrapReader(IndexReader.open(dir_2)); writer.addIndexes(open); open.close(); } dir_2.close(); } else { index(writer, randomValueType(types, random()), values, num_1 + num_2, num_3); } writer.forceMerge(1); writer.close(); assertValues(type, dir, values); dir.close(); }
// test deleteAll() public void testDeleteAll() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); // Add 1 doc (so we will have something buffered) addDoc(modifier, 99, value); // Delete all modifier.deleteAll(); // Delete all shouldn't be on disk yet reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); // Add a doc and update a doc (after the deleteAll, before the commit) addDoc(modifier, 101, value); updateDoc(modifier, 102, value); // commit the delete all modifier.commit(); // Validate there are no docs left reader = IndexReader.open(dir, true); assertEquals(2, reader.numDocs()); reader.close(); modifier.close(); dir.close(); }
// LUCENE-2593 public void testCorruptionAfterDiskFullDuringMerge() throws IOException { MockDirectoryWrapper dir = newDirectory(); // IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new // MockAnalyzer(random)).setReaderPooling(true)); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergeScheduler(new SerialMergeScheduler()) .setReaderPooling(true) .setMergePolicy(newLogMergePolicy(2))); _TestUtil.keepFullyDeletedSegments(w); ((LogMergePolicy) w.getMergePolicy()).setMergeFactor(2); Document doc = new Document(); doc.add(newField("f", "doctor who", Field.Store.YES, Field.Index.ANALYZED)); w.addDocument(doc); w.commit(); w.deleteDocuments(new Term("f", "who")); w.addDocument(doc); // disk fills up! FailTwiceDuringMerge ftdm = new FailTwiceDuringMerge(); ftdm.setDoFail(); dir.failOn(ftdm); try { w.commit(); fail("fake disk full IOExceptions not hit"); } catch (IOException ioe) { // expected assertTrue(ftdm.didFail1 || ftdm.didFail2); } _TestUtil.checkIndex(dir); ftdm.clearDoFail(); w.addDocument(doc); w.close(); dir.close(); }
private SegmentCommitInfo indexDoc(IndexWriter writer, String fileName) throws Exception { File file = new File(workDir, fileName); Document doc = new Document(); InputStreamReader is = new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8); doc.add(new TextField("contents", is)); writer.addDocument(doc); writer.commit(); is.close(); return writer.newestSegment(); }
// test that batched delete terms are flushed together public void testBatchDeletes() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); id = 0; modifier.deleteDocuments(new Term("id", String.valueOf(++id))); modifier.deleteDocuments(new Term("id", String.valueOf(++id))); modifier.commit(); reader = IndexReader.open(dir, true); assertEquals(5, reader.numDocs()); reader.close(); Term[] terms = new Term[3]; for (int i = 0; i < terms.length; i++) { terms[i] = new Term("id", String.valueOf(++id)); } modifier.deleteDocuments(terms); modifier.commit(); reader = IndexReader.open(dir, true); assertEquals(2, reader.numDocs()); reader.close(); modifier.close(); dir.close(); }
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException { Map<String, Document> docs = new HashMap<>(); IndexWriter w = RandomIndexWriter.mockIndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1) .setMaxBufferedDocs(maxBufferedDocs) .setMergePolicy(newLogMergePolicy()), new YieldTestPoint()); w.commit(); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(mergeFactor); /** * * w.setMaxMergeDocs(Integer.MAX_VALUE); w.setMaxFieldLength(10000); w.setRAMBufferSizeMB(1); * w.setMergeFactor(10); * */ threads = new IndexingThread[nThreads]; for (int i = 0; i < threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000 * i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i = 0; i < threads.length; i++) { threads[i].start(); } for (int i = 0; i < threads.length; i++) { threads[i].join(); } // w.forceMerge(1); // w.close(); for (int i = 0; i < threads.length; i++) { IndexingThread th = threads[i]; synchronized (th) { docs.putAll(th.docs); } } TestUtil.checkIndex(dir); DocsAndWriter dw = new DocsAndWriter(); dw.docs = docs; dw.writer = w; return dw; }
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put( field.name, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
public static void main(String[] args) throws IOException { Version version = Version.LUCENE_43; // 创建一个Document Document document = new Document(); Field field = new TextField("fieldName", "Hello man can you see this in index!", Field.Store.YES); field.setBoost(2.0f); Field fieldStore = new StringField("fieldName2", "fieldValueOnlyStore", Field.Store.YES); // // FieldType fieldAllType = new FieldType(); // fieldAllType.setIndexed(true); // fieldAllType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // fieldAllType.setOmitNorms(false); // fieldAllType.setStored(true); // fieldAllType.setStoreTermVectorOffsets(true); // fieldAllType.setStoreTermVectorPayloads(true); // fieldAllType.setStoreTermVectorPositions(true); // fieldAllType.setStoreTermVectors(true); // fieldAllType.setTokenized(true); // Field fieldAll = new Field("name", "all things need to store", // fieldAllType); document.add(field); // document.add(new BinaryDocValuesField("name", new // BytesRef("hello"))); document.add(fieldStore); // document.add(fieldAll); Document doc2 = new Document(); doc2.add(field); // 创建一个目录, 用于存放索引 Directory directory = FSDirectory.open(new File("/home/waf/tmp/index")); // Directory directory = new RAMDirectory(); // 定义索引写入器的一些参数 Analyzer analyzer = new StandardAnalyzer(version); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(version, analyzer); // indexWriterConfig.setCodec(new Lucene40Codec()); // 初始化索引写入器, 并把文档写入到索引中去 IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); indexWriter.addDocument(document); indexWriter.addDocument(doc2); indexWriter.commit(); indexWriter.close(); // 对索引进行查询 IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs result = indexSearcher.search(new TermQuery(new Term("name", "value")), 10); System.out.println(result.totalHits); reader.close(); }
// test when delete terms only apply to ram segments public void testRAMDeletes() throws IOException { for (int t = 0; t < 2; t++) { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter( dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(4); modifier.setMaxBufferedDeleteTerms(4); int id = 0; int value = 100; addDoc(modifier, ++id, value); if (0 == t) modifier.deleteDocuments(new Term("value", String.valueOf(value))); else modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value)))); addDoc(modifier, ++id, value); if (0 == t) { modifier.deleteDocuments(new Term("value", String.valueOf(value))); assertEquals(2, modifier.getNumBufferedDeleteTerms()); assertEquals(1, modifier.getBufferedDeleteTermsSize()); } else modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value)))); addDoc(modifier, ++id, value); assertEquals(0, modifier.getSegmentCount()); modifier.commit(); modifier.commit(); IndexReader reader = IndexReader.open(dir, true); assertEquals(1, reader.numDocs()); int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); assertEquals(1, hitCount); reader.close(); modifier.close(); dir.close(); } }
public void testNoWaitClose() throws IOException { Directory directory = newDirectory(); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(100))); for (int iter = 0; iter < 10; iter++) { for (int j = 0; j < 201; j++) { idField.setStringValue(Integer.toString(iter * 201 + j)); writer.addDocument(doc); } int delID = iter * 201; for (int j = 0; j < 20; j++) { writer.deleteDocuments(new Term("id", Integer.toString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); writer.addDocument(doc); writer.commit(); writer.close(false); IndexReader reader = DirectoryReader.open(directory); assertEquals((1 + iter) * 182, reader.numDocs()); reader.close(); // Reopen writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy(100))); } writer.close(); directory.close(); }
/** * Persists all snapshots information. If the given id and segment are not null, it persists their * information as well. */ private void persistSnapshotInfos(String id, String segment) throws IOException { writer.deleteAll(); Document d = new Document(); FieldType ft = new FieldType(); ft.setStored(true); d.add(new Field(SNAPSHOTS_ID, "", ft)); for (Entry<String, String> e : super.getSnapshots().entrySet()) { d.add(new Field(e.getKey(), e.getValue(), ft)); } if (id != null) { d.add(new Field(id, segment, ft)); } writer.addDocument(d); writer.commit(); }
// LUCENE-1274 public void testPrepareCommitNoChanges() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); writer.prepareCommit(); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(dir); assertEquals(0, reader.numDocs()); reader.close(); dir.close(); }
public void testTypeChangeAfterCommitAndDeleteAll() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); writer.commit(); writer.deleteAll(); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("foo"))); writer.addDocument(doc); writer.close(); dir.close(); }
@Override public void run() { try { long ramSize = 0; while (pendingDocs.decrementAndGet() > -1) { Document doc = docs.nextDoc(); writer.addDocument(doc); long newRamSize = writer.ramSizeInBytes(); if (newRamSize != ramSize) { ramSize = newRamSize; } if (doRandomCommit) { if (rarely()) { writer.commit(); } } } writer.commit(); } catch (Throwable ex) { System.out.println("FAILED exc:"); ex.printStackTrace(System.out); throw new RuntimeException(ex); } }
// LUCENE-1044: test writer.commit() when ac=false public void testForceCommit() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(5))); writer.commit(); for (int i = 0; i < 23; i++) TestIndexWriter.addDoc(writer); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(0, reader.numDocs()); writer.commit(); DirectoryReader reader2 = DirectoryReader.openIfChanged(reader); assertNotNull(reader2); assertEquals(0, reader.numDocs()); assertEquals(23, reader2.numDocs()); reader.close(); for (int i = 0; i < 17; i++) TestIndexWriter.addDoc(writer); assertEquals(23, reader2.numDocs()); reader2.close(); reader = DirectoryReader.open(dir); assertEquals(23, reader.numDocs()); reader.close(); writer.commit(); reader = DirectoryReader.open(dir); assertEquals(40, reader.numDocs()); reader.close(); writer.close(); dir.close(); }
// Test that deletes committed after a merge started and // before it finishes, are correctly merged back: public void testDeleteMerging() throws IOException { Directory directory = newDirectory(); LogDocMergePolicy mp = new LogDocMergePolicy(); // Force degenerate merging so we can get a mix of // merging of segments with and without deletes at the // start: mp.setMinMergeDocs(1000); IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(mp)); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); for (int i = 0; i < 10; i++) { if (VERBOSE) { System.out.println("\nTEST: cycle"); } for (int j = 0; j < 100; j++) { idField.setStringValue(Integer.toString(i * 100 + j)); writer.addDocument(doc); } int delID = i; while (delID < 100 * (1 + i)) { if (VERBOSE) { System.out.println("TEST: del " + delID); } writer.deleteDocuments(new Term("id", "" + delID)); delID += 10; } writer.commit(); } writer.close(); IndexReader reader = DirectoryReader.open(directory); // Verify that we did not lose any deletes... assertEquals(450, reader.numDocs()); reader.close(); directory.close(); }
/** * Writes the document to the directory using the analyzer and the similarity score; returns the * SegmentInfo describing the new segment */ public static SegmentCommitInfo writeDoc( Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException { IndexWriter writer = new IndexWriter( dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */ TEST_VERSION_CURRENT, analyzer) .setSimilarity( similarity == null ? IndexSearcher.getDefaultSimilarity() : similarity)); // writer.setNoCFSRatio(0.0); writer.addDocument(doc); writer.commit(); SegmentCommitInfo info = writer.newestSegment(); writer.close(); return info; }
// Verifies no *.prx exists when all fields omit term positions: public void testNoPrxFile() throws Throwable { Directory ram = newDirectory(); if (ram instanceof MockDirectoryWrapper) { // we verify some files get deleted ((MockDirectoryWrapper) ram).setEnableVirusScanner(false); } Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field f1 = newField("f1", "This field has term freqs", ft); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); writer.commit(); assertNoPrx(ram); // now add some documents with positions, and check there is no prox after optimization d = new Document(); f1 = newTextField("f1", "This field has positions", Field.Store.NO); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); assertNoPrx(ram); ram.close(); }
public void testTermDocsEnum() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document d = new Document(); d.add(newStringField("f", "j", Field.Store.NO)); w.addDocument(d); w.commit(); w.addDocument(d); IndexReader r = w.getReader(); w.close(); DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j")); assertEquals(0, de.nextDoc()); assertEquals(1, de.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc()); r.close(); dir.close(); }