@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add( new Field( "content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); XFastVectorHighlighter highlighter = new XFastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
/** * {@link PersistentSnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to enable * flexible snapshotting. * * @param primary the {@link IndexDeletionPolicy} that is used on non-snapshotted commits. * Snapshotted commits, by definition, are not deleted until explicitly released via {@link * #release(String)}. * @param dir the {@link Directory} which will be used to persist the snapshots information. * @param mode specifies whether a new index should be created, deleting all existing snapshots * information (immediately), or open an existing index, initializing the class with the * snapshots information. * @param matchVersion specifies the {@link Version} that should be used when opening the * IndexWriter. */ public PersistentSnapshotDeletionPolicy( IndexDeletionPolicy primary, Directory dir, OpenMode mode, Version matchVersion) throws CorruptIndexException, LockObtainFailedException, IOException { super(primary, null); // Initialize the index writer over the snapshot directory. writer = new IndexWriter(dir, new IndexWriterConfig(matchVersion, null).setOpenMode(mode)); if (mode != OpenMode.APPEND) { // IndexWriter no longer creates a first commit on an empty Directory. So // if we were asked to CREATE*, call commit() just to be sure. If the // index contains information and mode is CREATE_OR_APPEND, it's a no-op. writer.commit(); } try { // Initializes the snapshots information. This code should basically run // only if mode != CREATE, but if it is, it's no harm as we only open the // reader once and immediately close it. for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) { registerSnapshotInfo(e.getKey(), e.getValue(), null); } } catch (RuntimeException e) { writer.close(); // don't leave any open file handles throw e; } catch (IOException e) { writer.close(); // don't leave any open file handles throw e; } }
public void testBackToTheFuture() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); doc.add(newStringField("foo", "bar", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(newStringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); DirectoryReader r1 = DirectoryReader.open(iw, true); iw.deleteDocuments(new Term("foo", "baz")); DirectoryReader r2 = DirectoryReader.open(iw, true); FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo"); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo"); assertEquals(2, v.getValueCount()); v.setDocument(1); assertEquals(1, v.nextOrd()); iw.close(); r1.close(); r2.close(); dir.close(); }
public static void addDocuments( String searchEngineId, long companyId, Collection<Document> documents) throws SearchException { if (isIndexReadOnly() || (documents == null) || documents.isEmpty()) { return; } SearchEngine searchEngine = getSearchEngine(searchEngineId); IndexWriter indexWriter = searchEngine.getIndexWriter(); for (Document document : documents) { if (_log.isDebugEnabled()) { _log.debug("Add document " + document.toString()); } _searchPermissionChecker.addPermissionFields(companyId, document); } SearchContext searchContext = new SearchContext(); searchContext.setCompanyId(companyId); searchContext.setSearchEngineId(searchEngineId); indexWriter.addDocuments(searchContext, documents); }
// test when delete terms only apply to disk segments public void testNonRAMDelete() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDeleteTerms(2); int id = 0; int value = 100; for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); assertEquals(0, modifier.getNumBufferedDocuments()); assertTrue(0 < modifier.getSegmentCount()); modifier.commit(); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); modifier.deleteDocuments(new Term("value", String.valueOf(value))); modifier.commit(); reader = IndexReader.open(dir, true); assertEquals(0, reader.numDocs()); reader.close(); modifier.close(); dir.close(); }
// case 3: tail segments, invariants hold, copy, invariants hold public void testNoMergeAfterCopy() throws IOException { // main directory Directory dir = newDirectory(); // auxiliary directory Directory aux = newDirectory(); setUpDirs(dir, aux); IndexWriter writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMaxBufferedDocs(10) .setMergePolicy(newLogMergePolicy(4))); writer.addIndexes( aux, new MockDirectoryWrapper(random(), new RAMDirectory(aux, newIOContext(random())))); assertEquals(1060, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); // make sure the index is correct verifyNumDocs(dir, 1060); dir.close(); aux.close(); }
// LUCENE-1727: make sure doc fields are stored in order public void testStoredFieldsOrder() throws Throwable { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(); customType.setStored(true); doc.add(newField("zzz", "a b c", customType)); doc.add(newField("aaa", "a b c", customType)); doc.add(newField("zzz", "1 2 3", customType)); w.addDocument(doc); IndexReader r = w.getReader(); Document doc2 = r.document(0); Iterator<IndexableField> it = doc2.getFields().iterator(); assertTrue(it.hasNext()); Field f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "aaa"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "1 2 3"); assertFalse(it.hasNext()); r.close(); w.close(); d.close(); }
// LUCENE-1130: make sure immeidate disk full on creating // an IndexWriter (hit during DW.ThreadState.init()) is // OK: public void testImmediateDiskFull() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergeScheduler(new ConcurrentMergeScheduler())); dir.setMaxSizeInBytes(Math.max(1, dir.getRecomputedActualSizeInBytes())); final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); try { writer.addDocument(doc); fail("did not hit disk full"); } catch (IOException ioe) { } // Without fix for LUCENE-1130: this call will hang: try { writer.addDocument(doc); fail("did not hit disk full"); } catch (IOException ioe) { } try { writer.close(false); fail("did not hit disk full"); } catch (IOException ioe) { } // Make sure once disk space is avail again, we can // cleanly close: dir.setMaxSizeInBytes(0); writer.close(false); dir.close(); }
/** * Tests that index merging (specifically addIndexes(Directory...)) doesn't change the index order * of documents. */ public void testLucene() throws IOException { int num = 100; Directory indexA = newDirectory(); Directory indexB = newDirectory(); fillIndex(random(), indexA, 0, num); boolean fail = verifyIndex(indexA, 0); if (fail) { fail("Index a is invalid"); } fillIndex(random(), indexB, num, num); fail = verifyIndex(indexB, num); if (fail) { fail("Index b is invalid"); } Directory merged = newDirectory(); IndexWriter writer = new IndexWriter( merged, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(2))); writer.addIndexes(indexA, indexB); writer.forceMerge(1); writer.close(); fail = verifyIndex(merged, 0); assertFalse("The merged index is invalid", fail); indexA.close(); indexB.close(); merged.close(); }
public void testTotalBytesSize() throws Exception { Directory d = newDirectory(); if (d instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(5); iwc.setMergeScheduler(new TrackingCMS()); if (TestUtil.getPostingsFormat("id").equals("SimpleText")) { // no iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())); } IndexWriter w = new IndexWriter(d, iwc); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, Field.Store.NO)); w.addDocument(doc); if (random().nextBoolean()) { w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1))); } } assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0); w.close(); d.close(); }
// LUCENE-1219 public void testBinaryFieldOffsetLength() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); byte[] b = new byte[50]; for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77); Document doc = new Document(); Field f = new StoredField("binary", b, 10, 17); byte[] bx = f.binaryValue().bytes; assertTrue(bx != null); assertEquals(50, bx.length); assertEquals(10, f.binaryValue().offset); assertEquals(17, f.binaryValue().length); doc.add(f); w.addDocument(doc); w.close(); IndexReader ir = DirectoryReader.open(dir); Document doc2 = ir.document(0); IndexableField f2 = doc2.getField("binary"); b = f2.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); ir.close(); dir.close(); }
public static void indexSerial(Random random, Map<String, Document> docs, Directory dir) throws IOException { IndexWriter w = new IndexWriter( dir, LuceneTestCase.newIndexWriterConfig( random, TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergePolicy(newLogMergePolicy())); // index all docs in a single thread Iterator<Document> iter = docs.values().iterator(); while (iter.hasNext()) { Document d = iter.next(); ArrayList<Field> fields = new ArrayList<>(); fields.addAll(d.getFields()); // put fields in same order each time Collections.sort(fields, fieldNameComparator); Document d1 = new Document(); for (int i = 0; i < fields.size(); i++) { d1.add(fields.get(i)); } w.addDocument(d1); // System.out.println("indexing "+d1); } w.close(); }
private static Directory index(Analyzer analyzer, String processingPath) { RAMDirectory directory = null; IndexWriter indexWriter = null; try { directory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(directory, iwc); File file = new File(processingPath); index_h("", file, indexWriter); } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } return directory; }
public void testUpdateSameDoc() throws Exception { final Directory dir = newDirectory(); final LineFileDocs docs = new LineFileDocs(random()); for (int r = 0; r < 3; r++) { final IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2)); final int numUpdates = atLeast(20); int numThreads = TestUtil.nextInt(random(), 2, 6); IndexingThread[] threads = new IndexingThread[numThreads]; for (int i = 0; i < numThreads; i++) { threads[i] = new IndexingThread(docs, w, numUpdates); threads[i].start(); } for (int i = 0; i < numThreads; i++) { threads[i].join(); } w.close(); } IndexReader open = DirectoryReader.open(dir); assertEquals(1, open.numDocs()); open.close(); docs.close(); dir.close(); }
// LUCENE-1262 public void testExceptions() throws Throwable { Path indexDir = createTempDir("testfieldswriterexceptions"); Directory fsDir = newFSDirectory(indexDir); FaultyFSDirectory dir = new FaultyFSDirectory(fsDir); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); dir.startFailing(); boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); }
private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.add( newField( "f" + f, data[f % data.length] + '#' + data[random.nextInt(data.length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); } writer.close(); } catch (Exception e) { throw new RuntimeException(e); } return dir; }
public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); NumericDocValues dv = slow.getNumericDocValues("dv"); for (int i = 0; i < 50; i++) { assertEquals(i, dv.get(i)); StoredDocument d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("dv", "some text", Field.Store.NO)); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); DirectoryReader r = writer.getReader(); writer.close(); AtomicReader subR = r.leaves().get(0).reader(); assertEquals(2, subR.numDocs()); Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv"); assertTrue(bits.get(0)); assertTrue(bits.get(1)); r.close(); dir.close(); }
/** * Remove a stale file (uidIter.term().text()) from the index database (and the xref file) * * @throws java.io.IOException if an error occurs */ private void removeFile() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); writer.prepareCommit(); writer.commit(); File xrefFile; if (RuntimeEnvironment.getInstance().isCompressXref()) { xrefFile = new File(xrefDir, path + ".gz"); } else { xrefFile = new File(xrefDir, path); } File parent = xrefFile.getParentFile(); if (!xrefFile.delete() && xrefFile.exists()) { log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath()); } // Remove the parent directory if it's empty if (parent.delete()) { log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath()); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } }
public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())) .setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.add(newTextField(term.field(), term.text(), Field.Store.NO)); writer.addDocument(d1); } writer.commit(); writer.forceMerge(1); writer.close(); AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); for (int i = 0; i < 2; i++) { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250); // one skip on level 2 } }
public void testNoExtraFiles() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2)); for (int iter = 0; iter < 7; iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } for (int j = 0; j < 21; j++) { Document doc = new Document(); doc.add(newTextField("content", "a b c", Field.Store.NO)); writer.addDocument(doc); } writer.close(); TestIndexWriter.assertNoUnreferencedFiles(directory, "testNoExtraFiles"); // Reopen writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMaxBufferedDocs(2)); } writer.close(); directory.close(); }
private void createIndex( IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { boolean success = false; final IndexWriter w = new IndexWriter(target, config); try { final List<LeafReaderContext> leaves = reader.leaves(); final IndexReader[] subReaders = new IndexReader[leaves.size()]; int i = 0; for (final LeafReaderContext ctx : leaves) { subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter); } w.addIndexes(subReaders); success = true; } finally { if (success) { w.close(); } else { IOUtils.closeWhileHandlingException(w); } } }
public void testSizeInBytesCache() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(new Field("a", "value", Store.YES, Index.ANALYZED)); writer.addDocument(doc); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); SegmentInfo si = sis.info(0); long sizeInBytesNoStore = si.sizeInBytes(false); long sizeInBytesWithStore = si.sizeInBytes(true); assertTrue( "sizeInBytesNoStore=" + sizeInBytesNoStore + " sizeInBytesWithStore=" + sizeInBytesWithStore, sizeInBytesWithStore > sizeInBytesNoStore); dir.close(); }
private final IndexReader doOpenFromWriter(boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException { assert readOnly; if (!openReadOnly) { throw new IllegalArgumentException( "a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)"); } if (commit != null) { throw new IllegalArgumentException( "a reader obtained from IndexWriter.getReader() cannot currently accept a commit"); } if (writer.nrtIsCurrent(segmentInfos)) { return null; } IndexReader reader = writer.getReader(applyAllDeletes); // If in fact no changes took place, return null: if (reader.getVersion() == segmentInfos.getVersion()) { reader.decRef(); return null; } reader.readerFinishedListeners = readerFinishedListeners; return reader; }
/* * Test a deletion policy that keeps last N commits. */ public void testKeepLastNDeletionPolicy() throws IOException { final int N = 5; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 17; i++) { addDoc(writer); } writer.forceMerge(1); writer.close(); } assertTrue(policy.numDelete > 0); assertEquals(N + 1, policy.numOnInit); assertEquals(N + 1, policy.numOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); if (i == N) { fail("should have failed on commits prior to last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
public void buildIndex(JSONObject indexData) { try { Directory dir = FSDirectory.open(new File(indexDir)); IKAnalyzer analyzer = new IKAnalyzer(); analyzer.setUseSmart(true); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(dir, iwc); indexWriter.deleteAll(); JSONArray statusData = indexData.getJSONArray("statusData"); for (int i = 0; i < statusData.length(); i++) { String text = statusData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } JSONArray userData = indexData.getJSONArray("userData"); for (int i = 0; i < userData.length(); i++) { String text = userData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } // indexWriter.commit(); System.out.println("Index is done"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { indexWriter.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
/** * Returns true if this single info is already fully merged (has no pending deletes, is in the * same dir as the writer, and matches the current compound file setting */ protected final boolean isMerged(SegmentInfos infos, SegmentCommitInfo info) throws IOException { IndexWriter w = writer.get(); assert w != null; boolean hasDeletions = w.numDeletedDocs(info) > 0; return !hasDeletions && !info.info.hasSeparateNorms() && info.info.dir == w.getDirectory() && useCompoundFile(infos, info) == info.info.getUseCompoundFile(); }
private void crash(final IndexWriter writer) throws IOException { final MockDirectoryWrapper dir = (MockDirectoryWrapper) writer.getDirectory(); ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler(); cms.sync(); dir.crash(); cms.sync(); dir.clearCrash(); }
public Map<String, Document> indexRandom( int nThreads, int iterations, int range, Directory dir, int maxThreadStates, boolean doReaderPooling) throws IOException, InterruptedException { Map<String, Document> docs = new HashMap<>(); IndexWriter w = RandomIndexWriter.mockIndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1) .setMaxBufferedDocs(maxBufferedDocs) .setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) .setReaderPooling(doReaderPooling) .setMergePolicy(newLogMergePolicy()), new YieldTestPoint()); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(mergeFactor); threads = new IndexingThread[nThreads]; for (int i = 0; i < threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000 * i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i = 0; i < threads.length; i++) { threads[i].start(); } for (int i = 0; i < threads.length; i++) { threads[i].join(); } // w.forceMerge(1); w.close(); for (int i = 0; i < threads.length; i++) { IndexingThread th = threads[i]; synchronized (th) { docs.putAll(th.docs); } } // System.out.println("TEST: checkindex"); TestUtil.checkIndex(dir); return docs; }
// indexes Integer.MAX_VALUE docs with a fixed binary field public void testFixedSorted() throws Exception { BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); Document doc = new Document(); byte bytes[] = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.add(dvField); for (int i = 0; i < Integer.MAX_VALUE; i++) { bytes[0] = (byte) (i >> 8); bytes[1] = (byte) i; w.addDocument(doc); if (i % 100000 == 0) { System.out.println("indexed: " + i); System.out.flush(); } } w.forceMerge(1); w.close(); System.out.println("verifying..."); System.out.flush(); DirectoryReader r = DirectoryReader.open(dir); int expectedValue = 0; for (AtomicReaderContext context : r.leaves()) { AtomicReader reader = context.reader(); BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.getSortedDocValues("dv"); for (int i = 0; i < reader.maxDoc(); i++) { bytes[0] = (byte) (expectedValue >> 8); bytes[1] = (byte) expectedValue; dv.get(i, scratch); assertEquals(data, scratch); expectedValue++; } } r.close(); dir.close(); }