public void merge() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 会将索引合并为两段,这两段中被删除的数据会被清空 // 特别注意:此处Lucene在3.5之后不建议使用,因为会消耗大量的开销 // Lucene会根据情况自己处理 writer.forceMerge(2); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
/** * Optimizes the index forcing merge segments leaving the specified number of segments. This * operation may block until all merging completes. * * @param maxNumSegments The maximum number of segments left in the index after merging finishes. * @param doWait {@code true} if the call should block until the operation completes. * @throws IOException If Lucene throws IO errors. */ @Override public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { Log.info("%s merging index segments to %d", logName, maxNumSegments); indexWriter.forceMerge(maxNumSegments, doWait); indexWriter.commit(); Log.info("%s segments merge completed", logName); }
/** 手动索引优化 */ public void merge() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 会将索引强制合并为两段,这两段被删除的数据会被清空,此处lunene在3.5之后不建议使用,因为会消耗大量的内存开销,lunene会自动处理 writer.forceMerge(2); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
/* * Test per field codec support - adding fields with random codecs */ @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType); doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
public OperationResponse commitAndOptimize() { try { if (logger.isDebugEnabled()) { logger.debug("commiting..."); } indexWriter.commit(); if (logger.isDebugEnabled()) { logger.debug("commit finish."); } if (logger.isDebugEnabled()) { logger.debug("optimizing..."); } indexWriter.forceMerge(defaultMergeSize); if (logger.isDebugEnabled()) { logger.debug("optimize finish."); } reopenSearcher(); } catch (IOException e) { logger.error("optimize error", e); return new OperationResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } catch (OutOfMemoryError e) { CloseUtil.close(indexWriter); logger.error("error of OOM", e); return new OperationResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } return new OperationResponse(); }
private void doRandomForceMerge() throws IOException { if (doRandomForceMerge) { final int segCount = w.getSegmentCount(); if (r.nextBoolean() || segCount == 0) { // full forceMerge if (LuceneTestCase.VERBOSE) { System.out.println("RIW: doRandomForceMerge(1)"); } w.forceMerge(1); } else { // partial forceMerge final int limit = _TestUtil.nextInt(r, 1, segCount); if (LuceneTestCase.VERBOSE) { System.out.println("RIW: doRandomForceMerge(" + limit + ")"); } w.forceMerge(limit); assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit : "limit=" + limit + " actual=" + w.getSegmentCount(); } } }
// test using a sparse index (with deleted docs). @Test public void testSparseIndex() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.add(new IntField("id_int", d, Field.Store.NO)); doc.add(newStringField("body", "body", Field.Store.NO)); writer.addDocument(doc); } writer.forceMerge(1); BytesRef term0 = new BytesRef(); NumericUtils.intToPrefixCoded(0, 0, term0); writer.deleteDocuments(new Term("id_int", term0)); writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher search = newSearcher(reader); assertTrue(reader.hasDeletions()); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = search.search(q, FieldCacheRangeFilter.newIntRange("id_int", -20, 20, T, T), 100).scoreDocs; assertEquals("find all", 40, result.length); result = search.search(q, FieldCacheRangeFilter.newIntRange("id_int", 0, 20, T, T), 100).scoreDocs; assertEquals("find all", 20, result.length); result = search.search(q, FieldCacheRangeFilter.newIntRange("id_int", -20, 0, T, T), 100).scoreDocs; assertEquals("find all", 20, result.length); result = search.search(q, FieldCacheRangeFilter.newIntRange("id_int", 10, 20, T, T), 100).scoreDocs; assertEquals("find all", 11, result.length); result = search.search(q, FieldCacheRangeFilter.newIntRange("id_int", -20, -10, T, T), 100) .scoreDocs; assertEquals("find all", 11, result.length); reader.close(); dir.close(); }
@Test public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception { // test openIfChanged() when the taxonomy hasn't really changed, but segments // were merged. The NRT reader will be reopened, and ParentArray used to assert // that the new reader contains more ordinals than were given from the old // TaxReader version Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; // add a category so that the following DTR open will cause a flush and // a new segment will be created writer.addCategory(new FacetLabel("a")); TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); // merge all the segments so that NRT reader thinks there's a change iw.forceMerge(1); // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
public void testNonRootFloorBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 128; i++) { Document doc = new Document(); String term = "m" + (char) i; terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term)); } doc.add(newStringField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); BytesRef term; int ord = 0; while ((term = te.next()) != null) { if (VERBOSE) { System.out.println("TEST: " + te.ord() + ": " + term.utf8ToString()); } assertEquals(ord, te.ord()); ord++; } testEnum(te, terms); r.close(); w.close(); dir.close(); }
@Test public void testOpenIfChangedMergedSegment() throws Exception { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(1, reader.getSize()); assertEquals(1, reader.getParallelTaxonomyArrays().parents().length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.addCategory(new FacetLabel("1")); iw.forceMerge(1); // now calling openIfChanged should trip on the bug TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
/* * @see TestCase#setUp() */ @Override public void setUp() throws Exception { super.setUp(); analyzer = new MockAnalyzer(random()); // Create an index dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); for (String docFieldValue : docFieldValues) { w.addDocument(getDocumentFromString(docFieldValue)); } w.forceMerge(1); w.close(); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); // initialize the parser builder = new CorePlusExtensionsParser("artist", analyzer); }
public Lookup buildAnalyzingLookup( final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) { @Override public PostingsFormat postingsFormat() { final PostingsFormat in = super.postingsFormat(); return mapper.postingsFormat(in); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.indexAnalyzer()); indexWriterConfig.setCodec(filterCodec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload( new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); LeafReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms) .getLookup(mapper, new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
public void testSeveralNonRootBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); List<String> terms = new ArrayList<>(); for (int i = 0; i < 30; i++) { for (int j = 0; j < 30; j++) { Document doc = new Document(); String term = "" + (char) (97 + i) + (char) (97 + j); terms.add(term); if (VERBOSE) { System.out.println("term=" + term); } doc.add(newTextField("body", term, Field.Store.NO)); w.addDocument(doc); } } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "body").iterator(null); for (int i = 0; i < 30; i++) { for (int j = 0; j < 30; j++) { String term = "" + (char) (97 + i) + (char) (97 + j); if (VERBOSE) { System.out.println("TEST: check term=" + term); } assertEquals(term, te.next().utf8ToString()); assertEquals(30 * i + j, te.ord()); } } testEnum(te, terms); te.seekExact(0); assertEquals("aa", te.term().utf8ToString()); r.close(); w.close(); dir.close(); }
/* * Test that heterogeneous index segments are merge successfully */ @Test public void testMergeUnusedPerFieldCodec() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setCodec(new MockCodec()); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); writer.commit(); addDocs3(writer, 10); writer.commit(); addDocs2(writer, 10); writer.commit(); assertEquals(30, writer.maxDoc()); TestUtil.checkIndex(dir); writer.forceMerge(1); assertEquals(30, writer.maxDoc()); writer.close(); dir.close(); }
/** Optimize the index database */ public void optimize() { synchronized (lock) { if (running) { LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!"); return; } running = true; } IndexWriter wrt = null; try { LOGGER.info("Optimizing the index ... "); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore LOGGER.info("done"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { LOGGER.log( Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e); } finally { if (wrt != null) { try { wrt.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occured while closing writer", e); } } synchronized (lock) { running = false; } } }
public void testFloorBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); for (int i = 0; i < 128; i++) { Document doc = new Document(); String term = "" + (char) i; if (VERBOSE) { System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term)); } doc.add(newStringField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); if (VERBOSE) { BytesRef term; while ((term = te.next()) != null) { System.out.println(" " + te.ord() + ": " + term.utf8ToString()); } } assertTrue(te.seekExact(new BytesRef("a"))); assertEquals(97, te.ord()); te.seekExact(98); assertEquals(new BytesRef("b"), te.term()); assertTrue(te.seekExact(new BytesRef("z"))); assertEquals(122, te.ord()); r.close(); w.close(); dir.close(); }
@Override public void forceMerge( final boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, final boolean upgrade, final boolean upgradeOnlyAncientSegments) throws EngineException, EngineClosedException, IOException { /* * We do NOT acquire the readlock here since we are waiting on the merges to finish * that's fine since the IW.rollback should stop all the threads and trigger an IOException * causing us to fail the forceMerge * * The way we implement upgrades is a bit hackish in the sense that we set an instance * variable and that this setting will thus apply to the next forced merge that will be run. * This is ok because (1) this is the only place we call forceMerge, (2) we have a single * thread for optimize, and the 'optimizeLock' guarding this code, and (3) ConcurrentMergeScheduler * syncs calls to findForcedMerges. */ assert indexWriter.getConfig().getMergePolicy() instanceof ElasticsearchMergePolicy : "MergePolicy is " + indexWriter.getConfig().getMergePolicy().getClass().getName(); ElasticsearchMergePolicy mp = (ElasticsearchMergePolicy) indexWriter.getConfig().getMergePolicy(); optimizeLock.lock(); try { ensureOpen(); if (upgrade) { logger.info( "starting segment upgrade upgradeOnlyAncientSegments={}", upgradeOnlyAncientSegments); mp.setUpgradeInProgress(true, upgradeOnlyAncientSegments); } store.incRef(); // increment the ref just to ensure nobody closes the store while we optimize try { if (onlyExpungeDeletes) { assert upgrade == false; indexWriter.forceMergeDeletes(true /* blocks and waits for merges*/); } else if (maxNumSegments <= 0) { assert upgrade == false; indexWriter.maybeMerge(); } else { indexWriter.forceMerge(maxNumSegments, true /* blocks and waits for merges*/); } if (flush) { if (tryRenewSyncCommit() == false) { flush(false, true); } } if (upgrade) { logger.info("finished segment upgrade"); } } finally { store.decRef(); } } catch (Throwable t) { maybeFailEngine("force merge", t); throw t; } finally { try { mp.setUpgradeInProgress( false, false); // reset it just to make sure we reset it in a case of an error } finally { optimizeLock.unlock(); } } }
/** * Forces a forceMerge. * * <p>NOTE: this should be avoided in tests unless absolutely necessary, as it will result in less * test coverage. * * @see IndexWriter#forceMerge(int) */ public void forceMerge(int maxSegmentCount) throws IOException { w.forceMerge(maxSegmentCount); }
@Override public void close(TaskAttemptContext context) throws IOException { LOG.debug( "Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards); writeShardNumberFile(context); heartBeater.needHeartBeat(); try { Directory mergedIndex = new HdfsDirectory(workDir, NoLockFactory.INSTANCE, context.getConfiguration()); // TODO: shouldn't we pull the Version from the solrconfig.xml? IndexWriterConfig writerConfig = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE).setUseCompoundFile(false) // .setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml? // .setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml? ; if (LOG.isDebugEnabled()) { writerConfig.setInfoStream(System.out); } // writerConfig.setRAMBufferSizeMB(100); // improve performance // writerConfig.setMaxThreadStates(1); // disable compound file to improve performance // also see // http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html // also see defaults in SolrIndexConfig MergePolicy mergePolicy = writerConfig.getMergePolicy(); LOG.debug("mergePolicy was: {}", mergePolicy); if (mergePolicy instanceof TieredMergePolicy) { ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000); // ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000); } else if (mergePolicy instanceof LogMergePolicy) { ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0); } LOG.info("Using mergePolicy: {}", mergePolicy); IndexWriter writer = new IndexWriter(mergedIndex, writerConfig); Directory[] indexes = new Directory[shards.size()]; for (int i = 0; i < shards.size(); i++) { indexes[i] = new HdfsDirectory(shards.get(i), NoLockFactory.INSTANCE, context.getConfiguration()); } context.setStatus("Logically merging " + shards.size() + " shards into one shard"); LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir); RTimer timer = new RTimer(); writer.addIndexes(indexes); // TODO: avoid intermediate copying of files into dst directory; rename the files into the // dir instead (cp -> rename) // This can improve performance and turns this phase into a true "logical" merge, completing // in constant time. // See https://issues.apache.org/jira/browse/LUCENE-4746 timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info("Logical merge took {}ms", timer.getTime()); int maxSegments = context .getConfiguration() .getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE); context.setStatus( "Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments"); LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments); timer = new RTimer(); if (maxSegments < Integer.MAX_VALUE) { writer.forceMerge(maxSegments); // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data // see // http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html } timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info( "Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime()); timer = new RTimer(); LOG.info("Optimizing Solr: Closing index writer"); writer.close(); LOG.info("Optimizing Solr: Done closing index writer in {}ms", timer.getTime()); context.setStatus("Done"); } finally { heartBeater.cancelHeartBeat(); heartBeater.close(); } }
// TODO: not sure this test is that great, we should probably peek inside PerFieldPostingsFormat // or something?! @Test public void testChangeCodecAndMerge() throws IOException { Directory dir = newDirectory(); if (VERBOSE) { System.out.println("TEST: make new index"); } IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setCodec(new MockCodec()); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // ((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); writer.commit(); assertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { System.out.println("TEST: addDocs3"); } addDocs3(writer, 10); writer.commit(); writer.close(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.getCodec(); iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setCodec(codec); // ((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0); // ((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.setCodec(new MockCodec2()); // uses standard for field content writer = newWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { System.out.println("TEST: add docs w/ Standard codec for content field"); } addDocs2(writer, 10); writer.commit(); codec = iwconf.getCodec(); assertEquals(30, writer.maxDoc()); assertQuery(new Term("content", "bbb"), dir, 10); assertQuery(new Term("content", "ccc"), dir, 10); // // assertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { System.out.println("TEST: add more docs w/ new codec"); } addDocs2(writer, 10); writer.commit(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "bbb"), dir, 20); assertQuery(new Term("content", "aaa"), dir, 10); assertEquals(40, writer.maxDoc()); if (VERBOSE) { System.out.println("TEST: now optimize"); } writer.forceMerge(1); assertEquals(40, writer.maxDoc()); writer.close(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "bbb"), dir, 20); assertQuery(new Term("content", "aaa"), dir, 10); dir.close(); }
// Test using various international locales with accented characters (which // sort differently depending on locale) // // Copied (and slightly modified) from // org.apache.lucene.search.TestSort.testInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public void testCollationKeySort( Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, String usResult, String frResult, String svResult, String dkResult) throws Exception { Directory indexStore = newDirectory(); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); analyzer.addAnalyzer("US", usAnalyzer); analyzer.addAnalyzer("France", franceAnalyzer); analyzer.addAnalyzer("Sweden", swedenAnalyzer); analyzer.addAnalyzer("Denmark", denmarkAnalyzer); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // document data: // the tracer field is used to determine which document was hit String[][] sortData = new String[][] { // tracer contents US France Sweden (sv_SE) Denmark // (da_DK) {"A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche"}, {"B", "y", "HAT", "HAT", "HAT", "HAT"}, {"C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9"}, {"D", "y", "HUT", "HUT", "HUT", "HUT"}, {"E", "x", "peach", "peach", "peach", "peach"}, {"F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T"}, {"G", "x", "sin", "sin", "sin", "sin"}, {"H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T"}, {"I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn"}, {"J", "y", "HOT", "HOT", "HOT", "HOT"}, }; for (int i = 0; i < sortData.length; ++i) { Document doc = new Document(); doc.add(new Field("tracer", sortData[i][0], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", sortData[i][1], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][2] != null) doc.add(new Field("US", sortData[i][2], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][3] != null) doc.add(new Field("France", sortData[i][3], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][4] != null) doc.add(new Field("Sweden", sortData[i][4], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][5] != null) doc.add(new Field("Denmark", sortData[i][5], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.forceMerge(1); writer.close(); IndexReader reader = IndexReader.open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Query queryX = new TermQuery(new Term("contents", "x")); Query queryY = new TermQuery(new Term("contents", "y")); sort.setSort(new SortField("US", SortField.STRING)); assertMatches(searcher, queryY, sort, usResult); sort.setSort(new SortField("France", SortField.STRING)); assertMatches(searcher, queryX, sort, frResult); sort.setSort(new SortField("Sweden", SortField.STRING)); assertMatches(searcher, queryY, sort, svResult); sort.setSort(new SortField("Denmark", SortField.STRING)); assertMatches(searcher, queryY, sort, dkResult); searcher.close(); reader.close(); indexStore.close(); }
// creates 8 fields with different options and does "duels" of fields against each other public void test() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc.clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS_ONLY); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + _TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); _TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc.clone()); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }