/** Test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader ShardId shardId = new ShardId("fake", "_na_", 1); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame( ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey()); IOUtils.close(ir, ir2, iw, dir); }
/** * @Title: createIndex @Description: 建立索引 * * @param @param documentList * @param @throws IOException * @return void * @throws */ public static void createIndex(List<Document> documentList, String path) throws IOException { // 在当前路径下创建一个叫indexDir的目录 File file = new File(path); String pathAll = file.getParentFile().getParentFile().toString() + "\\index"; File indexDir = new File(pathAll); // 创建索引目录 Directory directory = FSDirectory.open(indexDir); // 创建一个分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); // 创建索引配置器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); // 设置segment添加文档(Document)时的合并频率 // 值较小,建立索引的速度就较慢 // 值较大,建立索引的速度就较快,>10适合批量建立索引 mergePolicy.setMergeFactor(50); // 设置segment最大合并文档(Document)数 // 值较小有利于追加索引的速度 // 值较大,适合批量建立索引和更快的搜索 mergePolicy.setMaxMergeDocs(5000); // 启用复合式索引文件格式,合并多个segment mergePolicy.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(mergePolicy); // 设置索引的打开模式 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); // 创建索引器 IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); for (Document document : documentList) { // 把文档添加到索引库 indexWriter.addDocument(document); } // 提交索引到磁盘上的索引库,关闭索引器 indexWriter.close(); }
private void _initIndexWriter() { try { Analyzer analyzer = new LimitTokenCountAnalyzer( LuceneHelperUtil.getAnalyzer(), PropsValues.LUCENE_ANALYZER_MAX_TOKENS); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneHelperUtil.getVersion(), analyzer); indexWriterConfig.setIndexDeletionPolicy(_dumpIndexDeletionPolicy); indexWriterConfig.setMergePolicy(_getMergePolicy()); indexWriterConfig.setMergeScheduler(_getMergeScheduler()); indexWriterConfig.setRAMBufferSizeMB(PropsValues.LUCENE_BUFFER_SIZE); _indexWriter = new IndexWriter(getLuceneDir(), indexWriterConfig); if (!IndexReader.indexExists(getLuceneDir())) { // Workaround for LUCENE-2386 if (_log.isDebugEnabled()) { _log.debug("Creating missing index"); } _indexWriter.commit(); } } catch (Exception e) { _log.error("Initializing Lucene writer failed for " + _companyId, e); } }
private IndexWriter createWriter(boolean create) throws IOException { try { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode( create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile( true); // always use compound on flush - reduces # of file-handles on refresh return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { logger.warn("could not lock IndexWriter", ex); throw ex; } }
@BeforeClass public static void beforeClassCountingFacetsAggregatorTest() throws Exception { indexDir = newDirectory(); taxoDir = newDirectory(); // create an index which has: // 1. Segment with no categories, but matching results // 2. Segment w/ categories, but no results // 3. Segment w/ categories and results // 4. Segment w/ categories, but only some results IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setMergePolicy( NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); // segment w/ no categories indexDocsNoFacets(indexWriter); // segment w/ categories, no content indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and content indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and some content indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); IOUtils.close(indexWriter, taxoWriter); }
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until * this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. */ public LuceneIndex( String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>( trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format( "com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) throws IOException { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain // files conf.setMergePolicy(logByteSizeMergePolicy); final IndexWriter writer = new IndexWriter(dir, conf); return writer; }
public void open() throws IOException { if (_closed) { IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)); idxWriterConfig.setMergePolicy(new ZoieMergePolicy()); idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); _idxWriter = new IndexWriter(_dir, idxWriterConfig); updateReader(); _closed = false; } }
public static IndexWriter openWriter( Directory directory, int maxMergeDocs, boolean useSerialMerger) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); if (useSerialMerger) { indexWriterConfig.setMergeScheduler(mergeScheduler); } LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMaxMergeDocs(maxMergeDocs); indexWriterConfig.setMergePolicy(mergePolicy); return new IndexWriter(directory, indexWriterConfig); }
/** Override this to customize index settings, e.g. which codec to use. */ protected IndexWriterConfig getIndexWriterConfig( Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); iwc.setCodec(new Lucene46Codec()); iwc.setOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT)); return iwc; }
public void testNoMatchSize() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Field none = new Field("none", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(none); String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore."; body.setStringValue(firstValue); none.setStringValue(firstValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); Query query = new TermQuery(new Term("none", "highlighting")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(0)); highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true); snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("This is a test.")); ir.close(); dir.close(); }
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); Codec filterCodec = new Lucene60Codec() { PostingsFormat postingsFormat = new Completion50PostingsFormat(); @Override public PostingsFormat getPostingsFormatForField(String field) { if (suggestFields.contains(field)) { return postingsFormat; } return super.getPostingsFormatForField(field); } }; iwc.setCodec(filterCodec); return iwc; }
/* * LUCENE-3528 - NRTManager hangs in certain situations */ public void testThreadStarvationNoDeleteNRTReader() throws IOException, InterruptedException { IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); conf.setMergePolicy(NoMergePolicy.INSTANCE); Directory d = newDirectory(); final CountDownLatch latch = new CountDownLatch(1); final CountDownLatch signal = new CountDownLatch(1); LatchedIndexWriter _writer = new LatchedIndexWriter(d, conf, latch, signal); final TrackingIndexWriter writer = new TrackingIndexWriter(_writer); final SearcherManager manager = new SearcherManager(_writer, false, null); Document doc = new Document(); doc.add(newTextField("test", "test", Field.Store.YES)); writer.addDocument(doc); manager.maybeRefresh(); Thread t = new Thread() { @Override public void run() { try { signal.await(); manager.maybeRefresh(); writer.deleteDocuments(new TermQuery(new Term("foo", "barista"))); manager.maybeRefresh(); // kick off another reopen so we inc. the internal gen } catch (Exception e) { e.printStackTrace(); } finally { latch.countDown(); // let the add below finish } } }; t.start(); _writer.waitAfterUpdate = true; // wait in addDocument to let some reopens go through final long lastGen = writer.updateDocument( new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen assertFalse(manager.isSearcherCurrent()); // false since there is a delete in the queue IndexSearcher searcher = manager.acquire(); try { assertEquals(2, searcher.getIndexReader().numDocs()); } finally { manager.release(searcher); } final ControlledRealTimeReopenThread<IndexSearcher> thread = new ControlledRealTimeReopenThread<>(writer, manager, 0.01, 0.01); thread.start(); // start reopening if (VERBOSE) { System.out.println("waiting now for generation " + lastGen); } final AtomicBoolean finished = new AtomicBoolean(false); Thread waiter = new Thread() { @Override public void run() { try { thread.waitForGeneration(lastGen); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new RuntimeException(ie); } finished.set(true); } }; waiter.start(); manager.maybeRefresh(); waiter.join(1000); if (!finished.get()) { waiter.interrupt(); fail("thread deadlocked on waitForGeneration"); } thread.close(); thread.join(); IOUtils.close(manager, _writer, d); }
public void testResetRootDocId() throws Exception { Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc); List<Document> documents = new ArrayList<>(); // 1 segment with, 1 root document, with 3 nested sub docs Document document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); documents.clear(); // 1 segment with: // 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); documents.clear(); // and 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); indexWriter.close(); IndexService indexService = createIndex("test"); DirectoryReader directoryReader = DirectoryReader.open(directory); directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0)); IndexSearcher searcher = new IndexSearcher(directoryReader); indexService .mapperService() .merge( "test", new CompressedXContent( PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested") .string()), MapperService.MergeReason.MAPPING_UPDATE, false); SearchContext searchContext = createSearchContext(indexService); AggregationContext context = new AggregationContext(searchContext); AggregatorFactories.Builder builder = AggregatorFactories.builder(); NestedAggregatorBuilder factory = new NestedAggregatorBuilder("test", "nested_field"); builder.addAggregator(factory); AggregatorFactories factories = builder.build(context, null); searchContext.aggregations(new SearchContextAggregations(factories)); Aggregator[] aggs = factories.createTopLevelAggregators(); BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs)); collector.preCollection(); // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here // (otherwise MatchAllDocsQuery would be sufficient) // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc // when we process a new segment, because // root doc type#3 and root doc type#1 have the same segment docid BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(Queries.newNonNestedFilter(), Occur.MUST); bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT); searcher.search(new ConstantScoreQuery(bq.build()), collector); collector.postCollection(); Nested nested = (Nested) aggs[0].buildAggregation(0); // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous // child docs from the first segment are emitted as hits. assertThat(nested.getDocCount(), equalTo(4L)); directoryReader.close(); directory.close(); }
@Test public void testRecoveryDiff() throws IOException, InterruptedException { int numDocs = 2 + random().nextInt(100); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add( new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc); } long seed = random().nextLong(); Store.MetadataSnapshot first; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); first = store.getMetadata(); assertDeleteContent(store, directoryService); store.close(); } long time = new Date().getTime(); while (time == new Date().getTime()) { Thread.sleep(10); // bump the time } Store.MetadataSnapshot second; Store store; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); second = store.getMetadata(); } Store.RecoveryDiff diff = first.recoveryDiff(second); assertThat(first.size(), equalTo(second.size())); for (StoreFileMetaData md : first) { assertThat(second.get(md.name()), notNullValue()); // si files are different - containing timestamps etc assertThat(second.get(md.name()).isSame(md), equalTo(false)); } assertThat(diff.different.size(), equalTo(first.size())); assertThat( diff.identical.size(), equalTo(0)); // in lucene 5 nothing is identical - we use random ids in file headers assertThat(diff.missing, empty()); // check the self diff Store.RecoveryDiff selfDiff = first.recoveryDiff(first); assertThat(selfDiff.identical.size(), equalTo(first.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // lets add some deletes Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter writer = new IndexWriter(store.directory(), iwc); writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs)))); writer.commit(); writer.close(); Store.MetadataSnapshot metadata = store.getMetadata(); StoreFileMetaData delFile = null; for (StoreFileMetaData md : metadata) { if (md.name().endsWith(".liv")) { delFile = md; break; } } Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second); if (delFile != null) { assertThat( afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(2)); } else { // an entire segment must be missing (single doc segment got dropped) assertThat(afterDeleteDiff.identical.size(), greaterThan(0)); assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different } // check the self diff selfDiff = metadata.recoveryDiff(metadata); assertThat(selfDiff.identical.size(), equalTo(metadata.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // add a new commit iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile( true); // force CFS - easier to test here since we know it will add 3 files iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(store.directory(), iwc); writer.addDocument(docs.get(0)); writer.close(); Store.MetadataSnapshot newCommitMetaData = store.getMetadata(); Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata); if (delFile != null) { assertThat( newCommitDiff.identical.size(), equalTo( newCommitMetaData.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv")); assertThat( newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment } else { assertThat( newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(0)); assertThat( newCommitDiff.missing.size(), equalTo( 4)); // an entire segment must be missing (single doc segment got dropped) plus the // commit is different } store.deleteContent(); IOUtils.close(store); }
public void testCustomPostingsHighlighter() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // good position but only one match final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter."; Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue(firstValue); // two matches, not the best snippet due to its length though final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower."; Field body2 = new Field("body", "", offsetsType); doc.add(body2); body2.setStringValue(secondValue); // two matches and short, will be scored highest final String thirdValue = "This is highlighting the third short highlighting value."; Field body3 = new Field("body", "", offsetsType); doc.add(body3); body3.setStringValue(thirdValue); // one match, same as first but at the end, will be scored lower due to its position final String fourthValue = "Just a test4 highlighting from postings highlighter."; Field body4 = new Field("body", "", offsetsType); doc.add(body4); body4.setStringValue(fourthValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter."; String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower."; String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value."; String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter."; IndexSearcher searcher = newSearcher(ir); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter( null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(4)); assertThat(snippets[0].getText(), equalTo(firstHlValue)); assertThat(snippets[1].getText(), equalTo(secondHlValue)); assertThat(snippets[2].getText(), equalTo(thirdHlValue)); assertThat(snippets[3].getText(), equalTo(fourthHlValue)); ir.close(); dir.close(); }