public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); NumericDocValues dv = slow.getNumericDocValues("dv"); for (int i = 0; i < 50; i++) { assertEquals(i, dv.get(i)); StoredDocument d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); }
public void testEmptyDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); // make sure that the fact that documents might be empty is not a problem final Document emptyDoc = new Document(); final int numDocs = random().nextBoolean() ? 1 : atLeast(1000); for (int i = 0; i < numDocs; ++i) { iw.addDocument(emptyDoc); } iw.commit(); final DirectoryReader rd = DirectoryReader.open(dir); for (int i = 0; i < numDocs; ++i) { final Document doc = rd.document(i); assertNotNull(doc); assertTrue(doc.getFields().isEmpty()); } rd.close(); iw.close(); dir.close(); }
public void testNumerics() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { field.setLongValue(random().nextLong()); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers"); NumericDocValues single = merged.getNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
/** Generate a spelling suggestion for the definitions stored in defs */ public void createSpellingSuggestions() { IndexReader indexReader = null; SpellChecker checker; try { log.info("Generating spelling suggestion index ... "); indexReader = DirectoryReader.open(indexDirectory); checker = new SpellChecker(spellDirectory); // TODO below seems only to index "defs" , possible bug ? Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false); log.info("done"); } catch (IOException e) { log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing reader", e); } } if (spellDirectory != null) { spellDirectory.close(); } } }
public void testTotalBytesSize() throws Exception { Directory d = newDirectory(); if (d instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(5); iwc.setMergeScheduler(new TrackingCMS()); if (TestUtil.getPostingsFormat("id").equals("SimpleText")) { // no iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())); } IndexWriter w = new IndexWriter(d, iwc); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, Field.Store.NO)); w.addDocument(doc); if (random().nextBoolean()) { w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1))); } } assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0); w.close(); d.close(); }
/* * Test a deletion policy that keeps last N commits. */ public void testKeepLastNDeletionPolicy() throws IOException { final int N = 5; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 17; i++) { addDoc(writer); } writer.forceMerge(1); writer.close(); } assertTrue(policy.numDelete > 0); assertEquals(N + 1, policy.numOnInit); assertEquals(N + 1, policy.numOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); if (i == N) { fail("should have failed on commits prior to last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
public void testForceMergeNotNeeded() throws IOException { try (Directory dir = newDirectory()) { final AtomicBoolean mayMerge = new AtomicBoolean(true); final MergeScheduler mergeScheduler = new SerialMergeScheduler() { @Override public synchronized void merge( IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException { if (mayMerge.get() == false) { MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge != null) { System.out.println( "TEST: we should not need any merging, yet merge policy returned merge " + merge); throw new AssertionError(); } } super.merge(writer, trigger, newMergesFound); } }; MergePolicy mp = mergePolicy(); assumeFalse( "this test cannot tolerate random forceMerges", mp.toString().contains("MockRandomMergePolicy")); mp.setNoCFSRatio(random().nextBoolean() ? 0 : 1); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(mergeScheduler); iwc.setMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, iwc); final int numSegments = TestUtil.nextInt(random(), 2, 20); for (int i = 0; i < numSegments; ++i) { final int numDocs = TestUtil.nextInt(random(), 1, 5); for (int j = 0; j < numDocs; ++j) { writer.addDocument(new Document()); } writer.getReader().close(); } for (int i = 5; i >= 0; --i) { final int segmentCount = writer.getSegmentCount(); final int maxNumSegments = i == 0 ? 1 : TestUtil.nextInt(random(), 1, 10); mayMerge.set(segmentCount > maxNumSegments); if (VERBOSE) { System.out.println( "TEST: now forceMerge(maxNumSegments=" + maxNumSegments + ") vs segmentCount=" + segmentCount); } writer.forceMerge(maxNumSegments); } writer.close(); } }
IndexWriter getIndexWriter(Directory luceneDir) throws CorruptIndexException, LockObtainFailedException, IOException, ProviderException { IndexWriter writer = null; IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, getLuceneAnalyzer()); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(luceneDir, writerConfig); // writer.setInfoStream( System.out ); return writer; }
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = newIndexWriterConfig(analyzer); final MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(directory, conf); if (VERBOSE) { System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS); } for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES)); d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES)); writer.addDocument(d); } writer.close(); // try a search without OR IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = newSearcher(reader); Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)); out.println("Query: " + query.toString(PRIORITY_FIELD)); if (VERBOSE) { System.out.println("TEST: search query=" + query); } final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT)); ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); // try a new search with OR searcher = newSearcher(reader); hits = null; BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD); out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD)); hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); reader.close(); directory.close(); }
public void testReadSkip() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); FieldType ft = new FieldType(); ft.setStored(true); ft.freeze(); final String string = _TestUtil.randomSimpleString(random(), 50); final byte[] bytes = string.getBytes("UTF-8"); final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong(); final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt(); final float f = random().nextFloat(); final double d = random().nextDouble(); List<Field> fields = Arrays.asList( new Field("bytes", bytes, ft), new Field("string", string, ft), new LongField("long", l, Store.YES), new IntField("int", i, Store.YES), new FloatField("float", f, Store.YES), new DoubleField("double", d, Store.YES)); for (int k = 0; k < 100; ++k) { Document doc = new Document(); for (Field fld : fields) { doc.add(fld); } iw.w.addDocument(doc); } iw.commit(); final DirectoryReader reader = DirectoryReader.open(dir); final int docID = random().nextInt(100); for (Field fld : fields) { String fldName = fld.name(); final Document sDoc = reader.document(docID, Collections.singleton(fldName)); final IndexableField sField = sDoc.getField(fldName); if (Field.class.equals(fld.getClass())) { assertEquals(fld.binaryValue(), sField.binaryValue()); assertEquals(fld.stringValue(), sField.stringValue()); } else { assertEquals(fld.numericValue(), sField.numericValue()); } } reader.close(); iw.close(); dir.close(); }
public void testSortedNumeric() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add( new SortedNumericDocValuesField( "nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums"); SortedNumericDocValues single = merged.getSortedNumericDocValues("nums"); if (multi == null) { assertNull(single); } else { // check values for (int i = 0; i < numDocs; i++) { single.setDocument(i); ArrayList<Long> expectedList = new ArrayList<>(); for (int j = 0; j < single.count(); j++) { expectedList.add(single.valueAt(j)); } multi.setDocument(i); assertEquals(expectedList.size(), multi.count()); for (int j = 0; j < single.count(); j++) { assertEquals(expectedList.get(j).longValue(), multi.valueAt(j)); } } } ir.close(); ir2.close(); dir.close(); }
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put( field.name, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
public void testTypeChangeAfterOpenCreate() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); writer.close(); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, conf); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("foo"))); writer.addDocument(doc); writer.close(); dir.close(); }
/* * simple test that ensures we getting expected exceptions */ public void testAddIndexMissingCodec() throws IOException { BaseDirectoryWrapper toAdd = newDirectory(); // Disable checkIndex, else we get an exception because // of the unregistered codec: toAdd.setCheckIndexOnClose(false); { IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); conf.setCodec(new UnRegisteredCodec()); IndexWriter w = new IndexWriter(toAdd, conf); Document doc = new Document(); FieldType customType = new FieldType(); customType.setIndexed(true); doc.add(newField("foo", "bar", customType)); w.addDocument(doc); w.close(); } { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); conf.setCodec( TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1 + random().nextInt(20)))); IndexWriter w = new IndexWriter(dir, conf); try { w.addIndexes(toAdd); fail("no such codec"); } catch (IllegalArgumentException ex) { // expected } w.close(); IndexReader open = DirectoryReader.open(dir); assertEquals(0, open.numDocs()); open.close(); dir.close(); } try { DirectoryReader.open(toAdd); fail("no such codec"); } catch (IllegalArgumentException ex) { // expected } toAdd.close(); }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (random().nextInt(4) >= 0) { doc.add(new NumericDocValuesField("numbers", random().nextLong())); } doc.add(new NumericDocValuesField("numbersAlways", random().nextLong())); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); Bits multi = MultiDocValues.getDocsWithField(ir, "numbers"); Bits single = merged.getDocsWithField("numbers"); if (multi == null) { assertNull(single); } else { assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } } multi = MultiDocValues.getDocsWithField(ir, "numbersAlways"); single = merged.getDocsWithField("numbersAlways"); assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
@BeforeClass public static void beforeClass() throws Exception { fieldInfos = new FieldInfos.Builder(); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc) { fieldInfos.addOrUpdate(field.name(), field.fieldType()); } dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); FaultyIndexInput.doFail = false; }
/* Test keeping NO commit points. This is a viable and * useful case eg where you want to build a big index and * you know there are no readers. */ public void testKeepNoneOnInitDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy()) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 107; i++) { addDoc(writer); } writer.close(); conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); writer = new IndexWriter(dir, conf); policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); assertEquals(2, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(2, policy.numOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.close(); } }
public void testSorted() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); if (random().nextInt(7) == 0) { iw.addDocument(new Document()); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); for (int i = 0; i < numDocs; i++) { // check ord assertEquals(single.getOrd(i), multi.getOrd(i)); // check value final BytesRef expected = BytesRef.deepCopyOf(single.get(i)); final BytesRef actual = multi.get(i); assertEquals(expected, actual); } ir.close(); ir2.close(); dir.close(); }
// Add inconsistent document after reopening IW w/ create public void testMixedTypesAfterReopenCreate() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); doc.add(new NumericDocValuesField("foo", 0)); w.addDocument(doc); w.close(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); w = new IndexWriter(dir, iwc); doc = new Document(); doc.add(new SortedDocValuesField("foo", new BytesRef("hello"))); w.addDocument(doc); w.close(); dir.close(); }
public void testAddNumericTwice() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 1)); doc.add(new NumericDocValuesField("dv", 2)); try { iwriter.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException expected) { // expected } iwriter.close(); directory.close(); }
// LUCENE-1013 public void testSetMaxMergeDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setMergeScheduler(new MyMergeScheduler()) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMaxMergeDocs(20); lmp.setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); document.add(newField("tvtest", "a b c", customType)); for (int i = 0; i < 177; i++) iw.addDocument(document); iw.close(); dir.close(); }
/** Make sure an open IndexWriter on an incoming Directory causes a LockObtainFailedException */ public void testLocksBlock() throws Exception { Directory src = newDirectory(); RandomIndexWriter w1 = new RandomIndexWriter(random(), src); w1.addDocument(new Document()); w1.commit(); Directory dest = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setWriteLockTimeout(1); RandomIndexWriter w2 = new RandomIndexWriter(random(), dest, iwc); try { w2.addIndexes(src); fail("did not hit expected exception"); } catch (LockObtainFailedException lofe) { // expected } IOUtils.close(w1, w2, src, dest); }
@BeforeClass public static void beforeClass() throws Exception { testDoc = new Document(); fieldInfos = new FieldInfos.Builder(); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc.getFields()) { FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name()); IndexableFieldType ift = field.fieldType(); fieldInfo.setIndexOptions(ift.indexOptions()); if (ift.omitNorms()) { fieldInfo.setOmitsNorms(); } fieldInfo.setDocValuesType(ift.docValuesType()); } dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()); conf.getMergePolicy().setNoCFSRatio(0.0); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); }
public void testTooLargeSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); byte bytes[] = new byte[100000]; BytesRef b = new BytesRef(bytes); random().nextBytes(bytes); doc.add(new SortedDocValuesField("dv", b)); try { iwriter.addDocument(doc); fail("did not get expected exception"); } catch (IllegalArgumentException expected) { // expected } iwriter.close(); directory.close(); }
/** Optimize the index database */ public void optimize() { synchronized (lock) { if (running) { log.warning("Optimize terminated... Someone else is updating / optimizing it!"); return; } running = true; } IndexWriter wrt = null; try { log.info("Optimizing the index ... "); Analyzer analyzer = new StandardAnalyzer(SearchEngine.LUCENE_VERSION); IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore log.info("done"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { log.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { log.log(Level.SEVERE, "ERROR: optimizing index: {0}", e); } finally { if (wrt != null) { try { wrt.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } synchronized (lock) { running = false; } } }
/** * Index a picture * * @param source * @param picture_id * @param conf * @throws IOException */ public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException { ByteArrayInputStream in = new ByteArrayInputStream(source); BufferedImage image = ImageIO.read(in); // Creating an Lucene IndexWriter log.debug("Is Lucene configured? " + (conf == null)); if (conf == null) { conf = new IndexWriterConfig( LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } luceneIndexer( image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf); luceneIndexer( image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf); luceneIndexer( image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf); luceneIndexer( image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf); luceneIndexer( image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf); luceneIndexer( image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf); }
// LUCENE-2790: tests that the non CFS files were deleted by addIndexes public void testNonCFSLeftovers() throws Exception { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.length; i++) { dirs[i] = new RAMDirectory(); IndexWriter w = new IndexWriter( dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); d.add(new Field("c", "v", customType)); w.addDocument(d); w.close(); } IndexReader[] readers = new IndexReader[] {DirectoryReader.open(dirs[0]), DirectoryReader.open(dirs[1])}; Directory dir = new MockDirectoryWrapper(random(), new RAMDirectory()); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy(true)); MergePolicy lmp = conf.getMergePolicy(); // Force creation of CFS: lmp.setNoCFSRatio(1.0); lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); IndexWriter w3 = new IndexWriter(dir, conf); w3.addIndexes(readers); w3.close(); // we should now see segments_X, // segments.gen,_Y.cfs,_Y.cfe, _Z.si assertEquals( "Only one compound segment should exist, but got: " + Arrays.toString(dir.listAll()), 5, dir.listAll().length); dir.close(); }
public void testStallControl() throws InterruptedException, IOException { int[] numThreads = new int[] {4 + random().nextInt(8), 1}; final int numDocumentsToIndex = 50 + random().nextInt(50); for (int i = 0; i < numThreads.length; i++) { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); MockDirectoryWrapper dir = newMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.setThrottling(MockDirectoryWrapper.Throttling.SOMETIMES); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.setFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.setIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.setRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); if (numThreads[i] == 1) { assertFalse( "single thread must not block numThreads: " + numThreads[i], docsWriter.flushControl.stallControl.hasBlocked()); } if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) { assertTrue(docsWriter.flushControl.stallControl.wasStalled()); } assertActiveBytesAfter(flushControl); writer.close(true); dir.close(); } }
/** create a RandomIndexWriter with the provided config */ public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException { // TODO: this should be solved in a different way; Random should not be shared (!). this.r = new Random(r.nextLong()); w = mockIndexWriter(dir, c, r); flushAt = _TestUtil.nextInt(r, 10, 1000); codec = w.getConfig().getCodec(); if (LuceneTestCase.VERBOSE) { System.out.println("RIW dir=" + dir + " config=" + w.getConfig()); System.out.println("codec default=" + codec.getName()); } // Make sure we sometimes test indices that don't get // any forced merges: doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean(); }
void buildGeoIndexWriter() throws CorruptIndexException, LockObtainFailedException, IOException { geoComparator = new CartesianGeoRecordComparator(); geoRecordSerializer = new CartesianGeoRecordSerializer(); directory = new RAMDirectory(); config = new IndexWriterConfig(Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT)); config.setMergePolicy(new MergeOnOptimizeOnly()); geoConfig = getGeoSearchConfig(); geoConfig.addFieldBitMask(LOCATION_FIELD, LOCATION_BIT_MASK); geoConfig.addFieldBitMask(IMAGE_LOCATION_FIELD, IMAGE_LOCATION_BIT_MASK); writer = new GeoIndexWriter(directory, config, geoConfig); }