public void testNumerics() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { field.setLongValue(random().nextLong()); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers"); NumericDocValues single = merged.getNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); // Add series of docs with filterable fields : url, text and dates flags addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101"); addDoc(writer, "http://lucene.apache.org", "New release pending", "20040102"); addDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101"); addDoc(writer, "http://www.bar.com", "Local man bites dog", "20040101"); addDoc(writer, "http://www.bar.com", "Dog bites local man", "20040102"); addDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101"); addDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101"); addDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102"); // Until we fix LUCENE-2348, the index must // have only 1 segment: writer.forceMerge(1); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); }
/** make sure we downgrade positions and payloads correctly */ public void testMixing() throws Exception { // no positions FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); for (int i = 0; i < 20; i++) { Document doc = new Document(); if (i < 19 && random().nextBoolean()) { for (int j = 0; j < 50; j++) { doc.add(new TextField("foo", "i have positions", Field.Store.NO)); } } else { for (int j = 0; j < 50; j++) { doc.add(new Field("foo", "i have no positions", ft)); } } iw.addDocument(doc); iw.commit(); } if (random().nextBoolean()) { iw.forceMerge(1); } DirectoryReader ir = iw.getReader(); FieldInfos fis = MultiFields.getMergedFieldInfos(ir); assertEquals(IndexOptions.DOCS_AND_FREQS, fis.fieldInfo("foo").getIndexOptions()); assertFalse(fis.fieldInfo("foo").hasPayloads()); iw.close(); ir.close(); dir.close(); // checkindex }
public void testSeekCeilNotFound() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); // Get empty string in there! doc.add(newStringField("field", "", Field.Store.NO)); w.addDocument(doc); for (int i = 0; i < 36; i++) { doc = new Document(); String term = "" + (char) (97 + i); String term2 = "a" + (char) (97 + i); doc.add(newTextField("field", term + " " + term2, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22}))); assertEquals("a", te.term().utf8ToString()); assertEquals(1L, te.ord()); r.close(); w.close(); dir.close(); }
public void testDifferentTypedDocValuesField2() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f = new NumericDocValuesField("field", 17); doc.add(f); doc.add(new SortedDocValuesField("field", new BytesRef("hello"))); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0)); r.close(); w.close(); d.close(); }
public void testMultiValuedDocValuesField() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); doc.add(f); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0)); r.close(); d.close(); }
public void testThreeBlocks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "m" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "mo" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); if (VERBOSE) { while (te.next() != null) { System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString()); } } assertTrue(te.seekExact(new BytesRef("mo"))); assertEquals(27, te.ord()); te.seekExact(90); assertEquals(new BytesRef("s"), te.term()); testEnum(te, terms); r.close(); w.close(); dir.close(); }
public void testSortedNumeric() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add( new SortedNumericDocValuesField( "nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums"); SortedNumericDocValues single = merged.getSortedNumericDocValues("nums"); if (multi == null) { assertNull(single); } else { // check values for (int i = 0; i < numDocs; i++) { single.setDocument(i); ArrayList<Long> expectedList = new ArrayList<>(); for (int j = 0; j < single.count(); j++) { expectedList.add(single.valueAt(j)); } multi.setDocument(i); assertEquals(expectedList.size(), multi.count()); for (int j = 0; j < single.count(); j++) { assertEquals(expectedList.get(j).longValue(), multi.valueAt(j)); } } } ir.close(); ir2.close(); dir.close(); }
/** * Indexes at least 1 document with f1, and at least 1 document with f2. returns the norms for * "field". */ NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()); RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); // add f1 Document d = new Document(); d.add(f1); riw.addDocument(d); // add f2 d = new Document(); d.add(f2); riw.addDocument(d); // add a mix of f1's and f2's int numExtraDocs = TestUtil.nextInt(random(), 1, 1000); for (int i = 0; i < numExtraDocs; i++) { d = new Document(); d.add(random().nextBoolean() ? f1 : f2); riw.addDocument(d); } IndexReader ir1 = riw.getReader(); // todo: generalize NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field); // fully merge and validate MultiNorms against single segment. riw.forceMerge(1); DirectoryReader ir2 = riw.getReader(); NumericDocValues norms2 = getOnlySegmentReader(ir2).getNormValues(field); if (norms1 == null) { assertNull(norms2); } else { for (int docID = 0; docID < ir1.maxDoc(); docID++) { assertEquals(norms1.get(docID), norms2.get(docID)); } } ir1.close(); ir2.close(); riw.close(); dir.close(); return norms1; }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (random().nextInt(4) >= 0) { doc.add(new NumericDocValuesField("numbers", random().nextLong())); } doc.add(new NumericDocValuesField("numbersAlways", random().nextLong())); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); Bits multi = MultiDocValues.getDocsWithField(ir, "numbers"); Bits single = merged.getDocsWithField("numbers"); if (multi == null) { assertNull(single); } else { assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } } multi = MultiDocValues.getDocsWithField(ir, "numbersAlways"); single = merged.getDocsWithField("numbersAlways"); assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
public void testSimple() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); int numDocs = atLeast(100); for (int i = 0; i < numDocs; i++) { iw.addDocument(doc()); } IndexReader ir = iw.getReader(); verifyCount(ir); ir.close(); iw.forceMerge(1); ir = iw.getReader(); verifyCount(ir); ir.close(); iw.close(); dir.close(); }
public void testSorted() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); if (random().nextInt(7) == 0) { iw.addDocument(new Document()); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); for (int i = 0; i < numDocs; i++) { // check ord assertEquals(single.getOrd(i), multi.getOrd(i)); // check value final BytesRef expected = BytesRef.deepCopyOf(single.get(i)); final BytesRef actual = multi.get(i); assertEquals(expected, actual); } ir.close(); ir2.close(); dir.close(); }
public void testAddIndexes() throws IOException { Directory d1 = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d1); Document doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); doc.add(new NumericDocValuesField("dv", 1)); w.addDocument(doc); IndexReader r1 = w.getReader(); w.close(); Directory d2 = newDirectory(); w = new RandomIndexWriter(random(), d2); doc = new Document(); doc.add(newStringField("id", "2", Field.Store.YES)); doc.add(new NumericDocValuesField("dv", 2)); w.addDocument(doc); IndexReader r2 = w.getReader(); w.close(); Directory d3 = newDirectory(); w = new RandomIndexWriter(random(), d3); w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2)); r1.close(); d1.close(); r2.close(); d2.close(); w.forceMerge(1); DirectoryReader r3 = w.getReader(); w.close(); AtomicReader sr = getOnlySegmentReader(r3); assertEquals(2, sr.numDocs()); NumericDocValues docValues = sr.getNumericDocValues("dv"); assertNotNull(docValues); r3.close(); d3.close(); }
public void testTwoBlocks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "m" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } if (VERBOSE) { System.out.println("TEST: now forceMerge"); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); assertTrue(te.seekExact(new BytesRef("mo"))); assertEquals(27, te.ord()); te.seekExact(54); assertEquals(new BytesRef("s"), te.term()); Collections.sort(terms); for (int i = terms.size() - 1; i >= 0; i--) { te.seekExact(i); assertEquals(i, te.ord()); assertEquals(terms.get(i), te.term().utf8ToString()); } int iters = atLeast(1000); for (int iter = 0; iter < iters; iter++) { int ord = random().nextInt(terms.size()); BytesRef term = new BytesRef(terms.get(ord)); if (random().nextBoolean()) { if (VERBOSE) { System.out.println("TEST: iter=" + iter + " seek to ord=" + ord + " of " + terms.size()); } te.seekExact(ord); } else { if (VERBOSE) { System.out.println( "TEST: iter=" + iter + " seek to term=" + terms.get(ord) + " ord=" + ord + " of " + terms.size()); } te.seekExact(term); } assertEquals(ord, te.ord()); assertEquals(term, te.term()); } r.close(); w.close(); dir.close(); }
public void testSortedTermsEnum() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("field", "hello", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("field", "world", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("field", "beer", Field.Store.NO)); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); AtomicReader ar = getOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field"); assertEquals(3, dv.getValueCount()); TermsEnum termsEnum = dv.termsEnum(); // next() assertEquals("beer", termsEnum.next().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); // seekCeil() assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); // seekExact() assertTrue(termsEnum.seekExact(new BytesRef("beer"), true)); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("hello"), true)); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("world"), true)); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true)); // seek(ord) termsEnum.seekExact(0); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); termsEnum.seekExact(1); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); termsEnum.seekExact(2); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); ireader.close(); directory.close(); }
public void testRandomStoredFields() throws IOException { Directory dir = newDirectory(); Random rand = random(); RandomIndexWriter w = new RandomIndexWriter( rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); // w.w.setNoCFSRatio(0.0); final int docCount = atLeast(200); final int fieldCount = _TestUtil.nextInt(rand, 1, 5); final List<Integer> fieldIDs = new ArrayList<Integer>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setTokenized(false); Field idField = newField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.add(i); } final Map<String, Document> docs = new HashMap<String, Document>(); if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.setStored(true); for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.add(idField); final String id = "" + i; idField.setStringValue(id); docs.put(id, doc); if (VERBOSE) { System.out.println("TEST: add doc id=" + id); } for (int field : fieldIDs) { final String s; if (rand.nextInt(4) != 3) { s = _TestUtil.randomUnicodeString(rand, 1000); doc.add(newField("f" + field, s, customType2)); } else { s = null; } } w.addDocument(doc); if (rand.nextInt(50) == 17) { // mixup binding of field name -> Number every so often Collections.shuffle(fieldIDs); } if (rand.nextInt(5) == 3 && i > 0) { final String delID = "" + rand.nextInt(i); if (VERBOSE) { System.out.println("TEST: delete doc id=" + delID); } w.deleteDocuments(new Term("id", delID)); docs.remove(delID); } } if (VERBOSE) { System.out.println("TEST: " + docs.size() + " docs in index; now load fields"); } if (docs.size() > 0) { String[] idsList = docs.keySet().toArray(new String[docs.size()]); for (int x = 0; x < 2; x++) { IndexReader r = w.getReader(); IndexSearcher s = newSearcher(r); if (VERBOSE) { System.out.println("TEST: cycle x=" + x + " r=" + r); } int num = atLeast(1000); for (int iter = 0; iter < num; iter++) { String testID = idsList[rand.nextInt(idsList.length)]; if (VERBOSE) { System.out.println("TEST: test id=" + testID); } TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1); assertEquals(1, hits.totalHits); Document doc = r.document(hits.scoreDocs[0].doc); Document docExp = docs.get(testID); for (int i = 0; i < fieldCount; i++) { assertEquals( "doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f" + i), doc.get("f" + i)); } } r.close(); w.forceMerge(1); } } w.close(); dir.close(); }
@Nightly public void testBigDocuments() throws IOException { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper( random(), new MMapDirectory(_TestUtil.getTempDir("testBigDocuments"))); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER); } final Document emptyDoc = new Document(); // emptyDoc final Document bigDoc1 = new Document(); // lot of small fields final Document bigDoc2 = new Document(); // 1 very big field final Field idField = new StringField("id", "", Store.NO); emptyDoc.add(idField); bigDoc1.add(idField); bigDoc2.add(idField); final FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.setIndexed(false); final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored); final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.add(smallField); } final Field bigField = new Field( "fld", randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored); bigDoc2.add(bigField); final int numDocs = atLeast(5); final Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.setStringValue("" + i); iw.addDocument(docs[i]); if (random().nextInt(numDocs) == 0) { iw.commit(); } } iw.commit(); iw.forceMerge(1); // look at what happens when big docs are merged final DirectoryReader rd = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { final Query query = new TermQuery(new Term("id", "" + i)); final TopDocs topDocs = searcher.search(query, 1); assertEquals("" + i, 1, topDocs.totalHits); final Document doc = rd.document(topDocs.scoreDocs[0].doc); assertNotNull(doc); final IndexableField[] fieldValues = doc.getFields("fld"); assertEquals(docs[i].getFields("fld").length, fieldValues.length); if (fieldValues.length > 0) { assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue()); } } rd.close(); iw.close(); dir.close(); }
public void testWriteReadMerge() throws IOException { // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone()); final int docCount = atLeast(200); final byte[][][] data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { final int fieldCount = rarely() ? RandomInts.randomIntBetween(random(), 1, 500) : RandomInts.randomIntBetween(random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { final int length = rarely() ? random().nextInt(1000) : random().nextInt(10); final int max = rarely() ? 256 : 2; data[i][j] = randomByteArray(length, max); } } final FieldType type = new FieldType(StringField.TYPE_STORED); type.setIndexed(false); type.freeze(); IntField id = new IntField("id", 0, Store.YES); for (int i = 0; i < data.length; ++i) { Document doc = new Document(); doc.add(id); id.setIntValue(i); for (int j = 0; j < data[i].length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.add(f); } iw.w.addDocument(doc); if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // test merging against a non-compressing codec if (iwConf.getCodec() == otherCodec) { iwConf.setCodec(Codec.getDefault()); } else { iwConf.setCodec(otherCodec); } iw = new RandomIndexWriter(random(), dir, iwConf.clone()); } } for (int i = 0; i < 10; ++i) { final int min = random().nextInt(data.length); final int max = min + random().nextInt(20); iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false)); } iw.forceMerge(2); // force merges with deletions iw.commit(); final DirectoryReader ir = DirectoryReader.open(dir); assertTrue(ir.numDocs() > 0); int numDocs = 0; for (int i = 0; i < ir.maxDoc(); ++i) { final Document doc = ir.document(i); if (doc == null) { continue; } ++numDocs; final int docId = doc.getField("id").numericValue().intValue(); assertEquals(data[docId].length + 1, doc.getFields().size()); for (int j = 0; j < data[docId].length; ++j) { final byte[] arr = data[docId][j]; final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length); assertArrayEquals(arr, arr2); } } assertTrue(ir.numDocs() <= numDocs); ir.close(); iw.deleteAll(); iw.commit(); iw.forceMerge(1); iw.close(); dir.close(); }
// tries to make more dups than testSortedSet public void testSortedSetWithDups() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add( new SortedSetDocValuesField( "bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); if (multi == null) { assertNull(single); } else { assertEquals(single.getValueCount(), multi.getValueCount()); // check values for (long i = 0; i < single.getValueCount(); i++) { final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i)); final BytesRef actual = multi.lookupOrd(i); assertEquals(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.setDocument(i); ArrayList<Long> expectedList = new ArrayList<>(); long ord; while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.add(ord); } multi.setDocument(i); int upto = 0; while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { assertEquals(expectedList.get(upto).longValue(), ord); upto++; } assertEquals(expectedList.size(), upto); } } ir.close(); ir2.close(); dir.close(); }