public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); NumericDocValues dv = slow.getNumericDocValues("dv"); for (int i = 0; i < 50; i++) { assertEquals(i, dv.get(i)); StoredDocument d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); }
public void testIgnoreStoredFields() throws IOException { Directory dir1 = getDir1(random()); Directory dir2 = getDir2(random()); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); LeafReader ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2)); // with overlapping ParallelLeafReader pr = new ParallelLeafReader(false, new LeafReader[] {ir1, ir2}, new LeafReader[] {ir1}); assertEquals("v1", pr.document(0).get("f1")); assertEquals("v1", pr.document(0).get("f2")); assertNull(pr.document(0).get("f3")); assertNull(pr.document(0).get("f4")); // check that fields are there assertNotNull(pr.terms("f1")); assertNotNull(pr.terms("f2")); assertNotNull(pr.terms("f3")); assertNotNull(pr.terms("f4")); pr.close(); // no stored fields at all pr = new ParallelLeafReader(false, new LeafReader[] {ir2}, new LeafReader[0]); assertNull(pr.document(0).get("f1")); assertNull(pr.document(0).get("f2")); assertNull(pr.document(0).get("f3")); assertNull(pr.document(0).get("f4")); // check that fields are there assertNull(pr.terms("f1")); assertNull(pr.terms("f2")); assertNotNull(pr.terms("f3")); assertNotNull(pr.terms("f4")); pr.close(); // without overlapping pr = new ParallelLeafReader(true, new LeafReader[] {ir2}, new LeafReader[] {ir1}); assertEquals("v1", pr.document(0).get("f1")); assertEquals("v1", pr.document(0).get("f2")); assertNull(pr.document(0).get("f3")); assertNull(pr.document(0).get("f4")); // check that fields are there assertNull(pr.terms("f1")); assertNull(pr.terms("f2")); assertNotNull(pr.terms("f3")); assertNotNull(pr.terms("f4")); pr.close(); // no main readers try { new ParallelLeafReader(true, new LeafReader[0], new LeafReader[] {ir1}); fail("didn't get expected exception: need a non-empty main-reader array"); } catch (IllegalArgumentException iae) { // pass } dir1.close(); dir2.close(); }
// Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader: private IndexSearcher parallel(Random random) throws IOException { dir1 = getDir1(random); dir2 = getDir2(random); ParallelLeafReader pr = new ParallelLeafReader( SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)), SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2))); TestUtil.checkReader(pr); return newSearcher(pr); }
public void testFieldNames() throws Exception { Directory dir1 = getDir1(random()); Directory dir2 = getDir2(random()); ParallelLeafReader pr = new ParallelLeafReader( SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)), SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2))); FieldInfos fieldInfos = pr.getFieldInfos(); assertEquals(4, fieldInfos.size()); assertNotNull(fieldInfos.fieldInfo("f1")); assertNotNull(fieldInfos.fieldInfo("f2")); assertNotNull(fieldInfos.fieldInfo("f3")); assertNotNull(fieldInfos.fieldInfo("f4")); pr.close(); dir1.close(); dir2.close(); }
protected LeafReaderContext refreshReader() throws Exception { if (readerContext != null) { readerContext.reader().close(); } LeafReader reader = SlowCompositeReaderWrapper.wrap(topLevelReader = DirectoryReader.open(writer, true)); readerContext = reader.getContext(); return readerContext; }
public void testRefCounts1() throws IOException { Directory dir1 = getDir1(random()); Directory dir2 = getDir2(random()); LeafReader ir1, ir2; // close subreaders, ParallelReader will not change refCounts, but close on its own close ParallelLeafReader pr = new ParallelLeafReader( ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)), ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2))); // check RefCounts assertEquals(1, ir1.getRefCount()); assertEquals(1, ir2.getRefCount()); pr.close(); assertEquals(0, ir1.getRefCount()); assertEquals(0, ir2.getRefCount()); dir1.close(); dir2.close(); }
public void testRefCounts2() throws IOException { Directory dir1 = getDir1(random()); Directory dir2 = getDir2(random()); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); LeafReader ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2)); // don't close subreaders, so ParallelReader will increment refcounts ParallelLeafReader pr = new ParallelLeafReader(false, ir1, ir2); // check RefCounts assertEquals(2, ir1.getRefCount()); assertEquals(2, ir2.getRefCount()); pr.close(); assertEquals(1, ir1.getRefCount()); assertEquals(1, ir2.getRefCount()); ir1.close(); ir2.close(); assertEquals(0, ir1.getRefCount()); assertEquals(0, ir2.getRefCount()); dir1.close(); dir2.close(); }
public void testIncompatibleIndexes() throws IOException { // two documents: Directory dir1 = getDir1(random()); // one document only: Directory dir2 = newDirectory(); IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random()))); Document d3 = new Document(); d3.add(newTextField("f3", "v1", Field.Store.YES)); w2.addDocument(d3); w2.close(); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); LeafReader ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2)); try { new ParallelLeafReader(ir1, ir2); fail("didn't get exptected exception: indexes don't have same number of documents"); } catch (IllegalArgumentException e) { // expected exception } try { new ParallelLeafReader( random().nextBoolean(), new LeafReader[] {ir1, ir2}, new LeafReader[] {ir1, ir2}); fail("didn't get expected exception: indexes don't have same number of documents"); } catch (IllegalArgumentException e) { // expected exception } // check RefCounts assertEquals(1, ir1.getRefCount()); assertEquals(1, ir2.getRefCount()); ir1.close(); ir2.close(); dir1.close(); dir2.close(); }
public void testAddIndexes() throws IOException { Directory d1 = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d1); Document doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); doc.add(new NumericDocValuesField("dv", 1)); w.addDocument(doc); IndexReader r1 = w.getReader(); w.close(); Directory d2 = newDirectory(); w = new RandomIndexWriter(random(), d2); doc = new Document(); doc.add(newStringField("id", "2", Field.Store.YES)); doc.add(new NumericDocValuesField("dv", 2)); w.addDocument(doc); IndexReader r2 = w.getReader(); w.close(); Directory d3 = newDirectory(); w = new RandomIndexWriter(random(), d3); w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2)); r1.close(); d1.close(); r2.close(); d2.close(); w.forceMerge(1); DirectoryReader r3 = w.getReader(); w.close(); AtomicReader sr = getOnlySegmentReader(r3); assertEquals(2, sr.numDocs()); NumericDocValues docValues = sr.getNumericDocValues("dv"); assertNotNull(docValues); r3.close(); d3.close(); }
public void testSimple() throws Exception { Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newTextField("field", "", Field.Store.NO); doc.add(field); field.setStringValue("a b c"); w.addDocument(doc); field.setStringValue("d e f"); w.addDocument(doc); field.setStringValue("a f"); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); SortedSetDocValues iter = dto.iterator(ar); iter.setDocument(0); assertEquals(0, iter.nextOrd()); assertEquals(1, iter.nextOrd()); assertEquals(2, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(1); assertEquals(3, iter.nextOrd()); assertEquals(4, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(2); assertEquals(0, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); r.close(); dir.close(); }
public void testCloseInnerReader() throws Exception { Directory dir1 = getDir1(random()); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); // with overlapping ParallelLeafReader pr = new ParallelLeafReader(true, new LeafReader[] {ir1}, new LeafReader[] {ir1}); ir1.close(); try { pr.document(0); fail("ParallelLeafReader should be already closed because inner reader was closed!"); } catch (AlreadyClosedException e) { // pass } // noop: pr.close(); dir1.close(); }
@Before public void setup() throws Exception { super.setUp(); // setup field mappers strMapper = new StringFieldMapper.Builder("str_value") .build(new Mapper.BuilderContext(null, new ContentPath(1))); lngMapper = new LongFieldMapper.Builder("lng_value") .build(new Mapper.BuilderContext(null, new ContentPath(1))); dblMapper = new DoubleFieldMapper.Builder("dbl_value") .build(new Mapper.BuilderContext(null, new ContentPath(1))); // create index and fielddata service ifdService = new IndexFieldDataService(new Index("test"), new DummyCircuitBreakerService()); MapperService mapperService = MapperTestUtils.newMapperService( ifdService.index(), ImmutableSettings.Builder.EMPTY_SETTINGS); ifdService.setIndexService(new StubIndexService(mapperService)); writer = new IndexWriter( new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION))); int numDocs = 10; for (int i = 0; i < numDocs; i++) { Document d = new Document(); d.add(new StringField(strMapper.names().indexName(), "str" + i, Field.Store.NO)); d.add(new LongField(lngMapper.names().indexName(), i, Field.Store.NO)); d.add(new DoubleField(dblMapper.names().indexName(), Double.valueOf(i), Field.Store.NO)); writer.addDocument(d); } reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true)); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); final Random r = random(); final IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r)) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB( scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc); int numUniqueChildValues = scaledRandomIntBetween(100, 2000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = scaledRandomIntBetween(0, 100); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>()); } if (!markParentAsDeleted && !filterMe) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Simulate a parent update if (random().nextBoolean()) { final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size(); int numberOfUpdates = RandomInts.randomIntBetween( random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5)); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; // leave min/max set to 0 half the time int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110); int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110); QueryBuilder queryBuilder = hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue))) .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH)) .minChildren(minChildren) .maxChildren(maxChildren) .setShortCircuitCutoff(shortCircuitParentDocSet); // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not // get live docs as acceptedDocs queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me"))); Query query = parseQuery(queryBuilder); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits); if (childValueToParentIds.containsKey(childValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()]; Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { int count = entry.getValue().elementsCount; if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); scores[docsEnum.docID()] = new FloatArrayList(entry.getValue()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } MockScorer mockScorer = new MockScorer(scoreType); final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext()); leafCollector.setScorer(mockScorer); for (int doc = expectedResult.nextSetBit(0); doc < slowLeafReader.maxDoc(); doc = doc + 1 >= expectedResult.length() ? DocIdSetIterator.NO_MORE_DOCS : expectedResult.nextSetBit(doc + 1)) { mockScorer.scores = scores[doc]; leafCollector.collect(doc); } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexWriter.close(); indexReader.close(); directory.close(); }
public TestReader(IndexReader reader) throws IOException { super(SlowCompositeReaderWrapper.wrap(reader)); }
public SparseInstances readIndex(String indexPath, String destFile, int threshold) throws Exception { if (indexPath == null || destFile == null) { System.out.println("error: indexPath or destFile is null\n"); return null; } DirectoryReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(reviewKey); int capacity = (int) terms.size(); HashMap<String, Integer> wordDict = new HashMap<>(capacity); capacity = capacity > 65535 ? 65535 : capacity; SparseInstances instData = new SparseInstances(capacity, reader.numDocs()); TermsEnum termsEnum = terms.iterator(); int index = 0; BytesRef term = null; String strTerm = null; while ((term = termsEnum.next()) != null) { strTerm = term.toString(); if (termsEnum.totalTermFreq() < threshold) { continue; } if (strTerm.isEmpty()) { continue; } if (wordDict.get(strTerm) != null) { continue; } instData.addAttribute(strTerm); index++; } int numAtt = instData.numAttributes(); int numInst = instData.numInstances(); Integer attIndex = null; String id = null; int termIndex = 0; for (int docIndex = 0; docIndex < numInst; docIndex++) { id = reader.document(docIndex).getField(idKey).stringValue(); Terms docTerms = reader.getTermVector(docIndex, reviewKey); if (docTerms == null) { continue; } int[] indices = new int[(int) docTerms.size()]; double[] attValues = new double[(int) docTerms.size()]; termsEnum = docTerms.iterator(); termIndex = 0; while ((term = termsEnum.next()) != null) { strTerm = term.toString(); attIndex = wordDict.get(strTerm); if (attIndex == null) { continue; } indices[termIndex] = attIndex.intValue(); attValues[termIndex] = termsEnum.totalTermFreq(); } ESparseInstance instance = new ESparseInstance(id, 1.0, attValues, indices, numAtt); instData.addInstance(instance); } return null; }
public void testRandom() throws Exception { Directory dir = newDirectory(); final int NUM_TERMS = atLeast(20); final Set<BytesRef> terms = new HashSet<BytesRef>(); while (terms.size() < NUM_TERMS) { final String s = _TestUtil.randomRealisticUnicodeString(random()); // final String s = _TestUtil.randomSimpleString(random); if (s.length() > 0) { terms.add(new BytesRef(s)); } } final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); Arrays.sort(termsArray); final int NUM_DOCS = atLeast(100); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { // Make sure terms index has ords: Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); conf.setCodec(codec); } final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); final int[][] idToOrds = new int[NUM_DOCS][]; final Set<Integer> ordsForDocSet = new HashSet<Integer>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.add(new IntField("id", id, Field.Store.YES)); final int termCount = _TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.size() < termCount) { ordsForDocSet.add(random().nextInt(termsArray.length)); } final int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { System.out.println("TEST: doc id=" + id); } for (int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } doc.add(field); } ordsForDocSet.clear(); Arrays.sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.addDocument(doc); } final DirectoryReader r = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + r); } for (AtomicReaderContext ctx : r.leaves()) { if (VERBOSE) { System.out.println("\nTEST: sub=" + ctx.reader()); } verify(ctx.reader(), idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { System.out.println("TEST: top reader"); } AtomicReader slowR = SlowCompositeReaderWrapper.wrap(r); verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.purge(slowR); r.close(); dir.close(); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); final Random r = random(); final IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r)) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB( scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc); int numUniqueChildValues = scaledRandomIntBetween(100, 2000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues); ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds = new ObjectObjectOpenHashMap<>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); final int numChildDocs = scaledRandomIntBetween(0, 100); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableSet<String> parentIds; if (childValueToParentIds.containsKey(childValue)) { parentIds = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIds = new TreeSet<>()); } if (!markParentAsDeleted && !filterMe) { parentIds.add(parent); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Simulate a parent update if (random().nextBoolean()) { final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size(); int numberOfUpdates = scaledRandomIntBetween(0, numberOfUpdatableParents); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; int shortCircuitParentDocSet = random().nextInt(numParentDocs); QueryBuilder queryBuilder; if (random().nextBoolean()) { queryBuilder = hasChildQuery("child", termQuery("field1", childValue)) .setShortCircuitCutoff(shortCircuitParentDocSet); } else { queryBuilder = constantScoreQuery( hasChildFilter("child", termQuery("field1", childValue)) .setShortCircuitCutoff(shortCircuitParentDocSet)); } // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not // get live docs as acceptedDocs queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me"))); Query query = parseQuery(queryBuilder); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); searcher.search(query, collector); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); if (childValueToParentIds.containsKey(childValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableSet<String> parentIds = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); PostingsEnum docsEnum = null; for (String id : parentIds) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", id)); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.postings(slowLeafReader.getLiveDocs(), docsEnum, PostingsEnum.NONE); expectedResult.set(docsEnum.nextDoc()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); } indexWriter.close(); indexReader.close(); directory.close(); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableSet<String>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs; if (rarely()) { numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); } else { numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10); } for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableSet<String> parentIds; if (childValueToParentIds.containsKey(childValue)) { parentIds = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIds = new TreeSet<String>()); } if (!markParentAsDeleted && !filterMe) { parentIds.add(parent); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me"))); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals // with the fact // that deletes are applied at the top level when filters are cached. Filter parentFilter; if (random().nextBoolean()) { parentFilter = SearchContext.current().filterCache().cache(rawParentFilter); } else { parentFilter = rawParentFilter; } // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer // not get live docs as acceptedDocs Filter filterMe; if (random().nextBoolean()) { filterMe = SearchContext.current().filterCache().cache(rawFilterMe); } else { filterMe = rawFilterMe; } // Simulate a parent update if (random().nextBoolean()) { int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.SimpleSearcher( ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; TermQuery childQuery = new TermQuery(new Term("field1", childValue)); int shortCircuitParentDocSet = random().nextInt(numParentDocs); Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null; Query query; if (random().nextBoolean()) { // Usage in HasChildQueryParser query = new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter); } else { // Usage in HasChildFilterParser query = new XConstantScoreQuery( new CustomQueryWrappingFilter( new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter))); } query = new XFilteredQuery(query, filterMe); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); searcher.search(query, collector); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableSet<String> parentIds = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (String id : parentIds) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", id)); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); } indexWriter.close(); indexReader.close(); directory.close(); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put( childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>()); } if (!markParentAsDeleted) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); for (String childValue : childValues) { Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue))); int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; Query query = new ChildrenQuery( "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); MockScorer mockScorer = new MockScorer(scoreType); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); expectedTopDocsCollector.setScorer(mockScorer); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores = entry.getValue(); expectedTopDocsCollector.collect(docsEnum.docID()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexReader.close(); directory.close(); }