public synchronized ShapeFieldCache<T> getCache(LeafReader reader) throws IOException { ShapeFieldCache<T> idx = sidx.get(reader); if (idx != null) { return idx; } long startTime = System.currentTimeMillis(); log.fine("Building Cache [" + reader.maxDoc() + "]"); idx = new ShapeFieldCache<>(reader.maxDoc(), defaultSize); int count = 0; DocsEnum docs = null; Terms terms = reader.terms(shapeField); TermsEnum te = null; if (terms != null) { te = terms.iterator(te); BytesRef term = te.next(); while (term != null) { T shape = readShape(term); if (shape != null) { docs = te.docs(null, docs, DocsEnum.FLAG_NONE); Integer docid = docs.nextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.add(docid, shape); docid = docs.nextDoc(); count++; } } term = te.next(); } } sidx.put(reader, idx); long elapsed = System.currentTimeMillis() - startTime; log.fine("Cached: [" + count + " in " + elapsed + "ms] " + idx); return idx; }
public void testCodec() throws Exception { Directory dir = new AppendingRAMDirectory(new RAMDirectory()); IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer()); cfg.setCodecProvider(new AppendingCodecProvider()); ((LogMergePolicy) cfg.getMergePolicy()).setUseCompoundFile(false); ((LogMergePolicy) cfg.getMergePolicy()).setUseCompoundDocStore(false); IndexWriter writer = new IndexWriter(dir, cfg); Document doc = new Document(); doc.add(new Field("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.commit(); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider()); assertEquals(2, reader.numDocs()); doc = reader.document(0); assertEquals(text, doc.get("f")); Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms("f"); assertNotNull(terms); TermsEnum te = terms.iterator(); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog"))); assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the"))); DocsEnum de = te.docs(null, null); assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS); assertEquals(2, de.freq()); assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS); assertTrue(de.advance(2) == DocsEnum.NO_MORE_DOCS); reader.close(); }
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null); } int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final AtomicReader sub = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; if (termsEnum.seekExact(id, false)) { final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX], 0); if (docs != null) { final int docID = docs.nextDoc(); if (docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += sub.maxDoc(); } } return results; }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableSet<String>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs; if (rarely()) { numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); } else { numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10); } for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableSet<String> parentIds; if (childValueToParentIds.containsKey(childValue)) { parentIds = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIds = new TreeSet<String>()); } if (!markParentAsDeleted && !filterMe) { parentIds.add(parent); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me"))); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals // with the fact // that deletes are applied at the top level when filters are cached. Filter parentFilter; if (random().nextBoolean()) { parentFilter = SearchContext.current().filterCache().cache(rawParentFilter); } else { parentFilter = rawParentFilter; } // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer // not get live docs as acceptedDocs Filter filterMe; if (random().nextBoolean()) { filterMe = SearchContext.current().filterCache().cache(rawFilterMe); } else { filterMe = rawFilterMe; } // Simulate a parent update if (random().nextBoolean()) { int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.SimpleSearcher( ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; TermQuery childQuery = new TermQuery(new Term("field1", childValue)); int shortCircuitParentDocSet = random().nextInt(numParentDocs); Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null; Query query; if (random().nextBoolean()) { // Usage in HasChildQueryParser query = new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter); } else { // Usage in HasChildFilterParser query = new XConstantScoreQuery( new CustomQueryWrappingFilter( new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter))); } query = new XFilteredQuery(query, filterMe); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); searcher.search(query, collector); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableSet<String> parentIds = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (String id : parentIds) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", id)); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); } indexWriter.close(); indexReader.close(); directory.close(); }
@Override public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); if (terms == null) { return DoubleArrayAtomicFieldData.EMPTY; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... final TDoubleArrayList values = new TDoubleArrayList(); ArrayList<int[]> ordinals = new ArrayList<int[]>(); int[] idx = new int[reader.maxDoc()]; ordinals.add(new int[reader.maxDoc()]); values.add(0); // first "t" indicates null value int termOrd = 1; // current term number TermsEnum termsEnum = terms.iterator(null); try { DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term)); docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { int[] ordinal; if (idx[docId] >= ordinals.size()) { ordinal = new int[reader.maxDoc()]; ordinals.add(ordinal); } else { ordinal = ordinals.get(idx[docId]); } ordinal[docId] = termOrd; idx[docId]++; } termOrd++; } } catch (RuntimeException e) { if (e.getClass().getName().endsWith("StopFillCacheException")) { // all is well, in case numeric parsers are used. } else { throw e; } } if (ordinals.size() == 1) { int[] nativeOrdinals = ordinals.get(0); FixedBitSet set = new FixedBitSet(reader.maxDoc()); double[] sValues = new double[reader.maxDoc()]; boolean allHaveValue = true; for (int i = 0; i < nativeOrdinals.length; i++) { int nativeOrdinal = nativeOrdinals[i]; if (nativeOrdinal == 0) { allHaveValue = false; } else { set.set(i); sValues[i] = values.get(nativeOrdinal); } } if (allHaveValue) { return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc()); } else { return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set); } } else { int[][] nativeOrdinals = new int[ordinals.size()][]; for (int i = 0; i < nativeOrdinals.length; i++) { nativeOrdinals[i] = ordinals.get(i); } return new DoubleArrayAtomicFieldData.WithOrdinals( values.toArray(new double[values.size()]), reader.maxDoc(), Ordinals.Factories.createFromFlatOrdinals( nativeOrdinals, termOrd, fieldDataType.getSettings())); } }
@Override public void execute(String[] args, PrintStream out) throws Exception { String field = null; String termVal = null; try { field = args[0]; } catch (Exception e) { field = null; } if (field != null) { String[] parts = field.split(":"); if (parts.length > 1) { field = parts[0]; termVal = parts[1]; } } if (field == null || termVal == null) { out.println("usage: field:term"); out.flush(); return; } IndexReader reader = ctx.getIndexReader(); List<AtomicReaderContext> leaves = reader.leaves(); int docBase = 0; int numPerPage = 20; for (AtomicReaderContext leaf : leaves) { AtomicReader atomicReader = leaf.reader(); Terms terms = atomicReader.terms(field); if (terms == null) { continue; } boolean hasPositions = terms.hasPositions(); if (terms != null && termVal != null) { TermsEnum te = terms.iterator(null); int count = 0; if (te.seekExact(new BytesRef(termVal), true)) { if (hasPositions) { DocsAndPositionsEnum iter = te.docsAndPositions(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.print("docid: " + (docid + docBase) + ", freq: " + iter.freq() + ", "); for (int i = 0; i < iter.freq(); ++i) { out.print("pos " + i + ": " + iter.nextPosition()); BytesRef payload = iter.getPayload(); if (payload != null) { out.print(",payload: " + payload); } out.print(";"); } out.println(); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } else { DocsEnum iter = te.docs(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.println("docid: " + (docid + docBase)); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } } } docBase += atomicReader.maxDoc(); } }
protected void fillFloatValues(FloatValues vals, IndexReader reader, String field) throws IOException { if (parser == null) { try { parser = FieldCache.DEFAULT_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } catch (NumberFormatException ne) { vals.parserHashCode = null; // wipe the previous one parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } } setParserAndResetCounts(vals, parser); Terms terms = MultiFields.getTerms(reader, field); int maxDoc = reader.maxDoc(); vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet(maxDoc) : null; DocsEnum docs = null; try { while (true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final float termval = parser.parseFloat(term); docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (vals.values == null) { vals.values = new float[maxDoc]; } vals.values[docID] = termval; vals.numDocs++; if (validBits != null) { validBits.set(docID); } } vals.numTerms++; } } catch (FieldCache.StopFillCacheException stop) { } if (vals.valid == null) { vals.valid = checkMatchAllBits(validBits, vals.numDocs, maxDoc); } } if (vals.values == null) { vals.values = new float[maxDoc]; } if (vals.valid == null && vals.numDocs < 1) { vals.valid = new Bits.MatchNoBits(maxDoc); } }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); final Random r = random(); final IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r)) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB( scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc); int numUniqueChildValues = scaledRandomIntBetween(100, 2000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = scaledRandomIntBetween(0, 100); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>()); } if (!markParentAsDeleted && !filterMe) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Simulate a parent update if (random().nextBoolean()) { final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size(); int numberOfUpdates = RandomInts.randomIntBetween( random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5)); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; // leave min/max set to 0 half the time int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110); int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110); QueryBuilder queryBuilder = hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue))) .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH)) .minChildren(minChildren) .maxChildren(maxChildren) .setShortCircuitCutoff(shortCircuitParentDocSet); // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not // get live docs as acceptedDocs queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me"))); Query query = parseQuery(queryBuilder); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits); if (childValueToParentIds.containsKey(childValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()]; Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { int count = entry.getValue().elementsCount; if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); scores[docsEnum.docID()] = new FloatArrayList(entry.getValue()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } MockScorer mockScorer = new MockScorer(scoreType); final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext()); leafCollector.setScorer(mockScorer); for (int doc = expectedResult.nextSetBit(0); doc < slowLeafReader.maxDoc(); doc = doc + 1 >= expectedResult.length() ? DocIdSetIterator.NO_MORE_DOCS : expectedResult.nextSetBit(doc + 1)) { mockScorer.scores = scores[doc]; leafCollector.collect(doc); } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexWriter.close(); indexReader.close(); directory.close(); }
private IndexIterationContext createContext( int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean scoreDocsInOrder) throws IOException { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.randomUniqueValues = new String[numRandomValues]; Set<String> trackSet = new HashSet<String>(); context.randomFrom = new boolean[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { String uniqueRandomValue; do { uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random()); // uniqueRandomValue = _TestUtil.randomSimpleString(random); } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.add(uniqueRandomValue); context.randomFrom[i] = random().nextBoolean(); context.randomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { String id = Integer.toString(i); int randomI = random().nextInt(context.randomUniqueValues.length); String value = context.randomUniqueValues[randomI]; Document document = new Document(); document.add(newTextField(random(), "id", id, Field.Store.NO)); document.add(newTextField(random(), "value", value, Field.Store.NO)); boolean from = context.randomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)]; docs[i].linkValues.add(linkValue); if (from) { if (!context.fromDocuments.containsKey(linkValue)) { context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueFromDocs.containsKey(value)) { context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>()); } context.fromDocuments.get(linkValue).add(docs[i]); context.randomValueFromDocs.get(value).add(docs[i]); document.add(newTextField(random(), "from", linkValue, Field.Store.NO)); } else { if (!context.toDocuments.containsKey(linkValue)) { context.toDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueToDocs.containsKey(value)) { context.randomValueToDocs.put(value, new ArrayList<RandomDoc>()); } context.toDocuments.get(linkValue).add(docs[i]); context.randomValueToDocs.get(value).add(docs[i]); document.add(newTextField(random(), "to", linkValue, Field.Store.NO)); } } final RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.addDocument(document); if (random().nextInt(10) == 4) { w.commit(); } if (VERBOSE) { System.out.println("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all // possible score for // any ScoreMode. IndexSearcher fromSearcher = newSearcher(fromWriter.getReader()); IndexSearcher toSearcher = newSearcher(toWriter.getReader()); for (int i = 0; i < context.randomUniqueValues.length; i++) { String uniqueRandomValue = context.randomUniqueValues[i]; final String fromField; final String toField; final Map<String, Map<Integer, JoinScore>> queryVals; if (context.randomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.fromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.toHitsToJoinScore; } final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private SortedSetDocValues docTermOrds; final BytesRef joinValue = new BytesRef(); @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, joinValue); JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } else { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private BinaryDocValues terms; private Bits docsWithField; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) throws IOException { terms.get(doc, spare); BytesRef joinValue = spare; if (joinValue.length == 0 && !docsWithField.get(doc)) { return; } JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true); docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader()); Terms terms = slowCompositeReader.terms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator()); joinValues.addAll(joinValueToJoinScores.keySet()); for (BytesRef joinValue : joinValues) { termsEnum = terms.iterator(termsEnum); if (termsEnum.seekExact(joinValue)) { docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); JoinScore joinScore = joinValueToJoinScores.get(joinValue); for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(doc)) { docToJoinScore.put(doc, joinScore); } } } } } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private SortedSetDocValues docTermOrds; private final BytesRef scratch = new BytesRef(); private int docBase; @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, scratch); JoinScore joinScore = joinValueToJoinScores.get(scratch); if (joinScore == null) { continue; } Integer basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(basedDoc)) { docToJoinScore.put(basedDoc, joinScore); } } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docBase = context.docBase; docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField); } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private BinaryDocValues terms; private int docBase; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) { terms.get(doc, spare); JoinScore joinScore = joinValueToJoinScores.get(spare); if (joinScore == null) { return; } docToJoinScore.put(docBase + doc, joinScore); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false); docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } queryVals.put(uniqueRandomValue, docToJoinScore); } fromSearcher.getIndexReader().close(); toSearcher.getIndexReader().close(); return context; }
/** * checks the terms enum sequentially if deep is false, it does a 'shallow' test that doesnt go * down to the docsenums */ public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws Exception { BytesRef term; Bits randomBits = new RandomBits(MAXDOC, random().nextDouble(), random()); DocsAndPositionsEnum leftPositions = null; DocsAndPositionsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; while ((term = leftTermsEnum.next()) != null) { assertEquals(term, rightTermsEnum.next()); assertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { // with payloads + off assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions)); assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); // with payloads only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions( null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.docsAndPositions( null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions( randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.docsAndPositions( randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions( null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.docsAndPositions( null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions( randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.docsAndPositions( randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); // with offsets only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions( null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.docsAndPositions( null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions( randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.docsAndPositions( randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions( null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.docsAndPositions( null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions( randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.docsAndPositions( randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); // with positions only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); assertPositionsSkipping( leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); // with freqs: assertDocsEnum( leftDocs = leftTermsEnum.docs(null, leftDocs), rightDocs = rightTermsEnum.docs(null, rightDocs)); assertDocsEnum( leftDocs = leftTermsEnum.docs(randomBits, leftDocs), rightDocs = rightTermsEnum.docs(randomBits, rightDocs)); // w/o freqs: assertDocsEnum( leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE)); assertDocsEnum( leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); // with freqs: assertDocsSkipping( leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.docs(null, leftDocs), rightDocs = rightTermsEnum.docs(null, rightDocs)); assertDocsSkipping( leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.docs(randomBits, leftDocs), rightDocs = rightTermsEnum.docs(randomBits, rightDocs)); // w/o freqs: assertDocsSkipping( leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE)); assertDocsSkipping( leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); } } assertNull(rightTermsEnum.next()); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put( childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>()); } if (!markParentAsDeleted) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); for (String childValue : childValues) { Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue))); int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; Query query = new ChildrenQuery( "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); MockScorer mockScorer = new MockScorer(scoreType); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); expectedTopDocsCollector.setScorer(mockScorer); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores = entry.getValue(); expectedTopDocsCollector.collect(docsEnum.docID()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexReader.close(); directory.close(); }