@Test public void testNoTerms() throws Exception { FieldDataTermsFilter hFilterBytes = FieldDataTermsFilter.newBytes(getFieldData(strMapper), new ObjectOpenHashSet<BytesRef>()); FieldDataTermsFilter hFilterLongs = FieldDataTermsFilter.newLongs(getFieldData(lngMapper), new LongOpenHashSet()); FieldDataTermsFilter hFilterDoubles = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), new DoubleOpenHashSet()); assertNull(hFilterBytes.getDocIdSet(reader.getContext(), reader.getLiveDocs())); assertNull(hFilterLongs.getDocIdSet(reader.getContext(), reader.getLiveDocs())); assertNull(hFilterDoubles.getDocIdSet(reader.getContext(), reader.getLiveDocs())); }
@Test public void testBytes() throws Exception { List<Integer> docs = Arrays.asList(1, 5, 7); ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>(); List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size()); for (int i = 0; i < docs.size(); i++) { BytesRef term = new BytesRef("str" + docs.get(i)); hTerms.add(term); cTerms.add(term); } FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms); int size = reader.maxDoc(); FixedBitSet result = new FixedBitSet(size); result.clear(0, size); assertThat(result.cardinality(), equalTo(0)); result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(docs.size())); for (int i = 0; i < reader.maxDoc(); i++) { assertThat(result.get(i), equalTo(docs.contains(i))); } // filter from mapper result.clear(0, size); assertThat(result.cardinality(), equalTo(0)); result.or( strMapper .termsFilter(ifdService, cTerms, null) .getDocIdSet(reader.getContext(), reader.getLiveDocs()) .iterator()); assertThat(result.cardinality(), equalTo(docs.size())); for (int i = 0; i < reader.maxDoc(); i++) { assertThat(result.get(i), equalTo(docs.contains(i))); } result.clear(0, size); assertThat(result.cardinality(), equalTo(0)); // filter on a numeric field using BytesRef terms // should not match any docs hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms); result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(0)); // filter on a numeric field using BytesRef terms // should not match any docs hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms); result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(0)); }
@Test public void testDoubles() throws Exception { List<Integer> docs = Arrays.asList(1, 5, 7); DoubleOpenHashSet hTerms = new DoubleOpenHashSet(); List<Double> cTerms = new ArrayList<Double>(docs.size()); for (int i = 0; i < docs.size(); i++) { double term = Double.valueOf(docs.get(i)); hTerms.add(term); cTerms.add(term); } FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms); int size = reader.maxDoc(); FixedBitSet result = new FixedBitSet(size); result.clear(0, size); assertThat(result.cardinality(), equalTo(0)); result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(docs.size())); for (int i = 0; i < reader.maxDoc(); i++) { assertThat(result.get(i), equalTo(docs.contains(i))); } // filter from mapper result.clear(0, size); assertThat(result.cardinality(), equalTo(0)); result.or( dblMapper .termsFilter(ifdService, cTerms, null) .getDocIdSet(reader.getContext(), reader.getLiveDocs()) .iterator()); assertThat(result.cardinality(), equalTo(docs.size())); for (int i = 0; i < reader.maxDoc(); i++) { assertThat(result.get(i), equalTo(docs.contains(i))); } hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms); assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs())); }
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null); } int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final AtomicReader sub = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; if (termsEnum.seekExact(id, false)) { final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX], 0); if (docs != null) { final int docID = docs.nextDoc(); if (docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += sub.maxDoc(); } } return results; }
private List<Document> lookupDocs(Term term, final LoadFieldCallback lfc) throws IOException { final List<Document> documents = new ArrayList<Document>(); final TermFilter tf = new TermFilter(term); try { for (AtomicReaderContext arc : searcher.getIndexReader().leaves()) { AtomicReader ar = arc.reader(); Bits liveDocs = ar.getLiveDocs(); DocIdSet docSet = tf.getDocIdSet(arc, liveDocs); if (docSet != null) { DocIdSetIterator disi = docSet.iterator(); if (disi != null) { int docId; while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { DocumentStoredFieldVisitor fv = new DocumentStoredFieldVisitor() { @Override public StoredFieldVisitor.Status needsField(FieldInfo fieldInfo) throws IOException { if (lfc == null || lfc.loadField(fieldInfo.name)) { return StoredFieldVisitor.Status.YES; } return StoredFieldVisitor.Status.NO; } }; ar.document(docId, fv); documents.add(fv.getDocument()); } } } } } catch (IOException io) { throw new IndexException(io); } return documents; }
/** * Merges the sortedset docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedSetField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedSetDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { dv.setDocument(i); long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedSetField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { long currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); long segmentOrd = map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord count new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); nextValue = 0; while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { nextValue++; } docIDUpto++; return true; } docIDUpto++; } } }; } }, // ords new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; long nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; int ordLength; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (ordUpto < ordLength) { nextValue = ords[ordUpto]; ordUpto++; nextIsSet = true; return true; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { assert docIDUpto < currentReader.maxDoc(); SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.length) { ords = ArrayUtil.grow(ords, ordLength + 1); } ords[ordLength] = currentMap.get(ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } } }; } }); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableSet<String>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs; if (rarely()) { numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); } else { numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10); } for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableSet<String> parentIds; if (childValueToParentIds.containsKey(childValue)) { parentIds = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIds = new TreeSet<String>()); } if (!markParentAsDeleted && !filterMe) { parentIds.add(parent); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me"))); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals // with the fact // that deletes are applied at the top level when filters are cached. Filter parentFilter; if (random().nextBoolean()) { parentFilter = SearchContext.current().filterCache().cache(rawParentFilter); } else { parentFilter = rawParentFilter; } // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer // not get live docs as acceptedDocs Filter filterMe; if (random().nextBoolean()) { filterMe = SearchContext.current().filterCache().cache(rawFilterMe); } else { filterMe = rawFilterMe; } // Simulate a parent update if (random().nextBoolean()) { int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.SimpleSearcher( ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; TermQuery childQuery = new TermQuery(new Term("field1", childValue)); int shortCircuitParentDocSet = random().nextInt(numParentDocs); Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null; Query query; if (random().nextBoolean()) { // Usage in HasChildQueryParser query = new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter); } else { // Usage in HasChildFilterParser query = new XConstantScoreQuery( new CustomQueryWrappingFilter( new ChildrenConstantScoreQuery( childQuery, "parent", "child", parentFilter, shortCircuitParentDocSet, nonNestedDocsFilter))); } query = new XFilteredQuery(query, filterMe); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); searcher.search(query, collector); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableSet<String> parentIds = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (String id : parentIds) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", id)); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); } indexWriter.close(); indexReader.close(); directory.close(); }
public SpansFiltered(Spans spans, Filter filter, AtomicReader reader) throws IOException { this(spans, filter.getDocIdSet(reader.getContext(), reader.getLiveDocs())); }
@Override public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); if (terms == null) { return DoubleArrayAtomicFieldData.EMPTY; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... final TDoubleArrayList values = new TDoubleArrayList(); ArrayList<int[]> ordinals = new ArrayList<int[]>(); int[] idx = new int[reader.maxDoc()]; ordinals.add(new int[reader.maxDoc()]); values.add(0); // first "t" indicates null value int termOrd = 1; // current term number TermsEnum termsEnum = terms.iterator(null); try { DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term)); docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { int[] ordinal; if (idx[docId] >= ordinals.size()) { ordinal = new int[reader.maxDoc()]; ordinals.add(ordinal); } else { ordinal = ordinals.get(idx[docId]); } ordinal[docId] = termOrd; idx[docId]++; } termOrd++; } } catch (RuntimeException e) { if (e.getClass().getName().endsWith("StopFillCacheException")) { // all is well, in case numeric parsers are used. } else { throw e; } } if (ordinals.size() == 1) { int[] nativeOrdinals = ordinals.get(0); FixedBitSet set = new FixedBitSet(reader.maxDoc()); double[] sValues = new double[reader.maxDoc()]; boolean allHaveValue = true; for (int i = 0; i < nativeOrdinals.length; i++) { int nativeOrdinal = nativeOrdinals[i]; if (nativeOrdinal == 0) { allHaveValue = false; } else { set.set(i); sValues[i] = values.get(nativeOrdinal); } } if (allHaveValue) { return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc()); } else { return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set); } } else { int[][] nativeOrdinals = new int[ordinals.size()][]; for (int i = 0; i < nativeOrdinals.length; i++) { nativeOrdinals[i] = ordinals.get(i); } return new DoubleArrayAtomicFieldData.WithOrdinals( values.toArray(new double[values.size()]), reader.maxDoc(), Ordinals.Factories.createFromFlatOrdinals( nativeOrdinals, termOrd, fieldDataType.getSettings())); } }
@Override public void execute(String[] args, PrintStream out) throws Exception { String field = null; String termVal = null; try { field = args[0]; } catch (Exception e) { field = null; } if (field != null) { String[] parts = field.split(":"); if (parts.length > 1) { field = parts[0]; termVal = parts[1]; } } if (field == null || termVal == null) { out.println("usage: field:term"); out.flush(); return; } IndexReader reader = ctx.getIndexReader(); List<AtomicReaderContext> leaves = reader.leaves(); int docBase = 0; int numPerPage = 20; for (AtomicReaderContext leaf : leaves) { AtomicReader atomicReader = leaf.reader(); Terms terms = atomicReader.terms(field); if (terms == null) { continue; } boolean hasPositions = terms.hasPositions(); if (terms != null && termVal != null) { TermsEnum te = terms.iterator(null); int count = 0; if (te.seekExact(new BytesRef(termVal), true)) { if (hasPositions) { DocsAndPositionsEnum iter = te.docsAndPositions(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.print("docid: " + (docid + docBase) + ", freq: " + iter.freq() + ", "); for (int i = 0; i < iter.freq(); ++i) { out.print("pos " + i + ": " + iter.nextPosition()); BytesRef payload = iter.getPayload(); if (payload != null) { out.print(",payload: " + payload); } out.print(";"); } out.println(); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } else { DocsEnum iter = te.docs(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.println("docid: " + (docid + docBase)); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } } } docBase += atomicReader.maxDoc(); } }
int resolveParentDocuments( TopDocs topDocs, SearchContext context, Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) { int parentHitsResolved = 0; Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>> parentDocsPerReader = cacheRecycler.hashMap(context.searcher().getIndexReader().leaves().size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves()); AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex); int subDoc = scoreDoc.doc - subContext.docBase; // find the parent id HashedBytesArray parentId = context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc); if (parentId == null) { // no parent found continue; } // now go over and find the parent doc Id and reader tuple for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { AtomicReader indexReader = atomicReaderContext.reader(); int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId); Bits liveDocs = indexReader.getLiveDocs(); if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) { // we found a match, add it and break Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs = parentDocsPerReader.v().get(indexReader.getCoreCacheKey()); if (readerParentDocs == null) { readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc()); parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs); } ParentDoc parentDoc = readerParentDocs.v().get(parentDocId); if (parentDoc == null) { parentHitsResolved++; // we have a hit on a parent parentDoc = new ParentDoc(); parentDoc.docId = parentDocId; parentDoc.count = 1; parentDoc.maxScore = scoreDoc.score; parentDoc.sumScores = scoreDoc.score; readerParentDocs.v().put(parentDocId, parentDoc); } else { parentDoc.count++; parentDoc.sumScores += scoreDoc.score; if (scoreDoc.score > parentDoc.maxScore) { parentDoc.maxScore = scoreDoc.score; } } } } } boolean[] states = parentDocsPerReader.v().allocated; Object[] keys = parentDocsPerReader.v().keys; Object[] values = parentDocsPerReader.v().values; for (int i = 0; i < states.length; i++) { if (states[i]) { Recycler.V<IntObjectOpenHashMap<ParentDoc>> value = (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i]; ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class); Arrays.sort(_parentDocs, PARENT_DOC_COMP); parentDocs.v().put(keys[i], _parentDocs); Releasables.release(value); } } Releasables.release(parentDocsPerReader); return parentHitsResolved; }
private IndexIterationContext createContext( int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean scoreDocsInOrder) throws IOException { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.randomUniqueValues = new String[numRandomValues]; Set<String> trackSet = new HashSet<String>(); context.randomFrom = new boolean[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { String uniqueRandomValue; do { uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random()); // uniqueRandomValue = _TestUtil.randomSimpleString(random); } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.add(uniqueRandomValue); context.randomFrom[i] = random().nextBoolean(); context.randomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { String id = Integer.toString(i); int randomI = random().nextInt(context.randomUniqueValues.length); String value = context.randomUniqueValues[randomI]; Document document = new Document(); document.add(newTextField(random(), "id", id, Field.Store.NO)); document.add(newTextField(random(), "value", value, Field.Store.NO)); boolean from = context.randomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)]; docs[i].linkValues.add(linkValue); if (from) { if (!context.fromDocuments.containsKey(linkValue)) { context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueFromDocs.containsKey(value)) { context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>()); } context.fromDocuments.get(linkValue).add(docs[i]); context.randomValueFromDocs.get(value).add(docs[i]); document.add(newTextField(random(), "from", linkValue, Field.Store.NO)); } else { if (!context.toDocuments.containsKey(linkValue)) { context.toDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueToDocs.containsKey(value)) { context.randomValueToDocs.put(value, new ArrayList<RandomDoc>()); } context.toDocuments.get(linkValue).add(docs[i]); context.randomValueToDocs.get(value).add(docs[i]); document.add(newTextField(random(), "to", linkValue, Field.Store.NO)); } } final RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.addDocument(document); if (random().nextInt(10) == 4) { w.commit(); } if (VERBOSE) { System.out.println("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all // possible score for // any ScoreMode. IndexSearcher fromSearcher = newSearcher(fromWriter.getReader()); IndexSearcher toSearcher = newSearcher(toWriter.getReader()); for (int i = 0; i < context.randomUniqueValues.length; i++) { String uniqueRandomValue = context.randomUniqueValues[i]; final String fromField; final String toField; final Map<String, Map<Integer, JoinScore>> queryVals; if (context.randomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.fromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.toHitsToJoinScore; } final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private SortedSetDocValues docTermOrds; final BytesRef joinValue = new BytesRef(); @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, joinValue); JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } else { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private BinaryDocValues terms; private Bits docsWithField; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) throws IOException { terms.get(doc, spare); BytesRef joinValue = spare; if (joinValue.length == 0 && !docsWithField.get(doc)) { return; } JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true); docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader()); Terms terms = slowCompositeReader.terms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator()); joinValues.addAll(joinValueToJoinScores.keySet()); for (BytesRef joinValue : joinValues) { termsEnum = terms.iterator(termsEnum); if (termsEnum.seekExact(joinValue)) { docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); JoinScore joinScore = joinValueToJoinScores.get(joinValue); for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(doc)) { docToJoinScore.put(doc, joinScore); } } } } } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private SortedSetDocValues docTermOrds; private final BytesRef scratch = new BytesRef(); private int docBase; @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, scratch); JoinScore joinScore = joinValueToJoinScores.get(scratch); if (joinScore == null) { continue; } Integer basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(basedDoc)) { docToJoinScore.put(basedDoc, joinScore); } } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docBase = context.docBase; docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField); } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private BinaryDocValues terms; private int docBase; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) { terms.get(doc, spare); JoinScore joinScore = joinValueToJoinScores.get(spare); if (joinScore == null) { return; } docToJoinScore.put(docBase + doc, joinScore); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false); docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } queryVals.put(uniqueRandomValue, docToJoinScore); } fromSearcher.getIndexReader().close(); toSearcher.getIndexReader().close(); return context; }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put( childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>()); } if (!markParentAsDeleted) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); for (String childValue : childValues) { Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue))); int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; Query query = new ChildrenQuery( "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); MockScorer mockScorer = new MockScorer(scoreType); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); expectedTopDocsCollector.setScorer(mockScorer); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores = entry.getValue(); expectedTopDocsCollector.collect(docsEnum.docID()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexReader.close(); directory.close(); }