@Test public void testNestedChildrenFilter() throws Exception { int numParentDocs = scaledRandomIntBetween(0, 32); int maxChildDocsPerParent = scaledRandomIntBetween(8, 16); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numParentDocs; i++) { int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent); List<Document> docs = new ArrayList<>(numChildDocs + 1); for (int j = 0; j < numChildDocs; j++) { Document childDoc = new Document(); childDoc.add(new StringField("type", "child", Field.Store.NO)); docs.add(childDoc); } Document parenDoc = new Document(); parenDoc.add(new StringField("type", "parent", Field.Store.NO)); parenDoc.add(new IntField("num_child_docs", numChildDocs, Field.Store.YES)); docs.add(parenDoc); writer.addDocuments(docs); } IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = new IndexSearcher(reader); FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(); BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter( new QueryWrapperFilter(new TermQuery(new Term("type", "parent")))); Filter childFilter = new QueryWrapperFilter(new TermQuery(new Term("type", "child"))); int checkedParents = 0; for (LeafReaderContext leaf : reader.leaves()) { DocIdSetIterator parents = parentFilter.getDocIdSet(leaf).iterator(); for (int parentDoc = parents.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = parents.nextDoc()) { int expectedChildDocs = leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue(); hitContext.reset(null, leaf, parentDoc, searcher); NestedChildrenFilter nestedChildrenFilter = new NestedChildrenFilter(parentFilter, childFilter, hitContext); TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector); assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs)); checkedParents++; } } assertThat(checkedParents, equalTo(numParentDocs)); reader.close(); dir.close(); }
public void testResetRootDocId() throws Exception { Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc); List<Document> documents = new ArrayList<>(); // 1 segment with, 1 root document, with 3 nested sub docs Document document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); documents.clear(); // 1 segment with: // 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); documents.clear(); // and 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); indexWriter.close(); IndexService indexService = createIndex("test"); DirectoryReader directoryReader = DirectoryReader.open(directory); directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0)); IndexSearcher searcher = new IndexSearcher(directoryReader); indexService .mapperService() .merge( "test", new CompressedXContent( PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested") .string()), MapperService.MergeReason.MAPPING_UPDATE, false); SearchContext searchContext = createSearchContext(indexService); AggregationContext context = new AggregationContext(searchContext); AggregatorFactories.Builder builder = AggregatorFactories.builder(); NestedAggregatorBuilder factory = new NestedAggregatorBuilder("test", "nested_field"); builder.addAggregator(factory); AggregatorFactories factories = builder.build(context, null); searchContext.aggregations(new SearchContextAggregations(factories)); Aggregator[] aggs = factories.createTopLevelAggregators(); BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs)); collector.preCollection(); // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here // (otherwise MatchAllDocsQuery would be sufficient) // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc // when we process a new segment, because // root doc type#3 and root doc type#1 have the same segment docid BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(Queries.newNonNestedFilter(), Occur.MUST); bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT); searcher.search(new ConstantScoreQuery(bq.build()), collector); collector.postCollection(); Nested nested = (Nested) aggs[0].buildAggregation(0); // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous // child docs from the first segment are emitted as hits. assertThat(nested.getDocCount(), equalTo(4L)); directoryReader.close(); directory.close(); }
// Tests some very basic usages... public void testBasic() throws Exception { final String groupField = "author"; FieldType customType = new FieldType(); customType.setStored(true); Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); boolean canUseIDV = true; List<Document> documents = new ArrayList<>(); // 0 Document doc = new Document(); addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new TextField("content", "random text", Field.Store.YES)); doc.add(new Field("id", "1", customType)); documents.add(doc); // 1 doc = new Document(); addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new TextField("content", "some more random text", Field.Store.YES)); doc.add(new Field("id", "2", customType)); documents.add(doc); // 2 doc = new Document(); addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.add(new Field("id", "3", customType)); doc.add(new StringField("groupend", "x", Field.Store.NO)); documents.add(doc); w.addDocuments(documents); documents.clear(); // 3 doc = new Document(); addGroupField(doc, groupField, "author2", canUseIDV); doc.add(new TextField("content", "some random text", Field.Store.YES)); doc.add(new Field("id", "4", customType)); doc.add(new StringField("groupend", "x", Field.Store.NO)); w.addDocument(doc); // 4 doc = new Document(); addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new TextField("content", "some more random text", Field.Store.YES)); doc.add(new Field("id", "5", customType)); documents.add(doc); // 5 doc = new Document(); addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new TextField("content", "random", Field.Store.YES)); doc.add(new Field("id", "6", customType)); doc.add(new StringField("groupend", "x", Field.Store.NO)); documents.add(doc); w.addDocuments(documents); documents.clear(); // 6 -- no author field doc = new Document(); doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.add(new Field("id", "6", customType)); doc.add(new StringField("groupend", "x", Field.Store.NO)); w.addDocument(doc); IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); Sort groupSort = Sort.RELEVANCE; GroupingSearch groupingSearch = createRandomGroupingSearch(groupField, groupSort, 5, canUseIDV); TopGroups<?> groups = groupingSearch.search(indexSearcher, new TermQuery(new Term("content", "random")), 0, 10); assertEquals(7, groups.totalHitCount); assertEquals(7, groups.totalGroupedHitCount); assertEquals(4, groups.groups.length); // relevance order: 5, 0, 3, 4, 1, 2, 6 // the later a document is added the higher this docId // value GroupDocs<?> group = groups.groups[0]; compareGroupValue("author3", group); assertEquals(2, group.scoreDocs.length); assertEquals(5, group.scoreDocs[0].doc); assertEquals(4, group.scoreDocs[1].doc); assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score); group = groups.groups[1]; compareGroupValue("author1", group); assertEquals(3, group.scoreDocs.length); assertEquals(0, group.scoreDocs[0].doc); assertEquals(1, group.scoreDocs[1].doc); assertEquals(2, group.scoreDocs[2].doc); assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score); assertTrue(group.scoreDocs[1].score > group.scoreDocs[2].score); group = groups.groups[2]; compareGroupValue("author2", group); assertEquals(1, group.scoreDocs.length); assertEquals(3, group.scoreDocs[0].doc); group = groups.groups[3]; compareGroupValue(null, group); assertEquals(1, group.scoreDocs.length); assertEquals(6, group.scoreDocs[0].doc); Filter lastDocInBlock = new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))); groupingSearch = new GroupingSearch(lastDocInBlock); groups = groupingSearch.search(indexSearcher, new TermQuery(new Term("content", "random")), 0, 10); assertEquals(7, groups.totalHitCount); assertEquals(7, groups.totalGroupedHitCount); assertEquals(4, groups.totalGroupCount.longValue()); assertEquals(4, groups.groups.length); indexSearcher.getIndexReader().close(); dir.close(); }
public void testRandomDiscreteMultiValueHighlighting() throws Exception { String[] randomValues = new String[3 + random().nextInt(10 * RANDOM_MULTIPLIER)]; for (int i = 0; i < randomValues.length; i++) { String randomValue; do { randomValue = _TestUtil.randomSimpleString(random()); } while ("".equals(randomValue)); randomValues[i] = randomValue; } Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorOffsets(true); customType.setStoreTermVectorPositions(true); int numDocs = randomValues.length * 5; int numFields = 2 + random().nextInt(5); int numTerms = 2 + random().nextInt(3); List<Doc> docs = new ArrayList<Doc>(numDocs); List<Document> documents = new ArrayList<Document>(numDocs); Map<String, Set<Integer>> valueToDocId = new HashMap<String, Set<Integer>>(); for (int i = 0; i < numDocs; i++) { Document document = new Document(); String[][] fields = new String[numFields][numTerms]; for (int j = 0; j < numFields; j++) { String[] fieldValues = new String[numTerms]; fieldValues[0] = getRandomValue(randomValues, valueToDocId, i); StringBuilder builder = new StringBuilder(fieldValues[0]); for (int k = 1; k < numTerms; k++) { fieldValues[k] = getRandomValue(randomValues, valueToDocId, i); builder.append(' ').append(fieldValues[k]); } document.add(new Field(F, builder.toString(), customType)); fields[j] = fieldValues; } docs.add(new Doc(fields)); documents.add(document); } writer.addDocuments(documents); writer.close(); IndexReader reader = DirectoryReader.open(dir); try { int highlightIters = 1 + random().nextInt(120 * RANDOM_MULTIPLIER); for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) { String queryTerm = randomValues[random().nextInt(randomValues.length)]; int randomHit = valueToDocId.get(queryTerm).iterator().next(); List<StringBuilder> builders = new ArrayList<StringBuilder>(); for (String[] fieldValues : docs.get(randomHit).fieldValues) { StringBuilder builder = new StringBuilder(); boolean hit = false; for (int i = 0; i < fieldValues.length; i++) { if (queryTerm.equals(fieldValues[i])) { builder.append("<b>").append(queryTerm).append("</b>"); hit = true; } else { builder.append(fieldValues[i]); } if (i != fieldValues.length - 1) { builder.append(' '); } } if (hit) { builders.add(builder); } } FieldQuery fq = new FieldQuery(tq(queryTerm), true, true); FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(100); FieldFragList ffl = sflb.createFieldFragList(fpl, 300); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.setDiscreteMultiValueHighlighting(true); String[] actualFragments = sfb.createFragments(reader, randomHit, F, ffl, numFields); assertEquals(builders.size(), actualFragments.length); for (int i = 0; i < actualFragments.length; i++) { assertEquals(builders.get(i).toString(), actualFragments[i]); } } } finally { reader.close(); dir.close(); } }