@Test public void testHugeLabel() throws Exception { Directory indexDir = newDirectory(), taxoDir = newDirectory(); IndexWriter indexWriter = new IndexWriter( indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter( taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetFields facetFields = new FacetFields(taxoWriter); // Add one huge label: String bigs = null; int ordinal = -1; CategoryPath cp = null; while (true) { int len = CategoryPath.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = _TestUtil.randomSimpleString(random(), len, len); cp = new CategoryPath("dim", bigs); ordinal = taxoWriter.addCategory(cp); Document doc = new Document(); facetFields.addFields(doc, Collections.singletonList(cp)); indexWriter.addDocument(doc); break; } // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { String s = _TestUtil.randomSimpleString(random(), 1, 10); taxoWriter.addCategory(new CategoryPath("dim", s)); Document doc = new Document(); facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim", s))); indexWriter.addDocument(doc); } // when too large components were allowed to be added, this resulted in a new added category assertEquals(ordinal, taxoWriter.addCategory(cp)); IOUtils.close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.open(indexDir); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT); ddq.add(cp); assertEquals(1, searcher.search(ddq, 10).totalHits); IOUtils.close(indexReader, taxoReader); IOUtils.close(indexDir, taxoDir); }
public void testSortedSetDocValuesAccumulator() throws Exception { assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); // Use a custom delim char to make sure the impls // respect it: final char delim = ':'; FacetIndexingParams fip = new FacetIndexingParams() { @Override public char getFacetDelimChar() { return delim; } }; SortedSetDocValuesFacetFields dvFields = new SortedSetDocValuesFacetFields(fip); Document doc = new Document(); // Mixup order we add these paths, to verify tie-break // order is by label (unicode sort) and has nothing to // do w/ order we added them: List<CategoryPath> paths = new ArrayList<CategoryPath>(); paths.add(new CategoryPath("a", "foo")); paths.add(new CategoryPath("a", "bar")); paths.add(new CategoryPath("a", "zoo")); Collections.shuffle(paths, random()); paths.add(new CategoryPath("b", "baz")); paths.add(new CategoryPath("b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR, "bazfoo")); dvFields.addFields(doc, paths); writer.addDocument(doc); if (random().nextBoolean()) { writer.commit(); } doc = new Document(); dvFields.addFields(doc, Collections.singletonList(new CategoryPath("a", "foo"))); writer.addDocument(doc); // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); writer.close(); List<FacetRequest> requests = new ArrayList<FacetRequest>(); requests.add(new CountFacetRequest(new CategoryPath("a"), 10)); requests.add(new CountFacetRequest(new CategoryPath("b"), 10)); requests.add( new CountFacetRequest( new CategoryPath("b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR), 10)); final boolean doDimCount = random().nextBoolean(); CategoryListParams clp = new CategoryListParams() { @Override public OrdinalPolicy getOrdinalPolicy(String dimension) { return doDimCount ? OrdinalPolicy.NO_PARENTS : OrdinalPolicy.ALL_BUT_DIMENSION; } }; FacetSearchParams fsp = new FacetSearchParams(new FacetIndexingParams(clp), requests); // Per-top-reader state: SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(fip, searcher.getIndexReader()); // SortedSetDocValuesCollector c = new SortedSetDocValuesCollector(state); // SortedSetDocValuesCollectorMergeBySeg c = new SortedSetDocValuesCollectorMergeBySeg(state); FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state)); searcher.search(new MatchAllDocsQuery(), c); // List<FacetResult> results = c.getFacetResults(requests); List<FacetResult> results = c.getFacetResults(); assertEquals(3, results.size()); int dimCount = doDimCount ? 4 : 0; assertEquals( "a (" + dimCount + ")\n foo (2)\n bar (1)\n zoo (1)\n", FacetTestUtils.toSimpleString(results.get(0))); dimCount = doDimCount ? 1 : 0; assertEquals( "b (" + dimCount + ")\n baz (1)\n", FacetTestUtils.toSimpleString(results.get(1))); dimCount = doDimCount ? 1 : 0; assertEquals( "b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR + " (" + dimCount + ")\n bazfoo (1)\n", FacetTestUtils.toSimpleString(results.get(2))); // DrillDown: DrillDownQuery q = new DrillDownQuery(fip); q.add(new CategoryPath("a", "foo")); q.add(new CategoryPath("b", "baz")); TopDocs hits = searcher.search(q, 1); assertEquals(1, hits.totalHits); q = new DrillDownQuery(fip); q.add(new CategoryPath("a")); hits = searcher.search(q, 1); assertEquals(2, hits.totalHits); searcher.getIndexReader().close(); dir.close(); }