@Test
  public void testHugeLabel() throws Exception {
    Directory indexDir = newDirectory(), taxoDir = newDirectory();
    IndexWriter indexWriter =
        new IndexWriter(
            indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    DirectoryTaxonomyWriter taxoWriter =
        new DirectoryTaxonomyWriter(
            taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
    FacetFields facetFields = new FacetFields(taxoWriter);

    // Add one huge label:
    String bigs = null;
    int ordinal = -1;
    CategoryPath cp = null;
    while (true) {
      int len = CategoryPath.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
      bigs = _TestUtil.randomSimpleString(random(), len, len);
      cp = new CategoryPath("dim", bigs);
      ordinal = taxoWriter.addCategory(cp);
      Document doc = new Document();
      facetFields.addFields(doc, Collections.singletonList(cp));
      indexWriter.addDocument(doc);
      break;
    }

    // Add tiny ones to cause a re-hash
    for (int i = 0; i < 3; i++) {
      String s = _TestUtil.randomSimpleString(random(), 1, 10);
      taxoWriter.addCategory(new CategoryPath("dim", s));
      Document doc = new Document();
      facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim", s)));
      indexWriter.addDocument(doc);
    }

    // when too large components were allowed to be added, this resulted in a new added category
    assertEquals(ordinal, taxoWriter.addCategory(cp));

    IOUtils.close(indexWriter, taxoWriter);

    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT);
    ddq.add(cp);
    assertEquals(1, searcher.search(ddq, 10).totalHits);

    IOUtils.close(indexReader, taxoReader);

    IOUtils.close(indexDir, taxoDir);
  }
Ejemplo n.º 2
0
  public void testSortedSetDocValuesAccumulator() throws Exception {
    assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet());
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

    // Use a custom delim char to make sure the impls
    // respect it:
    final char delim = ':';
    FacetIndexingParams fip =
        new FacetIndexingParams() {
          @Override
          public char getFacetDelimChar() {
            return delim;
          }
        };

    SortedSetDocValuesFacetFields dvFields = new SortedSetDocValuesFacetFields(fip);

    Document doc = new Document();
    // Mixup order we add these paths, to verify tie-break
    // order is by label (unicode sort) and has nothing to
    // do w/ order we added them:
    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    paths.add(new CategoryPath("a", "foo"));
    paths.add(new CategoryPath("a", "bar"));
    paths.add(new CategoryPath("a", "zoo"));
    Collections.shuffle(paths, random());

    paths.add(new CategoryPath("b", "baz"));
    paths.add(new CategoryPath("b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR, "bazfoo"));

    dvFields.addFields(doc, paths);

    writer.addDocument(doc);
    if (random().nextBoolean()) {
      writer.commit();
    }

    doc = new Document();
    dvFields.addFields(doc, Collections.singletonList(new CategoryPath("a", "foo")));
    writer.addDocument(doc);

    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();

    List<FacetRequest> requests = new ArrayList<FacetRequest>();
    requests.add(new CountFacetRequest(new CategoryPath("a"), 10));
    requests.add(new CountFacetRequest(new CategoryPath("b"), 10));
    requests.add(
        new CountFacetRequest(
            new CategoryPath("b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR), 10));

    final boolean doDimCount = random().nextBoolean();

    CategoryListParams clp =
        new CategoryListParams() {
          @Override
          public OrdinalPolicy getOrdinalPolicy(String dimension) {
            return doDimCount ? OrdinalPolicy.NO_PARENTS : OrdinalPolicy.ALL_BUT_DIMENSION;
          }
        };

    FacetSearchParams fsp = new FacetSearchParams(new FacetIndexingParams(clp), requests);

    // Per-top-reader state:
    SortedSetDocValuesReaderState state =
        new SortedSetDocValuesReaderState(fip, searcher.getIndexReader());

    // SortedSetDocValuesCollector c = new SortedSetDocValuesCollector(state);
    // SortedSetDocValuesCollectorMergeBySeg c = new SortedSetDocValuesCollectorMergeBySeg(state);

    FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state));

    searcher.search(new MatchAllDocsQuery(), c);

    // List<FacetResult> results = c.getFacetResults(requests);
    List<FacetResult> results = c.getFacetResults();

    assertEquals(3, results.size());

    int dimCount = doDimCount ? 4 : 0;
    assertEquals(
        "a (" + dimCount + ")\n  foo (2)\n  bar (1)\n  zoo (1)\n",
        FacetTestUtils.toSimpleString(results.get(0)));

    dimCount = doDimCount ? 1 : 0;
    assertEquals(
        "b (" + dimCount + ")\n  baz (1)\n", FacetTestUtils.toSimpleString(results.get(1)));

    dimCount = doDimCount ? 1 : 0;
    assertEquals(
        "b" + FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR + " (" + dimCount + ")\n  bazfoo (1)\n",
        FacetTestUtils.toSimpleString(results.get(2)));

    // DrillDown:

    DrillDownQuery q = new DrillDownQuery(fip);
    q.add(new CategoryPath("a", "foo"));
    q.add(new CategoryPath("b", "baz"));
    TopDocs hits = searcher.search(q, 1);
    assertEquals(1, hits.totalHits);

    q = new DrillDownQuery(fip);
    q.add(new CategoryPath("a"));
    hits = searcher.search(q, 1);
    assertEquals(2, hits.totalHits);

    searcher.getIndexReader().close();
    dir.close();
  }