예제 #1
0
  @Override
  public void setUp() throws Exception {
    super.setUp();
    numIterations = atLeast(50);
    dir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(
                    TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
                .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));
    Document doc = new Document();
    Field field = newStringField("field", "", Field.Store.YES);
    doc.add(field);
    terms = new TreeSet<BytesRef>();

    int num = atLeast(200);
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      field.setStringValue(s);
      terms.add(new BytesRef(s));
      writer.addDocument(doc);
    }

    termsAutomaton = BasicAutomata.makeStringUnion(terms);

    reader = writer.getReader();
    searcher = newSearcher(reader);
    writer.close();
  }
예제 #2
0
  /** tests intersect: TODO start at a random term! */
  public void testIntersect() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      CompiledAutomaton ca =
          new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false);
      TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null);
      Automaton expected = BasicOperations.intersection(termsAutomaton, automaton);
      TreeSet<BytesRef> found = new TreeSet<BytesRef>();
      while (te.next() != null) {
        found.add(BytesRef.deepCopyOf(te.term()));
      }

      Automaton actual = BasicAutomata.makeStringUnion(found);
      assertTrue(BasicOperations.sameLanguage(expected, actual));
    }
  }
예제 #3
0
  /** tests a pre-intersected automaton against the original */
  public void testFiniteVersusInfinite() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      final List<BytesRef> matchedTerms = new ArrayList<BytesRef>();
      for (BytesRef t : terms) {
        if (BasicOperations.run(automaton, t.utf8ToString())) {
          matchedTerms.add(t);
        }
      }

      Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms);
      // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + "
      // states, sigma=" + alternate.getStartPoints().length);
      // AutomatonTestUtil.minimizeSimple(alternate);
      // System.out.println("minmize done");
      AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
      CheckHits.checkEqual(
          a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs);
    }
  }