public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
    if (leftTerms == null || rightTerms == null) {
      assertNull(leftTerms);
      assertNull(rightTerms);
      return;
    }
    assertTermsStatistics(leftTerms, rightTerms);

    // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be
    // different

    TermsEnum leftTermsEnum = leftTerms.iterator(null);
    TermsEnum rightTermsEnum = rightTerms.iterator(null);
    assertTermsEnum(leftTermsEnum, rightTermsEnum, true);

    assertTermsSeeking(leftTerms, rightTerms);

    if (deep) {
      int numIntersections = atLeast(3);
      for (int i = 0; i < numIntersections; i++) {
        String re = AutomatonTestUtil.randomRegexp(random());
        CompiledAutomaton automaton =
            new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
        if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
          // TODO: test start term too
          TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
          TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
          assertTermsEnum(leftIntersection, rightIntersection, rarely());
        }
      }
    }
  }
Ejemplo n.º 2
0
 /** the minimal and non-minimal are compared to ensure they are the same. */
 public void testBasic() {
   int num = atLeast(200);
   for (int i = 0; i < num; i++) {
     Automaton a = AutomatonTestUtil.randomAutomaton(random());
     Automaton la = Operations.determinize(Operations.removeDeadStates(a));
     Automaton lb = MinimizationOperations.minimize(a);
     assertTrue(Operations.sameLanguage(la, lb));
   }
 }
Ejemplo n.º 3
0
 /** test a bunch of random regular expressions */
 public void testRegexps() throws Exception {
   // we generate aweful regexps: good for testing.
   // but for preflex codec, the test can be very slow, so use less iterations.
   String codec = CodecProvider.getDefaultCodec();
   int num = codec.equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : 1000 * RANDOM_MULTIPLIER;
   for (int i = 0; i < num; i++) {
     String reg = AutomatonTestUtil.randomRegexp(random).toString();
     assertSame(reg);
   }
 }
Ejemplo n.º 4
0
 /** test a bunch of random regular expressions */
 public void testRegexps() throws Exception {
   int num = atLeast(1000);
   for (int i = 0; i < num; i++) {
     String reg = AutomatonTestUtil.randomRegexp(random());
     if (VERBOSE) {
       System.out.println("TEST: regexp=" + reg);
     }
     assertSame(reg);
   }
 }
  /** return a random NFA/DFA for testing */
  public static Automaton randomAutomaton(Random random) {
    // get two random Automata from regexps
    Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
    if (random.nextBoolean()) a1 = BasicOperations.complement(a1);

    Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
    if (random.nextBoolean()) a2 = BasicOperations.complement(a2);

    // combine them in random ways
    switch (random.nextInt(4)) {
      case 0:
        return BasicOperations.concatenate(a1, a2);
      case 1:
        return BasicOperations.union(a1, a2);
      case 2:
        return BasicOperations.intersection(a1, a2);
      default:
        return BasicOperations.minus(a1, a2);
    }
  }
Ejemplo n.º 6
0
  /**
   * compare minimized against minimized with a slower, simple impl. we check not only that they are
   * the same, but that #states/#transitions are the same.
   */
  public void testAgainstBrzozowski() {
    int num = atLeast(200);
    for (int i = 0; i < num; i++) {
      Automaton a = AutomatonTestUtil.randomAutomaton(random());
      a = AutomatonTestUtil.minimizeSimple(a);
      Automaton b = MinimizationOperations.minimize(a);
      assertTrue(Operations.sameLanguage(a, b));
      assertEquals(a.getNumStates(), b.getNumStates());
      int numStates = a.getNumStates();

      int sum1 = 0;
      for (int s = 0; s < numStates; s++) {
        sum1 += a.getNumTransitions(s);
      }
      int sum2 = 0;
      for (int s = 0; s < numStates; s++) {
        sum2 += b.getNumTransitions(s);
      }

      assertEquals(sum1, sum2);
    }
  }
Ejemplo n.º 7
0
  /** tests intersect: TODO start at a random term! */
  public void testIntersect() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      CompiledAutomaton ca =
          new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false);
      TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null);
      Automaton expected = BasicOperations.intersection(termsAutomaton, automaton);
      TreeSet<BytesRef> found = new TreeSet<BytesRef>();
      while (te.next() != null) {
        found.add(BytesRef.deepCopyOf(te.term()));
      }

      Automaton actual = BasicAutomata.makeStringUnion(found);
      assertTrue(BasicOperations.sameLanguage(expected, actual));
    }
  }
Ejemplo n.º 8
0
 /** blast some random strings through differently configured tokenizers */
 public void testRandomRegexps() throws Exception {
   int iters = TEST_NIGHTLY ? atLeast(30) : atLeast(1);
   for (int i = 0; i < iters; i++) {
     final CharacterRunAutomaton dfa =
         new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE);
     final boolean lowercase = random().nextBoolean();
     final int limit = TestUtil.nextInt(random(), 0, 500);
     Analyzer a =
         new Analyzer() {
           @Override
           protected TokenStreamComponents createComponents(String fieldName) {
             Tokenizer t = new MockTokenizer(dfa, lowercase, limit);
             return new TokenStreamComponents(t, t);
           }
         };
     checkRandomData(random(), a, 100);
     a.close();
   }
 }
Ejemplo n.º 9
0
  /** tests a pre-intersected automaton against the original */
  public void testFiniteVersusInfinite() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      final List<BytesRef> matchedTerms = new ArrayList<BytesRef>();
      for (BytesRef t : terms) {
        if (BasicOperations.run(automaton, t.utf8ToString())) {
          matchedTerms.add(t);
        }
      }

      Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms);
      // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + "
      // states, sigma=" + alternate.getStartPoints().length);
      // AutomatonTestUtil.minimizeSimple(alternate);
      // System.out.println("minmize done");
      AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
      CheckHits.checkEqual(
          a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs);
    }
  }
Ejemplo n.º 10
0
  /** seeks to every term accepted by some automata */
  public void testSeeking() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null);
      ArrayList<BytesRef> unsortedTerms = new ArrayList<BytesRef>(terms);
      Collections.shuffle(unsortedTerms, random());

      for (BytesRef term : unsortedTerms) {
        if (BasicOperations.run(automaton, term.utf8ToString())) {
          // term is accepted
          if (random().nextBoolean()) {
            // seek exact
            assertTrue(te.seekExact(term, random().nextBoolean()));
          } else {
            // seek ceil
            assertEquals(SeekStatus.FOUND, te.seekCeil(term, random().nextBoolean()));
            assertEquals(term, te.term());
          }
        }
      }
    }
  }
Ejemplo n.º 11
0
 public void testRandomRegexes() throws Exception {
   int num = 250 * RANDOM_MULTIPLIER;
   for (int i = 0; i < num; i++) {
     assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton());
   }
 }