public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception { if (leftTerms == null || rightTerms == null) { assertNull(leftTerms); assertNull(rightTerms); return; } assertTermsStatistics(leftTerms, rightTerms); // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be // different TermsEnum leftTermsEnum = leftTerms.iterator(null); TermsEnum rightTermsEnum = rightTerms.iterator(null); assertTermsEnum(leftTermsEnum, rightTermsEnum, true); assertTermsSeeking(leftTerms, rightTerms); if (deep) { int numIntersections = atLeast(3); for (int i = 0; i < numIntersections; i++) { String re = AutomatonTestUtil.randomRegexp(random()); CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton()); if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // TODO: test start term too TermsEnum leftIntersection = leftTerms.intersect(automaton, null); TermsEnum rightIntersection = rightTerms.intersect(automaton, null); assertTermsEnum(leftIntersection, rightIntersection, rarely()); } } } }
/** the minimal and non-minimal are compared to ensure they are the same. */ public void testBasic() { int num = atLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.randomAutomaton(random()); Automaton la = Operations.determinize(Operations.removeDeadStates(a)); Automaton lb = MinimizationOperations.minimize(a); assertTrue(Operations.sameLanguage(la, lb)); } }
/** test a bunch of random regular expressions */ public void testRegexps() throws Exception { // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. String codec = CodecProvider.getDefaultCodec(); int num = codec.equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String reg = AutomatonTestUtil.randomRegexp(random).toString(); assertSame(reg); } }
/** test a bunch of random regular expressions */ public void testRegexps() throws Exception { int num = atLeast(1000); for (int i = 0; i < num; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); if (VERBOSE) { System.out.println("TEST: regexp=" + reg); } assertSame(reg); } }
/** return a random NFA/DFA for testing */ public static Automaton randomAutomaton(Random random) { // get two random Automata from regexps Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a1 = BasicOperations.complement(a1); Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a2 = BasicOperations.complement(a2); // combine them in random ways switch (random.nextInt(4)) { case 0: return BasicOperations.concatenate(a1, a2); case 1: return BasicOperations.union(a1, a2); case 2: return BasicOperations.intersection(a1, a2); default: return BasicOperations.minus(a1, a2); } }
/** * compare minimized against minimized with a slower, simple impl. we check not only that they are * the same, but that #states/#transitions are the same. */ public void testAgainstBrzozowski() { int num = atLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.randomAutomaton(random()); a = AutomatonTestUtil.minimizeSimple(a); Automaton b = MinimizationOperations.minimize(a); assertTrue(Operations.sameLanguage(a, b)); assertEquals(a.getNumStates(), b.getNumStates()); int numStates = a.getNumStates(); int sum1 = 0; for (int s = 0; s < numStates; s++) { sum1 += a.getNumTransitions(s); } int sum2 = 0; for (int s = 0; s < numStates; s++) { sum2 += b.getNumTransitions(s); } assertEquals(sum1, sum2); } }
/** tests intersect: TODO start at a random term! */ public void testIntersect() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false); TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null); Automaton expected = BasicOperations.intersection(termsAutomaton, automaton); TreeSet<BytesRef> found = new TreeSet<BytesRef>(); while (te.next() != null) { found.add(BytesRef.deepCopyOf(te.term())); } Automaton actual = BasicAutomata.makeStringUnion(found); assertTrue(BasicOperations.sameLanguage(expected, actual)); } }
/** blast some random strings through differently configured tokenizers */ public void testRandomRegexps() throws Exception { int iters = TEST_NIGHTLY ? atLeast(30) : atLeast(1); for (int i = 0; i < iters; i++) { final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE); final boolean lowercase = random().nextBoolean(); final int limit = TestUtil.nextInt(random(), 0, 500); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(dfa, lowercase, limit); return new TokenStreamComponents(t, t); } }; checkRandomData(random(), a, 100); a.close(); } }
/** tests a pre-intersected automaton against the original */ public void testFiniteVersusInfinite() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); final List<BytesRef> matchedTerms = new ArrayList<BytesRef>(); for (BytesRef t : terms) { if (BasicOperations.run(automaton, t.utf8ToString())) { matchedTerms.add(t); } } Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms); // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " // states, sigma=" + alternate.getStartPoints().length); // AutomatonTestUtil.minimizeSimple(alternate); // System.out.println("minmize done"); AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton); AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate); CheckHits.checkEqual( a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs); } }
/** seeks to every term accepted by some automata */ public void testSeeking() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null); ArrayList<BytesRef> unsortedTerms = new ArrayList<BytesRef>(terms); Collections.shuffle(unsortedTerms, random()); for (BytesRef term : unsortedTerms) { if (BasicOperations.run(automaton, term.utf8ToString())) { // term is accepted if (random().nextBoolean()) { // seek exact assertTrue(te.seekExact(term, random().nextBoolean())); } else { // seek ceil assertEquals(SeekStatus.FOUND, te.seekCeil(term, random().nextBoolean())); assertEquals(term, te.term()); } } } } }
public void testRandomRegexes() throws Exception { int num = 250 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton()); } }