public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception { if (leftTerms == null || rightTerms == null) { assertNull(leftTerms); assertNull(rightTerms); return; } assertTermsStatistics(leftTerms, rightTerms); // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be // different TermsEnum leftTermsEnum = leftTerms.iterator(null); TermsEnum rightTermsEnum = rightTerms.iterator(null); assertTermsEnum(leftTermsEnum, rightTermsEnum, true); assertTermsSeeking(leftTerms, rightTerms); if (deep) { int numIntersections = atLeast(3); for (int i = 0; i < numIntersections; i++) { String re = AutomatonTestUtil.randomRegexp(random()); CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton()); if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // TODO: test start term too TermsEnum leftIntersection = leftTerms.intersect(automaton, null); TermsEnum rightIntersection = rightTerms.intersect(automaton, null); assertTermsEnum(leftIntersection, rightIntersection, rarely()); } } } }
/** test a bunch of random regular expressions */ public void testRegexps() throws Exception { // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. String codec = CodecProvider.getDefaultCodec(); int num = codec.equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String reg = AutomatonTestUtil.randomRegexp(random).toString(); assertSame(reg); } }
/** test a bunch of random regular expressions */ public void testRegexps() throws Exception { int num = atLeast(1000); for (int i = 0; i < num; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); if (VERBOSE) { System.out.println("TEST: regexp=" + reg); } assertSame(reg); } }
/** return a random NFA/DFA for testing */ public static Automaton randomAutomaton(Random random) { // get two random Automata from regexps Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a1 = BasicOperations.complement(a1); Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a2 = BasicOperations.complement(a2); // combine them in random ways switch (random.nextInt(4)) { case 0: return BasicOperations.concatenate(a1, a2); case 1: return BasicOperations.union(a1, a2); case 2: return BasicOperations.intersection(a1, a2); default: return BasicOperations.minus(a1, a2); } }
/** tests intersect: TODO start at a random term! */ public void testIntersect() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false); TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null); Automaton expected = BasicOperations.intersection(termsAutomaton, automaton); TreeSet<BytesRef> found = new TreeSet<BytesRef>(); while (te.next() != null) { found.add(BytesRef.deepCopyOf(te.term())); } Automaton actual = BasicAutomata.makeStringUnion(found); assertTrue(BasicOperations.sameLanguage(expected, actual)); } }
/** tests a pre-intersected automaton against the original */ public void testFiniteVersusInfinite() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); final List<BytesRef> matchedTerms = new ArrayList<BytesRef>(); for (BytesRef t : terms) { if (BasicOperations.run(automaton, t.utf8ToString())) { matchedTerms.add(t); } } Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms); // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " // states, sigma=" + alternate.getStartPoints().length); // AutomatonTestUtil.minimizeSimple(alternate); // System.out.println("minmize done"); AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton); AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate); CheckHits.checkEqual( a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs); } }
/** seeks to every term accepted by some automata */ public void testSeeking() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null); ArrayList<BytesRef> unsortedTerms = new ArrayList<BytesRef>(terms); Collections.shuffle(unsortedTerms, random()); for (BytesRef term : unsortedTerms) { if (BasicOperations.run(automaton, term.utf8ToString())) { // term is accepted if (random().nextBoolean()) { // seek exact assertTrue(te.seekExact(term, random().nextBoolean())); } else { // seek ceil assertEquals(SeekStatus.FOUND, te.seekCeil(term, random().nextBoolean())); assertEquals(term, te.term()); } } } } }
public void testRandomRegexes() throws Exception { int num = 250 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton()); } }