/** * Returns an automaton that accepts the intersection of the languages of the given automata. * Never modifies the input automata languages. * * <p>Complexity: quadratic in number of states. */ public static Automaton intersection(Automaton a1, Automaton a2) { if (a1.isSingleton()) { if (BasicOperations.run(a2, a1.singleton)) return a1.cloneIfRequired(); else return BasicAutomata.makeEmpty(); } if (a2.isSingleton()) { if (BasicOperations.run(a1, a2.singleton)) return a2.cloneIfRequired(); else return BasicAutomata.makeEmpty(); } if (a1 == a2) return a1.cloneIfRequired(); Transition[][] transitions1 = a1.getSortedTransitions(); Transition[][] transitions2 = a2.getSortedTransitions(); Automaton c = new Automaton(); LinkedList<StatePair> worklist = new LinkedList<StatePair>(); HashMap<StatePair, StatePair> newstates = new HashMap<StatePair, StatePair>(); StatePair p = new StatePair(c.initial, a1.initial, a2.initial); worklist.add(p); newstates.put(p, p); while (worklist.size() > 0) { p = worklist.removeFirst(); p.s.accept = p.s1.accept && p.s2.accept; Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++; for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) if (t2[n2].max >= t1[n1].min) { StatePair q = new StatePair(t1[n1].to, t2[n2].to); StatePair r = newstates.get(q); if (r == null) { q.s = new State(); worklist.add(q); newstates.put(q, q); r = q; } int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min; int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max; p.s.addTransition(new Transition(min, max, r.s)); } } } c.deterministic = a1.deterministic && a2.deterministic; c.removeDeadTransitions(); c.checkMinimizeAlways(); return c; }
/** * Returns a (deterministic) automaton that accepts the intersection of the language of <code>a1 * </code> and the complement of the language of <code>a2</code>. As a side-effect, the automata * may be determinized, if not already deterministic. * * <p>Complexity: quadratic in number of states (if already deterministic). */ public static Automaton minus(Automaton a1, Automaton a2) { if (BasicOperations.isEmpty(a1) || a1 == a2) return BasicAutomata.makeEmpty(); if (BasicOperations.isEmpty(a2)) return a1.cloneIfRequired(); if (a1.isSingleton()) { if (BasicOperations.run(a2, a1.singleton)) return BasicAutomata.makeEmpty(); else return a1.cloneIfRequired(); } return intersection(a1, a2.complement()); }
/** * Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>. * As a side-effect, <code>a2</code> is determinized if not already marked as deterministic. * * <p>Complexity: quadratic in number of states. */ public static boolean subsetOf(Automaton a1, Automaton a2) { if (a1 == a2) return true; if (a1.isSingleton()) { if (a2.isSingleton()) return a1.singleton.equals(a2.singleton); return BasicOperations.run(a2, a1.singleton); } a2.determinize(); Transition[][] transitions1 = a1.getSortedTransitions(); Transition[][] transitions2 = a2.getSortedTransitions(); LinkedList<StatePair> worklist = new LinkedList<StatePair>(); HashSet<StatePair> visited = new HashSet<StatePair>(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.add(p); visited.add(p); while (worklist.size() > 0) { p = worklist.removeFirst(); if (p.s1.accept && !p.s2.accept) { return false; } Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++; int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) { return false; } if (t2[n2].max < Character.MAX_CODE_POINT) min1 = t2[n2].max + 1; else { min1 = Character.MAX_CODE_POINT; max1 = Character.MIN_CODE_POINT; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.contains(q)) { worklist.add(q); visited.add(q); } } if (min1 <= max1) { return false; } } } return true; }
/** tests a pre-intersected automaton against the original */ public void testFiniteVersusInfinite() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); final List<BytesRef> matchedTerms = new ArrayList<BytesRef>(); for (BytesRef t : terms) { if (BasicOperations.run(automaton, t.utf8ToString())) { matchedTerms.add(t); } } Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms); // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " // states, sigma=" + alternate.getStartPoints().length); // AutomatonTestUtil.minimizeSimple(alternate); // System.out.println("minmize done"); AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton); AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate); CheckHits.checkEqual( a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs); } }
/** seeks to every term accepted by some automata */ public void testSeeking() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null); ArrayList<BytesRef> unsortedTerms = new ArrayList<BytesRef>(terms); Collections.shuffle(unsortedTerms, random()); for (BytesRef term : unsortedTerms) { if (BasicOperations.run(automaton, term.utf8ToString())) { // term is accepted if (random().nextBoolean()) { // seek exact assertTrue(te.seekExact(term, random().nextBoolean())); } else { // seek ceil assertEquals(SeekStatus.FOUND, te.seekCeil(term, random().nextBoolean())); assertEquals(term, te.term()); } } } } }