/** * Returns an automaton that accepts the concatenation of the languages of the given automata. * * <p>Complexity: linear in number of states. */ public static Automaton concatenate(Automaton a1, Automaton a2) { if (a1.isSingleton() && a2.isSingleton()) return BasicAutomata.makeString(a1.singleton + a2.singleton); if (isEmpty(a1) || isEmpty(a2)) return BasicAutomata.makeEmpty(); // adding epsilon transitions with the NFA concatenation algorithm // in this case always produces a resulting DFA, preventing expensive // redundant determinize() calls for this common case. boolean deterministic = a1.isSingleton() && a2.isDeterministic(); if (a1 == a2) { a1 = a1.cloneExpanded(); a2 = a2.cloneExpanded(); } else { a1 = a1.cloneExpandedIfRequired(); a2 = a2.cloneExpandedIfRequired(); } for (State s : a1.getAcceptStates()) { s.accept = false; s.addEpsilon(a2.initial); } a1.deterministic = deterministic; // a1.clearHashCode(); a1.clearNumberedStates(); a1.checkMinimizeAlways(); return a1; }
@Test public void acceptViaRunning() { State state = State.INITIAL; state = state.run(); assertEquals(State.RUNNING, state); state = state.accept(); assertEquals(State.ACCEPTED, state); }
/** * Returns a (deterministic) automaton that accepts the complement of the language of the given * automaton. * * <p>Complexity: linear in number of states (if already deterministic). */ public static Automaton complement(Automaton a) { a = a.cloneExpandedIfRequired(); a.determinize(); a.totalize(); for (State p : a.getNumberedStates()) p.accept = !p.accept; a.removeDeadTransitions(); return a; }
/** Returns a new (deterministic) automaton that accepts all strings. */ public static Automaton makeAnyString() { Automaton a = new Automaton(); State s = new State(); a.initial = s; s.accept = true; s.transitions.add(new Transition(null, null, s)); a.deterministic = true; return a; }
/** Returns a new (deterministic) automaton that accepts all strings. */ public static DefaultAutomaton makeAnyString() { DefaultAutomaton a = new DefaultAutomaton(); State s = new State(); a.initial = s; s.accept = true; s.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, s)); a.deterministic = true; return a; }
/** Minimizes the given automaton using Huffman's algorithm. */ public static void minimizeHuffman(Automaton a) { a.determinize(); a.totalize(); Set<State> ss = a.getStates(); Transition[][] transitions = new Transition[ss.size()][]; State[] states = ss.toArray(new State[ss.size()]); boolean[][] mark = new boolean[states.length][states.length]; ArrayList<ArrayList<HashSet<IntPair>>> triggers = new ArrayList<ArrayList<HashSet<IntPair>>>(); for (int n1 = 0; n1 < states.length; n1++) { ArrayList<HashSet<IntPair>> v = new ArrayList<HashSet<IntPair>>(); initialize(v, states.length); triggers.add(v); } // initialize marks based on acceptance status and find transition arrays for (int n1 = 0; n1 < states.length; n1++) { states[n1].number = n1; transitions[n1] = states[n1].getSortedTransitionArray(false); for (int n2 = n1 + 1; n2 < states.length; n2++) if (states[n1].accept != states[n2].accept) mark[n1][n2] = true; } // for all pairs, see if states agree for (int n1 = 0; n1 < states.length; n1++) for (int n2 = n1 + 1; n2 < states.length; n2++) if (!mark[n1][n2]) { if (statesAgree(transitions, mark, n1, n2)) addTriggers(transitions, triggers, n1, n2); else markPair(mark, triggers, n1, n2); } // assign equivalence class numbers to states int numclasses = 0; for (int n = 0; n < states.length; n++) states[n].number = -1; for (int n1 = 0; n1 < states.length; n1++) if (states[n1].number == -1) { states[n1].number = numclasses; for (int n2 = n1 + 1; n2 < states.length; n2++) if (!mark[n1][n2]) states[n2].number = numclasses; numclasses++; } // make a new state for each equivalence class State[] newstates = new State[numclasses]; for (int n = 0; n < numclasses; n++) newstates[n] = new State(); // select a class representative for each class and find the new initial // state for (int n = 0; n < states.length; n++) { newstates[states[n].number].number = n; if (states[n] == a.initial) a.initial = newstates[states[n].number]; } // build transitions and set acceptance for (int n = 0; n < numclasses; n++) { State s = newstates[n]; s.accept = states[s.number].accept; for (Transition t : states[s.number].transitions) s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number])); } a.removeDeadTransitions(); }
/** * Returns a new (deterministic) automaton that accepts a single char whose value is in the given * interval (including both end points). */ public static Automaton makeCharRange(Character min, Character max) { if (min != null && max != null && min == max) return makeChar(min); Automaton a = new Automaton(); State s1 = new State(); State s2 = new State(); a.initial = s1; s2.accept = true; if (min == null || max == null || min <= max) s1.transitions.add(new Transition(min, max, s2)); a.deterministic = true; return a; }
/** * Returns a new (deterministic) automaton that accepts a single char whose value is in the given * interval (including both end points). */ public static DefaultAutomaton makeCharRange(char min, char max) { if (min == max) return makeChar(min); DefaultAutomaton a = new DefaultAutomaton(); State s1 = new State(); State s2 = new State(); a.initial = s1; s2.accept = true; if (min <= max) s1.transitions.add(new Transition(min, max, s2)); a.deterministic = true; return a; }
/** * Returns an automaton that accepts the union of the empty string and the language of the given * automaton. * * <p>Complexity: linear in number of states. */ public static Automaton optional(Automaton a) { a = a.cloneExpandedIfRequired(); State s = new State(); s.addEpsilon(a.initial); s.accept = true; a.initial = s; a.deterministic = false; // a.clearHashCode(); a.clearNumberedStates(); a.checkMinimizeAlways(); return a; }
/** Returns a new (deterministic) automaton that accepts a single character in the given set. */ public static Automaton makeCharSet(String set) { if (set.length() == 1) return makeChar(set.charAt(0)); Automaton a = new Automaton(); State s1 = new State(); State s2 = new State(); a.initial = s1; s2.accept = true; for (int i = 0; i < set.length(); i++) s1.transitions.add(new Transition(set.charAt(i), s2)); a.deterministic = true; a.reduce(); return a; }
/** * Returns an automaton that accepts the Kleene star (zero or more concatenated repetitions) of * the language of the given automaton. Never modifies the input automaton language. * * <p>Complexity: linear in number of states. */ public static Automaton repeat(Automaton a) { a = a.cloneExpanded(); State s = new State(); s.accept = true; s.addEpsilon(a.initial); for (State p : a.getAcceptStates()) p.addEpsilon(s); a.initial = s; a.deterministic = false; // a.clearHashCode(); a.clearNumberedStates(); a.checkMinimizeAlways(); return a; }
/** Constructs deterministic automaton that matches strings that contain the given substring. */ public static DefaultAutomaton makeStringMatcher(String s) { DefaultAutomaton a = new DefaultAutomaton(); State[] states = new State[s.length() + 1]; states[0] = a.initial; for (int i = 0; i < s.length(); i++) states[i + 1] = new State(); State f = states[s.length()]; f.accept = true; f.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, f)); for (int i = 0; i < s.length(); i++) { Set<Character> done = new HashSet<Character>(); char c = s.charAt(i); states[i].transitions.add(new Transition(c, states[i + 1])); done.add(c); for (int j = i; j >= 1; j--) { char d = s.charAt(j - 1); if (!done.contains(d) && s.substring(0, j - 1).equals(s.substring(i - j + 1, i))) { states[i].transitions.add(new Transition(d, states[j])); done.add(d); } } char[] da = new char[done.size()]; int h = 0; for (char w : done) da[h++] = w; Arrays.sort(da); int from = Character.MIN_VALUE; int k = 0; while (from <= Character.MAX_VALUE) { while (k < da.length && da[k] == from) { k++; from++; } if (from <= Character.MAX_VALUE) { int to = Character.MAX_VALUE; if (k < da.length) { to = da[k] - 1; k++; } states[i].transitions.add(new Transition((char) from, (char) to, states[0])); from = to + 2; } } } a.deterministic = true; return a; }
/** * Converts an incoming utf32 automaton to an equivalent utf8 one. The incoming automaton need not * be deterministic. Note that the returned automaton will not in general be deterministic, so you * must determinize it if that's needed. */ public Automaton convert(Automaton utf32) { if (utf32.isSingleton()) { utf32 = utf32.cloneExpanded(); } State[] map = new State[utf32.getNumberedStates().length]; List<State> pending = new ArrayList<State>(); State utf32State = utf32.getInitialState(); pending.add(utf32State); Automaton utf8 = new Automaton(); utf8.setDeterministic(false); State utf8State = utf8.getInitialState(); utf8States = new State[5]; utf8StateCount = 0; utf8State.number = utf8StateCount; utf8States[utf8StateCount] = utf8State; utf8StateCount++; utf8State.setAccept(utf32State.isAccept()); map[utf32State.number] = utf8State; while (pending.size() != 0) { utf32State = pending.remove(pending.size() - 1); utf8State = map[utf32State.number]; for (int i = 0; i < utf32State.numTransitions; i++) { final Transition t = utf32State.transitionsArray[i]; final State destUTF32 = t.to; State destUTF8 = map[destUTF32.number]; if (destUTF8 == null) { destUTF8 = newUTF8State(); destUTF8.accept = destUTF32.accept; map[destUTF32.number] = destUTF8; pending.add(destUTF32); } convertOneEdge(utf8State, destUTF8, t.min, t.max); } } utf8.setNumberedStates(utf8States, utf8StateCount); return utf8; }
/** * Returns an automaton that accepts the concatenation of the languages of the given automata. * * <p>Complexity: linear in total number of states. */ public static Automaton concatenate(List<Automaton> l) { if (l.isEmpty()) return BasicAutomata.makeEmptyString(); boolean all_singleton = true; for (Automaton a : l) if (!a.isSingleton()) { all_singleton = false; break; } if (all_singleton) { StringBuilder b = new StringBuilder(); for (Automaton a : l) b.append(a.singleton); return BasicAutomata.makeString(b.toString()); } else { for (Automaton a : l) if (BasicOperations.isEmpty(a)) return BasicAutomata.makeEmpty(); Set<Integer> ids = new HashSet<Integer>(); for (Automaton a : l) ids.add(System.identityHashCode(a)); boolean has_aliases = ids.size() != l.size(); Automaton b = l.get(0); if (has_aliases) b = b.cloneExpanded(); else b = b.cloneExpandedIfRequired(); Set<State> ac = b.getAcceptStates(); boolean first = true; for (Automaton a : l) if (first) first = false; else { if (a.isEmptyString()) continue; Automaton aa = a; if (has_aliases) aa = aa.cloneExpanded(); else aa = aa.cloneExpandedIfRequired(); Set<State> ns = aa.getAcceptStates(); for (State s : ac) { s.accept = false; s.addEpsilon(aa.initial); if (s.accept) ns.add(s); } ac = ns; } b.deterministic = false; // b.clearHashCode(); b.clearNumberedStates(); b.checkMinimizeAlways(); return b; } }
/** * Simple, original brics implementation of determinize() Determinizes the given automaton using * the given set of initial states. */ public static void determinizeSimple(Automaton a, Set<State> initialset) { int[] points = a.getStartPoints(); // subset construction Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>(); LinkedList<Set<State>> worklist = new LinkedList<Set<State>>(); Map<Set<State>, State> newstate = new HashMap<Set<State>, State>(); sets.put(initialset, initialset); worklist.add(initialset); a.initial = new State(); newstate.put(initialset, a.initial); while (worklist.size() > 0) { Set<State> s = worklist.removeFirst(); State r = newstate.get(s); for (State q : s) if (q.accept) { r.accept = true; break; } for (int n = 0; n < points.length; n++) { Set<State> p = new HashSet<State>(); for (State q : s) for (Transition t : q.getTransitions()) if (t.min <= points[n] && points[n] <= t.max) p.add(t.to); if (!sets.containsKey(p)) { sets.put(p, p); worklist.add(p); newstate.put(p, new State()); } State q = newstate.get(p); int min = points[n]; int max; if (n + 1 < points.length) max = points[n + 1] - 1; else max = Character.MAX_CODE_POINT; r.addTransition(new Transition(min, max, q)); } } a.deterministic = true; a.clearNumberedStates(); a.removeDeadTransitions(); }
/** Minimizes the given automaton using Hopcroft's algorithm. */ public static void minimizeHopcroft(Automaton a) { a.determinize(); Set<Transition> tr = a.initial.getTransitions(); if (tr.size() == 1) { Transition t = tr.iterator().next(); if (t.to == a.initial && t.min == Transition.MIN_VALUE && t.max == Transition.MAX_VALUE) return; } a.totalize(); // make arrays for numbered states and effective alphabet Set<State> ss = a.getStates(); State[] states = new State[ss.size()]; int number = 0; for (State q : ss) { states[number] = q; q.number = number++; } int[] sigma = a.getStartPoints(); // initialize data structures ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>(); for (int q = 0; q < states.length; q++) { ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>(); initialize(v, sigma.length); reverse.add(v); } boolean[][] reverse_nonempty = new boolean[states.length][sigma.length]; ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>(); initialize(partition, states.length); int[] block = new int[states.length]; StateList[][] active = new StateList[states.length][sigma.length]; StateListNode[][] active2 = new StateListNode[states.length][sigma.length]; LinkedList<IntPair> pending = new LinkedList<IntPair>(); boolean[][] pending2 = new boolean[sigma.length][states.length]; ArrayList<State> split = new ArrayList<State>(); boolean[] split2 = new boolean[states.length]; ArrayList<Integer> refine = new ArrayList<Integer>(); boolean[] refine2 = new boolean[states.length]; ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>(); initialize(splitblock, states.length); for (int q = 0; q < states.length; q++) { splitblock.set(q, new ArrayList<State>()); partition.set(q, new LinkedList<State>()); for (int x = 0; x < sigma.length; x++) { reverse.get(q).set(x, new LinkedList<State>()); active[q][x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < states.length; q++) { State qq = states[q]; int j; if (qq.accept) j = 0; else j = 1; partition.get(j).add(qq); block[qq.number] = j; for (int x = 0; x < sigma.length; x++) { int y = sigma[x]; State p = qq.step(y); reverse.get(p.number).get(x).add(qq); reverse_nonempty[p.number][x] = true; } } // initialize active sets for (int j = 0; j <= 1; j++) for (int x = 0; x < sigma.length; x++) for (State qq : partition.get(j)) if (reverse_nonempty[qq.number][x]) active2[qq.number][x] = active[j][x].add(qq); // initialize pending for (int x = 0; x < sigma.length; x++) { int a0 = active[0][x].size; int a1 = active[1][x].size; int j; if (a0 <= a1) j = 0; else j = 1; pending.add(new IntPair(j, x)); pending2[x][j] = true; } // process pending until fixed point int k = 2; while (!pending.isEmpty()) { IntPair ip = pending.removeFirst(); int p = ip.n1; int x = ip.n2; pending2[x][p] = false; // find states that need to be split off their blocks for (StateListNode m = active[p][x].first; m != null; m = m.next) for (State s : reverse.get(m.q.number).get(x)) if (!split2[s.number]) { split2[s.number] = true; split.add(s); int j = block[s.number]; splitblock.get(j).add(s); if (!refine2[j]) { refine2[j] = true; refine.add(j); } } // refine blocks for (int j : refine) { if (splitblock.get(j).size() < partition.get(j).size()) { LinkedList<State> b1 = partition.get(j); LinkedList<State> b2 = partition.get(k); for (State s : splitblock.get(j)) { b1.remove(s); b2.add(s); block[s.number] = k; for (int c = 0; c < sigma.length; c++) { StateListNode sn = active2[s.number][c]; if (sn != null && sn.sl == active[j][c]) { sn.remove(); active2[s.number][c] = active[k][c].add(s); } } } // update pending for (int c = 0; c < sigma.length; c++) { int aj = active[j][c].size; int ak = active[k][c].size; if (!pending2[c][j] && 0 < aj && aj <= ak) { pending2[c][j] = true; pending.add(new IntPair(j, c)); } else { pending2[c][k] = true; pending.add(new IntPair(k, c)); } } k++; } for (State s : splitblock.get(j)) split2[s.number] = false; refine2[j] = false; splitblock.get(j).clear(); } split.clear(); refine.clear(); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.length; n++) { State s = new State(); newstates[n] = s; for (State q : partition.get(n)) { if (q == a.initial) a.initial = s; s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; for (Transition t : states[s.number].transitions) s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number])); } a.removeDeadTransitions(); }
/** * Determinizes the given automaton. * * <p>Worst case complexity: exponential in number of states. */ static void determinize(Automaton a) { if (a.deterministic || a.isSingleton()) { return; } final State[] allStates = a.getNumberedStates(); // subset construction final boolean initAccept = a.initial.accept; final int initNumber = a.initial.number; a.initial = new State(); SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(initNumber, a.initial); LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<SortedIntSet.FrozenIntSet>(); Map<SortedIntSet.FrozenIntSet, State> newstate = new HashMap<SortedIntSet.FrozenIntSet, State>(); worklist.add(initialset); a.initial.accept = initAccept; newstate.put(initialset, a.initial); int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> final PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> final SortedIntSet statesSet = new SortedIntSet(5); while (worklist.size() > 0) { SortedIntSet.FrozenIntSet s = worklist.removeFirst(); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.length; i++) { final State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.add(s0.transitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.sort(); int lastPoint = -1; int accCount = 0; final State r = s.state; for (int i = 0; i < points.count; i++) { final int point = points.points[i].point; if (statesSet.upto > 0) { assert lastPoint != -1; statesSet.computeHash(); State q = newstate.get(statesSet); if (q == null) { q = new State(); final SortedIntSet.FrozenIntSet p = statesSet.freeze(q); worklist.add(p); if (newStateUpto == newStatesArray.length) { final State[] newArray = new State [ArrayUtil.oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJ_REF)]; System.arraycopy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate.put(p, q); } else { assert (accCount > 0 ? true : false) == q.accept : "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet; } r.addTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { final Transition t = transitions[j]; final Integer num = t.to.number; statesSet.decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { final Transition t = transitions[j]; final Integer num = t.to.number; statesSet.incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.reset(); assert statesSet.upto == 0 : "upto=" + statesSet.upto; } a.deterministic = true; a.setNumberedStates(newStatesArray, newStateUpto); }
@Test public void acceptImmediately() { State state = State.INITIAL; state = state.accept(); assertEquals(State.ACCEPTED, state); }
/** * Construct an NFA that will recognize a string matching this regular expression and then * transition to a state that will accept with the given integer code. */ public State toNFA(int accept) { State s = new State(); // Build a new state s.accept = accept; // with specified accept code s.trans = new Transition[0]; // and no outgoing transitions. return this.toNFA(s); // Generate recognizer. }