/** Returns a new (deterministic) automaton that accepts all strings. */ public static DefaultAutomaton makeAnyString() { DefaultAutomaton a = new DefaultAutomaton(); State s = new State(); a.initial = s; s.accept = true; s.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, s)); a.deterministic = true; return a; }
/** * Constructs sub-automaton corresponding to decimal numbers of value at most x.substring(n) and * length x.substring(n).length(). */ private static State atMost(String x, int n) { State s = new State(); if (x.length() == n) s.setAccept(true); else { char c = x.charAt(n); s.addTransition(new Transition(c, atMost(x, (char) n + 1))); if (c > '0') s.addTransition(new Transition('0', (char) (c - 1), anyOfRightLength(x, n + 1))); } return s; }
/** Minimizes the given automaton using Huffman's algorithm. */ public static void minimizeHuffman(LinkedAutomaton a) { determinize(a); a.totalize(); Set<State> ss = a.getStates(); Transition[][] transitions = new Transition[ss.size()][]; State[] states = ss.toArray(new State[ss.size()]); boolean[][] mark = new boolean[states.length][states.length]; ArrayList<ArrayList<HashSet<IntPair>>> triggers = new ArrayList<ArrayList<HashSet<IntPair>>>(); for (int n1 = 0; n1 < states.length; n1++) { ArrayList<HashSet<IntPair>> v = new ArrayList<HashSet<IntPair>>(); initialize(v, states.length); triggers.add(v); } // initialize marks based on acceptance status and find transition arrays for (int n1 = 0; n1 < states.length; n1++) { states[n1].number = n1; transitions[n1] = states[n1].getSortedTransitionArray(false); for (int n2 = n1 + 1; n2 < states.length; n2++) if (states[n1].accept != states[n2].accept) mark[n1][n2] = true; } // for all pairs, see if states agree for (int n1 = 0; n1 < states.length; n1++) for (int n2 = n1 + 1; n2 < states.length; n2++) if (!mark[n1][n2]) { if (statesAgree(transitions, mark, n1, n2)) addTriggers(transitions, triggers, n1, n2); else markPair(mark, triggers, n1, n2); } // assign equivalence class numbers to states int numclasses = 0; for (int n = 0; n < states.length; n++) states[n].number = -1; for (int n1 = 0; n1 < states.length; n1++) if (states[n1].number == -1) { states[n1].number = numclasses; for (int n2 = n1 + 1; n2 < states.length; n2++) if (!mark[n1][n2]) states[n2].number = numclasses; numclasses++; } // make a new state for each equivalence class State[] newstates = new State[numclasses]; for (int n = 0; n < numclasses; n++) newstates[n] = new State(); // select a class representative for each class and find the new initial // state for (int n = 0; n < states.length; n++) { newstates[states[n].number].number = n; if (states[n] == a.initial) a.initial = newstates[states[n].number]; } // build transitions and set acceptance for (int n = 0; n < numclasses; n++) { State s = newstates[n]; s.accept = states[s.number].accept; for (Transition t : states[s.number].transitions) s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number])); } a.removeDeadTransitions(); }
/** * Returns a new (deterministic) automaton that accepts a single char whose value is in the given * interval (including both end points). */ public static DefaultAutomaton makeCharRange(char min, char max) { if (min == max) return makeChar(min); DefaultAutomaton a = new DefaultAutomaton(); State s1 = new State(); State s2 = new State(); a.initial = s1; s2.accept = true; if (min <= max) s1.transitions.add(new Transition(min, max, s2)); a.deterministic = true; return a; }
/** * Constructs sub-automaton corresponding to decimal numbers of value at least x.substring(n) and * length x.substring(n).length(). */ private static State atLeast(String x, int n, Collection<State> initials, boolean zeros) { State s = new State(); if (x.length() == n) s.setAccept(true); else { if (zeros) initials.add(s); char c = x.charAt(n); s.addTransition(new Transition(c, atLeast(x, n + 1, initials, zeros && c == '0'))); if (c < '9') s.addTransition(new Transition((char) (c + 1), '9', anyOfRightLength(x, n + 1))); } return s; }
/** Returns a new (deterministic) automaton that accepts a single character in the given set. */ public static DefaultAutomaton makeCharSet(String set) { if (set.length() == 1) return makeChar(set.charAt(0)); DefaultAutomaton a = new DefaultAutomaton(); State s1 = new State(); State s2 = new State(); a.initial = s1; s2.accept = true; for (int i = 0; i < set.length(); i++) s1.transitions.add(new Transition(set.charAt(i), s2)); a.deterministic = true; a.reduce(); return a; }
/** Constructs deterministic automaton that matches strings that contain the given substring. */ public static DefaultAutomaton makeStringMatcher(String s) { DefaultAutomaton a = new DefaultAutomaton(); State[] states = new State[s.length() + 1]; states[0] = a.initial; for (int i = 0; i < s.length(); i++) states[i + 1] = new State(); State f = states[s.length()]; f.accept = true; f.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, f)); for (int i = 0; i < s.length(); i++) { Set<Character> done = new HashSet<Character>(); char c = s.charAt(i); states[i].transitions.add(new Transition(c, states[i + 1])); done.add(c); for (int j = i; j >= 1; j--) { char d = s.charAt(j - 1); if (!done.contains(d) && s.substring(0, j - 1).equals(s.substring(i - j + 1, i))) { states[i].transitions.add(new Transition(d, states[j])); done.add(d); } } char[] da = new char[done.size()]; int h = 0; for (char w : done) da[h++] = w; Arrays.sort(da); int from = Character.MIN_VALUE; int k = 0; while (from <= Character.MAX_VALUE) { while (k < da.length && da[k] == from) { k++; from++; } if (from <= Character.MAX_VALUE) { int to = Character.MAX_VALUE; if (k < da.length) { to = da[k] - 1; k++; } states[i].transitions.add(new Transition((char) from, (char) to, states[0])); from = to + 2; } } } a.deterministic = true; return a; }
/** * Constructs sub-automaton corresponding to decimal numbers of value between x.substring(n) and * y.substring(n) and of length x.substring(n).length() (which must be equal to * y.substring(n).length()). */ private static State between( String x, String y, int n, Collection<State> initials, boolean zeros) { State s = new State(); if (x.length() == n) s.setAccept(true); else { if (zeros) initials.add(s); char cx = x.charAt(n); char cy = y.charAt(n); if (cx == cy) s.addTransition(new Transition(cx, between(x, y, n + 1, initials, zeros && cx == '0'))); else { // cx<cy s.addTransition(new Transition(cx, atLeast(x, n + 1, initials, zeros && cx == '0'))); s.addTransition(new Transition(cy, atMost(y, n + 1))); if (cx + 1 < cy) s.addTransition( new Transition((char) (cx + 1), (char) (cy - 1), anyOfRightLength(x, n + 1))); } } return s; }
/** Minimizes the given automaton using Hopcroft's algorithm. */ public static void minimizeHopcroft(LinkedAutomaton a) { determinize(a); Set<Transition> tr = a.initial.getTransitions(); if (tr.size() == 1) { Transition t = tr.iterator().next(); if (t.to == a.initial && t.min == Character.MIN_VALUE && t.max == Character.MAX_VALUE) return; } a.totalize(); // make arrays for numbered states and effective alphabet Set<State> ss = a.getStates(); State[] states = new State[ss.size()]; int number = 0; for (State q : ss) { states[number] = q; q.number = number++; } char[] sigma = a.getStartPoints(); // initialize data structures ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>(); for (int q = 0; q < states.length; q++) { ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>(); initialize(v, sigma.length); reverse.add(v); } boolean[][] reverse_nonempty = new boolean[states.length][sigma.length]; ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>(); initialize(partition, states.length); int[] block = new int[states.length]; StateList[][] active = new StateList[states.length][sigma.length]; StateListNode[][] active2 = new StateListNode[states.length][sigma.length]; LinkedList<IntPair> pending = new LinkedList<IntPair>(); boolean[][] pending2 = new boolean[sigma.length][states.length]; ArrayList<State> split = new ArrayList<State>(); boolean[] split2 = new boolean[states.length]; ArrayList<Integer> refine = new ArrayList<Integer>(); boolean[] refine2 = new boolean[states.length]; ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>(); initialize(splitblock, states.length); for (int q = 0; q < states.length; q++) { splitblock.set(q, new ArrayList<State>()); partition.set(q, new LinkedList<State>()); for (int x = 0; x < sigma.length; x++) { reverse.get(q).set(x, new LinkedList<State>()); active[q][x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < states.length; q++) { State qq = states[q]; int j; if (qq.accept != null) j = 0; else j = 1; partition.get(j).add(qq); block[qq.number] = j; for (int x = 0; x < sigma.length; x++) { char y = sigma[x]; State p = qq.step(y); reverse.get(p.number).get(x).add(qq); reverse_nonempty[p.number][x] = true; } } // initialize active sets for (int j = 0; j <= 1; j++) for (int x = 0; x < sigma.length; x++) for (State qq : partition.get(j)) if (reverse_nonempty[qq.number][x]) active2[qq.number][x] = active[j][x].add(qq); // initialize pending for (int x = 0; x < sigma.length; x++) { int a0 = active[0][x].size; int a1 = active[1][x].size; int j; if (a0 <= a1) j = 0; else j = 1; pending.add(new IntPair(j, x)); pending2[x][j] = true; } // process pending until fixed point int k = 2; while (!pending.isEmpty()) { IntPair ip = pending.removeFirst(); int p = ip.n1; int x = ip.n2; pending2[x][p] = false; // find states that need to be split off their blocks for (StateListNode m = active[p][x].first; m != null; m = m.next) for (State s : reverse.get(m.q.number).get(x)) if (!split2[s.number]) { split2[s.number] = true; split.add(s); int j = block[s.number]; splitblock.get(j).add(s); if (!refine2[j]) { refine2[j] = true; refine.add(j); } } // refine blocks for (int j : refine) { if (splitblock.get(j).size() < partition.get(j).size()) { LinkedList<State> b1 = partition.get(j); LinkedList<State> b2 = partition.get(k); for (State s : splitblock.get(j)) { b1.remove(s); b2.add(s); block[s.number] = k; for (int c = 0; c < sigma.length; c++) { StateListNode sn = active2[s.number][c]; if (sn != null && sn.sl == active[j][c]) { sn.remove(); active2[s.number][c] = active[k][c].add(s); } } } // update pending for (int c = 0; c < sigma.length; c++) { int aj = active[j][c].size; int ak = active[k][c].size; if (!pending2[c][j] && 0 < aj && aj <= ak) { pending2[c][j] = true; pending.add(new IntPair(j, c)); } else { pending2[c][k] = true; pending.add(new IntPair(k, c)); } } k++; } for (State s : splitblock.get(j)) split2[s.number] = false; refine2[j] = false; splitblock.get(j).clear(); } split.clear(); refine.clear(); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.length; n++) { State s = new State(); newstates[n] = s; for (State q : partition.get(n)) { if (q == a.initial) a.initial = s; s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; for (Transition t : states[s.number].transitions) s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number])); } a.removeDeadTransitions(); }
/** * Constructs sub-automaton corresponding to decimal numbers of length x.substring(n).length(). */ private static State anyOfRightLength(String x, int n) { State s = new State(); if (x.length() == n) s.setAccept(true); else s.addTransition(new Transition('0', '9', anyOfRightLength(x, n + 1))); return s; }