private State newUTF8State() { State s = new State(); if (utf8StateCount == utf8States.length) { final State[] newArray = new State[ArrayUtil.oversize(1 + utf8StateCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(utf8States, 0, newArray, 0, utf8StateCount); utf8States = newArray; } utf8States[utf8StateCount] = s; s.number = utf8StateCount; utf8StateCount++; return s; }
/** * Converts an incoming utf32 automaton to an equivalent utf8 one. The incoming automaton need not * be deterministic. Note that the returned automaton will not in general be deterministic, so you * must determinize it if that's needed. */ public Automaton convert(Automaton utf32) { if (utf32.isSingleton()) { utf32 = utf32.cloneExpanded(); } State[] map = new State[utf32.getNumberedStates().length]; List<State> pending = new ArrayList<State>(); State utf32State = utf32.getInitialState(); pending.add(utf32State); Automaton utf8 = new Automaton(); utf8.setDeterministic(false); State utf8State = utf8.getInitialState(); utf8States = new State[5]; utf8StateCount = 0; utf8State.number = utf8StateCount; utf8States[utf8StateCount] = utf8State; utf8StateCount++; utf8State.setAccept(utf32State.isAccept()); map[utf32State.number] = utf8State; while (pending.size() != 0) { utf32State = pending.remove(pending.size() - 1); utf8State = map[utf32State.number]; for (int i = 0; i < utf32State.numTransitions; i++) { final Transition t = utf32State.transitionsArray[i]; final State destUTF32 = t.to; State destUTF8 = map[destUTF32.number]; if (destUTF8 == null) { destUTF8 = newUTF8State(); destUTF8.accept = destUTF32.accept; map[destUTF32.number] = destUTF8; pending.add(destUTF32); } convertOneEdge(utf8State, destUTF8, t.min, t.max); } } utf8.setNumberedStates(utf8States, utf8StateCount); return utf8; }
/** Minimizes the given automaton using Hopcroft's algorithm. */ public static void minimizeHopcroft(Automaton a) { a.determinize(); Set<Transition> tr = a.initial.getTransitions(); if (tr.size() == 1) { Transition t = tr.iterator().next(); if (t.to == a.initial && t.min == Transition.MIN_VALUE && t.max == Transition.MAX_VALUE) return; } a.totalize(); // make arrays for numbered states and effective alphabet Set<State> ss = a.getStates(); State[] states = new State[ss.size()]; int number = 0; for (State q : ss) { states[number] = q; q.number = number++; } int[] sigma = a.getStartPoints(); // initialize data structures ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>(); for (int q = 0; q < states.length; q++) { ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>(); initialize(v, sigma.length); reverse.add(v); } boolean[][] reverse_nonempty = new boolean[states.length][sigma.length]; ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>(); initialize(partition, states.length); int[] block = new int[states.length]; StateList[][] active = new StateList[states.length][sigma.length]; StateListNode[][] active2 = new StateListNode[states.length][sigma.length]; LinkedList<IntPair> pending = new LinkedList<IntPair>(); boolean[][] pending2 = new boolean[sigma.length][states.length]; ArrayList<State> split = new ArrayList<State>(); boolean[] split2 = new boolean[states.length]; ArrayList<Integer> refine = new ArrayList<Integer>(); boolean[] refine2 = new boolean[states.length]; ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>(); initialize(splitblock, states.length); for (int q = 0; q < states.length; q++) { splitblock.set(q, new ArrayList<State>()); partition.set(q, new LinkedList<State>()); for (int x = 0; x < sigma.length; x++) { reverse.get(q).set(x, new LinkedList<State>()); active[q][x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < states.length; q++) { State qq = states[q]; int j; if (qq.accept) j = 0; else j = 1; partition.get(j).add(qq); block[qq.number] = j; for (int x = 0; x < sigma.length; x++) { int y = sigma[x]; State p = qq.step(y); reverse.get(p.number).get(x).add(qq); reverse_nonempty[p.number][x] = true; } } // initialize active sets for (int j = 0; j <= 1; j++) for (int x = 0; x < sigma.length; x++) for (State qq : partition.get(j)) if (reverse_nonempty[qq.number][x]) active2[qq.number][x] = active[j][x].add(qq); // initialize pending for (int x = 0; x < sigma.length; x++) { int a0 = active[0][x].size; int a1 = active[1][x].size; int j; if (a0 <= a1) j = 0; else j = 1; pending.add(new IntPair(j, x)); pending2[x][j] = true; } // process pending until fixed point int k = 2; while (!pending.isEmpty()) { IntPair ip = pending.removeFirst(); int p = ip.n1; int x = ip.n2; pending2[x][p] = false; // find states that need to be split off their blocks for (StateListNode m = active[p][x].first; m != null; m = m.next) for (State s : reverse.get(m.q.number).get(x)) if (!split2[s.number]) { split2[s.number] = true; split.add(s); int j = block[s.number]; splitblock.get(j).add(s); if (!refine2[j]) { refine2[j] = true; refine.add(j); } } // refine blocks for (int j : refine) { if (splitblock.get(j).size() < partition.get(j).size()) { LinkedList<State> b1 = partition.get(j); LinkedList<State> b2 = partition.get(k); for (State s : splitblock.get(j)) { b1.remove(s); b2.add(s); block[s.number] = k; for (int c = 0; c < sigma.length; c++) { StateListNode sn = active2[s.number][c]; if (sn != null && sn.sl == active[j][c]) { sn.remove(); active2[s.number][c] = active[k][c].add(s); } } } // update pending for (int c = 0; c < sigma.length; c++) { int aj = active[j][c].size; int ak = active[k][c].size; if (!pending2[c][j] && 0 < aj && aj <= ak) { pending2[c][j] = true; pending.add(new IntPair(j, c)); } else { pending2[c][k] = true; pending.add(new IntPair(k, c)); } } k++; } for (State s : splitblock.get(j)) split2[s.number] = false; refine2[j] = false; splitblock.get(j).clear(); } split.clear(); refine.clear(); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.length; n++) { State s = new State(); newstates[n] = s; for (State q : partition.get(n)) { if (q == a.initial) a.initial = s; s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; for (Transition t : states[s.number].transitions) s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number])); } a.removeDeadTransitions(); }
/** * Determinizes the given automaton. * * <p>Worst case complexity: exponential in number of states. */ static void determinize(Automaton a) { if (a.deterministic || a.isSingleton()) { return; } final State[] allStates = a.getNumberedStates(); // subset construction final boolean initAccept = a.initial.accept; final int initNumber = a.initial.number; a.initial = new State(); SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(initNumber, a.initial); LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<SortedIntSet.FrozenIntSet>(); Map<SortedIntSet.FrozenIntSet, State> newstate = new HashMap<SortedIntSet.FrozenIntSet, State>(); worklist.add(initialset); a.initial.accept = initAccept; newstate.put(initialset, a.initial); int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> final PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> final SortedIntSet statesSet = new SortedIntSet(5); while (worklist.size() > 0) { SortedIntSet.FrozenIntSet s = worklist.removeFirst(); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.length; i++) { final State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.add(s0.transitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.sort(); int lastPoint = -1; int accCount = 0; final State r = s.state; for (int i = 0; i < points.count; i++) { final int point = points.points[i].point; if (statesSet.upto > 0) { assert lastPoint != -1; statesSet.computeHash(); State q = newstate.get(statesSet); if (q == null) { q = new State(); final SortedIntSet.FrozenIntSet p = statesSet.freeze(q); worklist.add(p); if (newStateUpto == newStatesArray.length) { final State[] newArray = new State [ArrayUtil.oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJ_REF)]; System.arraycopy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate.put(p, q); } else { assert (accCount > 0 ? true : false) == q.accept : "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet; } r.addTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { final Transition t = transitions[j]; final Integer num = t.to.number; statesSet.decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { final Transition t = transitions[j]; final Integer num = t.to.number; statesSet.incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.reset(); assert statesSet.upto == 0 : "upto=" + statesSet.upto; } a.deterministic = true; a.setNumberedStates(newStatesArray, newStateUpto); }