/**
  * Returns an automaton that accepts the concatenation of the languages of the given automata.
  *
  * <p>Complexity: linear in number of states.
  */
 public static Automaton concatenate(Automaton a1, Automaton a2) {
   if (a1.isSingleton() && a2.isSingleton())
     return BasicAutomata.makeString(a1.singleton + a2.singleton);
   if (isEmpty(a1) || isEmpty(a2)) return BasicAutomata.makeEmpty();
   // adding epsilon transitions with the NFA concatenation algorithm
   // in this case always produces a resulting DFA, preventing expensive
   // redundant determinize() calls for this common case.
   boolean deterministic = a1.isSingleton() && a2.isDeterministic();
   if (a1 == a2) {
     a1 = a1.cloneExpanded();
     a2 = a2.cloneExpanded();
   } else {
     a1 = a1.cloneExpandedIfRequired();
     a2 = a2.cloneExpandedIfRequired();
   }
   for (State s : a1.getAcceptStates()) {
     s.accept = false;
     s.addEpsilon(a2.initial);
   }
   a1.deterministic = deterministic;
   // a1.clearHashCode();
   a1.clearNumberedStates();
   a1.checkMinimizeAlways();
   return a1;
 }
Beispiel #2
0
 @Test
 public void acceptViaRunning() {
   State state = State.INITIAL;
   state = state.run();
   assertEquals(State.RUNNING, state);
   state = state.accept();
   assertEquals(State.ACCEPTED, state);
 }
 /**
  * Returns a (deterministic) automaton that accepts the complement of the language of the given
  * automaton.
  *
  * <p>Complexity: linear in number of states (if already deterministic).
  */
 public static Automaton complement(Automaton a) {
   a = a.cloneExpandedIfRequired();
   a.determinize();
   a.totalize();
   for (State p : a.getNumberedStates()) p.accept = !p.accept;
   a.removeDeadTransitions();
   return a;
 }
Beispiel #4
0
 /** Returns a new (deterministic) automaton that accepts all strings. */
 public static Automaton makeAnyString() {
   Automaton a = new Automaton();
   State s = new State();
   a.initial = s;
   s.accept = true;
   s.transitions.add(new Transition(null, null, s));
   a.deterministic = true;
   return a;
 }
 /** Returns a new (deterministic) automaton that accepts all strings. */
 public static DefaultAutomaton makeAnyString() {
   DefaultAutomaton a = new DefaultAutomaton();
   State s = new State();
   a.initial = s;
   s.accept = true;
   s.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, s));
   a.deterministic = true;
   return a;
 }
 /** Minimizes the given automaton using Huffman's algorithm. */
 public static void minimizeHuffman(Automaton a) {
   a.determinize();
   a.totalize();
   Set<State> ss = a.getStates();
   Transition[][] transitions = new Transition[ss.size()][];
   State[] states = ss.toArray(new State[ss.size()]);
   boolean[][] mark = new boolean[states.length][states.length];
   ArrayList<ArrayList<HashSet<IntPair>>> triggers = new ArrayList<ArrayList<HashSet<IntPair>>>();
   for (int n1 = 0; n1 < states.length; n1++) {
     ArrayList<HashSet<IntPair>> v = new ArrayList<HashSet<IntPair>>();
     initialize(v, states.length);
     triggers.add(v);
   }
   // initialize marks based on acceptance status and find transition arrays
   for (int n1 = 0; n1 < states.length; n1++) {
     states[n1].number = n1;
     transitions[n1] = states[n1].getSortedTransitionArray(false);
     for (int n2 = n1 + 1; n2 < states.length; n2++)
       if (states[n1].accept != states[n2].accept) mark[n1][n2] = true;
   }
   // for all pairs, see if states agree
   for (int n1 = 0; n1 < states.length; n1++)
     for (int n2 = n1 + 1; n2 < states.length; n2++)
       if (!mark[n1][n2]) {
         if (statesAgree(transitions, mark, n1, n2)) addTriggers(transitions, triggers, n1, n2);
         else markPair(mark, triggers, n1, n2);
       }
   // assign equivalence class numbers to states
   int numclasses = 0;
   for (int n = 0; n < states.length; n++) states[n].number = -1;
   for (int n1 = 0; n1 < states.length; n1++)
     if (states[n1].number == -1) {
       states[n1].number = numclasses;
       for (int n2 = n1 + 1; n2 < states.length; n2++)
         if (!mark[n1][n2]) states[n2].number = numclasses;
       numclasses++;
     }
   // make a new state for each equivalence class
   State[] newstates = new State[numclasses];
   for (int n = 0; n < numclasses; n++) newstates[n] = new State();
   // select a class representative for each class and find the new initial
   // state
   for (int n = 0; n < states.length; n++) {
     newstates[states[n].number].number = n;
     if (states[n] == a.initial) a.initial = newstates[states[n].number];
   }
   // build transitions and set acceptance
   for (int n = 0; n < numclasses; n++) {
     State s = newstates[n];
     s.accept = states[s.number].accept;
     for (Transition t : states[s.number].transitions)
       s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number]));
   }
   a.removeDeadTransitions();
 }
Beispiel #7
0
 /**
  * Returns a new (deterministic) automaton that accepts a single char whose value is in the given
  * interval (including both end points).
  */
 public static Automaton makeCharRange(Character min, Character max) {
   if (min != null && max != null && min == max) return makeChar(min);
   Automaton a = new Automaton();
   State s1 = new State();
   State s2 = new State();
   a.initial = s1;
   s2.accept = true;
   if (min == null || max == null || min <= max) s1.transitions.add(new Transition(min, max, s2));
   a.deterministic = true;
   return a;
 }
 /**
  * Returns a new (deterministic) automaton that accepts a single char whose value is in the given
  * interval (including both end points).
  */
 public static DefaultAutomaton makeCharRange(char min, char max) {
   if (min == max) return makeChar(min);
   DefaultAutomaton a = new DefaultAutomaton();
   State s1 = new State();
   State s2 = new State();
   a.initial = s1;
   s2.accept = true;
   if (min <= max) s1.transitions.add(new Transition(min, max, s2));
   a.deterministic = true;
   return a;
 }
 /**
  * Returns an automaton that accepts the union of the empty string and the language of the given
  * automaton.
  *
  * <p>Complexity: linear in number of states.
  */
 public static Automaton optional(Automaton a) {
   a = a.cloneExpandedIfRequired();
   State s = new State();
   s.addEpsilon(a.initial);
   s.accept = true;
   a.initial = s;
   a.deterministic = false;
   // a.clearHashCode();
   a.clearNumberedStates();
   a.checkMinimizeAlways();
   return a;
 }
Beispiel #10
0
 /** Returns a new (deterministic) automaton that accepts a single character in the given set. */
 public static Automaton makeCharSet(String set) {
   if (set.length() == 1) return makeChar(set.charAt(0));
   Automaton a = new Automaton();
   State s1 = new State();
   State s2 = new State();
   a.initial = s1;
   s2.accept = true;
   for (int i = 0; i < set.length(); i++) s1.transitions.add(new Transition(set.charAt(i), s2));
   a.deterministic = true;
   a.reduce();
   return a;
 }
 /**
  * Returns an automaton that accepts the Kleene star (zero or more concatenated repetitions) of
  * the language of the given automaton. Never modifies the input automaton language.
  *
  * <p>Complexity: linear in number of states.
  */
 public static Automaton repeat(Automaton a) {
   a = a.cloneExpanded();
   State s = new State();
   s.accept = true;
   s.addEpsilon(a.initial);
   for (State p : a.getAcceptStates()) p.addEpsilon(s);
   a.initial = s;
   a.deterministic = false;
   // a.clearHashCode();
   a.clearNumberedStates();
   a.checkMinimizeAlways();
   return a;
 }
 /** Constructs deterministic automaton that matches strings that contain the given substring. */
 public static DefaultAutomaton makeStringMatcher(String s) {
   DefaultAutomaton a = new DefaultAutomaton();
   State[] states = new State[s.length() + 1];
   states[0] = a.initial;
   for (int i = 0; i < s.length(); i++) states[i + 1] = new State();
   State f = states[s.length()];
   f.accept = true;
   f.transitions.add(new Transition(Character.MIN_VALUE, Character.MAX_VALUE, f));
   for (int i = 0; i < s.length(); i++) {
     Set<Character> done = new HashSet<Character>();
     char c = s.charAt(i);
     states[i].transitions.add(new Transition(c, states[i + 1]));
     done.add(c);
     for (int j = i; j >= 1; j--) {
       char d = s.charAt(j - 1);
       if (!done.contains(d) && s.substring(0, j - 1).equals(s.substring(i - j + 1, i))) {
         states[i].transitions.add(new Transition(d, states[j]));
         done.add(d);
       }
     }
     char[] da = new char[done.size()];
     int h = 0;
     for (char w : done) da[h++] = w;
     Arrays.sort(da);
     int from = Character.MIN_VALUE;
     int k = 0;
     while (from <= Character.MAX_VALUE) {
       while (k < da.length && da[k] == from) {
         k++;
         from++;
       }
       if (from <= Character.MAX_VALUE) {
         int to = Character.MAX_VALUE;
         if (k < da.length) {
           to = da[k] - 1;
           k++;
         }
         states[i].transitions.add(new Transition((char) from, (char) to, states[0]));
         from = to + 2;
       }
     }
   }
   a.deterministic = true;
   return a;
 }
  /**
   * Converts an incoming utf32 automaton to an equivalent utf8 one. The incoming automaton need not
   * be deterministic. Note that the returned automaton will not in general be deterministic, so you
   * must determinize it if that's needed.
   */
  public Automaton convert(Automaton utf32) {
    if (utf32.isSingleton()) {
      utf32 = utf32.cloneExpanded();
    }

    State[] map = new State[utf32.getNumberedStates().length];
    List<State> pending = new ArrayList<State>();
    State utf32State = utf32.getInitialState();
    pending.add(utf32State);
    Automaton utf8 = new Automaton();
    utf8.setDeterministic(false);

    State utf8State = utf8.getInitialState();

    utf8States = new State[5];
    utf8StateCount = 0;
    utf8State.number = utf8StateCount;
    utf8States[utf8StateCount] = utf8State;
    utf8StateCount++;

    utf8State.setAccept(utf32State.isAccept());

    map[utf32State.number] = utf8State;

    while (pending.size() != 0) {
      utf32State = pending.remove(pending.size() - 1);
      utf8State = map[utf32State.number];
      for (int i = 0; i < utf32State.numTransitions; i++) {
        final Transition t = utf32State.transitionsArray[i];
        final State destUTF32 = t.to;
        State destUTF8 = map[destUTF32.number];
        if (destUTF8 == null) {
          destUTF8 = newUTF8State();
          destUTF8.accept = destUTF32.accept;
          map[destUTF32.number] = destUTF8;
          pending.add(destUTF32);
        }
        convertOneEdge(utf8State, destUTF8, t.min, t.max);
      }
    }

    utf8.setNumberedStates(utf8States, utf8StateCount);

    return utf8;
  }
 /**
  * Returns an automaton that accepts the concatenation of the languages of the given automata.
  *
  * <p>Complexity: linear in total number of states.
  */
 public static Automaton concatenate(List<Automaton> l) {
   if (l.isEmpty()) return BasicAutomata.makeEmptyString();
   boolean all_singleton = true;
   for (Automaton a : l)
     if (!a.isSingleton()) {
       all_singleton = false;
       break;
     }
   if (all_singleton) {
     StringBuilder b = new StringBuilder();
     for (Automaton a : l) b.append(a.singleton);
     return BasicAutomata.makeString(b.toString());
   } else {
     for (Automaton a : l) if (BasicOperations.isEmpty(a)) return BasicAutomata.makeEmpty();
     Set<Integer> ids = new HashSet<Integer>();
     for (Automaton a : l) ids.add(System.identityHashCode(a));
     boolean has_aliases = ids.size() != l.size();
     Automaton b = l.get(0);
     if (has_aliases) b = b.cloneExpanded();
     else b = b.cloneExpandedIfRequired();
     Set<State> ac = b.getAcceptStates();
     boolean first = true;
     for (Automaton a : l)
       if (first) first = false;
       else {
         if (a.isEmptyString()) continue;
         Automaton aa = a;
         if (has_aliases) aa = aa.cloneExpanded();
         else aa = aa.cloneExpandedIfRequired();
         Set<State> ns = aa.getAcceptStates();
         for (State s : ac) {
           s.accept = false;
           s.addEpsilon(aa.initial);
           if (s.accept) ns.add(s);
         }
         ac = ns;
       }
     b.deterministic = false;
     // b.clearHashCode();
     b.clearNumberedStates();
     b.checkMinimizeAlways();
     return b;
   }
 }
 /**
  * Simple, original brics implementation of determinize() Determinizes the given automaton using
  * the given set of initial states.
  */
 public static void determinizeSimple(Automaton a, Set<State> initialset) {
   int[] points = a.getStartPoints();
   // subset construction
   Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>();
   LinkedList<Set<State>> worklist = new LinkedList<Set<State>>();
   Map<Set<State>, State> newstate = new HashMap<Set<State>, State>();
   sets.put(initialset, initialset);
   worklist.add(initialset);
   a.initial = new State();
   newstate.put(initialset, a.initial);
   while (worklist.size() > 0) {
     Set<State> s = worklist.removeFirst();
     State r = newstate.get(s);
     for (State q : s)
       if (q.accept) {
         r.accept = true;
         break;
       }
     for (int n = 0; n < points.length; n++) {
       Set<State> p = new HashSet<State>();
       for (State q : s)
         for (Transition t : q.getTransitions())
           if (t.min <= points[n] && points[n] <= t.max) p.add(t.to);
       if (!sets.containsKey(p)) {
         sets.put(p, p);
         worklist.add(p);
         newstate.put(p, new State());
       }
       State q = newstate.get(p);
       int min = points[n];
       int max;
       if (n + 1 < points.length) max = points[n + 1] - 1;
       else max = Character.MAX_CODE_POINT;
       r.addTransition(new Transition(min, max, q));
     }
   }
   a.deterministic = true;
   a.clearNumberedStates();
   a.removeDeadTransitions();
 }
 /** Minimizes the given automaton using Hopcroft's algorithm. */
 public static void minimizeHopcroft(Automaton a) {
   a.determinize();
   Set<Transition> tr = a.initial.getTransitions();
   if (tr.size() == 1) {
     Transition t = tr.iterator().next();
     if (t.to == a.initial && t.min == Transition.MIN_VALUE && t.max == Transition.MAX_VALUE)
       return;
   }
   a.totalize();
   // make arrays for numbered states and effective alphabet
   Set<State> ss = a.getStates();
   State[] states = new State[ss.size()];
   int number = 0;
   for (State q : ss) {
     states[number] = q;
     q.number = number++;
   }
   int[] sigma = a.getStartPoints();
   // initialize data structures
   ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>();
   for (int q = 0; q < states.length; q++) {
     ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>();
     initialize(v, sigma.length);
     reverse.add(v);
   }
   boolean[][] reverse_nonempty = new boolean[states.length][sigma.length];
   ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>();
   initialize(partition, states.length);
   int[] block = new int[states.length];
   StateList[][] active = new StateList[states.length][sigma.length];
   StateListNode[][] active2 = new StateListNode[states.length][sigma.length];
   LinkedList<IntPair> pending = new LinkedList<IntPair>();
   boolean[][] pending2 = new boolean[sigma.length][states.length];
   ArrayList<State> split = new ArrayList<State>();
   boolean[] split2 = new boolean[states.length];
   ArrayList<Integer> refine = new ArrayList<Integer>();
   boolean[] refine2 = new boolean[states.length];
   ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>();
   initialize(splitblock, states.length);
   for (int q = 0; q < states.length; q++) {
     splitblock.set(q, new ArrayList<State>());
     partition.set(q, new LinkedList<State>());
     for (int x = 0; x < sigma.length; x++) {
       reverse.get(q).set(x, new LinkedList<State>());
       active[q][x] = new StateList();
     }
   }
   // find initial partition and reverse edges
   for (int q = 0; q < states.length; q++) {
     State qq = states[q];
     int j;
     if (qq.accept) j = 0;
     else j = 1;
     partition.get(j).add(qq);
     block[qq.number] = j;
     for (int x = 0; x < sigma.length; x++) {
       int y = sigma[x];
       State p = qq.step(y);
       reverse.get(p.number).get(x).add(qq);
       reverse_nonempty[p.number][x] = true;
     }
   }
   // initialize active sets
   for (int j = 0; j <= 1; j++)
     for (int x = 0; x < sigma.length; x++)
       for (State qq : partition.get(j))
         if (reverse_nonempty[qq.number][x]) active2[qq.number][x] = active[j][x].add(qq);
   // initialize pending
   for (int x = 0; x < sigma.length; x++) {
     int a0 = active[0][x].size;
     int a1 = active[1][x].size;
     int j;
     if (a0 <= a1) j = 0;
     else j = 1;
     pending.add(new IntPair(j, x));
     pending2[x][j] = true;
   }
   // process pending until fixed point
   int k = 2;
   while (!pending.isEmpty()) {
     IntPair ip = pending.removeFirst();
     int p = ip.n1;
     int x = ip.n2;
     pending2[x][p] = false;
     // find states that need to be split off their blocks
     for (StateListNode m = active[p][x].first; m != null; m = m.next)
       for (State s : reverse.get(m.q.number).get(x))
         if (!split2[s.number]) {
           split2[s.number] = true;
           split.add(s);
           int j = block[s.number];
           splitblock.get(j).add(s);
           if (!refine2[j]) {
             refine2[j] = true;
             refine.add(j);
           }
         }
     // refine blocks
     for (int j : refine) {
       if (splitblock.get(j).size() < partition.get(j).size()) {
         LinkedList<State> b1 = partition.get(j);
         LinkedList<State> b2 = partition.get(k);
         for (State s : splitblock.get(j)) {
           b1.remove(s);
           b2.add(s);
           block[s.number] = k;
           for (int c = 0; c < sigma.length; c++) {
             StateListNode sn = active2[s.number][c];
             if (sn != null && sn.sl == active[j][c]) {
               sn.remove();
               active2[s.number][c] = active[k][c].add(s);
             }
           }
         }
         // update pending
         for (int c = 0; c < sigma.length; c++) {
           int aj = active[j][c].size;
           int ak = active[k][c].size;
           if (!pending2[c][j] && 0 < aj && aj <= ak) {
             pending2[c][j] = true;
             pending.add(new IntPair(j, c));
           } else {
             pending2[c][k] = true;
             pending.add(new IntPair(k, c));
           }
         }
         k++;
       }
       for (State s : splitblock.get(j)) split2[s.number] = false;
       refine2[j] = false;
       splitblock.get(j).clear();
     }
     split.clear();
     refine.clear();
   }
   // make a new state for each equivalence class, set initial state
   State[] newstates = new State[k];
   for (int n = 0; n < newstates.length; n++) {
     State s = new State();
     newstates[n] = s;
     for (State q : partition.get(n)) {
       if (q == a.initial) a.initial = s;
       s.accept = q.accept;
       s.number = q.number; // select representative
       q.number = n;
     }
   }
   // build transitions and set acceptance
   for (int n = 0; n < newstates.length; n++) {
     State s = newstates[n];
     s.accept = states[s.number].accept;
     for (Transition t : states[s.number].transitions)
       s.transitions.add(new Transition(t.min, t.max, newstates[t.to.number]));
   }
   a.removeDeadTransitions();
 }
  /**
   * Determinizes the given automaton.
   *
   * <p>Worst case complexity: exponential in number of states.
   */
  static void determinize(Automaton a) {
    if (a.deterministic || a.isSingleton()) {
      return;
    }

    final State[] allStates = a.getNumberedStates();

    // subset construction
    final boolean initAccept = a.initial.accept;
    final int initNumber = a.initial.number;
    a.initial = new State();
    SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(initNumber, a.initial);

    LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<SortedIntSet.FrozenIntSet>();
    Map<SortedIntSet.FrozenIntSet, State> newstate =
        new HashMap<SortedIntSet.FrozenIntSet, State>();

    worklist.add(initialset);

    a.initial.accept = initAccept;
    newstate.put(initialset, a.initial);

    int newStateUpto = 0;
    State[] newStatesArray = new State[5];
    newStatesArray[newStateUpto] = a.initial;
    a.initial.number = newStateUpto;
    newStateUpto++;

    // like Set<Integer,PointTransitions>
    final PointTransitionSet points = new PointTransitionSet();

    // like SortedMap<Integer,Integer>
    final SortedIntSet statesSet = new SortedIntSet(5);

    while (worklist.size() > 0) {
      SortedIntSet.FrozenIntSet s = worklist.removeFirst();

      // Collate all outgoing transitions by min/1+max:
      for (int i = 0; i < s.values.length; i++) {
        final State s0 = allStates[s.values[i]];
        for (int j = 0; j < s0.numTransitions; j++) {
          points.add(s0.transitionsArray[j]);
        }
      }

      if (points.count == 0) {
        // No outgoing transitions -- skip it
        continue;
      }

      points.sort();

      int lastPoint = -1;
      int accCount = 0;

      final State r = s.state;
      for (int i = 0; i < points.count; i++) {

        final int point = points.points[i].point;

        if (statesSet.upto > 0) {
          assert lastPoint != -1;

          statesSet.computeHash();

          State q = newstate.get(statesSet);
          if (q == null) {
            q = new State();
            final SortedIntSet.FrozenIntSet p = statesSet.freeze(q);
            worklist.add(p);
            if (newStateUpto == newStatesArray.length) {
              final State[] newArray =
                  new State
                      [ArrayUtil.oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
              System.arraycopy(newStatesArray, 0, newArray, 0, newStateUpto);
              newStatesArray = newArray;
            }
            newStatesArray[newStateUpto] = q;
            q.number = newStateUpto;
            newStateUpto++;
            q.accept = accCount > 0;
            newstate.put(p, q);
          } else {
            assert (accCount > 0 ? true : false) == q.accept
                : "accCount="
                    + accCount
                    + " vs existing accept="
                    + q.accept
                    + " states="
                    + statesSet;
          }

          r.addTransition(new Transition(lastPoint, point - 1, q));
        }

        // process transitions that end on this point
        // (closes an overlapping interval)
        Transition[] transitions = points.points[i].ends.transitions;
        int limit = points.points[i].ends.count;
        for (int j = 0; j < limit; j++) {
          final Transition t = transitions[j];
          final Integer num = t.to.number;
          statesSet.decr(num);
          accCount -= t.to.accept ? 1 : 0;
        }
        points.points[i].ends.count = 0;

        // process transitions that start on this point
        // (opens a new interval)
        transitions = points.points[i].starts.transitions;
        limit = points.points[i].starts.count;
        for (int j = 0; j < limit; j++) {
          final Transition t = transitions[j];
          final Integer num = t.to.number;
          statesSet.incr(num);
          accCount += t.to.accept ? 1 : 0;
        }
        lastPoint = point;
        points.points[i].starts.count = 0;
      }
      points.reset();
      assert statesSet.upto == 0 : "upto=" + statesSet.upto;
    }
    a.deterministic = true;
    a.setNumberedStates(newStatesArray, newStateUpto);
  }
Beispiel #18
0
 @Test
 public void acceptImmediately() {
   State state = State.INITIAL;
   state = state.accept();
   assertEquals(State.ACCEPTED, state);
 }
Beispiel #19
0
 /**
  * Construct an NFA that will recognize a string matching this regular expression and then
  * transition to a state that will accept with the given integer code.
  */
 public State toNFA(int accept) {
   State s = new State(); // Build a new state
   s.accept = accept; // with specified accept code
   s.trans = new Transition[0]; // and no outgoing transitions.
   return this.toNFA(s); // Generate recognizer.
 }