private void assertAutomaton(Automaton automaton) throws Exception {
    CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
    ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
    final AutomatonTestUtil.RandomAcceptedStrings ras =
        new AutomatonTestUtil.RandomAcceptedStrings(automaton);

    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      final String string;
      if (random.nextBoolean()) {
        // likely not accepted
        string = _TestUtil.randomUnicodeString(random);
      } else {
        // will be accepted
        int[] codepoints = ras.getRandomAcceptedString(random);
        try {
          string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
        } catch (Exception e) {
          System.out.println(codepoints.length + " codepoints:");
          for (int j = 0; j < codepoints.length; j++) {
            System.out.println("  " + Integer.toHexString(codepoints[j]));
          }
          throw e;
        }
      }
      byte bytes[] = string.getBytes("UTF-8");
      assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
    }
  }
 /** Simple smoke test for regular expression. */
 public void testSmoke() {
   RegExp r = new RegExp("a(b+|c+)d");
   Automaton a = r.toAutomaton();
   assertTrue(a.isDeterministic());
   CharacterRunAutomaton run = new CharacterRunAutomaton(a);
   assertTrue(run.run("abbbbbd"));
   assertTrue(run.run("acd"));
   assertFalse(run.run("ad"));
 }
  public void testSpecialCase2() throws Exception {
    RegExp re = new RegExp(".+\u0775");
    String input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
    Automaton automaton = re.toAutomaton();
    CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
    ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

    assertTrue(cra.run(input));

    byte[] bytes = input.getBytes("UTF-8");
    assertTrue(bra.run(bytes, 0, bytes.length)); // this one fails!
  }
  public void testSpecialCase3() throws Exception {
    RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ");
    String input =
        "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
    Automaton automaton = re.toAutomaton();
    CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
    ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

    assertTrue(cra.run(input));

    byte[] bytes = input.getBytes("UTF-8");
    assertTrue(bra.run(bytes, 0, bytes.length));
  }
  public void testSpecialCase() {
    RegExp re = new RegExp(".?");
    Automaton automaton = re.toAutomaton();
    CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
    ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
    // make sure character dfa accepts empty string
    assertTrue(cra.isAccept(cra.getInitialState()));
    assertTrue(cra.run(""));
    assertTrue(cra.run(new char[0], 0, 0));

    // make sure byte dfa accepts empty string
    assertTrue(bra.isAccept(bra.getInitialState()));
    assertTrue(bra.run(new byte[0], 0, 0));
  }