private void assertAutomaton(Automaton automaton) throws Exception { CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton); int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { final String string; if (random.nextBoolean()) { // likely not accepted string = _TestUtil.randomUnicodeString(random); } else { // will be accepted int[] codepoints = ras.getRandomAcceptedString(random); try { string = UnicodeUtil.newString(codepoints, 0, codepoints.length); } catch (Exception e) { System.out.println(codepoints.length + " codepoints:"); for (int j = 0; j < codepoints.length; j++) { System.out.println(" " + Integer.toHexString(codepoints[j])); } throw e; } } byte bytes[] = string.getBytes("UTF-8"); assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length)); } }
/** Simple smoke test for regular expression. */ public void testSmoke() { RegExp r = new RegExp("a(b+|c+)d"); Automaton a = r.toAutomaton(); assertTrue(a.isDeterministic()); CharacterRunAutomaton run = new CharacterRunAutomaton(a); assertTrue(run.run("abbbbbd")); assertTrue(run.run("acd")); assertFalse(run.run("ad")); }
public void testSpecialCase2() throws Exception { RegExp re = new RegExp(".+\u0775"); String input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.toAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); assertTrue(cra.run(input)); byte[] bytes = input.getBytes("UTF-8"); assertTrue(bra.run(bytes, 0, bytes.length)); // this one fails! }
public void testSpecialCase3() throws Exception { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); String input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.toAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); assertTrue(cra.run(input)); byte[] bytes = input.getBytes("UTF-8"); assertTrue(bra.run(bytes, 0, bytes.length)); }
public void testSpecialCase() { RegExp re = new RegExp(".?"); Automaton automaton = re.toAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); // make sure character dfa accepts empty string assertTrue(cra.isAccept(cra.getInitialState())); assertTrue(cra.run("")); assertTrue(cra.run(new char[0], 0, 0)); // make sure byte dfa accepts empty string assertTrue(bra.isAccept(bra.getInitialState())); assertTrue(bra.run(new byte[0], 0, 0)); }