public void testMultiCharEsc2() throws EXIException {
    String regex = "\\S";

    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testNonUnicode3_1_0() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("\uFFFF");

    assertFalse(re.isEntireSetOfXMLCharacters());
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 1);
  }
 public void testHuge1() throws Exception {
   EXIRegularExpression re = new EXIRegularExpression("\\p{L}");
   assertTrue(re.isEntireSetOfXMLCharacters());
   // Set<Integer> codePoints = re.getCodePoints();
   // assertTrue(codePoints.size() > 45000);
   // assertTrue(re.isEntireSetOfXMLCharacters());
 }
  public void testProdNumType() throws Exception {
    // \d{3}-[A-Z]{2}|\d{7}
    EXIRegularExpression re = new EXIRegularExpression("\\d{3}-[A-Z]{2}|\\d{7}");

    // multi characters ...
    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testComplEscape2() throws EXIException {
    String regex = "\\P{Zs}";

    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
 public void testPattern22() throws EXIException {
   // Pattern in OpenOffice
   // ([$]?([^\. ']+|'[^']+'))?\.[$]?[A-Z]+[$]?[0-9]+
   String regex = "([$]?([^\\. ']+|'[^']+'))?\\.[$]?[A-Z]+[$]?[0-9]+";
   EXIRegularExpression re = new EXIRegularExpression(regex);
   assertTrue(re.isEntireSetOfXMLCharacters());
 }
  public void testPattern17() throws EXIException {
    // complexEsc \P{ L }
    String regex = "abc\\P{L}def";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testBasicLatin() throws Exception {
    // \p{IsBasicLatin}
    EXIRegularExpression re = new EXIRegularExpression("\\p{IsBasicLatin}");

    // block ...
    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testPattern15() throws EXIException {
    // Number decimal digit
    String regex = "\\p{Nd}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testPattern16() throws EXIException {
    //
    String regex = "\\i\\c*";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testCategoryEscape3() throws EXIException {
    String regex = "\\p{IsBasicLatin}";

    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertTrue(re.isEntireSetOfXMLCharacters());
  }
 public void testPattern30() throws EXIException {
   // <xs:pattern value="\\c"/> --> means '\' followed by 'c'
   // Note: Nothing to do with Multiple Character Escape Sequences \c
   EXIRegularExpression re = new EXIRegularExpression("\\\\c");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 2);
 }
  public void testSubtraction3() throws Exception {
    // [\p{Ll}\p{Lu}-[\p{IsBasicLatin}]] matches all uppercase and lowercase
    // Unicode letters, except any ASCII letters
    EXIRegularExpression re = new EXIRegularExpression("[\\p{Ll}\\p{Lu}-[\\p{IsBasicLatin}]]");

    // catEsc, block & ...
    assertTrue(re.isEntireSetOfXMLCharacters());
  }
 @Test
 public void testRangeSpecialChars() throws Exception {
   // UnicodeData-3.1.0: 03D7 .. 03DA (2 chars)
   // UnicodeData-3.2.0: 03D7 .. 03DA (4 chars, 03D8 & 03D9)
   EXIRegularExpression re = new EXIRegularExpression("[\u03D7-\u03DA]");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 4);
 }
  public void testPattern24() throws EXIException, IOException {
    // --0\d --0\d --0\d --0\d --\d1 --0\d --0\d
    String regex = "--0\\d --0\\d --0\\d --0\\d --\\d1 --0\\d --0\\d";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    // multi characters ...
    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testPattern21() throws EXIException {
    // Partial Pattern in OpenOffice
    // ([$]?([^\. ']+|'[^']+'))
    String regex = "([$]?([^\\. ']+|'[^']+'))";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    // TODO.. what is it ?
    assertTrue(re.isEntireSetOfXMLCharacters());
  }
  public void testMultiCharEsc1() throws EXIException {
    String regex = "\\s";

    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertFalse(re.isEntireSetOfXMLCharacters());
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 4);
  }
  public void testSSN() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}");
    assertTrue(re.getCodePoints().size() == 11);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('5') == 6);
    assertTrue(rcs.getCodePoint(3) == '2');
    assertTrue(rcs.getCodingLength() == 4);
  }
  public void testAorBorCorMinus() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("A|B|C|-");
    assertTrue(re.getCodePoints().size() == 4);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('C') == 3);
    assertTrue(rcs.getCodePoint(0) == '-');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testMaleFemale() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("male|female");
    // aeflm
    assertTrue(re.getCodePoints().size() == 5);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('a') == 0);
    assertTrue(rcs.getCodePoint(3) == 'l');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testPattern10() throws EXIException {
    // 8 restricted digits
    String regex = "[2-4]{8}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '2');
    assertTrue(rcs.getCode('4') == 2);
    assertTrue(rcs.getCodingLength() == 2);
  }
  public void testPattern11() throws EXIException {
    // 111 restricted digits
    String regex = "[3-9]{111}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(1) == '4');
    assertTrue(rcs.getCode('4') == 1);
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange2() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[A-Z][B-Z][C-Z]");
    assertTrue(re.getCodePoints().size() == 26);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('A') == 0);
    assertTrue(rcs.getCode('L') == 11);
    assertTrue(rcs.getCode('Z') == 25);
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testRange7() throws EXIException {
    // ONE of the following letters: x, y, OR z:
    String regex = "[xyz]"; // e.g. "x" "y" "z"
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'x');
    assertTrue(rcs.getCodePoint(1) == 'y');
    assertTrue(rcs.getCodePoint(2) == 'z');
    assertTrue(rcs.getCodingLength() == 2);
  }
 public void testPattern26() throws EXIException {
   // "&#x10FFF;" NON BMP
   String test =
       new StringBuilder().append("[ABC").appendCodePoint(0x10FFF).append("]{1}").toString();
   // EXIRegularExpression re = new
   // EXIRegularExpression("[ABC&#x10FFF;]{1}");
   EXIRegularExpression re = new EXIRegularExpression(test);
   assertTrue(re.isEntireSetOfXMLCharacters());
   // // A, B, and C ?
   // assertTrue(re.getCodePoints().size() == 3);
 }
  public void testRange8() throws EXIException {
    // zero or more occurrences of lowercase letters from a to z:
    String regex = "([a-z])*";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'a');
    assertTrue(rcs.getCode('c') == 2);
    assertTrue(rcs.getCodePoint(25) == 'z');
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testPattern13() throws EXIException {
    //
    String regex = "\\s";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '\t');
    assertTrue(rcs.getCode(' ') == 3);
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testSubtraction2() throws Exception {
    // matches any character in the string 0123789
    EXIRegularExpression re = new EXIRegularExpression("[0-9-[0-6-[0-3]]]");
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue(codePoints.size() == 7);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('7') == 4);
    assertTrue(rcs.getCodePoint(5) == '8');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange4() throws EXIException {
    String regex = "[0-9][0-9][0-9][0-9][0-9]"; // e.g. "12345"
    EXIRegularExpression re = new EXIRegularExpression(regex);
    assertTrue(re.getCodePoints().size() == 10);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('0') == 0);
    assertTrue(rcs.getCode('5') == 5);
    assertTrue(rcs.getCode('9') == 9);
    assertTrue(rcs.getCodingLength() == 4);
  }
  public void testPattern18() throws EXIException {
    // language --> ([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*
    String regex = "([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '-');
    assertTrue(rcs.getCodePoint(1) == '0');
    assertTrue(rcs.getCode('8') == 9);
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 6);
  }