public void testSSN() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}");
    assertTrue(re.getCodePoints().size() == 11);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('5') == 6);
    assertTrue(rcs.getCodePoint(3) == '2');
    assertTrue(rcs.getCodingLength() == 4);
  }
  public void testAorBorCorMinus() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("A|B|C|-");
    assertTrue(re.getCodePoints().size() == 4);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('C') == 3);
    assertTrue(rcs.getCodePoint(0) == '-');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testMaleFemale() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("male|female");
    // aeflm
    assertTrue(re.getCodePoints().size() == 5);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('a') == 0);
    assertTrue(rcs.getCodePoint(3) == 'l');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange2() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[A-Z][B-Z][C-Z]");
    assertTrue(re.getCodePoints().size() == 26);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('A') == 0);
    assertTrue(rcs.getCode('L') == 11);
    assertTrue(rcs.getCode('Z') == 25);
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testSubtraction2() throws Exception {
    // matches any character in the string 0123789
    EXIRegularExpression re = new EXIRegularExpression("[0-9-[0-6-[0-3]]]");
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue(codePoints.size() == 7);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('7') == 4);
    assertTrue(rcs.getCodePoint(5) == '8');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange4() throws EXIException {
    String regex = "[0-9][0-9][0-9][0-9][0-9]"; // e.g. "12345"
    EXIRegularExpression re = new EXIRegularExpression(regex);
    assertTrue(re.getCodePoints().size() == 10);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('0') == 0);
    assertTrue(rcs.getCode('5') == 5);
    assertTrue(rcs.getCode('9') == 9);
    assertTrue(rcs.getCodingLength() == 4);
  }
  // vowel
  public void testRange10() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[b-df-hj-np-tv-z]");
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue(codePoints.size() == 21);
    assertFalse(codePoints.contains(codePoint('a')));
    assertTrue(codePoints.contains(codePoint('b')));

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('b') == 0);
    assertTrue(rcs.getCodePoint(2) == 'd');
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testRange3() throws Exception {
    String regex = "[A-Z][A-Z][A-Z]"; // e.g. "ABC"
    EXIRegularExpression re = new EXIRegularExpression(regex);
    assertTrue(re.getCodePoints().size() == 26);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('A') == 0);
    assertTrue(rcs.getCode('L') == 11);
    assertTrue(rcs.getCode('Z') == 25);
    assertTrue(rcs.getCodePoint(1) == 'B');
    assertTrue(rcs.getCodingLength() == 5);
  }
 public void testPattern30() throws EXIException {
   // <xs:pattern value="\\c"/> --> means '\' followed by 'c'
   // Note: Nothing to do with Multiple Character Escape Sequences \c
   EXIRegularExpression re = new EXIRegularExpression("\\\\c");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 2);
 }
  // vowel, identical to "[b-df-hj-np-tv-z]" without subtraction
  public void testSubtraction1() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("[a-z-[aeiuo]]");
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue(codePoints.size() == 21);

    assertTrue(codePoints.contains(codePoint('b')));
    assertTrue(codePoints.contains(codePoint('d')));

    assertFalse(codePoints.contains(codePoint('e')));
    assertFalse(codePoints.contains(codePoint('u')));

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('d') == 2);
    assertTrue(rcs.getCodePoint(6) == 'j');
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testNonUnicode3_1_0() throws Exception {
    EXIRegularExpression re = new EXIRegularExpression("\uFFFF");

    assertFalse(re.isEntireSetOfXMLCharacters());
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 1);
  }
 @Test
 public void testRangeSpecialChars() throws Exception {
   // UnicodeData-3.1.0: 03D7 .. 03DA (2 chars)
   // UnicodeData-3.2.0: 03D7 .. 03DA (4 chars, 03D8 & 03D9)
   EXIRegularExpression re = new EXIRegularExpression("[\u03D7-\u03DA]");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 4);
 }
  public void testMultiCharEsc1() throws EXIException {
    String regex = "\\s";

    EXIRegularExpression re = new EXIRegularExpression(regex);

    assertFalse(re.isEntireSetOfXMLCharacters());
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 4);
  }
  public void testPattern10() throws EXIException {
    // 8 restricted digits
    String regex = "[2-4]{8}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '2');
    assertTrue(rcs.getCode('4') == 2);
    assertTrue(rcs.getCodingLength() == 2);
  }
  public void testPattern11() throws EXIException {
    // 111 restricted digits
    String regex = "[3-9]{111}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(1) == '4');
    assertTrue(rcs.getCode('4') == 1);
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange8() throws EXIException {
    // zero or more occurrences of lowercase letters from a to z:
    String regex = "([a-z])*";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'a');
    assertTrue(rcs.getCode('c') == 2);
    assertTrue(rcs.getCodePoint(25) == 'z');
    assertTrue(rcs.getCodingLength() == 5);
  }
  public void testRange7() throws EXIException {
    // ONE of the following letters: x, y, OR z:
    String regex = "[xyz]"; // e.g. "x" "y" "z"
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'x');
    assertTrue(rcs.getCodePoint(1) == 'y');
    assertTrue(rcs.getCodePoint(2) == 'z');
    assertTrue(rcs.getCodingLength() == 2);
  }
  public void testPattern13() throws EXIException {
    //
    String regex = "\\s";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '\t');
    assertTrue(rcs.getCode(' ') == 3);
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testPattern18() throws EXIException {
    // language --> ([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*
    String regex = "([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '-');
    assertTrue(rcs.getCodePoint(1) == '0');
    assertTrue(rcs.getCode('8') == 9);
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 6);
  }
  public void testRange5() throws EXIException {
    // THREE of the LOWERCASE OR UPPERCASE letters from a to z
    String regex = "[a-zA-Z][a-zA-Z][a-zA-Z]"; // e.g. "aXy"
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());

    assertTrue(rcs.getCode('0') == Constants.NOT_FOUND);
    assertTrue(rcs.getCode('A') == 0);
    assertTrue(rcs.getCode('a') == 26);
    assertTrue(rcs.getCodingLength() == 6);
  }
  public void testSubtraction4() throws EXIException {
    // [A-Z-[C-X-[M-N]]]*
    // means: A,B,M,N,Y,Z
    String regex = "[A-Z-[C-X-[M-N]]]*";
    EXIRegularExpression re = new EXIRegularExpression(regex);
    Set<Integer> codePoints = re.getCodePoints();

    assertTrue(codePoints.size() == 6);

    assertTrue(codePoints.contains(codePoint('A')));
    assertTrue(codePoints.contains(codePoint('B')));
    assertTrue(codePoints.contains(codePoint('M')));
    assertTrue(codePoints.contains(codePoint('N')));
    assertTrue(codePoints.contains(codePoint('Y')));
    assertTrue(codePoints.contains(codePoint('Z')));

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('M') == 2);
    assertTrue(rcs.getCodePoint(5) == 'Z');
    assertTrue(rcs.getCodingLength() == 3);
  }
  public void testRange9() throws EXIException {
    // For example, "sToP" will be validated by this pattern, but not "Stop"
    // or "STOP" or "stop":
    String regex = "([a-z][A-Z])+";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'A');
    assertTrue(rcs.getCodePoint(25) == 'Z');
    assertTrue(rcs.getCode('c') == 28);
    assertTrue(rcs.getCodingLength() == 6);
  }
  public void testRange11() throws Exception {
    // Matching strings: 1z, 2z, pz, rz
    // Non-matching strings: cz,dz, 0sz
    EXIRegularExpression re = new EXIRegularExpression("[0-9pqr]z");
    Set<Integer> codePoints = re.getCodePoints();
    assertTrue(codePoints.size() == (10 + 4));
    assertTrue(codePoints.contains(codePoint('0')));
    assertTrue(codePoints.contains(codePoint('9')));
    assertTrue(codePoints.contains(codePoint('p')));
    assertTrue(codePoints.contains(codePoint('q')));
    assertTrue(codePoints.contains(codePoint('r')));
    assertTrue(codePoints.contains(codePoint('z')));

    assertFalse(codePoints.contains(codePoint('h')));

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCode('0') == 0);
    assertTrue(rcs.getCodePoint(3) == '3');
    assertTrue(rcs.getCodePoint(13) == 'z');
    assertTrue(rcs.getCodingLength() == 4);
  }
  public void testPattern19() throws EXIException {
    // Social Security Number v1
    // Matches: 078-05-1120
    String regex = "[0-9]{3}-[0-9]{2}-[0-9]{4}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '-');
    assertTrue(rcs.getCodePoint(1) == '0');
    assertTrue(rcs.getCode('8') == 9);
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 4);
  }
  public void testPattern9() throws EXIException {
    // "password" with a restriction. There must be exactly eight characters
    // in a row and those characters must be lowercase or uppercase letters
    // from a to z, or a number from 0 to 9:
    String regex = "[a-zA-Z0-9]{8}";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == '0');
    assertTrue(rcs.getCodePoint(25) == 'P');
    assertTrue(rcs.getCode('c') == 38);
    assertTrue(rcs.getCodingLength() == 6);
  }
  public void testSubtraction5() throws EXIException {
    // [A-Z-[C-X]]
    // means: A, B, Y, Z
    String regex = "[A-Z-[C-X]]";
    EXIRegularExpression re = new EXIRegularExpression(regex);

    RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints());
    assertTrue(rcs.getCodePoint(0) == 'A');
    assertTrue(rcs.getCodePoint(1) == 'B');
    assertTrue(rcs.size() == 4);
    assertTrue(rcs.getCodePoint(2) == 'Y');
    assertTrue(rcs.getCodePoint(3) == 'Z');
    assertTrue(rcs.getCode('?') == Constants.NOT_FOUND);
    assertTrue(rcs.getCodingLength() == 3);
  }
 public void testPattern36() throws EXIException {
   // <xs:pattern value="[0-9]{3}-[0-9]{2}-[0-9]{4}"/>
   EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 11);
 }
 public void testPattern26_() throws EXIException {
   EXIRegularExpression re = new EXIRegularExpression("[ABC]{1}");
   assertTrue(!re.isEntireSetOfXMLCharacters());
   // A, B, and C
   assertTrue(re.getCodePoints().size() == 3);
 }
 public void testPattern33() throws EXIException {
   // <xs:pattern value="T529H72b7opQKe3MedjI8"/>
   EXIRegularExpression re = new EXIRegularExpression("T529H72b7opQKe3MedjI8");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 18);
 }
 public void testPattern32() throws EXIException {
   // <xs:pattern value="\?"/> --> means '?' only
   EXIRegularExpression re = new EXIRegularExpression("\\?");
   assertFalse(re.isEntireSetOfXMLCharacters());
   assertTrue(re.getCodePoints().size() == 1);
 }