public void testSSN() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}"); assertTrue(re.getCodePoints().size() == 11); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('5') == 6); assertTrue(rcs.getCodePoint(3) == '2'); assertTrue(rcs.getCodingLength() == 4); }
public void testAorBorCorMinus() throws Exception { EXIRegularExpression re = new EXIRegularExpression("A|B|C|-"); assertTrue(re.getCodePoints().size() == 4); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('C') == 3); assertTrue(rcs.getCodePoint(0) == '-'); assertTrue(rcs.getCodingLength() == 3); }
public void testMaleFemale() throws Exception { EXIRegularExpression re = new EXIRegularExpression("male|female"); // aeflm assertTrue(re.getCodePoints().size() == 5); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('a') == 0); assertTrue(rcs.getCodePoint(3) == 'l'); assertTrue(rcs.getCodingLength() == 3); }
public void testRange2() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[A-Z][B-Z][C-Z]"); assertTrue(re.getCodePoints().size() == 26); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('A') == 0); assertTrue(rcs.getCode('L') == 11); assertTrue(rcs.getCode('Z') == 25); assertTrue(rcs.getCodingLength() == 5); }
public void testSubtraction2() throws Exception { // matches any character in the string 0123789 EXIRegularExpression re = new EXIRegularExpression("[0-9-[0-6-[0-3]]]"); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == 7); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('7') == 4); assertTrue(rcs.getCodePoint(5) == '8'); assertTrue(rcs.getCodingLength() == 3); }
public void testRange4() throws EXIException { String regex = "[0-9][0-9][0-9][0-9][0-9]"; // e.g. "12345" EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.getCodePoints().size() == 10); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('0') == 0); assertTrue(rcs.getCode('5') == 5); assertTrue(rcs.getCode('9') == 9); assertTrue(rcs.getCodingLength() == 4); }
// vowel public void testRange10() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[b-df-hj-np-tv-z]"); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == 21); assertFalse(codePoints.contains(codePoint('a'))); assertTrue(codePoints.contains(codePoint('b'))); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('b') == 0); assertTrue(rcs.getCodePoint(2) == 'd'); assertTrue(rcs.getCodingLength() == 5); }
public void testRange3() throws Exception { String regex = "[A-Z][A-Z][A-Z]"; // e.g. "ABC" EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.getCodePoints().size() == 26); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('A') == 0); assertTrue(rcs.getCode('L') == 11); assertTrue(rcs.getCode('Z') == 25); assertTrue(rcs.getCodePoint(1) == 'B'); assertTrue(rcs.getCodingLength() == 5); }
public void testPattern30() throws EXIException { // <xs:pattern value="\\c"/> --> means '\' followed by 'c' // Note: Nothing to do with Multiple Character Escape Sequences \c EXIRegularExpression re = new EXIRegularExpression("\\\\c"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 2); }
// vowel, identical to "[b-df-hj-np-tv-z]" without subtraction public void testSubtraction1() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[a-z-[aeiuo]]"); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == 21); assertTrue(codePoints.contains(codePoint('b'))); assertTrue(codePoints.contains(codePoint('d'))); assertFalse(codePoints.contains(codePoint('e'))); assertFalse(codePoints.contains(codePoint('u'))); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('d') == 2); assertTrue(rcs.getCodePoint(6) == 'j'); assertTrue(rcs.getCodingLength() == 5); }
public void testNonUnicode3_1_0() throws Exception { EXIRegularExpression re = new EXIRegularExpression("\uFFFF"); assertFalse(re.isEntireSetOfXMLCharacters()); Set<Integer> codePoints = re.getCodePoints(); assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 1); }
@Test public void testRangeSpecialChars() throws Exception { // UnicodeData-3.1.0: 03D7 .. 03DA (2 chars) // UnicodeData-3.2.0: 03D7 .. 03DA (4 chars, 03D8 & 03D9) EXIRegularExpression re = new EXIRegularExpression("[\u03D7-\u03DA]"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 4); }
public void testMultiCharEsc1() throws EXIException { String regex = "\\s"; EXIRegularExpression re = new EXIRegularExpression(regex); assertFalse(re.isEntireSetOfXMLCharacters()); Set<Integer> codePoints = re.getCodePoints(); assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 4); }
public void testPattern10() throws EXIException { // 8 restricted digits String regex = "[2-4]{8}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '2'); assertTrue(rcs.getCode('4') == 2); assertTrue(rcs.getCodingLength() == 2); }
public void testPattern11() throws EXIException { // 111 restricted digits String regex = "[3-9]{111}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(1) == '4'); assertTrue(rcs.getCode('4') == 1); assertTrue(rcs.getCodingLength() == 3); }
public void testRange8() throws EXIException { // zero or more occurrences of lowercase letters from a to z: String regex = "([a-z])*"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'a'); assertTrue(rcs.getCode('c') == 2); assertTrue(rcs.getCodePoint(25) == 'z'); assertTrue(rcs.getCodingLength() == 5); }
public void testRange7() throws EXIException { // ONE of the following letters: x, y, OR z: String regex = "[xyz]"; // e.g. "x" "y" "z" EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'x'); assertTrue(rcs.getCodePoint(1) == 'y'); assertTrue(rcs.getCodePoint(2) == 'z'); assertTrue(rcs.getCodingLength() == 2); }
public void testPattern13() throws EXIException { // String regex = "\\s"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '\t'); assertTrue(rcs.getCode(' ') == 3); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 3); }
public void testPattern18() throws EXIException { // language --> ([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})* String regex = "([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '-'); assertTrue(rcs.getCodePoint(1) == '0'); assertTrue(rcs.getCode('8') == 9); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 6); }
public void testRange5() throws EXIException { // THREE of the LOWERCASE OR UPPERCASE letters from a to z String regex = "[a-zA-Z][a-zA-Z][a-zA-Z]"; // e.g. "aXy" EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('0') == Constants.NOT_FOUND); assertTrue(rcs.getCode('A') == 0); assertTrue(rcs.getCode('a') == 26); assertTrue(rcs.getCodingLength() == 6); }
public void testSubtraction4() throws EXIException { // [A-Z-[C-X-[M-N]]]* // means: A,B,M,N,Y,Z String regex = "[A-Z-[C-X-[M-N]]]*"; EXIRegularExpression re = new EXIRegularExpression(regex); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == 6); assertTrue(codePoints.contains(codePoint('A'))); assertTrue(codePoints.contains(codePoint('B'))); assertTrue(codePoints.contains(codePoint('M'))); assertTrue(codePoints.contains(codePoint('N'))); assertTrue(codePoints.contains(codePoint('Y'))); assertTrue(codePoints.contains(codePoint('Z'))); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('M') == 2); assertTrue(rcs.getCodePoint(5) == 'Z'); assertTrue(rcs.getCodingLength() == 3); }
public void testRange9() throws EXIException { // For example, "sToP" will be validated by this pattern, but not "Stop" // or "STOP" or "stop": String regex = "([a-z][A-Z])+"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'A'); assertTrue(rcs.getCodePoint(25) == 'Z'); assertTrue(rcs.getCode('c') == 28); assertTrue(rcs.getCodingLength() == 6); }
public void testRange11() throws Exception { // Matching strings: 1z, 2z, pz, rz // Non-matching strings: cz,dz, 0sz EXIRegularExpression re = new EXIRegularExpression("[0-9pqr]z"); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == (10 + 4)); assertTrue(codePoints.contains(codePoint('0'))); assertTrue(codePoints.contains(codePoint('9'))); assertTrue(codePoints.contains(codePoint('p'))); assertTrue(codePoints.contains(codePoint('q'))); assertTrue(codePoints.contains(codePoint('r'))); assertTrue(codePoints.contains(codePoint('z'))); assertFalse(codePoints.contains(codePoint('h'))); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('0') == 0); assertTrue(rcs.getCodePoint(3) == '3'); assertTrue(rcs.getCodePoint(13) == 'z'); assertTrue(rcs.getCodingLength() == 4); }
public void testPattern19() throws EXIException { // Social Security Number v1 // Matches: 078-05-1120 String regex = "[0-9]{3}-[0-9]{2}-[0-9]{4}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '-'); assertTrue(rcs.getCodePoint(1) == '0'); assertTrue(rcs.getCode('8') == 9); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 4); }
public void testPattern9() throws EXIException { // "password" with a restriction. There must be exactly eight characters // in a row and those characters must be lowercase or uppercase letters // from a to z, or a number from 0 to 9: String regex = "[a-zA-Z0-9]{8}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '0'); assertTrue(rcs.getCodePoint(25) == 'P'); assertTrue(rcs.getCode('c') == 38); assertTrue(rcs.getCodingLength() == 6); }
public void testSubtraction5() throws EXIException { // [A-Z-[C-X]] // means: A, B, Y, Z String regex = "[A-Z-[C-X]]"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'A'); assertTrue(rcs.getCodePoint(1) == 'B'); assertTrue(rcs.size() == 4); assertTrue(rcs.getCodePoint(2) == 'Y'); assertTrue(rcs.getCodePoint(3) == 'Z'); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 3); }
public void testPattern36() throws EXIException { // <xs:pattern value="[0-9]{3}-[0-9]{2}-[0-9]{4}"/> EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 11); }
public void testPattern26_() throws EXIException { EXIRegularExpression re = new EXIRegularExpression("[ABC]{1}"); assertTrue(!re.isEntireSetOfXMLCharacters()); // A, B, and C assertTrue(re.getCodePoints().size() == 3); }
public void testPattern33() throws EXIException { // <xs:pattern value="T529H72b7opQKe3MedjI8"/> EXIRegularExpression re = new EXIRegularExpression("T529H72b7opQKe3MedjI8"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 18); }
public void testPattern32() throws EXIException { // <xs:pattern value="\?"/> --> means '?' only EXIRegularExpression re = new EXIRegularExpression("\\?"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 1); }