public void testMultiCharEsc2() throws EXIException { String regex = "\\S"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testNonUnicode3_1_0() throws Exception { EXIRegularExpression re = new EXIRegularExpression("\uFFFF"); assertFalse(re.isEntireSetOfXMLCharacters()); Set<Integer> codePoints = re.getCodePoints(); assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 1); }
public void testHuge1() throws Exception { EXIRegularExpression re = new EXIRegularExpression("\\p{L}"); assertTrue(re.isEntireSetOfXMLCharacters()); // Set<Integer> codePoints = re.getCodePoints(); // assertTrue(codePoints.size() > 45000); // assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testProdNumType() throws Exception { // \d{3}-[A-Z]{2}|\d{7} EXIRegularExpression re = new EXIRegularExpression("\\d{3}-[A-Z]{2}|\\d{7}"); // multi characters ... assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testComplEscape2() throws EXIException { String regex = "\\P{Zs}"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern22() throws EXIException { // Pattern in OpenOffice // ([$]?([^\. ']+|'[^']+'))?\.[$]?[A-Z]+[$]?[0-9]+ String regex = "([$]?([^\\. ']+|'[^']+'))?\\.[$]?[A-Z]+[$]?[0-9]+"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern17() throws EXIException { // complexEsc \P{ L } String regex = "abc\\P{L}def"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testBasicLatin() throws Exception { // \p{IsBasicLatin} EXIRegularExpression re = new EXIRegularExpression("\\p{IsBasicLatin}"); // block ... assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern15() throws EXIException { // Number decimal digit String regex = "\\p{Nd}"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern16() throws EXIException { // String regex = "\\i\\c*"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testCategoryEscape3() throws EXIException { String regex = "\\p{IsBasicLatin}"; EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern30() throws EXIException { // <xs:pattern value="\\c"/> --> means '\' followed by 'c' // Note: Nothing to do with Multiple Character Escape Sequences \c EXIRegularExpression re = new EXIRegularExpression("\\\\c"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 2); }
public void testSubtraction3() throws Exception { // [\p{Ll}\p{Lu}-[\p{IsBasicLatin}]] matches all uppercase and lowercase // Unicode letters, except any ASCII letters EXIRegularExpression re = new EXIRegularExpression("[\\p{Ll}\\p{Lu}-[\\p{IsBasicLatin}]]"); // catEsc, block & ... assertTrue(re.isEntireSetOfXMLCharacters()); }
@Test public void testRangeSpecialChars() throws Exception { // UnicodeData-3.1.0: 03D7 .. 03DA (2 chars) // UnicodeData-3.2.0: 03D7 .. 03DA (4 chars, 03D8 & 03D9) EXIRegularExpression re = new EXIRegularExpression("[\u03D7-\u03DA]"); assertFalse(re.isEntireSetOfXMLCharacters()); assertTrue(re.getCodePoints().size() == 4); }
public void testPattern24() throws EXIException, IOException { // --0\d --0\d --0\d --0\d --\d1 --0\d --0\d String regex = "--0\\d --0\\d --0\\d --0\\d --\\d1 --0\\d --0\\d"; EXIRegularExpression re = new EXIRegularExpression(regex); // multi characters ... assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testPattern21() throws EXIException { // Partial Pattern in OpenOffice // ([$]?([^\. ']+|'[^']+')) String regex = "([$]?([^\\. ']+|'[^']+'))"; EXIRegularExpression re = new EXIRegularExpression(regex); // TODO.. what is it ? assertTrue(re.isEntireSetOfXMLCharacters()); }
public void testMultiCharEsc1() throws EXIException { String regex = "\\s"; EXIRegularExpression re = new EXIRegularExpression(regex); assertFalse(re.isEntireSetOfXMLCharacters()); Set<Integer> codePoints = re.getCodePoints(); assertTrue("CP Size=" + codePoints.size(), codePoints.size() == 4); }
public void testSSN() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[0-9]{3}-[0-9]{2}-[0-9]{4}"); assertTrue(re.getCodePoints().size() == 11); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('5') == 6); assertTrue(rcs.getCodePoint(3) == '2'); assertTrue(rcs.getCodingLength() == 4); }
public void testAorBorCorMinus() throws Exception { EXIRegularExpression re = new EXIRegularExpression("A|B|C|-"); assertTrue(re.getCodePoints().size() == 4); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('C') == 3); assertTrue(rcs.getCodePoint(0) == '-'); assertTrue(rcs.getCodingLength() == 3); }
public void testMaleFemale() throws Exception { EXIRegularExpression re = new EXIRegularExpression("male|female"); // aeflm assertTrue(re.getCodePoints().size() == 5); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('a') == 0); assertTrue(rcs.getCodePoint(3) == 'l'); assertTrue(rcs.getCodingLength() == 3); }
public void testPattern10() throws EXIException { // 8 restricted digits String regex = "[2-4]{8}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '2'); assertTrue(rcs.getCode('4') == 2); assertTrue(rcs.getCodingLength() == 2); }
public void testPattern11() throws EXIException { // 111 restricted digits String regex = "[3-9]{111}"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(1) == '4'); assertTrue(rcs.getCode('4') == 1); assertTrue(rcs.getCodingLength() == 3); }
public void testRange2() throws Exception { EXIRegularExpression re = new EXIRegularExpression("[A-Z][B-Z][C-Z]"); assertTrue(re.getCodePoints().size() == 26); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('A') == 0); assertTrue(rcs.getCode('L') == 11); assertTrue(rcs.getCode('Z') == 25); assertTrue(rcs.getCodingLength() == 5); }
public void testRange7() throws EXIException { // ONE of the following letters: x, y, OR z: String regex = "[xyz]"; // e.g. "x" "y" "z" EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'x'); assertTrue(rcs.getCodePoint(1) == 'y'); assertTrue(rcs.getCodePoint(2) == 'z'); assertTrue(rcs.getCodingLength() == 2); }
public void testPattern26() throws EXIException { // "𐿿" NON BMP String test = new StringBuilder().append("[ABC").appendCodePoint(0x10FFF).append("]{1}").toString(); // EXIRegularExpression re = new // EXIRegularExpression("[ABC𐿿]{1}"); EXIRegularExpression re = new EXIRegularExpression(test); assertTrue(re.isEntireSetOfXMLCharacters()); // // A, B, and C ? // assertTrue(re.getCodePoints().size() == 3); }
public void testRange8() throws EXIException { // zero or more occurrences of lowercase letters from a to z: String regex = "([a-z])*"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == 'a'); assertTrue(rcs.getCode('c') == 2); assertTrue(rcs.getCodePoint(25) == 'z'); assertTrue(rcs.getCodingLength() == 5); }
public void testPattern13() throws EXIException { // String regex = "\\s"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '\t'); assertTrue(rcs.getCode(' ') == 3); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 3); }
public void testSubtraction2() throws Exception { // matches any character in the string 0123789 EXIRegularExpression re = new EXIRegularExpression("[0-9-[0-6-[0-3]]]"); Set<Integer> codePoints = re.getCodePoints(); assertTrue(codePoints.size() == 7); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('7') == 4); assertTrue(rcs.getCodePoint(5) == '8'); assertTrue(rcs.getCodingLength() == 3); }
public void testRange4() throws EXIException { String regex = "[0-9][0-9][0-9][0-9][0-9]"; // e.g. "12345" EXIRegularExpression re = new EXIRegularExpression(regex); assertTrue(re.getCodePoints().size() == 10); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCode('0') == 0); assertTrue(rcs.getCode('5') == 5); assertTrue(rcs.getCode('9') == 9); assertTrue(rcs.getCodingLength() == 4); }
public void testPattern18() throws EXIException { // language --> ([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})* String regex = "([a-zA-Z]{1,8})(-[a-zA-Z0-9]{1,8})*"; EXIRegularExpression re = new EXIRegularExpression(regex); RestrictedCharacterSet rcs = new CodePointCharacterSet(re.getCodePoints()); assertTrue(rcs.getCodePoint(0) == '-'); assertTrue(rcs.getCodePoint(1) == '0'); assertTrue(rcs.getCode('8') == 9); assertTrue(rcs.getCode('?') == Constants.NOT_FOUND); assertTrue(rcs.getCodingLength() == 6); }