private void addCaseVariant(CharClass lower, List members) { if (caseBlind) { final int[] variants = CaseVariants.getCaseVariants(lower.getSingleChar()); for (int v = 0; v < variants.length; v++) { members.add(new SingleChar(variants[v])); } } }
private CharClass parseCharClassExpr() throws RegexSyntaxException { boolean compl; if (curChar == '^') { advance(); compl = true; } else { compl = false; } final List members = new ArrayList(10); // boolean firstOrLast = true; do { final CharClass lower = parseCharClassEscOrXmlChar(); members.add(lower); if (curChar == ']' || eos) { addCaseVariant(lower, members); break; } // firstOrLast = isLastInGroup(); if (curChar == '-') { final char next = regExp.charAt(pos); if (next == '[') { // hyphen denotes subtraction addCaseVariant(lower, members); advance(); break; } else if (next == ']') { // hyphen denotes a regular character - no need to do anything addCaseVariant(lower, members); } else { // hyphen denotes a character range advance(); final CharClass upper = parseCharClassEscOrXmlChar(); if (lower.getSingleChar() < 0 || upper.getSingleChar() < 0) { throw makeException("the ends of a range must be single characters"); } if (lower.getSingleChar() > upper.getSingleChar()) { throw makeException("invalid range (start > end)"); } if (lower instanceof SingleChar && lower.getSingleChar() == '-' && !((SingleChar) lower).isEscaped) { throw makeException("range cannot start with unescaped hyphen"); } if (upper instanceof SingleChar && upper.getSingleChar() == '-' && !((SingleChar) upper).isEscaped) { throw makeException("range cannot end with unescaped hyphen"); } members.set( members.size() - 1, new CharRange(lower.getSingleChar(), upper.getSingleChar())); if (caseBlind) { // Special-case A-Z and a-z if (lower.getSingleChar() == 'a' && upper.getSingleChar() == 'z') { members.add(new CharRange('A', 'Z')); for (int v = 0; v < CaseVariants.ROMAN_VARIANTS.length; v++) { members.add(new SingleChar(CaseVariants.ROMAN_VARIANTS[v])); } } else if (lower.getSingleChar() == 'A' && upper.getSingleChar() == 'Z') { members.add(new CharRange('a', 'z')); for (int v = 0; v < CaseVariants.ROMAN_VARIANTS.length; v++) { members.add(new SingleChar(CaseVariants.ROMAN_VARIANTS[v])); } } else { for (int k = lower.getSingleChar(); k <= upper.getSingleChar(); k++) { final int[] variants = CaseVariants.getCaseVariants(k); for (int v = 0; v < variants.length; v++) { members.add(new SingleChar(variants[v])); } } } } // look for a subtraction if (curChar == '-' && regExp.charAt(pos) == '[') { advance(); // expect('['); break; } } } else { addCaseVariant(lower, members); } } while (curChar != ']'); if (eos) { expect(']'); } CharClass result; if (members.size() == 1) { result = (CharClass) members.get(0); } else { result = new Union(members); } if (compl) { result = new Complement(result); } if (curChar == '[') { advance(); result = new Subtraction(result, parseCharClassExpr()); expect(']'); } inCharClassExpr = false; advance(); return result; }
protected boolean translateAtom() throws RegexSyntaxException { switch (curChar) { case RegexData.EOS: if (!eos) { break; } // else fall through case '?': case '*': case '+': case ')': case '{': case '}': case '|': case ']': return false; case '(': copyCurChar(); final int thisCapture = ++currentCapture; translateRegExp(); expect(')'); captures.add(thisCapture); copyCurChar(); return true; case '\\': advance(); parseEsc().output(result); return true; case '[': inCharClassExpr = true; advance(); parseCharClassExpr().output(result); return true; case '.': if (isXPath) { // under XPath, "." has the same meaning as in JDK 1.5 break; } else { // under XMLSchema, "." means anything except \n or \r, which is different from the // XPath/JDK rule DOT_SCHEMA.output(result); advance(); return true; } case '$': case '^': if (isXPath) { copyCurChar(); return true; } result.append('\\'); break; default: if (caseBlind) { final int thisChar = absorbSurrogatePair(); final int[] variants = CaseVariants.getCaseVariants(thisChar); if (variants.length > 0) { final CharClass[] chars = new CharClass[variants.length + 1]; chars[0] = new SingleChar(thisChar); for (int i = 0; i < variants.length; i++) { chars[i + 1] = new SingleChar(variants[i]); } final Union union = new Union(chars); union.output(result); advance(); return true; } // else fall through } // else fall through } copyCurChar(); return true; }