public static void main(String[] args) { char a[] = { 'a', '5', '?', 'A', ' ', '$', 'жа' }; for (int i = 0; i < a.length; i++) { if (Character.isDigit(a[i])) System.out.println(a[i] + " is a digit."); if (Character.isLetter(a[i])) System.out.println(a[i] + " is a letter."); if (Character.isWhitespace(a[i])) System.out.println(a[i] + " is whitespace."); if (Character.isUpperCase(a[i])) System.out.println(a[i] + " is uppercase."); if (Character.isLowerCase(a[i])) System.out.println(a[i] + " is lowercase."); if (Character.isJavaIdentifierPart(a[i])) System.out.println(a[i] + " may be part of java Identifier part."); if (Character.isJavaIdentifierStart(a[i])) System.out.println(a[i] + " may be part of java Identifier Start."); if (Character.isUnicodeIdentifierPart(a[i])) System.out.println(a[i] + " may be part of a Unicode identifier ."); if (Character.isUnicodeIdentifierStart(a[i])) System.out .println(a[i] + " may be the first character in a Unicode identifier."); } }
/** * Called when an operand is expected next. * * @return one of: * <UL> * <LI>a {@link BigDecimal} value; * <LI>the {@link String} name of a variable; * <LI>{@link Tokeniser#START_NEW_EXPRESSION} when an opening parenthesis is found: * <LI>or {@link Operator} when a unary operator is found in front of an operand * </UL> * * @throws RuntimeException if the end of the string is reached unexpectedly. */ Object getOperand() { /* Skip whitespace */ final int len = this.string.length(); char ch = 0; while (this.position < len && Character.isWhitespace(ch = this.string.charAt(this.position))) { this.position++; } if (this.position == len) { throw new RuntimeException("operand expected but end of string found"); } if (ch == '(') { this.position++; return START_NEW_EXPRESSION; } else if (ch == '-') { this.position++; return Operator.NEG; } else if (ch == '+') { this.position++; return Operator.PLUS; } else if (ch == '.' || Character.isDigit(ch)) { return getBigDecimal(); } else if (Character.isUnicodeIdentifierStart(ch)) { int start = this.position++; while (this.position < len && Character.isUnicodeIdentifierPart(this.string.charAt(this.position))) { this.position++; } String name = this.string.substring(start, this.position); /* Is variable name actually a keyword unary operator? */ if (name.equals("abs")) { return Operator.ABS; } else if (name.equals("int")) { return Operator.INT; } else if (name.equals("ln")) { return Operator.LN; } /* Return variable name */ return name; } throw new RuntimeException("operand expected but '" + ch + "' found"); }
private void validateUserId(String uid) { if (!User.userIdAvailable(uid)) { addFieldError("username", getText("error.uid.unavailable", new String[] {uid})); return; } int n = uid.length(); if (n > UID_MAX_LENGTH) { addFieldError("username", getText("error.string.toolong", new String[] {uid})); return; } char[] cs = uid.toCharArray(); for (int i = 0; i < n; i++) { char c = cs[i]; // White space cannot be part of a valid uid! if (Character.isWhitespace(c)) addFieldError( "username", "You chose '" + uid + "' as your username. Character '" + c + "' at position " + i + " cannot be used in a user name!"); // No special characters allowed! if (((i == 0) && !Character.isUnicodeIdentifierStart(c)) || ((i > 0) && !Character.isUnicodeIdentifierPart(c) && (c != '_'))) { addFieldError( "username", "You chose '" + uid + " as your username. Character '" + c + "' at position " + i + " cannot be used in a user name! Start with a letter and use letters, numbers, or _ elsewhere!"); } } }
Operator getOperator(char endOfExpressionChar) { /* Use any pushed back operator. */ if (this.pushedBackOperator != null) { Operator operator = this.pushedBackOperator; this.pushedBackOperator = null; return operator; } /* Skip whitespace */ final int len = this.string.length(); char ch = 0; while (this.position < len && Character.isWhitespace(ch = this.string.charAt(this.position))) { this.position++; } if (this.position == len) { if (endOfExpressionChar == 0) { return Operator.END; } else { throw new RuntimeException("missing " + endOfExpressionChar); } } this.position++; if (ch == endOfExpressionChar) { return Operator.END; } switch (ch) { case '+': { return Operator.ADD; } case '-': { return Operator.SUB; } case '/': { return Operator.DIV; } case '%': { return Operator.REMAINDER; } case '*': { return Operator.MUL; } case '?': { return Operator.TERNARY; } case '>': { if (this.position < len && this.string.charAt(this.position) == '=') { this.position++; return Operator.GE; } return Operator.GT; } case '<': { if (this.position < len) { switch (this.string.charAt(this.position)) { case '=': this.position++; return Operator.LE; case '>': this.position++; return Operator.NE; } } return Operator.LT; } case '=': { if (this.position < len && this.string.charAt(this.position) == '=') { this.position++; return Operator.EQ; } throw new RuntimeException("use == for equality at position " + this.position); } case '!': { if (this.position < len && this.string.charAt(this.position) == '=') { this.position++; return Operator.NE; } throw new RuntimeException("use != or <> for inequality at position " + this.position); } case '&': { if (this.position < len && this.string.charAt(this.position) == '&') { this.position++; return Operator.AND; } throw new RuntimeException("use && for AND at position " + this.position); } case '|': { if (this.position < len && this.string.charAt(this.position) == '|') { this.position++; return Operator.OR; } throw new RuntimeException("use || for OR at position " + this.position); } default: { /* Is this an identifier name for an operator function? */ if (Character.isUnicodeIdentifierStart(ch)) { int start = this.position - 1; while (this.position < len && Character.isUnicodeIdentifierPart(this.string.charAt(this.position))) { this.position++; } String name = this.string.substring(start, this.position); if (name.equals("pow")) { return Operator.POW; } if (name.equals("spt")) { return Operator.SPT; } } throw new RuntimeException( "operator expected at position " + this.position + " instead of '" + ch + "'"); } } }
/** Computes the next token. */ private Token fetchToken() throws OffsetLimitReachedException { while (true) { final int start = fOffset; final int c = fCharPhase3; final int d = nextCharPhase3(); switch (c) { case END_OF_INPUT: return newToken(IToken.tEND_OF_INPUT, start); case '\n': fInsideIncludeDirective = false; return newToken(Lexer.tNEWLINE, start); case ' ': case '\t': case 0xb: // vertical tab case '\f': case '\r': continue; case 'L': switch (d) { case 'R': if (fOptions.fSupportRawStringLiterals) { markPhase3(); if (nextCharPhase3() == '"') { nextCharPhase3(); return rawStringLiteral(start, 3, IToken.tLSTRING); } restorePhase3(); } break; case '"': nextCharPhase3(); return stringLiteral(start, 2, IToken.tLSTRING); case '\'': nextCharPhase3(); return charLiteral(start, IToken.tLCHAR); } return identifier(start, 1); case 'u': case 'U': if (fOptions.fSupportUTFLiterals) { switch (d) { case 'R': if (fOptions.fSupportRawStringLiterals) { markPhase3(); if (nextCharPhase3() == '"') { nextCharPhase3(); return rawStringLiteral( start, 3, c == 'u' ? IToken.tUTF16STRING : IToken.tUTF32STRING); } restorePhase3(); } break; case '"': nextCharPhase3(); return stringLiteral( start, 2, c == 'u' ? IToken.tUTF16STRING : IToken.tUTF32STRING); case '\'': nextCharPhase3(); return charLiteral(start, c == 'u' ? IToken.tUTF16CHAR : IToken.tUTF32CHAR); case '8': if (c == 'u') { markPhase3(); switch (nextCharPhase3()) { case 'R': if (fOptions.fSupportRawStringLiterals && nextCharPhase3() == '"') { nextCharPhase3(); return rawStringLiteral(start, 4, IToken.tSTRING); } break; case '"': nextCharPhase3(); return stringLiteral(start, 3, IToken.tSTRING); } restorePhase3(); } break; } } return identifier(start, 1); case 'R': if (fOptions.fSupportRawStringLiterals && d == '"') { nextCharPhase3(); return rawStringLiteral(start, 2, IToken.tSTRING); } return identifier(start, 1); case '"': if (fInsideIncludeDirective) { return headerName(start, true); } return stringLiteral(start, 1, IToken.tSTRING); case '\'': return charLiteral(start, IToken.tCHAR); case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'S': case 'T': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': return identifier(start, 1); case '$': if (fOptions.fSupportDollarInIdentifiers) { return identifier(start, 1); } break; case '@': if (fOptions.fSupportAtSignInIdentifiers) { return identifier(start, 1); } break; case '\\': switch (d) { case 'u': case 'U': nextCharPhase3(); return identifier(start, 2); } return newToken(tOTHER_CHARACTER, start, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return number(start, 1, false); case '.': switch (d) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': nextCharPhase3(); return number(start, 2, true); case '.': markPhase3(); if (nextCharPhase3() == '.') { nextCharPhase3(); return newToken(IToken.tELLIPSIS, start); } restorePhase3(); break; case '*': nextCharPhase3(); return newToken(IToken.tDOTSTAR, start); } return newToken(IToken.tDOT, start); case '#': if (d == '#') { nextCharPhase3(); return newToken(IToken.tPOUNDPOUND, start); } return newToken(IToken.tPOUND, start); case '{': return newToken(IToken.tLBRACE, start); case '}': return newToken(IToken.tRBRACE, start); case '[': return newToken(IToken.tLBRACKET, start); case ']': return newToken(IToken.tRBRACKET, start); case '(': return newToken(IToken.tLPAREN, start); case ')': return newToken(IToken.tRPAREN, start); case ';': return newToken(IToken.tSEMI, start); case ':': switch (d) { case ':': nextCharPhase3(); return newToken(IToken.tCOLONCOLON, start); case '>': nextCharPhase3(); return newDigraphToken(IToken.tRBRACKET, start); } return newToken(IToken.tCOLON, start); case '?': return newToken(IToken.tQUESTION, start); case '+': switch (d) { case '+': nextCharPhase3(); return newToken(IToken.tINCR, start); case '=': nextCharPhase3(); return newToken(IToken.tPLUSASSIGN, start); } return newToken(IToken.tPLUS, start); case '-': switch (d) { case '>': int e = nextCharPhase3(); if (e == '*') { nextCharPhase3(); return newToken(IToken.tARROWSTAR, start); } return newToken(IToken.tARROW, start); case '-': nextCharPhase3(); return newToken(IToken.tDECR, start); case '=': nextCharPhase3(); return newToken(IToken.tMINUSASSIGN, start); } return newToken(IToken.tMINUS, start); case '*': if (d == '=') { nextCharPhase3(); return newToken(IToken.tSTARASSIGN, start); } return newToken(IToken.tSTAR, start); case '/': switch (d) { case '=': nextCharPhase3(); return newToken(IToken.tDIVASSIGN, start); case '/': nextCharPhase3(); lineComment(start); continue; case '*': blockComment(start, '*'); continue; case '%': if (fOptions.fSupportSlashPercentComments) { blockComment(start, '%'); continue; } break; } return newToken(IToken.tDIV, start); case '%': switch (d) { case '=': nextCharPhase3(); return newToken(IToken.tMODASSIGN, start); case '>': nextCharPhase3(); return newDigraphToken(IToken.tRBRACE, start); case ':': final int e = nextCharPhase3(); if (e == '%') { markPhase3(); if (nextCharPhase3() == ':') { nextCharPhase3(); return newDigraphToken(IToken.tPOUNDPOUND, start); } restorePhase3(); } return newDigraphToken(IToken.tPOUND, start); } return newToken(IToken.tMOD, start); case '^': if (d == '=') { nextCharPhase3(); return newToken(IToken.tXORASSIGN, start); } return newToken(IToken.tXOR, start); case '&': switch (d) { case '&': nextCharPhase3(); return newToken(IToken.tAND, start); case '=': nextCharPhase3(); return newToken(IToken.tAMPERASSIGN, start); } return newToken(IToken.tAMPER, start); case '|': switch (d) { case '|': nextCharPhase3(); return newToken(IToken.tOR, start); case '=': nextCharPhase3(); return newToken(IToken.tBITORASSIGN, start); } return newToken(IToken.tBITOR, start); case '~': return newToken(IToken.tBITCOMPLEMENT, start); case '!': if (d == '=') { nextCharPhase3(); return newToken(IToken.tNOTEQUAL, start); } return newToken(IToken.tNOT, start); case '=': if (d == '=') { nextCharPhase3(); return newToken(IToken.tEQUAL, start); } return newToken(IToken.tASSIGN, start); case '<': if (fInsideIncludeDirective) { return headerName(start, false); } switch (d) { case '=': nextCharPhase3(); return newToken(IToken.tLTEQUAL, start); case '<': final int e = nextCharPhase3(); if (e == '=') { nextCharPhase3(); return newToken(IToken.tSHIFTLASSIGN, start); } return newToken(IToken.tSHIFTL, start); case '?': if (fOptions.fSupportMinAndMax) { nextCharPhase3(); return newToken(IGCCToken.tMIN, start); } break; case ':': // 2.5-3 markPhase3(); if (nextCharPhase3() != ':') { return newDigraphToken(IToken.tLBRACKET, start); } switch (nextCharPhase3()) { case ':': case '>': restorePhase3(); nextCharPhase3(); return newDigraphToken(IToken.tLBRACKET, start); } restorePhase3(); break; case '%': nextCharPhase3(); return newDigraphToken(IToken.tLBRACE, start); } return newToken(IToken.tLT, start); case '>': switch (d) { case '=': nextCharPhase3(); return newToken(IToken.tGTEQUAL, start); case '>': final int e = nextCharPhase3(); if (e == '=') { nextCharPhase3(); return newToken(IToken.tSHIFTRASSIGN, start); } return newToken(IToken.tSHIFTR, start); case '?': if (fOptions.fSupportMinAndMax) { nextCharPhase3(); return newToken(IGCCToken.tMAX, start); } break; } return newToken(IToken.tGT, start); case ',': return newToken(IToken.tCOMMA, start); default: // in case we have some other letter to start an identifier if (Character.isUnicodeIdentifierStart((char) c)) { return identifier(start, 1); } break; } // handles for instance @ return newToken(tOTHER_CHARACTER, start, 1); } }
/** * Returns a parser that produces a unicode identifier start character. * * @param missing The error if there is no character on the stream to produce a unicode * identifier start character with. * @param sat The error if the produced character is not a unicode identifier start character. * @return A parser that produces a unicode identifier start character. * @see Character#isUnicodeIdentifierStart(char) */ public static <E> Parser<Stream<Character>, Character, E> unicodeIdentiferStart( final F0<E> missing, final F<Character, E> sat) { return StreamParser.satisfy(missing, sat, c -> Character.isUnicodeIdentifierStart(c)); }