private static int parseNmStart(CharProducer cp, int start) throws ParseException { if (start == cp.getLimit()) { return -1; } char ch = cp.getBuffer()[start]; if (CssLexer.isNmStart(ch)) { return start + 1; } if (ch == '\\') { return parseEscapeBody(cp, start + 1); } return -1; }
private static int parseNmChar(CharProducer cp, int start) throws ParseException { // nmchar [_a-z0-9-]|{nonascii}|{escape} int end = parseNmStart(cp, start); if (end >= 0) { return end; } if (start != cp.getLimit()) { char ch = cp.getBuffer()[start]; if ((ch >= '0' && ch <= '9') || ch == '-') { return start + 1; } } return -1; }
private int parseUri(CharProducer cp, int start) throws ParseException { // url ([!#$%&*-~]|{nonascii}|{escape})* char[] buf = cp.getBuffer(); int limit = cp.getLimit(); int end = start; while (end < limit) { if (isUriChar(buf[end])) { ++end; } else if (buf[end] == '\\') { end = parseEscapeBody(cp, end + 1); } else { break; } } return end; }
private static int parseMatch(CharProducer cp, int start, String match) { int len = match.length(); int limit = cp.getLimit(); if (limit - start < len) { return -1; } char[] buf = cp.getBuffer(); for (int i = 0; i < len; ++i) { char chB = buf[start + i]; char chM = match.charAt(i); if (!(chB == chM || ((chB | 0x20) == chM && chB >= 'A' && chB < 'Z'))) { return -1; } } return start + len; }
private static int parseString(CharProducer cp, int start) throws ParseException { int limit = cp.getLimit(); if (start == limit) { return -1; } char[] buf = cp.getBuffer(); char ch = buf[start]; if (ch != '\'' && ch != '"') { return -1; } // {string} STRING // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" // string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\' // string {string1}|{string2} char delim = ch; int end = start + 1; while (end < limit) { ch = buf[end]; ++end; // escape {unicode}|\\[^\r\n\f0-9a-f] // nl \n|\r\n|\r|\f if (delim == ch) { return end; } else if (ch == '\\') { if (end < limit && isLineBreak(ch = buf[end])) { ++end; if (ch == '\r' && end < limit && buf[end] == '\n') { ++end; } } else { end = parseEscapeBody(cp, end); } } else if (isLineBreak(ch)) { throw new ParseException( new Message( MessageType.MALFORMED_STRING, cp.filePositionForOffsets(end - 1, end - 1), MessagePart.Factory.valueOf("" + ch))); } } throw new ParseException( new Message(MessageType.UNTERMINATED_STRING_TOKEN, cp.filePositionForOffsets(start, end))); }
private static int parseEscapeBody(CharProducer cp, int start) throws ParseException { // unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])? // escape {unicode}|\\[^\r\n\f0-9a-f] int limit = cp.getLimit(); char[] buf = cp.getBuffer(); if (start == limit) { throw new ParseException( new Message( MessageType.EXPECTED_TOKEN, cp.filePositionForOffsets(start, start), MessagePart.Factory.valueOf("<hex-digit>"), MessagePart.Factory.valueOf("<end-of-input>"))); } char ch = buf[start]; if (CssLexer.isHexChar(ch)) { int end = start + 1; for (int i = 5; --i >= 0; ++end) { if (end == limit) { break; } ch = buf[end]; if (!CssLexer.isHexChar(ch)) { break; } } if (end < limit && CssLexer.isSpaceChar(ch = buf[end])) { ++end; if ('\r' == ch && end < limit && '\n' == buf[end]) { ++end; } } return end; } else if (isLineBreak(ch)) { throw new ParseException( new Message( MessageType.UNRECOGNIZED_ESCAPE, cp.filePositionForOffsets(start, start), MessagePart.Factory.valueOf(String.valueOf(ch)))); } else { return start + 1; } }
private static int parseInt(CharProducer cp, int start) { int limit = cp.getLimit(); if (start == limit) { return -1; } char[] buf = cp.getBuffer(); char ch = buf[start]; if (ch >= '0' && ch <= '9') { int end = start; do { if (++end == limit) { break; } ch = buf[end]; } while (ch >= '0' && ch <= '9'); return end; } else { return -1; } }
private static int parseRange(CharProducer cp, int start) throws ParseException { // range \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}\ // (\?{0,3}|{h}(\?{0,2}|{h}(\??|{h}))))) // This method also handles {h}{1,6}-{h}{1,6} char[] buf = cp.getBuffer(); int limit = cp.getLimit(); int end = start; int len = 6; boolean isRange = end < limit && buf[end] == '?'; if (isRange) { while (end < limit && '?' == buf[end] && --len >= 0) { ++end; } } while (end < limit && CssLexer.isHexChar(buf[end]) && --len >= 0) { ++end; } if (!isRange) { if (end == limit || '-' != buf[end]) { throw new ParseException( new Message( MessageType.EXPECTED_TOKEN, cp.filePositionForOffsets(end, end), MessagePart.Factory.valueOf("-"), toMessagePart(cp, end))); } ++end; len = 6; while (end < limit && '?' == buf[end] && --len >= 0) { ++end; } while (end < limit && CssLexer.isHexChar(buf[end]) && --len >= 0) { ++end; } } return end != start ? end : -1; }
/** * Only handles the case where num does not start with a dot since it is hard to distinguish a "." * token from a number token with 1 char lookahead. */ private static int parseNum(CharProducer cp, int start) throws ParseException { // num [0-9]+|[0-9]*"."[0-9]+ int end = parseInt(cp, start); assert end >= 0; int limit = cp.getLimit(); char[] buf = cp.getBuffer(); if (end < limit && '.' == buf[end]) { ++end; char ch; // By CSS rules, 0. is an invalid number. if (end == limit || (ch = buf[end]) < '0' || ch > '9') { throw new ParseException( new Message( MessageType.MALFORMED_NUMBER, cp.filePositionForOffsets(start, end), MessagePart.Factory.valueOf(cp.toString(start, end)))); } return parseInt(cp, end); } return end; }
private static MessagePart toMessagePart(CharProducer cp, int offset) { return MessagePart.Factory.valueOf( offset == cp.getLimit() ? "<end-of-input>" : "" + cp.getBuffer()[offset]); }
private void produce() throws ParseException { if (null != pending) { return; } if (cp.isEmpty()) { return; } char[] buf = cp.getBuffer(); final int start = cp.getOffset(); int limit = cp.getLimit(); int end = start + 1; CssTokenType type; char ch = buf[start]; int identEnd; if (CssLexer.isSpaceChar(ch)) { // [ \t\r\n\f]+ S end = parseWhitespace(buf, end, limit); type = CssTokenType.SPACE; } else if (ch == '/') { if (end < limit && buf[end] == '*') { // \/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */ int state = 0; // 0 - start, 1 - in comment, 2 - saw, 3 - done do { if (end == limit) { break; } ch = buf[end]; switch (state) { case 0: state = 1; break; case 1: if (ch == '*') { state = 2; } break; case 2: if (ch == '/') { state = 3; } else if (ch != '*') { state = 1; } break; } ++end; } while (state != 3); if (state != 3) { throw new ParseException( new Message( MessageType.UNTERMINATED_COMMENT_TOKEN, cp.filePositionForOffsets(start, end))); } type = CssTokenType.COMMENT; } else if (end < limit && buf[end] == '/') { do { if (++end == limit) { break; } ch = buf[end]; // Line comment does not contain the newline character that ends it // since we don't want to break \r\n sequences across two tokens, // and for consistency with JavaScript conventions which exclude the // newline from the line comment token. if (ch == '\r' || ch == '\n') { break; } } while (true); type = CssTokenType.COMMENT; FilePosition commentPos = cp.filePositionForOffsets(start, end); mq.addMessage(MessageType.INVALID_CSS_COMMENT, commentPos); } else { // *yytext type = CssTokenType.PUNCTUATION; } } else if ('~' == ch || '|' == ch) { if (end < limit && '=' == buf[end]) { // "~=" INCLUDES // "|=" DASHMATCH ++end; } else { // . *yytext } type = CssTokenType.PUNCTUATION; } else if (ch == '\'' || ch == '"') { end = parseString(cp, start); type = CssTokenType.STRING; } else if (ch == '@') { identEnd = parseIdent(cp, end); if (identEnd != -1) { // "@import" IMPORT_SYM // "@page" PAGE_SYM // "@media" MEDIA_SYM // "@font-face" FONT_FACE_SYM // "@charset " CHARSET_SYM // "@"{ident} ATKEYWORD type = CssTokenType.SYMBOL; end = identEnd; // In http://www.w3.org/TR/CSS21/grammar.html, the CHARSET_SYM is // allowed to match only "@charset " if ((end - start) == 8 && parseMatch(cp, start, "@charset ") > 0) { ++end; } } else { // . *yytext type = CssTokenType.PUNCTUATION; } } else if (ch == '!') { // "!{w}important" IMPORTANT_SYM // handled by token joining at a later pass // . *yytext type = CssTokenType.PUNCTUATION; } else if (ch == '#') { int nameEnd = parseName(cp, end); if (nameEnd >= 0) { // "#"{name} HASH type = CssTokenType.HASH; end = nameEnd; } else { // . *yytext type = CssTokenType.PUNCTUATION; } } else if (ch == '<' || ch == '-') { // "<!--" CDO // "-->" CDC int tailEnd = parseMatch(cp, end, ch == '<' ? "!--" : "->"); if (tailEnd >= 0) { end = tailEnd; } type = CssTokenType.PUNCTUATION; } else if ((ch >= '0' && ch <= '9') || '.' == ch) { // {num}em EMS // {num}ex EXS // {num}px LENGTH // {num}cm LENGTH // {num}mm LENGTH // {num}in LENGTH // {num}pt LENGTH // {num}pc LENGTH // {num}deg ANGLE // {num}rad ANGLE // {num}grad ANGLE // {num}ms TIME // {num}s TIME // {num}Hz FREQ // {num}kHz FREQ // {num}{ident} DIMEN // {num}% PERCENTAGE // {num} NUMBER boolean isNum; if ('.' == ch) { int numEnd = parseInt(cp, end); isNum = numEnd >= 0; if (isNum) { end = numEnd; } } else { isNum = true; end = parseNum(cp, start); } if (isNum) { identEnd = parseIdent(cp, end); if (identEnd >= 0) { end = identEnd; } else if (end < limit && '%' == buf[end]) { ++end; } type = CssTokenType.QUANTITY; } else { // lone . // . *yytext type = CssTokenType.PUNCTUATION; } } else if ((identEnd = parseIdent(cp, start)) >= 0) { end = identEnd; if (end - start == 1 && 'U' == ch && end < limit && '+' == buf[end]) { // U\+{range} UNICODERANGE // U\+{h}{1,6}-{h}{1,6} UNICODERANGE // range \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}\ // (\?{0,3}|{h}(\?{0,2}|{h}(\??|{h}))))) type = CssTokenType.UNICODE_RANGE; ++end; end = parseRange(cp, end); } else if (end < limit && '(' == buf[end]) { ++end; if (end - start == 4 && parseMatch(cp, start, "url(") >= 0) { // "url("{w}{string}{w}")" URI // "url("{w}{url}{w}")" URI end = parseWhitespace(buf, end, limit); int stringEnd = parseString(cp, end); int uriEnd = stringEnd < 0 ? parseUri(cp, end) : -1; if (stringEnd < 0 && uriEnd < 0) { throw new ParseException( new Message( MessageType.EXPECTED_TOKEN, cp.filePositionForOffsets(end, end), MessagePart.Factory.valueOf("{url}"), toMessagePart(cp, end))); } end = stringEnd >= 0 ? stringEnd : uriEnd; end = parseWhitespace(buf, end, limit); if (end == limit || ')' != buf[end]) { throw new ParseException( new Message( MessageType.EXPECTED_TOKEN, cp.filePositionForOffsets(end, end), MessagePart.Factory.valueOf(")"), toMessagePart(cp, end))); } ++end; type = CssTokenType.URI; } else { // {ident}"(" FUNCTION type = CssTokenType.FUNCTION; } } else { // {ident} IDENT type = CssTokenType.IDENT; } } else if (ch == '$' && allowSubstitutions) { // ${<javascript tokens>} if (end < limit && buf[end] != '{') { type = CssTokenType.PUNCTUATION; } else { // 0 - non string // 1 - quoted string // 2 - saw \ in string // 3 - saw close paren int state = 0; // number of parenthetical blocks entered and not exited int nOpen = 0; char delim = 0; do { if (end == limit) { break; } ch = buf[end]; switch (state) { case 0: if (ch == '"' || ch == '\'') { delim = ch; state = 1; } else if (ch == '{') { ++nOpen; } else if (ch == '}') { if (--nOpen == 0) { state = 3; } } break; case 1: if (ch == delim) { state = 0; } else if (ch == '\\') { state = 2; } break; case 2: state = 1; break; } ++end; } while (state != 3); if (state != 3) { throw new ParseException( new Message( MessageType.UNTERMINATED_STRING_TOKEN, cp.filePositionForOffsets(start, end))); } identEnd = parseIdent(cp, end); if (identEnd >= 0) { end = identEnd; } else if (end != limit && '%' == buf[end]) { ++end; } type = CssTokenType.SUBSTITUTION; } } else { // . *yytext type = CssTokenType.PUNCTUATION; } assert end > start; pending = Token.instance(cp.toString(start, end), type, cp.filePositionForOffsets(start, end)); cp.consumeTo(end); }