/** The global unescape method, as per ECMA-262 15.1.2.5. */ private Object js_unescape(Object[] args) { String s = ScriptRuntime.toString(args, 0); int firstEscapePos = s.indexOf('%'); if (firstEscapePos >= 0) { int L = s.length(); char[] buf = s.toCharArray(); int destination = firstEscapePos; for (int k = firstEscapePos; k != L; ) { char c = buf[k]; ++k; if (c == '%' && k != L) { int end, start; if (buf[k] == 'u') { start = k + 1; end = k + 5; } else { start = k; end = k + 2; } if (end <= L) { int x = 0; for (int i = start; i != end; ++i) { x = Kit.xDigitToInt(buf[i], x); } if (x >= 0) { c = (char) x; k = end; } } } buf[destination] = c; ++destination; } s = new String(buf, 0, destination); } return s; }
final int getToken() throws IOException { int c; retry: for (; ; ) { // Eat whitespace, possibly sensitive to newlines. for (; ; ) { c = getChar(); if (c == EOF_CHAR) { tokenBeg = cursor - 1; tokenEnd = cursor; return Token.EOF; } else if (c == '\n') { dirtyLine = false; tokenBeg = cursor - 1; tokenEnd = cursor; return Token.EOL; } else if (!isJSSpace(c)) { if (c != '-') { dirtyLine = true; } break; } } // Assume the token will be 1 char - fixed up below. tokenBeg = cursor - 1; tokenEnd = cursor; if (c == '@') return Token.XMLATTR; // identifier/keyword/instanceof? // watch out for starting with a <backslash> boolean identifierStart; boolean isUnicodeEscapeStart = false; if (c == '\\') { c = getChar(); if (c == 'u') { identifierStart = true; isUnicodeEscapeStart = true; stringBufferTop = 0; } else { identifierStart = false; ungetChar(c); c = '\\'; } } else { identifierStart = Character.isJavaIdentifierStart((char) c); if (identifierStart) { stringBufferTop = 0; addToString(c); } } if (identifierStart) { boolean containsEscape = isUnicodeEscapeStart; for (; ; ) { if (isUnicodeEscapeStart) { // strictly speaking we should probably push-back // all the bad characters if the <backslash>uXXXX // sequence is malformed. But since there isn't a // correct context(is there?) for a bad Unicode // escape sequence in an identifier, we can report // an error here. int escapeVal = 0; for (int i = 0; i != 4; ++i) { c = getChar(); escapeVal = Kit.xDigitToInt(c, escapeVal); // Next check takes care about c < 0 and bad escape if (escapeVal < 0) { break; } } if (escapeVal < 0) { parser.addError("msg.invalid.escape"); return Token.ERROR; } addToString(escapeVal); isUnicodeEscapeStart = false; } else { c = getChar(); if (c == '\\') { c = getChar(); if (c == 'u') { isUnicodeEscapeStart = true; containsEscape = true; } else { parser.addError("msg.illegal.character"); return Token.ERROR; } } else { if (c == EOF_CHAR || c == BYTE_ORDER_MARK || !Character.isJavaIdentifierPart((char) c)) { break; } addToString(c); } } } ungetChar(c); String str = getStringFromBuffer(); if (!containsEscape) { // OPT we shouldn't have to make a string (object!) to // check if it's a keyword. // Return the corresponding token if it's a keyword int result = stringToKeyword(str); if (result != Token.EOF) { if ((result == Token.LET || result == Token.YIELD) && parser.compilerEnv.getLanguageVersion() < Context.VERSION_1_7) { // LET and YIELD are tokens only in 1.7 and later string = result == Token.LET ? "let" : "yield"; result = Token.NAME; } if (result != Token.RESERVED) { return result; } else if (!parser.compilerEnv.isReservedKeywordAsIdentifier()) { return result; } } } this.string = (String) allStrings.intern(str); return Token.NAME; } // is it a number? if (isDigit(c) || (c == '.' && isDigit(peekChar()))) { isOctal = false; stringBufferTop = 0; int base = 10; if (c == '0') { c = getChar(); if (c == 'x' || c == 'X') { base = 16; c = getChar(); } else if (isDigit(c)) { base = 8; isOctal = true; } else { addToString('0'); } } if (base == 16) { while (0 <= Kit.xDigitToInt(c, 0)) { addToString(c); c = getChar(); } } else { while ('0' <= c && c <= '9') { /* * We permit 08 and 09 as decimal numbers, which * makes our behavior a superset of the ECMA * numeric grammar. We might not always be so * permissive, so we warn about it. */ if (base == 8 && c >= '8') { parser.addWarning("msg.bad.octal.literal", c == '8' ? "8" : "9"); base = 10; } addToString(c); c = getChar(); } } boolean isInteger = true; if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { isInteger = false; if (c == '.') { do { addToString(c); c = getChar(); } while (isDigit(c)); } if (c == 'e' || c == 'E') { addToString(c); c = getChar(); if (c == '+' || c == '-') { addToString(c); c = getChar(); } if (!isDigit(c)) { parser.addError("msg.missing.exponent"); return Token.ERROR; } do { addToString(c); c = getChar(); } while (isDigit(c)); } } ungetChar(c); String numString = getStringFromBuffer(); this.string = numString; double dval; if (base == 10 && !isInteger) { try { // Use Java conversion to number from string... dval = Double.valueOf(numString).doubleValue(); } catch (NumberFormatException ex) { parser.addError("msg.caught.nfe"); return Token.ERROR; } } else { dval = ScriptRuntime.stringToNumber(numString, 0, base); } this.number = dval; return Token.NUMBER; } // is it a string? if (c == '"' || c == '\'') { // We attempt to accumulate a string the fast way, by // building it directly out of the reader. But if there // are any escaped characters in the string, we revert to // building it out of a StringBuffer. quoteChar = c; stringBufferTop = 0; c = getChar(); strLoop: while (c != quoteChar) { if (c == '\n' || c == EOF_CHAR) { ungetChar(c); tokenEnd = cursor; parser.addError("msg.unterminated.string.lit"); return Token.ERROR; } if (c == '\\') { // We've hit an escaped character int escapeVal; c = getChar(); switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; // \v a late addition to the ECMA spec, // it is not in Java, so use 0xb case 'v': c = 0xb; break; case 'u': // Get 4 hex digits; if the u escape is not // followed by 4 hex digits, use 'u' + the // literal character sequence that follows. int escapeStart = stringBufferTop; addToString('u'); escapeVal = 0; for (int i = 0; i != 4; ++i) { c = getChar(); escapeVal = Kit.xDigitToInt(c, escapeVal); if (escapeVal < 0) { continue strLoop; } addToString(c); } // prepare for replace of stored 'u' sequence // by escape value stringBufferTop = escapeStart; c = escapeVal; break; case 'x': // Get 2 hex digits, defaulting to 'x'+literal // sequence, as above. c = getChar(); escapeVal = Kit.xDigitToInt(c, 0); if (escapeVal < 0) { addToString('x'); continue strLoop; } else { int c1 = c; c = getChar(); escapeVal = Kit.xDigitToInt(c, escapeVal); if (escapeVal < 0) { addToString('x'); addToString(c1); continue strLoop; } else { // got 2 hex digits c = escapeVal; } } break; case '\n': // Remove line terminator after escape to follow // SpiderMonkey and C/C++ c = getChar(); continue strLoop; default: if ('0' <= c && c < '8') { int val = c - '0'; c = getChar(); if ('0' <= c && c < '8') { val = 8 * val + c - '0'; c = getChar(); if ('0' <= c && c < '8' && val <= 037) { // c is 3rd char of octal sequence only // if the resulting val <= 0377 val = 8 * val + c - '0'; c = getChar(); } } ungetChar(c); c = val; } } } addToString(c); c = getChar(); } String str = getStringFromBuffer(); this.string = (String) allStrings.intern(str); return Token.STRING; } switch (c) { case ';': return Token.SEMI; case '[': return Token.LB; case ']': return Token.RB; case '{': return Token.LC; case '}': return Token.RC; case '(': return Token.LP; case ')': return Token.RP; case ',': return Token.COMMA; case '?': return Token.HOOK; case ':': if (matchChar(':')) { return Token.COLONCOLON; } else { return Token.COLON; } case '.': if (matchChar('.')) { return Token.DOTDOT; } else if (matchChar('(')) { return Token.DOTQUERY; } else { return Token.DOT; } case '|': if (matchChar('|')) { return Token.OR; } else if (matchChar('=')) { return Token.ASSIGN_BITOR; } else { return Token.BITOR; } case '^': if (matchChar('=')) { return Token.ASSIGN_BITXOR; } else { return Token.BITXOR; } case '&': if (matchChar('&')) { return Token.AND; } else if (matchChar('=')) { return Token.ASSIGN_BITAND; } else { return Token.BITAND; } case '=': if (matchChar('=')) { if (matchChar('=')) { return Token.SHEQ; } else { return Token.EQ; } } else { return Token.ASSIGN; } case '!': if (matchChar('=')) { if (matchChar('=')) { return Token.SHNE; } else { return Token.NE; } } else { return Token.NOT; } case '<': /* NB:treat HTML begin-comment as comment-till-eol */ if (matchChar('!')) { if (matchChar('-')) { if (matchChar('-')) { tokenBeg = cursor - 4; skipLine(); commentType = Token.CommentType.HTML; return Token.COMMENT; } ungetCharIgnoreLineEnd('-'); } ungetCharIgnoreLineEnd('!'); } if (matchChar('<')) { if (matchChar('=')) { return Token.ASSIGN_LSH; } else { return Token.LSH; } } else { if (matchChar('=')) { return Token.LE; } else { return Token.LT; } } case '>': if (matchChar('>')) { if (matchChar('>')) { if (matchChar('=')) { return Token.ASSIGN_URSH; } else { return Token.URSH; } } else { if (matchChar('=')) { return Token.ASSIGN_RSH; } else { return Token.RSH; } } } else { if (matchChar('=')) { return Token.GE; } else { return Token.GT; } } case '*': if (matchChar('=')) { return Token.ASSIGN_MUL; } else { return Token.MUL; } case '/': markCommentStart(); // is it a // comment? if (matchChar('/')) { tokenBeg = cursor - 2; skipLine(); commentType = Token.CommentType.LINE; return Token.COMMENT; } // is it a /* or /** comment? if (matchChar('*')) { boolean lookForSlash = false; tokenBeg = cursor - 2; if (matchChar('*')) { lookForSlash = true; commentType = Token.CommentType.JSDOC; } else { commentType = Token.CommentType.BLOCK_COMMENT; } for (; ; ) { c = getChar(); if (c == EOF_CHAR) { tokenEnd = cursor - 1; parser.addError("msg.unterminated.comment"); return Token.COMMENT; } else if (c == '*') { lookForSlash = true; } else if (c == '/') { if (lookForSlash) { tokenEnd = cursor; return Token.COMMENT; } } else { lookForSlash = false; tokenEnd = cursor; } } } if (matchChar('=')) { return Token.ASSIGN_DIV; } else { return Token.DIV; } case '%': if (matchChar('=')) { return Token.ASSIGN_MOD; } else { return Token.MOD; } case '~': return Token.BITNOT; case '+': if (matchChar('=')) { return Token.ASSIGN_ADD; } else if (matchChar('+')) { return Token.INC; } else { return Token.ADD; } case '-': if (matchChar('=')) { c = Token.ASSIGN_SUB; } else if (matchChar('-')) { if (!dirtyLine) { // treat HTML end-comment after possible whitespace // after line start as comment-until-eol if (matchChar('>')) { markCommentStart("--"); skipLine(); commentType = Token.CommentType.HTML; return Token.COMMENT; } } c = Token.DEC; } else { c = Token.SUB; } dirtyLine = true; return c; default: parser.addError("msg.illegal.character"); return Token.ERROR; } } }