/** * <strong>[11.8.5] Regular Expression Literals</strong> * * <pre> * RegularExpressionLiteral :: * / RegularExpressionBody / RegularExpressionFlags * RegularExpressionBody :: * RegularExpressionFirstChar RegularExpressionChars * RegularExpressionChars :: * [empty] * RegularExpressionChars RegularExpressionChar * RegularExpressionFirstChar :: * RegularExpressionNonTerminator but not one of * or \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionChar :: * RegularExpressionNonTerminator but not one of \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionBackslashSequence :: * \ RegularExpressionNonTerminator * RegularExpressionNonTerminator :: * SourceCharacter but not LineTerminator * RegularExpressionClass :: * [ RegularExpressionClassChars ] * RegularExpressionClassChars :: * [empty] * RegularExpressionClassChars RegularExpressionClassChar * RegularExpressionClassChar :: * RegularExpressionNonTerminator but not one of ] or \ * RegularExpressionBackslashSequence * RegularExpressionFlags :: * [empty] * RegularExpressionFlags IdentifierPart * </pre> */ public String[] readRegularExpression(Token start) { assert start == Token.DIV || start == Token.ASSIGN_DIV; assert next == null : "regular expression in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StringBuffer buffer = buffer(); if (start == Token.ASSIGN_DIV) { buffer.add('='); } else { int c = input.peek(0); if (c == '/' || c == '*') { throw error(Messages.Key.InvalidRegExpLiteral); } } boolean inClass = false; for (; ; ) { int c = input.get(); if (c == '\\') { // escape sequence buffer.add(c); c = input.get(); } else if (c == '[') { inClass = true; } else if (c == ']') { inClass = false; } else if (c == '/' && !inClass) { break; } if (c == EOF || isLineTerminator(c)) { throw error(Messages.Key.UnterminatedRegExpLiteral); } buffer.add(c); } String regexp = buffer.toString(); buffer.clear(); for (; ; ) { int c = input.get(); if (!isIdentifierPart(c)) { if (c == '\\' && match('u')) { readUnicode(); throw error(Messages.Key.UnicodeEscapeInRegExpFlags); } input.unget(c); break; } buffer.add(c); } String flags = buffer.toString(); return new String[] {regexp, flags}; }
/** * <strong>[11.8.6] Template Literal Lexical Components</strong> * * <pre> * Template :: * NoSubstitutionTemplate * TemplateHead * NoSubstitutionTemplate :: * ` TemplateCharacters<sub>opt</sub>` * TemplateHead :: * ` TemplateCharacters<sub>opt</sub>${ * TemplateSubstitutionTail :: * TemplateMiddle * TemplateTail * TemplateMiddle :: * } TemplateCharacters<sub>opt</sub>${ * TemplateTail :: * } TemplateCharacters<sub>opt</sub>` * TemplateCharacters :: * TemplateCharacter TemplateCharacters<sub>opt</sub> * TemplateCharacter :: * SourceCharacter but not one of ` or \ or $ * $ [LA ∉ { ] * \ EscapeSequence * LineContinuation * </pre> */ public String[] readTemplateLiteral(Token start) { assert start == Token.TEMPLATE || start == Token.RC; assert currentToken() == start; assert next == null : "template literal in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StringBuilder raw = new StringBuilder(); StringBuffer buffer = buffer(); int pos = input.position(); for (; ; ) { int c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } if (c == '`') { current = Token.TEMPLATE; raw.append(input.range(pos, input.position() - 1)); return new String[] {buffer.toString(), raw.toString()}; } if (c == '$' && match('{')) { current = Token.LC; raw.append(input.range(pos, input.position() - 2)); return new String[] {buffer.toString(), raw.toString()}; } if (c != '\\') { if (isLineTerminator(c)) { // line terminator sequence if (c == '\r') { // normalise \r and \r\n to \n raw.append(input.range(pos, input.position() - 1)).append('\n'); match('\n'); pos = input.position(); c = '\n'; } buffer.add(c); incrementLine(); continue; } // TODO: add substring range buffer.add(c); continue; } c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } // EscapeSequence if (isLineTerminator(c)) { // line continuation if (c == '\r') { // normalise \r and \r\n to \n raw.append(input.range(pos, input.position() - 1)).append('\n'); match('\n'); pos = input.position(); } incrementLine(); continue; } switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case '0': if (isDecimalDigit(input.peek(0))) { throw error(Messages.Key.InvalidNULLEscape); } c = '\0'; break; case 'x': c = (hexDigit(input.get()) << 4) | hexDigit(input.get()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicode(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': throw error(Messages.Key.StrictModeOctalEscapeSequence); case '"': case '\'': case '\\': default: // fall-through } buffer.addCodepoint(c); } }
/** * <strong>[11.5] Token</strong> * * <pre> * Token :: * IdentifierName * Punctuator * NumericLiteral * StringLiteral * Template * </pre> */ private Token scanToken() { TokenStreamInput input = this.input; int c; for (; ; ) { c = input.get(); if (c == TokenStreamInput.EOF) { return Token.EOF; } else if (c <= 0x20) { if (c == 0x09 || c == 0x0B || c == 0x0C || c == 0x20) { // skip over whitespace continue; } if (c == '\n') { incrementLineAndUpdate(); continue; } if (c == '\r') { match('\n'); incrementLineAndUpdate(); continue; } } else if (c >= 0xA0) { if (isWhitespace(c)) { // skip over whitespace continue; } if (isLineTerminator(c)) { incrementLineAndUpdate(); continue; } } break; } updateSourcePosition(); if (DEBUG) System.out.printf("scanToken() -> %c\n", (char) c); switch (c) { case '\'': case '"': return readString(c); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': case '_': return readIdentifier(c); case '{': return Token.LC; case '}': return Token.RC; case '(': return Token.LP; case ')': return Token.RP; case '[': return Token.LB; case ']': return Token.RB; case '.': switch (input.peek(0)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case '.': if (input.peek(1) == '.') { mustMatch('.'); mustMatch('.'); return Token.TRIPLE_DOT; } } return Token.DOT; case ';': return Token.SEMI; case ',': return Token.COMMA; case '~': return Token.BITNOT; case '?': return Token.HOOK; case ':': return Token.COLON; case '<': if (match('<')) { if (match('=')) { return Token.ASSIGN_SHL; } else { return Token.SHL; } } else if (match('=')) { return Token.LE; } else if (input.peek(0) == '!' && input.peek(1) == '-' && input.peek(2) == '-' && parser.isEnabled(CompatibilityOption.HTMLComments)) { // html start-comment mustMatch('!'); mustMatch('-'); mustMatch('-'); readSingleComment(); return Token.COMMENT; } else { return Token.LT; } case '>': if (match('>')) { if (match('>')) { if (match('=')) { return Token.ASSIGN_USHR; } else { return Token.USHR; } } else if (match('=')) { return Token.ASSIGN_SHR; } else { return Token.SHR; } } else if (match('=')) { return Token.GE; } else { return Token.GT; } case '=': if (match('=')) { if (match('=')) { return Token.SHEQ; } else { return Token.EQ; } } else if (match('>')) { return Token.ARROW; } else { return Token.ASSIGN; } case '!': if (match('=')) { if (match('=')) { return Token.SHNE; } else { return Token.NE; } } else { return Token.NOT; } case '+': if (match('+')) { return Token.INC; } else if (match('=')) { return Token.ASSIGN_ADD; } else { return Token.ADD; } case '-': if (match('-')) { if (input.peek(0) == '>' && hasLineTerminator && parser.isEnabled(CompatibilityOption.HTMLComments)) { // html end-comment at line start mustMatch('>'); readSingleComment(); return Token.COMMENT; } return Token.DEC; } else if (match('=')) { return Token.ASSIGN_SUB; } else { return Token.SUB; } case '*': if (match('=')) { return Token.ASSIGN_MUL; } else { return Token.MUL; } case '%': if (match('=')) { return Token.ASSIGN_MOD; } else { return Token.MOD; } case '/': if (match('=')) { return Token.ASSIGN_DIV; } else if (match('/')) { readSingleComment(); return Token.COMMENT; } else if (match('*')) { readMultiComment(); return Token.COMMENT; } else { return Token.DIV; } case '&': if (match('&')) { return Token.AND; } else if (match('=')) { return Token.ASSIGN_BITAND; } else { return Token.BITAND; } case '|': if (match('|')) { return Token.OR; } else if (match('=')) { return Token.ASSIGN_BITOR; } else { return Token.BITOR; } case '^': if (match('=')) { return Token.ASSIGN_BITXOR; } else { return Token.BITXOR; } case '`': return Token.TEMPLATE; } if (c == '\\') { mustMatch('u'); c = readUnicode(); } if (isIdentifierStart(c)) { return readIdentifier(c); } return Token.ERROR; }
/** * <strong>[11.8.4] String Literals</strong> * * <pre> * StringLiteral :: * " DoubleStringCharacters<sub>opt</sub> " * ' SingleStringCharacters<sub>opt</sub> ' * DoubleStringCharacters :: * DoubleStringCharacter DoubleStringCharacters<sub>opt</sub> * SingleStringCharacters :: * SingleStringCharacter SingleStringCharacters<sub>opt</sub> * DoubleStringCharacter :: * SourceCharacter but not one of " or \ or LineTerminator * \ EscapeSequence * LineContinuation * SingleStringCharacter :: * SourceCharacter but not one of ' or \ or LineTerminator * \ EscapeSequence * LineContinuation * LineContinuation :: * \ LineTerminatorSequence * EscapeSequence :: * CharacterEscapeSequence * 0 [lookahead ∉ DecimalDigit] * HexEscapeSequence * UnicodeEscapeSequence * CharacterEscapeSequence :: * SingleEscapeCharacter * NonEscapeCharacter * SingleEscapeCharacter :: one of * ' " \ b f n r t v * NonEscapeCharacter :: * SourceCharacter but not one of EscapeCharacter or LineTerminator * EscapeCharacter :: * SingleEscapeCharacter * DecimalDigit * x * u * HexEscapeSequence :: * x HexDigit HexDigit * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> * * <strong>[B.1.2] String Literals</strong> * * <pre> * EscapeSequence :: * CharacterEscapeSequence * OctalEscapeSequence * HexEscapeSequence * UnicodeEscapeSequence * </pre> */ private Token readString(int quoteChar) { assert quoteChar == '"' || quoteChar == '\''; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; int start = input.position(); StringBuffer buffer = this.buffer(); hasEscape = false; for (; ; ) { int c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedStringLiteral); } if (c == quoteChar) { buffer.add(input.range(start, input.position() - 1)); break; } if (isLineTerminator(c)) { throw error(Messages.Key.UnterminatedStringLiteral); } if (c != '\\') { continue; } buffer.add(input.range(start, input.position() - 1)); hasEscape = true; c = input.get(); if (isLineTerminator(c)) { // line continuation if (c == '\r' && match('\n')) { // \r\n sequence } incrementLine(); start = input.position(); continue; } // escape sequences switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case 'x': c = (hexDigit(input.get()) << 4) | hexDigit(input.get()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicode(); break; case '0': if (isDecimalDigit(input.peek(0))) { if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.InvalidNULLEscape); } c = readOctalEscape(c); } else { c = '\0'; } break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.StrictModeOctalEscapeSequence); } c = readOctalEscape(c); break; case '8': case '9': // FIXME: spec bug - undefined behaviour for \8 and \9 if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.StrictModeOctalEscapeSequence); } // fall-through case '"': case '\'': case '\\': default: // fall-through } buffer.addCodepoint(c); start = input.position(); } return Token.STRING; }