/** * <strong>[11.6] Identifier Names and Identifiers</strong> * * <pre> * Identifier :: * IdentifierName but not ReservedWord * IdentifierName :: * IdentifierStart * IdentifierName IdentifierPart * </pre> */ private Token readIdentifier(int c) { assert isIdentifierStart(c); TokenStreamInput input = this.input; StringBuffer buffer = this.buffer(); buffer.addCodepoint(c); for (; ; ) { c = input.get(); if (isIdentifierPart(c)) { buffer.add(c); } else if (c == '\\') { mustMatch('u'); c = readUnicode(); if (!isIdentifierPart(c)) { throw error(Messages.Key.InvalidUnicodeEscapedIdentifierPart); } buffer.addCodepoint(c); continue; } else { input.unget(c); break; } } Token tok = readReservedWord(buffer); if (tok != null) { return tok; } return Token.NAME; }
/** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * OctalIntegerLiteral :: * 0o OctalDigit * 0O OctalDigit * OctalIntegerLiteral OctalDigit * </pre> */ private double readOctalIntegerLiteral() { TokenStreamInput input = this.input; StringBuffer buffer = this.buffer(); int c; while (isOctalDigit(c = input.get())) { buffer.add(c); } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } input.unget(c); if (buffer.length == 0) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } return parseOctal(buffer.cbuf, buffer.length); }
/** * <strong>[B.1.1] Numeric Literals</strong> * * <pre> * LegacyOctalIntegerLiteral :: * 0 OctalDigit * LegacyOctalIntegerLiteral OctalDigit * </pre> */ private double readLegacyOctalIntegerLiteral() { TokenStreamInput input = this.input; StringBuffer buffer = this.buffer(); int c; while (isOctalDigit(c = input.get())) { buffer.add(c); } if (c == '8' || c == '9') { // invalid octal integer literal -> treat as decimal literal, no strict-mode error // FIXME: spec bug? undefined behaviour - SM reports a strict-mode error in this case return readDecimalLiteral(c, false); } parser.reportStrictModeSyntaxError(Messages.Key.StrictModeOctalIntegerLiteral); if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } input.unget(c); if (buffer.length == 0) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } return parseOctal(buffer.cbuf, buffer.length); }
/** * <strong>[11.8.5] Regular Expression Literals</strong> * * <pre> * RegularExpressionLiteral :: * / RegularExpressionBody / RegularExpressionFlags * RegularExpressionBody :: * RegularExpressionFirstChar RegularExpressionChars * RegularExpressionChars :: * [empty] * RegularExpressionChars RegularExpressionChar * RegularExpressionFirstChar :: * RegularExpressionNonTerminator but not one of * or \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionChar :: * RegularExpressionNonTerminator but not one of \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionBackslashSequence :: * \ RegularExpressionNonTerminator * RegularExpressionNonTerminator :: * SourceCharacter but not LineTerminator * RegularExpressionClass :: * [ RegularExpressionClassChars ] * RegularExpressionClassChars :: * [empty] * RegularExpressionClassChars RegularExpressionClassChar * RegularExpressionClassChar :: * RegularExpressionNonTerminator but not one of ] or \ * RegularExpressionBackslashSequence * RegularExpressionFlags :: * [empty] * RegularExpressionFlags IdentifierPart * </pre> */ public String[] readRegularExpression(Token start) { assert start == Token.DIV || start == Token.ASSIGN_DIV; assert next == null : "regular expression in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StringBuffer buffer = buffer(); if (start == Token.ASSIGN_DIV) { buffer.add('='); } else { int c = input.peek(0); if (c == '/' || c == '*') { throw error(Messages.Key.InvalidRegExpLiteral); } } boolean inClass = false; for (; ; ) { int c = input.get(); if (c == '\\') { // escape sequence buffer.add(c); c = input.get(); } else if (c == '[') { inClass = true; } else if (c == ']') { inClass = false; } else if (c == '/' && !inClass) { break; } if (c == EOF || isLineTerminator(c)) { throw error(Messages.Key.UnterminatedRegExpLiteral); } buffer.add(c); } String regexp = buffer.toString(); buffer.clear(); for (; ; ) { int c = input.get(); if (!isIdentifierPart(c)) { if (c == '\\' && match('u')) { readUnicode(); throw error(Messages.Key.UnicodeEscapeInRegExpFlags); } input.unget(c); break; } buffer.add(c); } String flags = buffer.toString(); return new String[] {regexp, flags}; }
private double readDecimalLiteral(int c, boolean reset) { assert c == '.' || isDecimalDigit(c); TokenStreamInput input = this.input; StringBuffer buffer = reset ? this.buffer() : this.buffer; if (c != '.' && c != '0') { buffer.add(c); while (isDecimalDigit(c = input.get())) { buffer.add(c); } } else if (c == '0') { buffer.add(c); c = input.get(); } if (c == '.') { buffer.add(c); while (isDecimalDigit(c = input.get())) { buffer.add(c); } } if (c == 'e' || c == 'E') { buffer.add(c); c = input.get(); if (c == '+' || c == '-') { buffer.add(c); c = input.get(); } if (!isDecimalDigit(c)) { throw error(Messages.Key.InvalidNumberLiteral); } buffer.add(c); while (isDecimalDigit(c = input.get())) { buffer.add(c); } } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidNumberLiteral); } input.unget(c); return parseDecimal(buffer.cbuf, buffer.length); }
/** * <strong>[11.8.6] Template Literal Lexical Components</strong> * * <pre> * Template :: * NoSubstitutionTemplate * TemplateHead * NoSubstitutionTemplate :: * ` TemplateCharacters<sub>opt</sub>` * TemplateHead :: * ` TemplateCharacters<sub>opt</sub>${ * TemplateSubstitutionTail :: * TemplateMiddle * TemplateTail * TemplateMiddle :: * } TemplateCharacters<sub>opt</sub>${ * TemplateTail :: * } TemplateCharacters<sub>opt</sub>` * TemplateCharacters :: * TemplateCharacter TemplateCharacters<sub>opt</sub> * TemplateCharacter :: * SourceCharacter but not one of ` or \ or $ * $ [LA ∉ { ] * \ EscapeSequence * LineContinuation * </pre> */ public String[] readTemplateLiteral(Token start) { assert start == Token.TEMPLATE || start == Token.RC; assert currentToken() == start; assert next == null : "template literal in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StringBuilder raw = new StringBuilder(); StringBuffer buffer = buffer(); int pos = input.position(); for (; ; ) { int c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } if (c == '`') { current = Token.TEMPLATE; raw.append(input.range(pos, input.position() - 1)); return new String[] {buffer.toString(), raw.toString()}; } if (c == '$' && match('{')) { current = Token.LC; raw.append(input.range(pos, input.position() - 2)); return new String[] {buffer.toString(), raw.toString()}; } if (c != '\\') { if (isLineTerminator(c)) { // line terminator sequence if (c == '\r') { // normalise \r and \r\n to \n raw.append(input.range(pos, input.position() - 1)).append('\n'); match('\n'); pos = input.position(); c = '\n'; } buffer.add(c); incrementLine(); continue; } // TODO: add substring range buffer.add(c); continue; } c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } // EscapeSequence if (isLineTerminator(c)) { // line continuation if (c == '\r') { // normalise \r and \r\n to \n raw.append(input.range(pos, input.position() - 1)).append('\n'); match('\n'); pos = input.position(); } incrementLine(); continue; } switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case '0': if (isDecimalDigit(input.peek(0))) { throw error(Messages.Key.InvalidNULLEscape); } c = '\0'; break; case 'x': c = (hexDigit(input.get()) << 4) | hexDigit(input.get()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicode(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': throw error(Messages.Key.StrictModeOctalEscapeSequence); case '"': case '\'': case '\\': default: // fall-through } buffer.addCodepoint(c); } }
public TokenStream initialise() { // set internal state to default values this.hasLineTerminator = true; this.hasCurrentLineTerminator = true; this.position = input.position(); this.line = parser.getSourceLine(); this.linestart = input.position(); this.current = scanTokenNoComment(); this.sourcePosition = nextSourcePosition; this.nextposition = input.position(); this.next = null; return this; }
public void reset(long position, long lineinfo) { // reset character stream input.reset((int) position); // reset internal state this.hasLineTerminator = false; this.hasCurrentLineTerminator = true; this.position = input.position(); this.current = scanTokenNoComment(); this.sourcePosition = nextSourcePosition; this.nextposition = input.position(); this.next = null; // reset line state last, effectively ignoring any changes from scanTokenNoComment() this.line = (int) (lineinfo >>> 32); this.linestart = (int) lineinfo; }
/** * <strong>[11.4] Comments</strong> * * <pre> * SingleLineComment :: * // SingleLineCommentChars<sub>opt</sub> * SingleLineCommentChars :: * SingleLineCommentChar SingleLineCommentChars<sub>opt</sub> * SingleLineCommentChar :: * SourceCharacter but not LineTerminator * </pre> */ private Token readSingleComment() { final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; for (; ; ) { int c = input.get(); if (c == EOF) { break; } if (isLineTerminator(c)) { // EOL is not part of the single-line comment! input.unget(c); break; } } return Token.COMMENT; }
public Token nextToken() { if (next == null) { hasLineTerminator = false; nextposition = input.position(); next = scanTokenNoComment(); } current = next; sourcePosition = nextSourcePosition; position = nextposition; hasCurrentLineTerminator = hasLineTerminator; string = null; next = null; nextposition = input.position(); hasLineTerminator = false; return current; }
/** * <strong>[11.4] Comments</strong> * * <pre> * MultiLineComment :: * /* MultiLineCommentChars<sub>opt</sub> */ * MultiLineCommentChars :: * MultiLineNotAsteriskChar MultiLineCommentChars<sub>opt</sub> * PostAsteriskCommentChars<sub>opt</sub> * PostAsteriskCommentChars :: * MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars<sub>opt</sub> * PostAsteriskCommentChars<sub>opt</sub> * MultiLineNotAsteriskChar :: * SourceCharacter but not * * MultiLineNotForwardSlashOrAsteriskChar :: * SourceCharacter but not one of / or * * </pre> */ private Token readMultiComment() { final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; loop: for (; ; ) { int c = input.get(); while (c == '*') { if ((c = input.get()) == '/') break loop; } if (isLineTerminator(c)) { if (c == '\r') { match('\n'); } incrementLineAndUpdate(); } if (c == EOF) { throw eofError(Messages.Key.UnterminatedComment); } } return Token.COMMENT; }
public Token peekToken() { assert !(current == Token.DIV || current == Token.ASSIGN_DIV); if (next == null) { if (current == Token.NAME || current == Token.STRING) { string = getString(); } hasLineTerminator = false; nextposition = input.position(); next = scanTokenNoComment(); } return next; }
/** * <strong>[B.1.2] String Literals</strong> * * <pre> * OctalEscapeSequence :: * OctalDigit [lookahead ∉ DecimalDigit] * ZeroToThree OctalDigit [lookahead ∉ DecimalDigit] * FourToSeven OctalDigit * ZeroToThree OctalDigit OctalDigit * ZeroToThree :: one of * 0 1 2 3 * FourToSeven :: one of * 4 5 6 7 * </pre> */ private int readOctalEscape(int c) { parser.reportStrictModeSyntaxError(Messages.Key.StrictModeOctalEscapeSequence); int d = (c - '0'); c = input.get(); if (c < '0' || c > '7') { // FIXME: spec bug? behaviour for non-octal decimal digits? input.unget(c); } else { d = d * 8 + (c - '0'); if (d <= 037) { c = input.get(); if (c < '0' || c > '7') { // FIXME: spec bug? behaviour for non-octal decimal digits? input.unget(c); } else { d = d * 8 + (c - '0'); } } } return d; }
/** * <strong>[11.8.4] String Literals</strong> * * <pre> * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> */ private int readUnicode() { TokenStreamInput input = this.input; int c = input.get(); if (c == '{') { int acc = 0; c = input.get(); do { acc = (acc << 4) | hexDigit(c); } while ((acc >= 0 && acc <= 0x10FFFF) && (c = input.get()) != '}'); if (c == '}') { c = acc; } else { c = -1; } } else { c = (hexDigit(c) << 12) | (hexDigit(input.get()) << 8) | (hexDigit(input.get()) << 4) | hexDigit(input.get()); } if (c < 0 || c > 0x10FFFF) { throw error(Messages.Key.InvalidUnicodeEscape); } return c; }
/** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * NumericLiteral :: * DecimalLiteral * BinaryIntegerLiteral * OctalIntegerLiteral * HexIntegerLiteral * </pre> */ private Token readNumberLiteral(int c) { if (c == '0') { int d = input.get(); if (d == 'x' || d == 'X') { number = readHexIntegerLiteral(); } else if (d == 'b' || d == 'B') { number = readBinaryIntegerLiteral(); } else if (d == 'o' || d == 'O') { number = readOctalIntegerLiteral(); } else if (isDecimalDigit(d) && parser.isEnabled(CompatibilityOption.LegacyOctalIntegerLiteral)) { input.unget(d); number = readLegacyOctalIntegerLiteral(); } else { input.unget(d); number = readDecimalLiteral(c); } } else { number = readDecimalLiteral(c); } return Token.NUMBER; }
/** * <strong>[11.5] Token</strong> * * <pre> * Token :: * IdentifierName * Punctuator * NumericLiteral * StringLiteral * Template * </pre> */ private Token scanToken() { TokenStreamInput input = this.input; int c; for (; ; ) { c = input.get(); if (c == TokenStreamInput.EOF) { return Token.EOF; } else if (c <= 0x20) { if (c == 0x09 || c == 0x0B || c == 0x0C || c == 0x20) { // skip over whitespace continue; } if (c == '\n') { incrementLineAndUpdate(); continue; } if (c == '\r') { match('\n'); incrementLineAndUpdate(); continue; } } else if (c >= 0xA0) { if (isWhitespace(c)) { // skip over whitespace continue; } if (isLineTerminator(c)) { incrementLineAndUpdate(); continue; } } break; } updateSourcePosition(); if (DEBUG) System.out.printf("scanToken() -> %c\n", (char) c); switch (c) { case '\'': case '"': return readString(c); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': case '_': return readIdentifier(c); case '{': return Token.LC; case '}': return Token.RC; case '(': return Token.LP; case ')': return Token.RP; case '[': return Token.LB; case ']': return Token.RB; case '.': switch (input.peek(0)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case '.': if (input.peek(1) == '.') { mustMatch('.'); mustMatch('.'); return Token.TRIPLE_DOT; } } return Token.DOT; case ';': return Token.SEMI; case ',': return Token.COMMA; case '~': return Token.BITNOT; case '?': return Token.HOOK; case ':': return Token.COLON; case '<': if (match('<')) { if (match('=')) { return Token.ASSIGN_SHL; } else { return Token.SHL; } } else if (match('=')) { return Token.LE; } else if (input.peek(0) == '!' && input.peek(1) == '-' && input.peek(2) == '-' && parser.isEnabled(CompatibilityOption.HTMLComments)) { // html start-comment mustMatch('!'); mustMatch('-'); mustMatch('-'); readSingleComment(); return Token.COMMENT; } else { return Token.LT; } case '>': if (match('>')) { if (match('>')) { if (match('=')) { return Token.ASSIGN_USHR; } else { return Token.USHR; } } else if (match('=')) { return Token.ASSIGN_SHR; } else { return Token.SHR; } } else if (match('=')) { return Token.GE; } else { return Token.GT; } case '=': if (match('=')) { if (match('=')) { return Token.SHEQ; } else { return Token.EQ; } } else if (match('>')) { return Token.ARROW; } else { return Token.ASSIGN; } case '!': if (match('=')) { if (match('=')) { return Token.SHNE; } else { return Token.NE; } } else { return Token.NOT; } case '+': if (match('+')) { return Token.INC; } else if (match('=')) { return Token.ASSIGN_ADD; } else { return Token.ADD; } case '-': if (match('-')) { if (input.peek(0) == '>' && hasLineTerminator && parser.isEnabled(CompatibilityOption.HTMLComments)) { // html end-comment at line start mustMatch('>'); readSingleComment(); return Token.COMMENT; } return Token.DEC; } else if (match('=')) { return Token.ASSIGN_SUB; } else { return Token.SUB; } case '*': if (match('=')) { return Token.ASSIGN_MUL; } else { return Token.MUL; } case '%': if (match('=')) { return Token.ASSIGN_MOD; } else { return Token.MOD; } case '/': if (match('=')) { return Token.ASSIGN_DIV; } else if (match('/')) { readSingleComment(); return Token.COMMENT; } else if (match('*')) { readMultiComment(); return Token.COMMENT; } else { return Token.DIV; } case '&': if (match('&')) { return Token.AND; } else if (match('=')) { return Token.ASSIGN_BITAND; } else { return Token.BITAND; } case '|': if (match('|')) { return Token.OR; } else if (match('=')) { return Token.ASSIGN_BITOR; } else { return Token.BITOR; } case '^': if (match('=')) { return Token.ASSIGN_BITXOR; } else { return Token.BITXOR; } case '`': return Token.TEMPLATE; } if (c == '\\') { mustMatch('u'); c = readUnicode(); } if (isIdentifierStart(c)) { return readIdentifier(c); } return Token.ERROR; }
private void incrementLineAndUpdate() { line += 1; linestart = input.position(); hasLineTerminator = true; }
public int getColumn() { return input.position() - linestart; }
private void mustMatch(char c) { if (input.get() != c) { throw error(Messages.Key.UnexpectedCharacter, String.valueOf((char) c)); } }
private boolean match(char c) { return input.match(c); }
private void updateSourcePosition() { nextSourcePosition = ((long) (input.position() - linestart) << 32) | line; }
public String range(int from, int to) { return input.range(from, to); }
private void incrementLine() { line += 1; linestart = input.position(); }
/** * <strong>[11.8.4] String Literals</strong> * * <pre> * StringLiteral :: * " DoubleStringCharacters<sub>opt</sub> " * ' SingleStringCharacters<sub>opt</sub> ' * DoubleStringCharacters :: * DoubleStringCharacter DoubleStringCharacters<sub>opt</sub> * SingleStringCharacters :: * SingleStringCharacter SingleStringCharacters<sub>opt</sub> * DoubleStringCharacter :: * SourceCharacter but not one of " or \ or LineTerminator * \ EscapeSequence * LineContinuation * SingleStringCharacter :: * SourceCharacter but not one of ' or \ or LineTerminator * \ EscapeSequence * LineContinuation * LineContinuation :: * \ LineTerminatorSequence * EscapeSequence :: * CharacterEscapeSequence * 0 [lookahead ∉ DecimalDigit] * HexEscapeSequence * UnicodeEscapeSequence * CharacterEscapeSequence :: * SingleEscapeCharacter * NonEscapeCharacter * SingleEscapeCharacter :: one of * ' " \ b f n r t v * NonEscapeCharacter :: * SourceCharacter but not one of EscapeCharacter or LineTerminator * EscapeCharacter :: * SingleEscapeCharacter * DecimalDigit * x * u * HexEscapeSequence :: * x HexDigit HexDigit * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> * * <strong>[B.1.2] String Literals</strong> * * <pre> * EscapeSequence :: * CharacterEscapeSequence * OctalEscapeSequence * HexEscapeSequence * UnicodeEscapeSequence * </pre> */ private Token readString(int quoteChar) { assert quoteChar == '"' || quoteChar == '\''; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; int start = input.position(); StringBuffer buffer = this.buffer(); hasEscape = false; for (; ; ) { int c = input.get(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedStringLiteral); } if (c == quoteChar) { buffer.add(input.range(start, input.position() - 1)); break; } if (isLineTerminator(c)) { throw error(Messages.Key.UnterminatedStringLiteral); } if (c != '\\') { continue; } buffer.add(input.range(start, input.position() - 1)); hasEscape = true; c = input.get(); if (isLineTerminator(c)) { // line continuation if (c == '\r' && match('\n')) { // \r\n sequence } incrementLine(); start = input.position(); continue; } // escape sequences switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case 'x': c = (hexDigit(input.get()) << 4) | hexDigit(input.get()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicode(); break; case '0': if (isDecimalDigit(input.peek(0))) { if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.InvalidNULLEscape); } c = readOctalEscape(c); } else { c = '\0'; } break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.StrictModeOctalEscapeSequence); } c = readOctalEscape(c); break; case '8': case '9': // FIXME: spec bug - undefined behaviour for \8 and \9 if (!parser.isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.StrictModeOctalEscapeSequence); } // fall-through case '"': case '\'': case '\\': default: // fall-through } buffer.addCodepoint(c); start = input.position(); } return Token.STRING; }