示例#1
0
 /** The global unescape method, as per ECMA-262 15.1.2.5. */
 private Object js_unescape(Object[] args) {
   String s = ScriptRuntime.toString(args, 0);
   int firstEscapePos = s.indexOf('%');
   if (firstEscapePos >= 0) {
     int L = s.length();
     char[] buf = s.toCharArray();
     int destination = firstEscapePos;
     for (int k = firstEscapePos; k != L; ) {
       char c = buf[k];
       ++k;
       if (c == '%' && k != L) {
         int end, start;
         if (buf[k] == 'u') {
           start = k + 1;
           end = k + 5;
         } else {
           start = k;
           end = k + 2;
         }
         if (end <= L) {
           int x = 0;
           for (int i = start; i != end; ++i) {
             x = Kit.xDigitToInt(buf[i], x);
           }
           if (x >= 0) {
             c = (char) x;
             k = end;
           }
         }
       }
       buf[destination] = c;
       ++destination;
     }
     s = new String(buf, 0, destination);
   }
   return s;
 }
示例#2
0
  final int getToken() throws IOException {
    int c;

    retry:
    for (; ; ) {
      // Eat whitespace, possibly sensitive to newlines.
      for (; ; ) {
        c = getChar();
        if (c == EOF_CHAR) {
          tokenBeg = cursor - 1;
          tokenEnd = cursor;
          return Token.EOF;
        } else if (c == '\n') {
          dirtyLine = false;
          tokenBeg = cursor - 1;
          tokenEnd = cursor;
          return Token.EOL;
        } else if (!isJSSpace(c)) {
          if (c != '-') {
            dirtyLine = true;
          }
          break;
        }
      }

      // Assume the token will be 1 char - fixed up below.
      tokenBeg = cursor - 1;
      tokenEnd = cursor;

      if (c == '@') return Token.XMLATTR;

      // identifier/keyword/instanceof?
      // watch out for starting with a <backslash>
      boolean identifierStart;
      boolean isUnicodeEscapeStart = false;
      if (c == '\\') {
        c = getChar();
        if (c == 'u') {
          identifierStart = true;
          isUnicodeEscapeStart = true;
          stringBufferTop = 0;
        } else {
          identifierStart = false;
          ungetChar(c);
          c = '\\';
        }
      } else {
        identifierStart = Character.isJavaIdentifierStart((char) c);
        if (identifierStart) {
          stringBufferTop = 0;
          addToString(c);
        }
      }

      if (identifierStart) {
        boolean containsEscape = isUnicodeEscapeStart;
        for (; ; ) {
          if (isUnicodeEscapeStart) {
            // strictly speaking we should probably push-back
            // all the bad characters if the <backslash>uXXXX
            // sequence is malformed. But since there isn't a
            // correct context(is there?) for a bad Unicode
            // escape sequence in an identifier, we can report
            // an error here.
            int escapeVal = 0;
            for (int i = 0; i != 4; ++i) {
              c = getChar();
              escapeVal = Kit.xDigitToInt(c, escapeVal);
              // Next check takes care about c < 0 and bad escape
              if (escapeVal < 0) {
                break;
              }
            }
            if (escapeVal < 0) {
              parser.addError("msg.invalid.escape");
              return Token.ERROR;
            }
            addToString(escapeVal);
            isUnicodeEscapeStart = false;
          } else {
            c = getChar();
            if (c == '\\') {
              c = getChar();
              if (c == 'u') {
                isUnicodeEscapeStart = true;
                containsEscape = true;
              } else {
                parser.addError("msg.illegal.character");
                return Token.ERROR;
              }
            } else {
              if (c == EOF_CHAR
                  || c == BYTE_ORDER_MARK
                  || !Character.isJavaIdentifierPart((char) c)) {
                break;
              }
              addToString(c);
            }
          }
        }
        ungetChar(c);

        String str = getStringFromBuffer();
        if (!containsEscape) {
          // OPT we shouldn't have to make a string (object!) to
          // check if it's a keyword.

          // Return the corresponding token if it's a keyword
          int result = stringToKeyword(str);
          if (result != Token.EOF) {
            if ((result == Token.LET || result == Token.YIELD)
                && parser.compilerEnv.getLanguageVersion() < Context.VERSION_1_7) {
              // LET and YIELD are tokens only in 1.7 and later
              string = result == Token.LET ? "let" : "yield";
              result = Token.NAME;
            }
            if (result != Token.RESERVED) {
              return result;
            } else if (!parser.compilerEnv.isReservedKeywordAsIdentifier()) {
              return result;
            }
          }
        }
        this.string = (String) allStrings.intern(str);
        return Token.NAME;
      }

      // is it a number?
      if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
        isOctal = false;
        stringBufferTop = 0;
        int base = 10;

        if (c == '0') {
          c = getChar();
          if (c == 'x' || c == 'X') {
            base = 16;
            c = getChar();
          } else if (isDigit(c)) {
            base = 8;
            isOctal = true;
          } else {
            addToString('0');
          }
        }

        if (base == 16) {
          while (0 <= Kit.xDigitToInt(c, 0)) {
            addToString(c);
            c = getChar();
          }
        } else {
          while ('0' <= c && c <= '9') {
            /*
             * We permit 08 and 09 as decimal numbers, which
             * makes our behavior a superset of the ECMA
             * numeric grammar.  We might not always be so
             * permissive, so we warn about it.
             */
            if (base == 8 && c >= '8') {
              parser.addWarning("msg.bad.octal.literal", c == '8' ? "8" : "9");
              base = 10;
            }
            addToString(c);
            c = getChar();
          }
        }

        boolean isInteger = true;

        if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
          isInteger = false;
          if (c == '.') {
            do {
              addToString(c);
              c = getChar();
            } while (isDigit(c));
          }
          if (c == 'e' || c == 'E') {
            addToString(c);
            c = getChar();
            if (c == '+' || c == '-') {
              addToString(c);
              c = getChar();
            }
            if (!isDigit(c)) {
              parser.addError("msg.missing.exponent");
              return Token.ERROR;
            }
            do {
              addToString(c);
              c = getChar();
            } while (isDigit(c));
          }
        }
        ungetChar(c);
        String numString = getStringFromBuffer();
        this.string = numString;

        double dval;
        if (base == 10 && !isInteger) {
          try {
            // Use Java conversion to number from string...
            dval = Double.valueOf(numString).doubleValue();
          } catch (NumberFormatException ex) {
            parser.addError("msg.caught.nfe");
            return Token.ERROR;
          }
        } else {
          dval = ScriptRuntime.stringToNumber(numString, 0, base);
        }

        this.number = dval;
        return Token.NUMBER;
      }

      // is it a string?
      if (c == '"' || c == '\'') {
        // We attempt to accumulate a string the fast way, by
        // building it directly out of the reader.  But if there
        // are any escaped characters in the string, we revert to
        // building it out of a StringBuffer.

        quoteChar = c;
        stringBufferTop = 0;

        c = getChar();
        strLoop:
        while (c != quoteChar) {
          if (c == '\n' || c == EOF_CHAR) {
            ungetChar(c);
            tokenEnd = cursor;
            parser.addError("msg.unterminated.string.lit");
            return Token.ERROR;
          }

          if (c == '\\') {
            // We've hit an escaped character
            int escapeVal;

            c = getChar();
            switch (c) {
              case 'b':
                c = '\b';
                break;
              case 'f':
                c = '\f';
                break;
              case 'n':
                c = '\n';
                break;
              case 'r':
                c = '\r';
                break;
              case 't':
                c = '\t';
                break;

                // \v a late addition to the ECMA spec,
                // it is not in Java, so use 0xb
              case 'v':
                c = 0xb;
                break;

              case 'u':
                // Get 4 hex digits; if the u escape is not
                // followed by 4 hex digits, use 'u' + the
                // literal character sequence that follows.
                int escapeStart = stringBufferTop;
                addToString('u');
                escapeVal = 0;
                for (int i = 0; i != 4; ++i) {
                  c = getChar();
                  escapeVal = Kit.xDigitToInt(c, escapeVal);
                  if (escapeVal < 0) {
                    continue strLoop;
                  }
                  addToString(c);
                }
                // prepare for replace of stored 'u' sequence
                // by escape value
                stringBufferTop = escapeStart;
                c = escapeVal;
                break;
              case 'x':
                // Get 2 hex digits, defaulting to 'x'+literal
                // sequence, as above.
                c = getChar();
                escapeVal = Kit.xDigitToInt(c, 0);
                if (escapeVal < 0) {
                  addToString('x');
                  continue strLoop;
                } else {
                  int c1 = c;
                  c = getChar();
                  escapeVal = Kit.xDigitToInt(c, escapeVal);
                  if (escapeVal < 0) {
                    addToString('x');
                    addToString(c1);
                    continue strLoop;
                  } else {
                    // got 2 hex digits
                    c = escapeVal;
                  }
                }
                break;

              case '\n':
                // Remove line terminator after escape to follow
                // SpiderMonkey and C/C++
                c = getChar();
                continue strLoop;

              default:
                if ('0' <= c && c < '8') {
                  int val = c - '0';
                  c = getChar();
                  if ('0' <= c && c < '8') {
                    val = 8 * val + c - '0';
                    c = getChar();
                    if ('0' <= c && c < '8' && val <= 037) {
                      // c is 3rd char of octal sequence only
                      // if the resulting val <= 0377
                      val = 8 * val + c - '0';
                      c = getChar();
                    }
                  }
                  ungetChar(c);
                  c = val;
                }
            }
          }
          addToString(c);
          c = getChar();
        }

        String str = getStringFromBuffer();
        this.string = (String) allStrings.intern(str);
        return Token.STRING;
      }

      switch (c) {
        case ';':
          return Token.SEMI;
        case '[':
          return Token.LB;
        case ']':
          return Token.RB;
        case '{':
          return Token.LC;
        case '}':
          return Token.RC;
        case '(':
          return Token.LP;
        case ')':
          return Token.RP;
        case ',':
          return Token.COMMA;
        case '?':
          return Token.HOOK;
        case ':':
          if (matchChar(':')) {
            return Token.COLONCOLON;
          } else {
            return Token.COLON;
          }
        case '.':
          if (matchChar('.')) {
            return Token.DOTDOT;
          } else if (matchChar('(')) {
            return Token.DOTQUERY;
          } else {
            return Token.DOT;
          }

        case '|':
          if (matchChar('|')) {
            return Token.OR;
          } else if (matchChar('=')) {
            return Token.ASSIGN_BITOR;
          } else {
            return Token.BITOR;
          }

        case '^':
          if (matchChar('=')) {
            return Token.ASSIGN_BITXOR;
          } else {
            return Token.BITXOR;
          }

        case '&':
          if (matchChar('&')) {
            return Token.AND;
          } else if (matchChar('=')) {
            return Token.ASSIGN_BITAND;
          } else {
            return Token.BITAND;
          }

        case '=':
          if (matchChar('=')) {
            if (matchChar('=')) {
              return Token.SHEQ;
            } else {
              return Token.EQ;
            }
          } else {
            return Token.ASSIGN;
          }

        case '!':
          if (matchChar('=')) {
            if (matchChar('=')) {
              return Token.SHNE;
            } else {
              return Token.NE;
            }
          } else {
            return Token.NOT;
          }

        case '<':
          /* NB:treat HTML begin-comment as comment-till-eol */
          if (matchChar('!')) {
            if (matchChar('-')) {
              if (matchChar('-')) {
                tokenBeg = cursor - 4;
                skipLine();
                commentType = Token.CommentType.HTML;
                return Token.COMMENT;
              }
              ungetCharIgnoreLineEnd('-');
            }
            ungetCharIgnoreLineEnd('!');
          }
          if (matchChar('<')) {
            if (matchChar('=')) {
              return Token.ASSIGN_LSH;
            } else {
              return Token.LSH;
            }
          } else {
            if (matchChar('=')) {
              return Token.LE;
            } else {
              return Token.LT;
            }
          }

        case '>':
          if (matchChar('>')) {
            if (matchChar('>')) {
              if (matchChar('=')) {
                return Token.ASSIGN_URSH;
              } else {
                return Token.URSH;
              }
            } else {
              if (matchChar('=')) {
                return Token.ASSIGN_RSH;
              } else {
                return Token.RSH;
              }
            }
          } else {
            if (matchChar('=')) {
              return Token.GE;
            } else {
              return Token.GT;
            }
          }

        case '*':
          if (matchChar('=')) {
            return Token.ASSIGN_MUL;
          } else {
            return Token.MUL;
          }

        case '/':
          markCommentStart();
          // is it a // comment?
          if (matchChar('/')) {
            tokenBeg = cursor - 2;
            skipLine();
            commentType = Token.CommentType.LINE;
            return Token.COMMENT;
          }
          // is it a /* or /** comment?
          if (matchChar('*')) {
            boolean lookForSlash = false;
            tokenBeg = cursor - 2;
            if (matchChar('*')) {
              lookForSlash = true;
              commentType = Token.CommentType.JSDOC;
            } else {
              commentType = Token.CommentType.BLOCK_COMMENT;
            }
            for (; ; ) {
              c = getChar();
              if (c == EOF_CHAR) {
                tokenEnd = cursor - 1;
                parser.addError("msg.unterminated.comment");
                return Token.COMMENT;
              } else if (c == '*') {
                lookForSlash = true;
              } else if (c == '/') {
                if (lookForSlash) {
                  tokenEnd = cursor;
                  return Token.COMMENT;
                }
              } else {
                lookForSlash = false;
                tokenEnd = cursor;
              }
            }
          }

          if (matchChar('=')) {
            return Token.ASSIGN_DIV;
          } else {
            return Token.DIV;
          }

        case '%':
          if (matchChar('=')) {
            return Token.ASSIGN_MOD;
          } else {
            return Token.MOD;
          }

        case '~':
          return Token.BITNOT;

        case '+':
          if (matchChar('=')) {
            return Token.ASSIGN_ADD;
          } else if (matchChar('+')) {
            return Token.INC;
          } else {
            return Token.ADD;
          }

        case '-':
          if (matchChar('=')) {
            c = Token.ASSIGN_SUB;
          } else if (matchChar('-')) {
            if (!dirtyLine) {
              // treat HTML end-comment after possible whitespace
              // after line start as comment-until-eol
              if (matchChar('>')) {
                markCommentStart("--");
                skipLine();
                commentType = Token.CommentType.HTML;
                return Token.COMMENT;
              }
            }
            c = Token.DEC;
          } else {
            c = Token.SUB;
          }
          dirtyLine = true;
          return c;

        default:
          parser.addError("msg.illegal.character");
          return Token.ERROR;
      }
    }
  }