Java TokenClassification Examples, TokenClassification Java Examples

Example #1

0

Show file

File: BufferingRenderer.java Project: fenglang0203/caja

 private static String removeLinebreaksFromComment(String token) {
   if (TokenClassification.isLineComment(token)) {
     token = "/*" + token.substring(2) + "*/";
   }
   StringBuilder sb = new StringBuilder(token);
   // Section 5.1.2 hinges on whether a MultiLineComment contains a
   // line-terminator char, so make sure it does not.
   for (int i = sb.length(); --i >= 0; ) {
     if (JsLexer.isJsLineSeparator(sb.charAt(i))) {
       sb.setCharAt(i, ' ');
     }
   }
   // Make sure that turning a line comment into a MultiLineComment didn't
   // cause a */ in the line comment to become lexically significant.
   for (int e = sb.length() - 3, i; (i = sb.lastIndexOf("*/", e)) >= 0; ) {
     sb.setCharAt(i + 1, ' ');
   }
   return sb.toString();
 }

Example #2

0

Show file

File: Spacer.java Project: fenglang0203/caja

  void processToken(String text) {
    TokenClassification tClass = TokenClassification.classify(text);
    if (tClass == null) {
      return;
    }
    switch (tClass) {
      case LINEBREAK:
        // Allow external code to force line-breaks.
        // This allows us to create a composite-renderer that renders
        // original source code next to translated source code.
        emit("\n");
        return;
      case SPACE:
        pendingSpace = true;
        return;
      case COMMENT:
        if (mark != null && lastLine != mark.startLineNo()) {
          newline();
          lastLine = mark.startLineNo();
        } else if ("/".equals(lastToken) || pendingSpace) {
          space();
        }
        pendingSpace = false;
        emit(text);
        if (text.startsWith("//")) {
          newline();
          pendingSpace = false;
        } else {
          pendingSpace = true;
        }
        return;
      default:
        break;
    }

    boolean spaceBefore = pendingSpace;
    pendingSpace = false;
    boolean spaceAfter = false;

    // Determine which pairs of tokens cannot be adjacent and put a space
    // between them.
    if (tClass == lastClass) {
      // Adjacent punctuation, strings, and words require space.
      // Numbers and words are both of type OTHER.
      // This decision may be revisited in the following to prevent
      // excessive space inside parentheses.
      spaceBefore = !"(".equals(lastToken);
    } else if (lastClass == TokenClassification.REGEX) {
      if (tClass == TokenClassification.OTHER || "/".equals(text)) {
        // Make sure words don't run into regex flags, and that / operator
        // does not combine with end of regex to make a line comment.
        spaceBefore = true;
      }
    } else if (tClass == TokenClassification.REGEX && "/".equals(lastToken)) {
      // Allowing these two tokens to run together could introduce a line
      // comment.
      spaceBefore = true;
    } else if (tClass == TokenClassification.OTHER
        && Character.isDigit(text.charAt(0))
        && ".".equals(lastToken)) {
      // Following a dot operator with a number is illegal syntactically, but
      // this renderer should not allow any lexical confusion.
      spaceBefore = true;
    }

    if (tClass == TokenClassification.OTHER) {
      if ("}".equals(lastToken)) {
        spaceBefore = true;
      }
      if (isKeyword(text.toString())) {
        // Put a space between if and other keywords and the parenthesis.
        spaceAfter = true;
      }
    }

    // If this token is an open bracket, we want to indent, but not before
    // writing the token to avoid over-indenting the open bracket.
    if (text.length() == 1) {
      char ch0 = text.charAt(0);
      switch (ch0) {
        case '{':
          if (lastClass == TokenClassification.PUNCTUATION) {
            if (":".equals(lastToken)) { // See JSON test.
              spaceBefore = true;
            } else if (!(")".equals(lastToken) || "=".equals(lastToken))) {
              // If starting a block following a parenthesized condition, or
              // an object literal assigned.
              spaceBefore = !("(".equals(lastToken) || "[".equals(lastToken));
            }
          }
          spaceAfter = true;
          break;
        case '[':
          if (")".equals(lastToken)) {
            spaceBefore = false;
          }
          spaceAfter = true;
          break;
        case '(':
          if (")".equals(lastToken)) { // Calling a parenthesized value.
            spaceBefore = false;
          }
          break;
        case '}':
          spaceBefore = !"{".equals(lastToken);
          spaceAfter = true;
          break;
        case ')':
          spaceBefore = false;
          spaceAfter = true;
          break;
        case ']':
          spaceBefore = !"}".equals(lastToken);
          spaceAfter = true;
          break;
        case ',':
          spaceBefore = false;
          spaceAfter = true;
          break;
        case ';':
          spaceBefore = false;
          spaceAfter = true;
          break;
        case ':':
          spaceBefore = ":".equals(lastToken); // Since :: is a token in ES4
          spaceAfter = true;
          break;
        case '=':
          spaceBefore = true;
          spaceAfter = true;
          break;
        case '.':
          spaceBefore =
              lastToken != null
                  && (TokenClassification.isNumber(lastToken) || ".".equals(lastToken));
          spaceAfter = false;
          break;
      }
    }

    // Write any whitespace before the token.
    if (spaceBefore) {
      space();
    }

    // Actually write the token.
    emit(text);

    pendingSpace = spaceAfter;

    lastClass = tClass;
    lastToken = text;
    if (mark != null) {
      lastLine = mark.startLineNo();
    }
  }

Example #3

0

Show file

File: BufferingRenderer.java Project: fenglang0203/caja

  /** @throws NullPointerException if out raises an IOException and ioExceptionHandler is null. */
  public final void noMoreTokens() {
    JsTokenAdjacencyChecker adjChecker = new JsTokenAdjacencyChecker();

    String lastToken = null;
    boolean noOutputWritten = true;
    List<String> outputTokens = splitTokens(pending);
    pending.clear();
    String pendingSpace = null;
    for (int i = 0, nTokens = outputTokens.size(); i < nTokens; ++i) {
      String token = outputTokens.get(i);
      if (token.charAt(0) == '\n' || " ".equals(token)) {
        pendingSpace = token;
        continue;
      }
      if (TokenClassification.isComment(token)) {
        // Make sure we don't get into a situation where we have to output
        // a newline to end a line comment, but can't output a newline because
        // it would break a restricted production.
        // When we see a line comment, scan forward until the next non-comment
        // token.  If the canBreakBetween check fails, then remove any
        // line-breaks by rewriting the comment.
        // We have to rewrite multi-line block comments, since ES3 and ES5 say
        // that a multi-line comment is replaced with a newline for the
        // purposes of semicolon insertion.
        //
        // This is inconsistently implemented, but the rewriting works
        // regardless of whether an implementation actually treats the
        // comment as a newline for semicolon insertion.
        String nextToken = null;
        for (int j = i + 1; j < nTokens; ++j) {
          switch (TokenClassification.classify(outputTokens.get(j))) {
            case SPACE:
            case LINEBREAK:
            case COMMENT:
              continue;
            default:
              break;
          }
          nextToken = outputTokens.get(j);
          break;
        }
        if (!JsRenderUtil.canBreakBetween(lastToken, nextToken)) {
          token = removeLinebreaksFromComment(token);
          if (pendingSpace != null) {
            pendingSpace = " ";
          }
        }
      }
      boolean needSpaceBefore = adjChecker.needSpaceBefore(token);
      if (pendingSpace == null && needSpaceBefore) {
        pendingSpace = " ";
      }
      if (pendingSpace != null) {
        if (pendingSpace.charAt(0) == '\n') {
          if (!JsRenderUtil.canBreakBetween(lastToken, token)) {
            pendingSpace = " ";
          } else if (noOutputWritten) {
            pendingSpace = pendingSpace.substring(1);
          }
        }
        out.append(pendingSpace);
        pendingSpace = null;
      }
      out.append(token);
      noOutputWritten = false;
      if (!TokenClassification.isComment(token)) {
        lastToken = token;
      }
    }
    out.noMoreTokens();
  }