Example #1
0
 /**
  * @param ihtmlRoots roots of trees to process and the baseURI used to resolve URIs in those
  *     nodes.
  * @param validatedStylesheets CSS style-sheets that have had unsafe constructs removed and had
  *     rules rewritten.
  * @param placeholderScripts placeholder IDs per unsanitized JS programs. We extract scripts early
  *     on and turn them into separate jobs, so that we can use cached results for scripts even
  *     when the non-script details of the containing HTML page changes.
  * @param meta specifies how URLs and other attributes are rewritten.
  * @param cssSchema specifies how STYLE attributes are rewritten.
  * @param htmlSchema specifies how elements and attributes are handled.
  * @param mq receives messages about invalid attribute values.
  */
 public TemplateCompiler(
     List<? extends IhtmlRoot> ihtmlRoots,
     List<? extends ValidatedStylesheet> validatedStylesheets,
     List<? extends ScriptPlaceholder> placeholderScripts,
     CssSchema cssSchema,
     HtmlSchema htmlSchema,
     PluginMeta meta,
     MessageContext mc,
     MessageQueue mq) {
   this.ihtmlRoots = Lists.newArrayList(ihtmlRoots);
   this.validatedStylesheets = Lists.newArrayList(validatedStylesheets);
   for (ScriptPlaceholder ph : placeholderScripts) {
     scriptsPerPlaceholder.put(ph.source.placeholderId, ph);
   }
   this.htmlSchema = htmlSchema;
   this.meta = meta;
   this.mc = mc;
   this.mq = mq;
   this.aRewriter = new HtmlAttributeRewriter(meta, cssSchema, htmlSchema, embeddedContent, mq);
 }
Example #2
0
/**
 * An abstract renderer for JavaScript tokens that ensures that implementations don't fall afoul of
 * JavaScript's syntactic quirks.
 *
 * @author [email protected]
 */
abstract class BufferingRenderer implements TokenConsumer {
  private final List<Object> pending = Lists.newArrayList();
  private final Concatenator out;

  /** @param out receives the rendered text. */
  BufferingRenderer(Concatenator out) {
    this.out = out;
  }

  /** @throws NullPointerException if out raises an IOException and ioExceptionHandler is null. */
  public final void noMoreTokens() {
    JsTokenAdjacencyChecker adjChecker = new JsTokenAdjacencyChecker();

    String lastToken = null;
    boolean noOutputWritten = true;
    List<String> outputTokens = splitTokens(pending);
    pending.clear();
    String pendingSpace = null;
    for (int i = 0, nTokens = outputTokens.size(); i < nTokens; ++i) {
      String token = outputTokens.get(i);
      if (token.charAt(0) == '\n' || " ".equals(token)) {
        pendingSpace = token;
        continue;
      }
      if (TokenClassification.isComment(token)) {
        // Make sure we don't get into a situation where we have to output
        // a newline to end a line comment, but can't output a newline because
        // it would break a restricted production.
        // When we see a line comment, scan forward until the next non-comment
        // token.  If the canBreakBetween check fails, then remove any
        // line-breaks by rewriting the comment.
        // We have to rewrite multi-line block comments, since ES3 and ES5 say
        // that a multi-line comment is replaced with a newline for the
        // purposes of semicolon insertion.
        //
        // This is inconsistently implemented, but the rewriting works
        // regardless of whether an implementation actually treats the
        // comment as a newline for semicolon insertion.
        String nextToken = null;
        for (int j = i + 1; j < nTokens; ++j) {
          switch (TokenClassification.classify(outputTokens.get(j))) {
            case SPACE:
            case LINEBREAK:
            case COMMENT:
              continue;
            default:
              break;
          }
          nextToken = outputTokens.get(j);
          break;
        }
        if (!JsRenderUtil.canBreakBetween(lastToken, nextToken)) {
          token = removeLinebreaksFromComment(token);
          if (pendingSpace != null) {
            pendingSpace = " ";
          }
        }
      }
      boolean needSpaceBefore = adjChecker.needSpaceBefore(token);
      if (pendingSpace == null && needSpaceBefore) {
        pendingSpace = " ";
      }
      if (pendingSpace != null) {
        if (pendingSpace.charAt(0) == '\n') {
          if (!JsRenderUtil.canBreakBetween(lastToken, token)) {
            pendingSpace = " ";
          } else if (noOutputWritten) {
            pendingSpace = pendingSpace.substring(1);
          }
        }
        out.append(pendingSpace);
        pendingSpace = null;
      }
      out.append(token);
      noOutputWritten = false;
      if (!TokenClassification.isComment(token)) {
        lastToken = token;
      }
    }
    out.noMoreTokens();
  }

  /**
   * May receive line-break or comment tokens. Implementations may ignore comment tokens, but the
   * client is responsible for making sure that comments are well-formed, do not contain code (e.g.
   * conditional compilation code), and do not violate any containment requirements, such as not
   * containing the string {@code </script>}.
   */
  public final void consume(String text) {
    if ("".equals(text)) {
      return;
    }
    pending.add(text);
  }

  public final void mark(@Nullable FilePosition mark) {
    if (mark != null && !InputSource.UNKNOWN.equals(mark.source())) {
      pending.add(mark);
    }
  }

  private static String removeLinebreaksFromComment(String token) {
    if (TokenClassification.isLineComment(token)) {
      token = "/*" + token.substring(2) + "*/";
    }
    StringBuilder sb = new StringBuilder(token);
    // Section 5.1.2 hinges on whether a MultiLineComment contains a
    // line-terminator char, so make sure it does not.
    for (int i = sb.length(); --i >= 0; ) {
      if (JsLexer.isJsLineSeparator(sb.charAt(i))) {
        sb.setCharAt(i, ' ');
      }
    }
    // Make sure that turning a line comment into a MultiLineComment didn't
    // cause a */ in the line comment to become lexically significant.
    for (int e = sb.length() - 3, i; (i = sb.lastIndexOf("*/", e)) >= 0; ) {
      sb.setCharAt(i + 1, ' ');
    }
    return sb.toString();
  }

  /**
   * Generates a list of output tokens consisting of non-whitespace tokens, space tokens ({@code "
   * "}) and newline tokens ({@code '\n'} followed by any number of spaces).
   *
   * @param tokens a heterogeneous array containing {@code String} tokens and {@code FilePosition}
   *     marks.
   * @return the strings in tokens in order with newline and space tokens inserted as appropriate.
   */
  abstract List<String> splitTokens(List<Object> tokens);
}