示例#1
0
/**
 * A mapping of identifiers which ensures that no use of an identifier in a scope masks a use of an
 * identifier in an outer scope.
 *
 * <p>Javascript is not a block scoped language, but IHTML constructs are block scoped, so we
 * alpha-rename all variables to prevent collisions.
 *
 * @param <NAME> a type that can work as a hashtable key
 * @param <BINDING> type of data associated with a particular name. This can be any object related
 *     to the result of resolving the name.
 * @author [email protected]
 */
public final class NameContext<NAME, BINDING> {
  private final NameContext<NAME, BINDING> parent;
  private final Iterator<String> nameGenerator;
  /** maps names in original source to details about the renamed instance. */
  private final Map<NAME, VarInfo<NAME, BINDING>> vars = Maps.newLinkedHashMap();

  public static final class VarInfo<NAME, BINDING> {
    public final NAME origName;
    public final String newName;
    public final FilePosition declaredAt;
    private BINDING binding;

    private VarInfo(NAME origName, String newName, FilePosition declaredAt) {
      assert origName != null;
      this.origName = origName;
      this.newName = newName;
      this.declaredAt = declaredAt;
    }

    public BINDING getBinding() {
      return binding;
    }

    public void bind(BINDING binding) {
      this.binding = binding;
    }

    @Override
    public String toString() {
      return "(" + getClass().getSimpleName() + " " + origName + ")";
    }
  }

  /**
   * Creates a context with no parent context.
   *
   * @param nameGenerator an infinite iterator that returns safe identifiers and that never returns
   *     the same String twice. Typically, a {@link com.google.caja.util.SafeIdentifierMaker}.
   */
  public NameContext(Iterator<String> nameGenerator) {
    this(null, nameGenerator);
  }

  private NameContext(NameContext<NAME, BINDING> parent, Iterator<String> nameGenerator) {
    this.parent = parent;
    this.nameGenerator = nameGenerator;
  }

  /**
   * Produces a context that has the same name generator and which has this context as its parent.
   */
  public NameContext<NAME, BINDING> makeChildContext() {
    return new NameContext<NAME, BINDING>(this, this.nameGenerator);
  }

  /**
   * The context that is used to resolve original names that have not been declared in this context,
   * or null if no such context.
   */
  public NameContext<NAME, BINDING> getParentContext() {
    return parent;
  }

  /**
   * Introduce a new declaration which will mask any declaration with the same name in the {@link
   * #getParentContext} context.
   */
  public VarInfo<NAME, BINDING> declare(NAME origName, FilePosition declSite)
      throws RedeclarationException {
    VarInfo<NAME, BINDING> d = vars.get(origName);
    if (d == null) {
      String newName = nameGenerator.next();
      VarInfo<NAME, BINDING> vi = new VarInfo<NAME, BINDING>(origName, newName, declSite);
      vars.put(origName, vi);
      return vi;
    } else {
      FilePosition dPos = d.declaredAt;
      throw new RedeclarationException(
          new Message(
              RewriterMessageType.CANNOT_REDECLARE_VAR,
              declSite,
              MessagePart.Factory.valueOf(origName.toString()),
              dPos));
    }
  }

  /**
   * Find a declaration with the given original name, looking in ancestor contexts if {@code
   * declare(originalName, ...)} was never called on this context.
   */
  public VarInfo<NAME, BINDING> lookup(NAME originalName) {
    for (NameContext<NAME, BINDING> c = this; c != null; c = c.parent) {
      VarInfo<NAME, BINDING> vi = c.vars.get(originalName);
      if (vi != null) {
        return vi;
      }
    }
    return null;
  }

  /** The set of vars declared in this context, not including any in ancestor contexts. */
  public Iterable<VarInfo<NAME, BINDING>> vars() {
    return Collections.unmodifiableMap(vars).values();
  }

  /** The name generator used to generate names for new declarations. */
  public Iterator<String> getNameGenerator() {
    return nameGenerator;
  }

  public static class RedeclarationException extends CajaException {
    public RedeclarationException(Message m, Throwable th) {
      super(m, th);
    }

    public RedeclarationException(Message m) {
      this(m, null);
    }
  }
}
示例#2
0
/**
 * Compiles an HTML document to a chunk of safe static HTML, and a bit of javascript which attaches
 * event handlers and other dynamic attributes, and executes inline scripts.
 *
 * <p>Requires that CSS be rewritten, that inline scripts have been replaced with {@link Placeholder
 * placeholders}, and that the output JS be run through the CajitaRewriter.
 *
 * @author [email protected]
 */
public class TemplateCompiler {
  private final List<IhtmlRoot> ihtmlRoots;
  private final List<ValidatedStylesheet> validatedStylesheets;
  private final HtmlSchema htmlSchema;
  private final PluginMeta meta;
  private final MessageContext mc;
  private final MessageQueue mq;
  private final HtmlAttributeRewriter aRewriter;

  /**
   * Maps {@link Node}s to JS parse trees.
   *
   * <ul>
   *   <li>If the value is {@code null}, then the literal value in the original parse tree may be
   *       used.
   *   <li>If the node is an attribute, then the value is an expression that returns a (key, value)
   *       pair.
   *   <li>If the node is a text node inside a script block, then the value is an {@link
   *       UncajoledModule}.
   *   <li>Otherwise, the value is a JavaScript expression which evaluates to the dynamic text
   *       value.
   * </ul>
   */
  private final Map<Node, ParseTreeNode> scriptsPerNode = Maps.newIdentityHashMap();

  /**
   * Maps placeholder IDs to JS programs.
   *
   * <p>We extract scripts early on and turn them into separate jobs, so that we can use cached
   * results for scripts even when the non-script details of the containing HTML page changes.
   */
  private final Map<String, ScriptPlaceholder> scriptsPerPlaceholder = Maps.newHashMap();

  private final Map<Attr, EmbeddedContent> embeddedContent = Maps.newIdentityHashMap();

  /**
   * @param ihtmlRoots roots of trees to process and the baseURI used to resolve URIs in those
   *     nodes.
   * @param validatedStylesheets CSS style-sheets that have had unsafe constructs removed and had
   *     rules rewritten.
   * @param placeholderScripts placeholder IDs per unsanitized JS programs. We extract scripts early
   *     on and turn them into separate jobs, so that we can use cached results for scripts even
   *     when the non-script details of the containing HTML page changes.
   * @param meta specifies how URLs and other attributes are rewritten.
   * @param cssSchema specifies how STYLE attributes are rewritten.
   * @param htmlSchema specifies how elements and attributes are handled.
   * @param mq receives messages about invalid attribute values.
   */
  public TemplateCompiler(
      List<? extends IhtmlRoot> ihtmlRoots,
      List<? extends ValidatedStylesheet> validatedStylesheets,
      List<? extends ScriptPlaceholder> placeholderScripts,
      CssSchema cssSchema,
      HtmlSchema htmlSchema,
      PluginMeta meta,
      MessageContext mc,
      MessageQueue mq) {
    this.ihtmlRoots = Lists.newArrayList(ihtmlRoots);
    this.validatedStylesheets = Lists.newArrayList(validatedStylesheets);
    for (ScriptPlaceholder ph : placeholderScripts) {
      scriptsPerPlaceholder.put(ph.source.placeholderId, ph);
    }
    this.htmlSchema = htmlSchema;
    this.meta = meta;
    this.mc = mc;
    this.mq = mq;
    this.aRewriter = new HtmlAttributeRewriter(meta, cssSchema, htmlSchema, embeddedContent, mq);
  }

  /**
   * Examines the HTML document and writes messages about problematic portions to the message queue
   * passed to the constructor.
   */
  private void inspect() {
    if (!mq.hasMessageAtLevel(MessageLevel.FATAL_ERROR)) {
      for (IhtmlRoot ihtmlRoot : ihtmlRoots) {
        HtmlEmbeddedContentFinder finder =
            new HtmlEmbeddedContentFinder(htmlSchema, ihtmlRoot.baseUri, mq, mc);
        for (EmbeddedContent c : finder.findEmbeddedContent(ihtmlRoot.root)) {
          Node src = c.getSource();
          if (src instanceof Attr) {
            embeddedContent.put((Attr) src, c);
          }
        }
        inspect(ihtmlRoot.source, ihtmlRoot.root, ElKey.forHtmlElement("div"));
      }
    }
  }

  private void inspect(JobEnvelope source, Node n, ElKey containingHtmlElement) {
    switch (n.getNodeType()) {
      case Node.ELEMENT_NODE:
        inspectElement(source, (Element) n, containingHtmlElement);
        break;
      case Node.TEXT_NODE:
      case Node.CDATA_SECTION_NODE:
        inspectText((Text) n, containingHtmlElement);
        break;
      case Node.DOCUMENT_FRAGMENT_NODE:
        inspectFragment(source, (DocumentFragment) n, containingHtmlElement);
        break;
      default:
        // Since they don't show in the scriptsPerNode map, they won't appear in
        // any output trees.
        break;
    }
  }

  /**
   * @param containingHtmlElement the name of the HTML element containing el. If the HTML element is
   *     contained inside a template construct then this name may differ from el's immediate parent.
   */
  private void inspectElement(JobEnvelope source, Element el, ElKey containingHtmlElement) {
    ElKey elKey = ElKey.forElement(el);

    // Recurse early so that ihtml:dynamic elements have been parsed before we
    // process the attributes element list.
    for (Node child : Nodes.childrenOf(el)) {
      inspect(source, child, elKey);
    }

    // For each attribute allowed on this element type, ensure that
    // (1) If it is not specified, and its default value is not allowed, then
    //     it is added with a known safe value.
    // (2) Its value is rewritten as appropriate.
    // We don't have to worry about disallowed attributes since those will
    // not be present in scriptsPerNode.  The TemplateSanitizer should have
    // stripped those out.  The TemplateSanitizer should also have stripped out
    // disallowed elements.
    if (!htmlSchema.isElementAllowed(elKey)) {
      return;
    }

    HTML.Element elInfo = htmlSchema.lookupElement(elKey);
    List<HTML.Attribute> attrs = elInfo.getAttributes();
    if (attrs != null) {
      for (HTML.Attribute a : attrs) {
        AttribKey attrKey = a.getKey();
        if (!htmlSchema.isAttributeAllowed(attrKey)) {
          continue;
        }
        Attr attr = null;
        String aUri = attrKey.ns.uri;
        String aName = attrKey.localName;
        Attr unsafe = el.getAttributeNodeNS(aUri, aName);
        if (unsafe != null && a.getValueCriterion().accept(unsafe.getValue())) {
          attr = unsafe;
        } else if ((a.getDefaultValue() != null
                && !a.getValueCriterion().accept(a.getDefaultValue()))
            || !a.isOptional()) {
          attr = el.getOwnerDocument().createAttributeNS(aUri, aName);
          String safeValue;
          if (a.getType() == HTML.Attribute.Type.URI) {
            safeValue = "" + Nodes.getFilePositionFor(el).source().getUri();
          } else {
            safeValue = a.getSafeValue();
          }
          if (safeValue == null) {
            mq.addMessage(
                IhtmlMessageType.MISSING_ATTRIB, Nodes.getFilePositionFor(el), elKey, attrKey);
            continue;
          }
          attr.setNodeValue(safeValue);
          el.setAttributeNodeNS(attr);
        }
        if (attr != null) {
          inspectHtmlAttribute(source, attr, a);
        }
      }
    }
    scriptsPerNode.put(el, null);
  }

  private void inspectText(Text t, ElKey containingHtmlElement) {
    if (!htmlSchema.isElementAllowed(containingHtmlElement)) {
      return;
    }
    scriptsPerNode.put(t, null);
  }

  private void inspectFragment(
      JobEnvelope source, DocumentFragment f, ElKey containingHtmlElement) {
    scriptsPerNode.put(f, null);
    for (Node child : Nodes.childrenOf(f)) {
      // We know that top level text nodes in a document fragment
      // are not significant if they are just newlines and indentation.
      // This decreases output size significantly.
      if (isWhitespaceOnlyTextNode(child)) {
        continue;
      }
      inspect(source, child, containingHtmlElement);
    }
  }

  private static boolean isWhitespaceOnlyTextNode(Node child) {
    // This leaves whitespace without a leading EOL character intact.
    // TODO(ihab.awad): Investigate why this is the right criterion to use.
    return child.getNodeType() == Node.TEXT_NODE // excludes CDATA sections
        && "".equals(child.getNodeValue().replaceAll("[\r\n]+[ \t]*", ""));
  }

  /**
   * For an HTML attribute, decides whether the value is valid according to the schema and if it is
   * valid, sets a value into {@link #scriptsPerNode}. The expression is null if the current value
   * is fine, or a StringLiteral if it can be statically rewritten.
   */
  private void inspectHtmlAttribute(JobEnvelope source, Attr attr, HTML.Attribute info) {
    if (Placeholder.ID_ATTR.is(attr) && scriptsPerPlaceholder.containsKey(attr.getValue())) {
      scriptsPerNode.put(attr, null);
    } else {
      HtmlAttributeRewriter.SanitizedAttr r =
          aRewriter.sanitizeStringValue(HtmlAttributeRewriter.fromAttr(attr, info, source));
      if (r.isSafe) {
        scriptsPerNode.put(attr, r.result);
      }
      // Otherwise the SanitizeHtmlStage should have emitted a warning.
    }
  }

  /**
   * Builds a tree of only the safe HTML parts ignoring IHTML elements. If there are embedded script
   * elements, then these will be removed, and nodes may have synthetic IDs added so that the
   * generated code can split them into the elements present when each script is executed.
   *
   * <p>On introspection, the code will find that the output DOM is missing the SCRIPT elements
   * originally on the page. We consider this a known observable fact of our transformation. If we
   * wish to hid that as well, we could change {@link SafeHtmlMaker} to include empty SCRIPT nodes.
   * However, that would make the output larger -- and, anyway, the text content of these nodes
   * would *still* not be identical to the original.
   *
   * @param doc a DOM {@link Document} object to be used as a factory for DOM nodes; it is not
   *     processed or transformed in any way.
   */
  public Pair<List<SafeHtmlChunk>, List<SafeJsChunk>> getSafeHtml(Document doc) {
    // Inspect the document.
    inspect();

    // Compile CSS to HTML when appropriate or to JS where not.
    // It always ends up at the top either way.
    List<SafeStylesheet> css = new SafeCssMaker(validatedStylesheets, doc).make();

    // Emit safe HTML with JS which attaches dynamic attributes.
    SafeHtmlMaker htmlMaker =
        new SafeHtmlMaker(
            meta,
            mc,
            doc,
            scriptsPerNode,
            scriptsPerPlaceholder,
            ihtmlRoots,
            aRewriter.getHandlers());
    return htmlMaker.make(css);
  }
}