コード例 #1
0
  /**
   * @param configuration the configuration to use for the cleaning
   * @return the default {@link CleanerProperties} to be used for cleaning.
   */
  private CleanerProperties getDefaultCleanerProperties(HTMLCleanerConfiguration configuration) {
    CleanerProperties defaultProperties = new CleanerProperties();
    defaultProperties.setOmitUnknownTags(true);

    // HTML Cleaner uses the compact notation by default but we don't want that since:
    // - it's more work and not required since not compact notation is valid XHTML
    // - expanded elements can also be rendered fine in browsers that only support HTML.
    defaultProperties.setUseEmptyElementTags(false);

    // Wrap script and style content in CDATA blocks
    defaultProperties.setUseCdataForScriptAndStyle(true);

    // If the caller has defined NAMESPACE_AWARE configuration property then use it, otherwise use
    // our default.
    String param = configuration.getParameters().get(HTMLCleanerConfiguration.NAMESPACES_AWARE);
    boolean namespacesAware = (param != null) ? Boolean.parseBoolean(param) : true;
    defaultProperties.setNamespacesAware(namespacesAware);

    return defaultProperties;
  }
コード例 #2
0
  /** Verify that the restricted parameter works. */
  @Test
  public void restrictedHtml() throws ComponentLookupException {
    HTMLCleanerConfiguration configuration =
        this.mocker.getComponentUnderTest().getDefaultConfiguration();
    Map<String, String> parameters = new HashMap<String, String>();
    parameters.putAll(configuration.getParameters());
    parameters.put("restricted", "true");
    configuration.setParameters(parameters);

    String result =
        HTMLUtils.toString(
            this.mocker
                .getComponentUnderTest()
                .clean(new StringReader("<script>alert(\"foo\")</script>"), configuration));
    Assert.assertEquals(HEADER_FULL + "<pre>alert(\"foo\")</pre>" + FOOTER, result);

    result =
        HTMLUtils.toString(
            this.mocker
                .getComponentUnderTest()
                .clean(new StringReader("<style>p {color:white;}</style>"), configuration));
    Assert.assertEquals(HEADER_FULL + "<pre>p {color:white;}</pre>" + FOOTER, result);
  }
コード例 #3
0
  /**
   * @param configuration The cleaner configuration.
   * @return the default cleaning transformations to perform on tags, in addition to the base
   *     transformations done by HTML Cleaner
   */
  private CleanerTransformations getDefaultCleanerTransformations(
      HTMLCleanerConfiguration configuration) {
    CleanerTransformations defaultTransformations = new CleanerTransformations();

    TagTransformation tt =
        new TagTransformation(HTMLConstants.TAG_B, HTMLConstants.TAG_STRONG, false);
    defaultTransformations.addTransformation(tt);

    tt = new TagTransformation(HTMLConstants.TAG_I, HTMLConstants.TAG_EM, false);
    defaultTransformations.addTransformation(tt);

    tt = new TagTransformation(HTMLConstants.TAG_U, HTMLConstants.TAG_INS, false);
    defaultTransformations.addTransformation(tt);

    tt = new TagTransformation(HTMLConstants.TAG_S, HTMLConstants.TAG_DEL, false);
    defaultTransformations.addTransformation(tt);

    tt = new TagTransformation(HTMLConstants.TAG_STRIKE, HTMLConstants.TAG_DEL, false);
    defaultTransformations.addTransformation(tt);

    tt = new TagTransformation(HTMLConstants.TAG_CENTER, HTMLConstants.TAG_P, false);
    tt.addAttributeTransformation(HTMLConstants.ATTRIBUTE_STYLE, "text-align:center");
    defaultTransformations.addTransformation(tt);

    String restricted = configuration.getParameters().get(HTMLCleanerConfiguration.RESTRICTED);
    if ("true".equalsIgnoreCase(restricted)) {

      tt = new TagTransformation(HTMLConstants.TAG_SCRIPT, HTMLConstants.TAG_PRE, false);
      defaultTransformations.addTransformation(tt);

      tt = new TagTransformation(HTMLConstants.TAG_STYLE, HTMLConstants.TAG_PRE, false);
      defaultTransformations.addTransformation(tt);
    }

    return defaultTransformations;
  }
コード例 #4
0
  @Override
  public Document clean(Reader originalHtmlContent, HTMLCleanerConfiguration configuration) {
    Document result;

    // Note: Instantiation of an HtmlCleaner object is cheap so there's no need to cache an instance
    // of it,
    // especially since this makes it extra safe with regards to multithreading (even though HTML
    // Cleaner is
    // already supposed to be thread safe).
    CleanerProperties cleanerProperties = getDefaultCleanerProperties(configuration);
    HtmlCleaner cleaner = new HtmlCleaner(cleanerProperties);

    cleaner.setTransformations(getDefaultCleanerTransformations(configuration));
    TagNode cleanedNode;
    try {
      cleanedNode = cleaner.clean(originalHtmlContent);
    } catch (Exception e) {
      // This shouldn't happen since we're not doing any IO... I consider this a flaw in the design
      // of HTML
      // Cleaner.
      throw new RuntimeException("Unhandled error when cleaning HTML", e);
    }

    // Serialize the cleanedNode TagNode into a w3c dom. Ideally following code should be enough.
    // But SF's HTML Cleaner seems to omit the DocType declaration while serializing.
    // See
    // https://sourceforge.net/tracker/index.php?func=detail&aid=2062318&group_id=183053&atid=903696
    //      cleanedNode.setDocType(new DoctypeToken("html", "PUBLIC", "-//W3C//DTD XHTML 1.0
    // Strict//EN",
    //          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
    //      try {
    //          result = new DomSerializer(cleanerProperties, false).createDOM(cleanedNode);
    //      } catch(ParserConfigurationException ex) { }
    // As a workaround, we must serialize the cleanedNode into a temporary w3c document, create a
    // new w3c document
    // with proper DocType declaration and move the root node from the temporary document to the new
    // one.
    try {
      // Since there's a bug in SF's HTML Cleaner in that it doesn't recognize CDATA blocks we need
      // to turn off
      // character escaping (hence the false value passed) and do the escaping in
      // XMLUtils.toString(). Note that
      // this can cause problem for code not serializing the W3C DOM to a String since it won't have
      // the
      // characters escaped.
      // See
      // https://sourceforge.net/tracker/index.php?func=detail&aid=2691888&group_id=183053&atid=903696
      Document tempDoc = new XWikiDOMSerializer(cleanerProperties, false).createDOM(cleanedNode);
      DOMImplementation domImpl =
          DocumentBuilderFactory.newInstance().newDocumentBuilder().getDOMImplementation();
      DocumentType docType =
          domImpl.createDocumentType(
              QUALIFIED_NAME_HTML,
              "-//W3C//DTD XHTML 1.0 Strict//EN",
              "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
      result = domImpl.createDocument(null, QUALIFIED_NAME_HTML, docType);
      result.replaceChild(
          result.adoptNode(tempDoc.getDocumentElement()), result.getDocumentElement());
    } catch (ParserConfigurationException ex) {
      throw new RuntimeException("Error while serializing TagNode into w3c dom.", ex);
    }

    // Finally apply filters.
    for (HTMLFilter filter : configuration.getFilters()) {
      filter.filter(result, configuration.getParameters());
    }

    return result;
  }