/** Test {@link UniqueIdFilter}. */
 @Test
 public void duplicateIds() throws Exception {
   String actual = "<p id=\"x\">1</p><p id=\"xy\">2</p><p id=\"x\">3</p>";
   String expected = "<p id=\"x\">1</p><p id=\"xy\">2</p><p id=\"x0\">3</p>";
   HTMLCleanerConfiguration config = this.mocker.getComponentUnderTest().getDefaultConfiguration();
   List<HTMLFilter> filters = new ArrayList<HTMLFilter>(config.getFilters());
   filters.add(this.mocker.<HTMLFilter>getInstance(HTMLFilter.class, "uniqueId"));
   config.setFilters(filters);
   Assert.assertEquals(
       HEADER_FULL + expected + FOOTER,
       HTMLUtils.toString(
           this.mocker.getComponentUnderTest().clean(new StringReader(actual), config)));
 }
  @Override
  public Document clean(Reader originalHtmlContent, HTMLCleanerConfiguration configuration) {
    Document result;

    // Note: Instantiation of an HtmlCleaner object is cheap so there's no need to cache an instance
    // of it,
    // especially since this makes it extra safe with regards to multithreading (even though HTML
    // Cleaner is
    // already supposed to be thread safe).
    CleanerProperties cleanerProperties = getDefaultCleanerProperties(configuration);
    HtmlCleaner cleaner = new HtmlCleaner(cleanerProperties);

    cleaner.setTransformations(getDefaultCleanerTransformations(configuration));
    TagNode cleanedNode;
    try {
      cleanedNode = cleaner.clean(originalHtmlContent);
    } catch (Exception e) {
      // This shouldn't happen since we're not doing any IO... I consider this a flaw in the design
      // of HTML
      // Cleaner.
      throw new RuntimeException("Unhandled error when cleaning HTML", e);
    }

    // Serialize the cleanedNode TagNode into a w3c dom. Ideally following code should be enough.
    // But SF's HTML Cleaner seems to omit the DocType declaration while serializing.
    // See
    // https://sourceforge.net/tracker/index.php?func=detail&aid=2062318&group_id=183053&atid=903696
    //      cleanedNode.setDocType(new DoctypeToken("html", "PUBLIC", "-//W3C//DTD XHTML 1.0
    // Strict//EN",
    //          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
    //      try {
    //          result = new DomSerializer(cleanerProperties, false).createDOM(cleanedNode);
    //      } catch(ParserConfigurationException ex) { }
    // As a workaround, we must serialize the cleanedNode into a temporary w3c document, create a
    // new w3c document
    // with proper DocType declaration and move the root node from the temporary document to the new
    // one.
    try {
      // Since there's a bug in SF's HTML Cleaner in that it doesn't recognize CDATA blocks we need
      // to turn off
      // character escaping (hence the false value passed) and do the escaping in
      // XMLUtils.toString(). Note that
      // this can cause problem for code not serializing the W3C DOM to a String since it won't have
      // the
      // characters escaped.
      // See
      // https://sourceforge.net/tracker/index.php?func=detail&aid=2691888&group_id=183053&atid=903696
      Document tempDoc = new XWikiDOMSerializer(cleanerProperties, false).createDOM(cleanedNode);
      DOMImplementation domImpl =
          DocumentBuilderFactory.newInstance().newDocumentBuilder().getDOMImplementation();
      DocumentType docType =
          domImpl.createDocumentType(
              QUALIFIED_NAME_HTML,
              "-//W3C//DTD XHTML 1.0 Strict//EN",
              "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
      result = domImpl.createDocument(null, QUALIFIED_NAME_HTML, docType);
      result.replaceChild(
          result.adoptNode(tempDoc.getDocumentElement()), result.getDocumentElement());
    } catch (ParserConfigurationException ex) {
      throw new RuntimeException("Error while serializing TagNode into w3c dom.", ex);
    }

    // Finally apply filters.
    for (HTMLFilter filter : configuration.getFilters()) {
      filter.filter(result, configuration.getParameters());
    }

    return result;
  }