示例#1
0
  @SuppressWarnings("unchecked")
  protected void handleFile(File file, int depth, Collection results) {
    File f = new File(FilenameUtils.normalize(file.getAbsolutePath()));
    logger.debug(f.getAbsoluteFile());
    try {
      HtmlCleaner cleaner = new HtmlCleaner();
      cleaner.setTransformations(ct);

      CleanerProperties props = cleaner.getProperties();
      props.setAdvancedXmlEscape(false);
      //			props.setTranslateSpecialEntities(false);
      //			props.setRecognizeUnicodeChars(false);

      TagNode node = cleaner.clean(f);

      TagNode tnBody = node.getAllElements(false)[1];
      List l = tnBody.getChildren();
      if (l != null
          && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body
        tnBody.removeChild(l.get(0));
      }

      Document myJDom = new JDomSerializer(props, true).createJDom(node);

      // Format format = Format.getRawFormat();
      Format format = new OutputFormat();
      format.setEncoding("iso-8859-1");
      XMLWriter outputter = new XMLWriter(format);

      OutputStream os = new FileOutputStream(f);

      // outputter.output(myJDom,os);
      output.setOutputStream(os);
      output.write(myJDom);
      //			sbResult.append(outputter.outputString(myJDom));
      results.add(f.getAbsoluteFile());
    } catch (IOException e) {
      logger.error("", e);
    }
  }
示例#2
0
  @SuppressWarnings("unchecked")
  protected void handleFile(File file, int depth, Collection results) {
    File f = new File(FilenameUtils.normalize(file.getAbsolutePath()));
    logger.debug(f.getAbsoluteFile());
    try {
      HtmlCleaner cleaner = new HtmlCleaner();
      cleaner.setTransformations(ct);

      CleanerProperties props = cleaner.getProperties();
      //				props.setAdvancedXmlEscape(false);
      props.setUseEmptyElementTags(false);
      //				props.setTranslateSpecialEntities(false);
      //				props.setRecognizeUnicodeChars(false);

      TagNode node = cleaner.clean(f);

      TagNode tnBody = node.getAllElements(false)[1];
      List l = tnBody.getChildren();
      if (l != null
          && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body
        tnBody.removeChild(l.get(0));
      }

      for (int i = 1; i <= anzElements; i++) {
        String tag = config.getString("substitute[" + i + "]/@tag");
        String att = config.getString("substitute[" + i + "]/@att");
        String from = config.getString("substitute[" + i + "]/from");
        String to = config.getString("substitute[" + i + "]/to");
        to = subSpecial(to);

        TagNode[] imgs = node.getElementsByName(tag, true);

        for (TagNode tn : imgs) {
          String srcAtt = tn.getAttributeByName(att);
          int index = srcAtt.indexOf(from);
          if (index >= 0) {
            tn.addAttribute(att, to);
          }
        }
      }

      BrowserCompactXmlSerializer serializer = new BrowserCompactXmlSerializer(props);
      //			PrettyXmlSerializer serializer = new PrettyXmlSerializer(props);

      String s = serializer.getXmlAsString(node, "ISO-8859-1");

      Writer fw = null;
      try {
        fw = new FileWriter(f);
        fw.write(s);
      } catch (IOException e) {
        logger.error("", e);
      } finally {
        if (fw != null)
          try {
            fw.close();
          } catch (IOException e) {
          }
      }

      results.add(f.getAbsoluteFile());
    } catch (IOException e) {
      logger.error("", e);
    }
  }