@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); props.setAdvancedXmlEscape(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } Document myJDom = new JDomSerializer(props, true).createJDom(node); // Format format = Format.getRawFormat(); Format format = new OutputFormat(); format.setEncoding("iso-8859-1"); XMLWriter outputter = new XMLWriter(format); OutputStream os = new FileOutputStream(f); // outputter.output(myJDom,os); output.setOutputStream(os); output.write(myJDom); // sbResult.append(outputter.outputString(myJDom)); results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); // props.setAdvancedXmlEscape(false); props.setUseEmptyElementTags(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } for (int i = 1; i <= anzElements; i++) { String tag = config.getString("substitute[" + i + "]/@tag"); String att = config.getString("substitute[" + i + "]/@att"); String from = config.getString("substitute[" + i + "]/from"); String to = config.getString("substitute[" + i + "]/to"); to = subSpecial(to); TagNode[] imgs = node.getElementsByName(tag, true); for (TagNode tn : imgs) { String srcAtt = tn.getAttributeByName(att); int index = srcAtt.indexOf(from); if (index >= 0) { tn.addAttribute(att, to); } } } BrowserCompactXmlSerializer serializer = new BrowserCompactXmlSerializer(props); // PrettyXmlSerializer serializer = new PrettyXmlSerializer(props); String s = serializer.getXmlAsString(node, "ISO-8859-1"); Writer fw = null; try { fw = new FileWriter(f); fw.write(s); } catch (IOException e) { logger.error("", e); } finally { if (fw != null) try { fw.close(); } catch (IOException e) { } } results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }