/** * Create a NodeModel from an XML file. * * @param removeComments whether to remove all comment nodes (recursively) from the tree before * processing * @param removePIs whether to remove all processing instruction nodes (recursively from the tree * before processing */ public static NodeModel parse(File f, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(f); if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); return wrap(doc); }
/** * Merges adjacent text/cdata nodes, so that there are no adjacent text/cdata nodes. Operates * recursively on the entire subtree. You thus lose information about any CDATA sections occurring * in the doc. * * @see #simplify */ public static void mergeAdjacentText(Node node) { Node child = node.getFirstChild(); while (child != null) { if (child instanceof Text || child instanceof CDATASection) { Node next = child.getNextSibling(); if (next instanceof Text || next instanceof CDATASection) { String fullText = child.getNodeValue() + next.getNodeValue(); ((CharacterData) child).setData(fullText); node.removeChild(next); } } else { mergeAdjacentText(child); } child = child.getNextSibling(); } }