@Override public List<String> selectList(String text) { HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); if (tagNode == null) { return null; } List<String> results = new ArrayList<String>(); try { Object[] objects = tagNode.evaluateXPath(xpathStr); if (objects != null && objects.length >= 1) { for (Object object : objects) { if (object instanceof TagNode) { TagNode tagNode1 = (TagNode) object; results.add(htmlCleaner.getInnerHtml(tagNode1)); } else { results.add(object.toString()); } } } } catch (XPatherException e) { e.printStackTrace(); } return results; }
public void testRandomCloseTagsRemoved() throws IOException { HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties properties = cleaner.getProperties(); properties.setOmitHtmlEnvelope(true); properties.setOmitXmlDeclaration(true); SimpleXmlSerializer serializer = new SimpleXmlSerializer(properties); TagNode cleaned = cleaner.clean("Some</span> text </b></div>"); assertEquals("Some text ", serializer.getAsString(cleaned)); }
public TagNode(String name, HtmlCleaner cleaner) { super(name == null ? null : name.toLowerCase()); this.cleaner = cleaner; if (cleaner != null) { Set pruneTagSet = cleaner.getPruneTagSet(); if (pruneTagSet != null && name != null && pruneTagSet.contains(name.toLowerCase())) { cleaner.addPruneNode(this); } } }
@Override public String select(String text) { HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); if (tagNode == null) { return null; } try { Object[] objects = tagNode.evaluateXPath(xpathStr); if (objects != null && objects.length >= 1) { if (objects[0] instanceof TagNode) { TagNode tagNode1 = (TagNode) objects[0]; return htmlCleaner.getInnerHtml(tagNode1); } else { return objects[0].toString(); } } } catch (XPatherException e) { e.printStackTrace(); } return null; }