public void parseStreaming(InputStream is, OutputStream os, Context context) { // HTML5 may violate XML correctness. It shouldn't, but hey, shit happens. HtmlParser hp = new HtmlParser(XmlViolationPolicy.ALLOW); // true streaming for max awesomeness hp.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL); handler.setOutput(os); handler.setContext(context); hp.setContentHandler(handler); try { hp.parse(new InputSource(is)); handler.end(); } catch (IOException | SAXException ex) { Logger.getLogger(getClass()).error("HTML parsing failed", ex); } }
public Map<String, String> getFragmentParts(InputStream is, Context context) { // HTML5 may violate XML correctness. It shouldn't, but hey, shit happens. HtmlParser hp = new HtmlParser(XmlViolationPolicy.FATAL); // true streaming for max awesomeness hp.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL); handler.setAction(Handler.Action.EXTRACT_INLINE_PARTS); handler.setContext(context); handler.doNotWriteDocumentStart(); hp.setContentHandler(handler); try { hp.parse(new InputSource(is)); } catch (IOException | SAXException ex) { Logger.getLogger(getClass()).error("HTML parsing failed", ex); } return handler.getFragmentParts(); }