Document parse(String input, String baseUri, ParseErrorList errors) { initialiseParse(input, baseUri, errors); runParser(); // System.out.println("number of errors: "+errors.size()); for (int i = 0; i < errors.size(); i++) System.out.println(errors.get(i)); return doc; }
void reset() { doc.body().removeChildNodes(); reader = new CharacterReader(input, this); this.errors = ParseErrorList.tracking(100); tokeniser = new Tokeniser(reader, errors); while (stack.size() > 2) stack.pollLast(); }
Document parse(String input, String baseUri) { return parse(input, baseUri, ParseErrorList.tracking(100)); }
/** * Parse HTML into a Document. * * @param html HTML to parse * @param baseUri base URI of document (i.e. original fetch location), for resolving relative * URLs. * @return parsed Document */ public static Document parse(String html, String baseUri) { TreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parse(html, baseUri, ParseErrorList.noTracking()); }
public Document parseInput(String html, String baseUri) { errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking(); Document doc = treeBuilder.parse(html, baseUri, errors); return doc; }
/** * Utility method to unescape HTML entities from a string * * @param string HTML escaped string * @param inAttribute if the string is to be escaped in strict mode (as attributes are) * @return an unescaped string */ public static String unescapeEntities(String string, boolean inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking()); return tokeniser.unescapeEntities(inAttribute); }
/** * Parse a fragment of XML into a list of nodes. * * @param fragmentXml the fragment of XML to parse * @param baseUri base URI of document (i.e. original fetch location), for resolving relative * URLs. * @return list of nodes parsed from the input XML. */ public static List<Node> parseXmlFragment(String fragmentXml, String baseUri) { XmlTreeBuilder treeBuilder = new XmlTreeBuilder(); return treeBuilder.parseFragment(fragmentXml, baseUri, ParseErrorList.noTracking()); }
/** * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies * parsing context. * * @param fragmentHtml the fragment of HTML to parse * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for * inner HTML). This provides stack context (for implicit element creation). * @param baseUri base URI of document (i.e. original fetch location), for resolving relative * URLs. * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, * is not modified. */ public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking()); }
Document parse(final String input, final String baseUri) { return parse(input, baseUri, ParseErrorList.noTracking()); }