/** * Converts (X)HTML response to DOM object Tree. This version cares of charset of response. * * @param unicodeData * @return */ private Document parseResponse(String unicodeData, XPathExtractor extractor) throws UnsupportedEncodingException, IOException, ParserConfigurationException, SAXException, TidyException { // TODO: validate contentType for reasonable types? // NOTE: responseData encoding is server specific // Therefore we do byte -> unicode -> byte conversion // to ensure UTF-8 encoding as required by XPathUtil // convert unicode String -> UTF-8 bytes byte[] utf8data = unicodeData.getBytes("UTF-8"); // $NON-NLS-1$ ByteArrayInputStream in = new ByteArrayInputStream(utf8data); boolean isXML = JOrphanUtils.isXML(utf8data); // this method assumes UTF-8 input data return XPathUtil.makeDocument( in, false, false, extractor.useNameSpace(), extractor.isTolerant(), extractor.isQuiet(), extractor.showWarnings(), extractor.reportErrors(), isXML, extractor.isDownloadDTDs()); }