Пример #1
0
  /**
   * Converts (X)HTML response to DOM object Tree. This version cares of charset of response.
   *
   * @param unicodeData
   * @return
   */
  private Document parseResponse(String unicodeData, XPathExtractor extractor)
      throws UnsupportedEncodingException, IOException, ParserConfigurationException, SAXException,
          TidyException {
    // TODO: validate contentType for reasonable types?

    // NOTE: responseData encoding is server specific
    //       Therefore we do byte -> unicode -> byte conversion
    //       to ensure UTF-8 encoding as required by XPathUtil
    // convert unicode String -> UTF-8 bytes
    byte[] utf8data = unicodeData.getBytes("UTF-8"); // $NON-NLS-1$
    ByteArrayInputStream in = new ByteArrayInputStream(utf8data);
    boolean isXML = JOrphanUtils.isXML(utf8data);
    // this method assumes UTF-8 input data
    return XPathUtil.makeDocument(
        in,
        false,
        false,
        extractor.useNameSpace(),
        extractor.isTolerant(),
        extractor.isQuiet(),
        extractor.showWarnings(),
        extractor.reportErrors(),
        isXML,
        extractor.isDownloadDTDs());
  }