private List<Comment> parseComments(Element commentsElement, Book book) {
    List<Comment> comments = new ArrayList<Comment>();
    Elements elements = commentsElement.getElementsByClass("product-comment");
    Iterator<Element> iterator = elements.iterator();
    String str = "";
    while (iterator.hasNext()) {
      Comment comment = new Comment();
      Element element = iterator.next();
      try {
        str =
            element
                .getElementsByClass("comment-user-avatar")
                .get(0)
                .getElementsByTag("a")
                .get(0)
                .attr("title");
        comment.setAuthor((str.length() <= 100) ? str : str.substring(0, 99));
      } catch (Exception ex) {
        comment.setAuthor("");
      }

      try {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd.mm.yyyy HH:mm:ss");
        str =
            element
                .getElementsByClass("comment-footer")
                .get(0)
                .getElementsByClass("date")
                .get(0)
                .text()
                .trim();
        comment.setDate(new Date(simpleDateFormat.parse(str).getTime()));
      } catch (Exception ex) {
        comment.setDate(null);
      }
      try {
        if (element.getElementsByAttributeValueContaining("id", "fullcomment").size() > 0) {
          str = element.getElementsByAttributeValueContaining("id", "fullcomment").get(0).text();
          comment.setComment((str.length() <= 5000) ? str : str.substring(0, 4999));
        } else if (element.getElementsByClass("comment-text").size() > 0) {
          str = element.getElementsByClass("comment-text").get(0).text();
          comment.setComment((str.length() <= 5000) ? str : str.substring(0, 4999));
        } else {
          str = element.getElementsByAttributeValueContaining("id", "shortcomment").get(0).text();
          comment.setComment((str.length() <= 5000) ? str : str.substring(0, 4999));
        }
      } catch (Exception ex) {
        comment.setComment("");
      }
      comment.setBook(book);
      comments.add(comment);
    }
    return comments;
  }
Example #2
0
  public static List<CaoImg> getImgData(String url) throws Exception {
    String response = HttpUtils.getString(url);

    Document parse = Jsoup.parse(response);
    Elements allElements = parse.getAllElements();

    List<CaoImg> caoImgs = new ArrayList<CaoImg>();

    for (int i = 0; i < allElements.size(); i++) {
      Element element = allElements.get(i);

      // <table class="wikitable"
      // style="width: 22em; position: absolute; top: 0px; left: 0px;">
      String nodeName = element.nodeName();
      String attrClass = element.attr("class");
      if (nodeName.equals("table") && "wikitable".equals(attrClass + "")) {
        String title = element.getElementsByAttribute("title").get(0).attr("title");
        Elements imgElement = element.getElementsByTag("img");
        String src = imgElement.attr("src");

        Elements styleElements =
            element.getElementsByAttributeValueContaining("style", "font-size");
        String otherName = null;
        String intro = null;
        if (styleElements.size() == 1) {
          intro = styleElements.get(0).text();
        } else {
          otherName = styleElements.get(0).text();
          intro = styleElements.get(1).text();
        }

        CaoImg caoImg = new CaoImg();
        caoImg.setName(title);
        caoImg.setImg(src);
        caoImg.setOtherName(otherName);
        caoImg.setIntro(intro);

        caoImgs.add(caoImg);
      }
    }

    return caoImgs;
  }