@Override public MeiziTopicMData decode(String html) { MeiziTopicMData resulTopicM = new MeiziTopicMData(); Document document = Jsoup.parse(html); // get title string resulTopicM.title = document.select("div.row div.span9 h4").html().trim(); // get doubanPosterUrl and doubanTopicurl Elements urlElements = document.select("div.row div.span6 div.content-meta a"); if (urlElements.size() >= 1) { resulTopicM.doubanPosterUrl = urlElements.get(0).attr("href"); } if (urlElements.size() >= 2) { resulTopicM.doubanTopicUrl = urlElements.get(1).attr("href"); } // get post time String dateString = document.select("div.row div.span6").html(); String startString = "title=\"最后更新时间\"></span> "; int startPos = dateString.indexOf(startString) + startString.length(); String endString = "<span class=\"icon-arrow-right"; int endPos = dateString.indexOf(endString); dateString = dateString.substring(startPos, endPos).trim(); try { resulTopicM.date = MeiziDateUtil.getDate(dateString); } catch (ParseException e) { } // get the main content, pictures and messages Elements contentElements = document.select("div.row div.span6 div.content").first().children(); for (int i = 0; i < contentElements.size(); i++) { Element element = contentElements.get(i); String tagName = element.tagName(); if (tagName.equals("p")) { // in case incorrect syntax Elements pImgElements = element.select("img"); if (pImgElements.size() != 0) { for (int j = 0; j < pImgElements.size(); j++) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; item.imgUrl = pImgElements.get(j).attr("src"); if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } } else { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.MSG; String pContent = contentElements.get(i).html(); item.msg = Html.fromHtml(pContent).toString().trim(); if (!TextUtils.isEmpty(item.msg)) { resulTopicM.content.add(item); } } } else if (tagName.equals("div")) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; Elements divElements = element.select("img"); Element imgElement = divElements.first(); if (imgElement != null) { item.imgUrl = imgElement.attr("src"); } if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } else if (tagName.equals("img")) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; item.imgUrl = element.attr("src"); if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } } return resulTopicM; }
@Override public MeiziTopicMData decode(String html) { MeiziTopicMData resulTopicM = new MeiziTopicMData(); Document document = Jsoup.parse(html); // get title string resulTopicM.title = document.select("div.main-header h2").html().trim(); // get doubanPosterUrl and doubanTopicurl Elements urlElements = document.select("ul.clx li.widgets-views a"); if (urlElements.size() >= 1) { resulTopicM.doubanTopicUrl = urlElements.get(0).attr("href"); } String dateString = document.select("div.main-meta.clx span.post-span").html().trim(); try { if (!StringUtil.isEmpty(dateString)) resulTopicM.date = MeiziDateUtil.getDate(dateString); } catch (ParseException e) { } // get the main content, pictures and messages Elements contentElements = document.select("div.main-body").first().children(); for (int i = 0; i < contentElements.size(); i++) { Element element = contentElements.get(i); String tagName = element.tagName(); if (tagName.equals("p")) { // in case incorrect syntax Elements pImgElements = element.select("img"); if (pImgElements.size() != 0) { for (int j = 0; j < pImgElements.size(); j++) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; item.imgUrl = pImgElements.get(j).attr("src"); if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } } else { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.MSG; String pContent = contentElements.get(i).html(); item.msg = Html.fromHtml(pContent).toString().trim(); if (!TextUtils.isEmpty(item.msg)) { resulTopicM.content.add(item); } } } else if (tagName.equals("div")) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; Elements divElements = element.select("img"); Element imgElement = divElements.first(); if (imgElement != null) { item.imgUrl = imgElement.attr("src"); } if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } else if (tagName.equals("img")) { TopicContentItem item = new TopicContentItem(); item.type = ContentItemType.IMAGE; item.imgUrl = element.attr("src"); if (!TextUtils.isEmpty(item.imgUrl)) { resulTopicM.content.add(item); } } } return resulTopicM; }