示例#1
0
 @Override
 public void docParser(String url) {
   TopQASpider sp = new TopQASpider();
   Document doc = null;
   try {
     doc = sp.docCrawler(url);
   } catch (Exception ex) {
     ex.printStackTrace();
   }
   if (doc == null) {
     return;
   }
   Elements eles = doc.select("div[class=feed-main]");
   Topic topic = new Topic();
   for (Element ele : eles) {
     topic.setBaselink(url);
     topic.setTitle(ele.select("h2").select("a").text());
     topic.setLink("http://www.zhihu.com" + ele.select("h2").select("a").attr("href"));
     topic.setAcount(
         Integer.valueOf(
             ele.select("div[class=zm-item-vote]").select("a").attr("data-votecount")));
     topic.setAuthor(ele.select("div[class=zm-item-answer-author-info]").select("a").text());
     topic.setDescr(
         ele.select("div[class=zm-item-answer-author-info]").select("span").attr("title"));
     topic.setAuthorlink(
         "http://www.zhihu.com"
             + (ele.select("div[class=zm-item-answer-author-info]")
                 .select("a[class=author-link]")
                 .attr("href")));
     DatabaseHelper.insertEntity(Topic.class, BeanUtil.transBean2Map(topic));
   }
 }
示例#2
0
 public static void main(String[] args) throws Exception {
   List<SeedUrl> list = DatabaseHelper.queryEntityList(SeedUrl.class, "select * from seedUrl");
   TopQASpider top = new TopQASpider();
   for (SeedUrl seed : list) {
     for (int i = 1; i < 10; i++) {
       try {
         top.docParser(seed.getLink() + "?page=" + i);
       } catch (Exception ex) {
         ex.printStackTrace();
       }
     }
   }
 }