Пример #1
0
  @Override
  public void parseBoard() {
    Iterator<Map.Entry<String, String>> iterator = this.keyWords.entrySet().iterator();
    while (iterator.hasNext()) {
      Map.Entry<String, String> entry = iterator.next();
      String transKey = "";
      String keyWord = entry.getKey().split(";")[0];
      try {
        transKey = URLEncoder.encode(keyWord, "UTF-8");
      } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
      }

      String html = "error";
      int i = 0;
      for (i = 0; i < 5 && html.equals("error"); i++) {
        html = GetHTML.getHtml("http://sou.chinanews.com/search.do?q=" + transKey, "UTF-8");
      }
      if (i < 5) {
        html = html.replaceAll("&nbsp;", "");
        Document document = Jsoup.parse(html);

        String flag =
            document
                .select("table[style]")
                .first()
                .select("tbody")
                .select("div#news_list")
                .select("span")
                .text();
        if (flag.contains("对不起,没有找到相关内容,请更换关键字后重试")) {
          // Todo ??
          System.out.println("nothing have found...");
        } else {
          Elements tableEles =
              document
                  .select("table[style]")
                  .first()
                  .select("tbody")
                  .select("div#news_list")
                  .select("table");
          ArrayList<Element> tableList = new ArrayList<Element>();
          for (Element ele : tableEles) {
            tableList.add(ele);
          }
          parsePages(tableList, entry);
        }
      }
    }
  }
Пример #2
0
  @Override
  public void parseBoard() {
    Iterator<Map.Entry<String, String>> iterator = this.keyWords.entrySet().iterator();
    while (iterator.hasNext()) {
      Map.Entry<String, String> entry = iterator.next();
      String keyWord = entry.getKey().split(";")[0];
      String transKey = "";
      try {
        transKey = URLEncoder.encode(keyWord, "utf-8");
      } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
      }
      String location =
          GetHTML.getHeaderValue("Location", "http://bbs.p5w.net/search.php?mod=my&q=" + transKey);
      String html = GetHTML.getHtml(location, "utf-8");

      html = html.replaceAll("&nbsp;", "");
      Document document = Jsoup.parse(html);
      /*
       	搜索关键词是否存在
      */
      Elements flag =
          document.select("div.result").select("span#result-items").select("ul").select("li");
      if (flag.size() == 0) {
        // Todo ??
        System.out.println("nothing to found.....");
      } else {
        Elements tableEles =
            document.select("div.result").select("span#result-items").select("ul").select("li");
        ArrayList<Element> tableList = new ArrayList<Element>();
        for (Element ele : tableEles) {
          tableList.add(ele);
        }
        parsePages(tableList, entry);
      }
    }
  }