@Override public void parseBoard() { Iterator<Map.Entry<String, String>> iterator = this.keyWords.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<String, String> entry = iterator.next(); String transKey = ""; String keyWord = entry.getKey().split(";")[0]; try { transKey = URLEncoder.encode(keyWord, "UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } String html = "error"; int i = 0; for (i = 0; i < 5 && html.equals("error"); i++) { html = GetHTML.getHtml("http://sou.chinanews.com/search.do?q=" + transKey, "UTF-8"); } if (i < 5) { html = html.replaceAll(" ", ""); Document document = Jsoup.parse(html); String flag = document .select("table[style]") .first() .select("tbody") .select("div#news_list") .select("span") .text(); if (flag.contains("对不起,没有找到相关内容,请更换关键字后重试")) { // Todo ?? System.out.println("nothing have found..."); } else { Elements tableEles = document .select("table[style]") .first() .select("tbody") .select("div#news_list") .select("table"); ArrayList<Element> tableList = new ArrayList<Element>(); for (Element ele : tableEles) { tableList.add(ele); } parsePages(tableList, entry); } } } }
@Override public void parseBoard() { Iterator<Map.Entry<String, String>> iterator = this.keyWords.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<String, String> entry = iterator.next(); String keyWord = entry.getKey().split(";")[0]; String transKey = ""; try { transKey = URLEncoder.encode(keyWord, "utf-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } String location = GetHTML.getHeaderValue("Location", "http://bbs.p5w.net/search.php?mod=my&q=" + transKey); String html = GetHTML.getHtml(location, "utf-8"); html = html.replaceAll(" ", ""); Document document = Jsoup.parse(html); /* 搜索关键词是否存在 */ Elements flag = document.select("div.result").select("span#result-items").select("ul").select("li"); if (flag.size() == 0) { // Todo ?? System.out.println("nothing to found....."); } else { Elements tableEles = document.select("div.result").select("span#result-items").select("ul").select("li"); ArrayList<Element> tableList = new ArrayList<Element>(); for (Element ele : tableEles) { tableList.add(ele); } parsePages(tableList, entry); } } }