@RequestMapping(value = "/s", method = RequestMethod.GET) public String s(ModelMap model, HttpServletRequest request) throws Exception { StringBuffer paramString = new StringBuffer(); for (Object paramName : request.getParameterMap().keySet()) { String name = (String) paramName; for (String value : request.getParameterValues(name)) { if (paramString.length() > 0) paramString.append("&"); try { if ("type".equalsIgnoreCase(name)) { type = URLEncoder.encode(value, "ISO8859-1"); } paramString.append(name + "=" + URLEncoder.encode(value, "ISO8859-1")); } catch (Exception e) { paramString.append(name + "=" + value); } } } ContentModel contentModel = searchService.listHtml(paramString.toString(), type); model.put("content", contentModel.getContent()); model.put("type", type); model.put("key", new String(request.getParameter("wd").getBytes("ISO8859-1"), "utf-8")); return PATH + "result"; }
public ContentModel view(String url) { ContentModel model = new ContentModel(); try { NodeFilter filter = new TagNameFilter("html"); Parser parser = new Parser(); parser.setURL(SearchHelper.decrypt(url)); parser.setEncoding(parser.getEncoding()); // parser.setEncoding("gb2312"); NodeList list = parser.extractAllNodesThatMatch(filter); for (int i = 0; i < list.size(); i++) { String s = list.elementAt(i).toHtml(); model.setContent(s); } } catch (Exception e) { e.printStackTrace(); } return model; }
public ContentModel listHtml(String param, String type) { ContentModel model = new ContentModel(); StringBuffer html = new StringBuffer(); try { NodeFilter filter = new TagNameFilter("body"); Parser parser = new Parser(); parser.setURL(SearchHelper.SEARCH_URL_BAIDU + param); parser.setEncoding(parser.getEncoding()); NodeList list = parser.extractAllNodesThatMatch(filter); String body = list.toHtml(); Parser content = new Parser(); content.setInputHTML(body); content.setEncoding(parser.getEncoding()); NodeFilter content_filter = new TagNameFilter("table"); NodeList content_list = content.extractAllNodesThatMatch(content_filter); for (int i = 0; i < content_list.size(); i++) { String s = content_list.elementAt(i).toHtml(); if (s.indexOf("div") != -1) { continue; } if (s.indexOf("相关搜索") != -1) { html.append("<div id=\"rs\">" + s + "</div>"); continue; } html.append("<div class=\"content\">"); for (Node n : extractHtml(content_list.elementAt(i), type)) { if (n instanceof LinkTag) { if (n.toPlainTextString().equals("百度快照")) { continue; } html.append("<h3 class=\"t\">" + n.toHtml() + "</h3>"); } else { html.append(n.toHtml()); } } html.append("<br/></div><br>"); } /** 获取分页数据 */ Parser page = new Parser(); page.setInputHTML(body); page.setEncoding(parser.getEncoding()); NodeFilter page_filter = new TagNameFilter("p"); NodeList page_list = page.extractAllNodesThatMatch(page_filter); for (int i = 0; i < page_list.size(); i++) { String s = page_list.elementAt(i).toHtml(); if (s.indexOf("page") == -1) { continue; } html.append("<p id=\"page\">" + page_list.elementAt(i).toHtml() + "</div>"); } } catch (Exception e) { e.printStackTrace(); } model.setContent(html.toString()); return model; }