예제 #1
0
  public ContentModel view(String url) {

    ContentModel model = new ContentModel();

    try {
      NodeFilter filter = new TagNameFilter("html");
      Parser parser = new Parser();
      parser.setURL(SearchHelper.decrypt(url));
      parser.setEncoding(parser.getEncoding());
      // parser.setEncoding("gb2312");
      NodeList list = parser.extractAllNodesThatMatch(filter);
      for (int i = 0; i < list.size(); i++) {
        String s = list.elementAt(i).toHtml();
        model.setContent(s);
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    return model;
  }
예제 #2
0
  public ContentModel listHtml(String param, String type) {
    ContentModel model = new ContentModel();
    StringBuffer html = new StringBuffer();
    try {
      NodeFilter filter = new TagNameFilter("body");
      Parser parser = new Parser();
      parser.setURL(SearchHelper.SEARCH_URL_BAIDU + param);
      parser.setEncoding(parser.getEncoding());
      NodeList list = parser.extractAllNodesThatMatch(filter);
      String body = list.toHtml();

      Parser content = new Parser();
      content.setInputHTML(body);
      content.setEncoding(parser.getEncoding());
      NodeFilter content_filter = new TagNameFilter("table");
      NodeList content_list = content.extractAllNodesThatMatch(content_filter);
      for (int i = 0; i < content_list.size(); i++) {
        String s = content_list.elementAt(i).toHtml();
        if (s.indexOf("div") != -1) {
          continue;
        }

        if (s.indexOf("相关搜索") != -1) {

          html.append("<div id=\"rs\">" + s + "</div>");
          continue;
        }
        html.append("<div class=\"content\">");
        for (Node n : extractHtml(content_list.elementAt(i), type)) {

          if (n instanceof LinkTag) {
            if (n.toPlainTextString().equals("百度快照")) {
              continue;
            }
            html.append("<h3 class=\"t\">" + n.toHtml() + "</h3>");
          } else {
            html.append(n.toHtml());
          }
        }

        html.append("<br/></div><br>");
      }

      /** 获取分页数据 */
      Parser page = new Parser();
      page.setInputHTML(body);
      page.setEncoding(parser.getEncoding());
      NodeFilter page_filter = new TagNameFilter("p");
      NodeList page_list = page.extractAllNodesThatMatch(page_filter);
      for (int i = 0; i < page_list.size(); i++) {
        String s = page_list.elementAt(i).toHtml();
        if (s.indexOf("page") == -1) {
          continue;
        }
        html.append("<p id=\"page\">" + page_list.elementAt(i).toHtml() + "</div>");
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    model.setContent(html.toString());
    return model;
  }