Example #1
0
  public static void dealOnePage(String url, int starNo) {
    try {
      Parser parser = new Parser((HttpURLConnection) (new URL(url)).openConnection());
      NodeList tableSet =
          parser.extractAllNodesThatMatch(new HasAttributeFilter("bgcolor", "#DDE1FF"));
      parser = new Parser(new Lexer(tableSet.toHtml()));
      NodeList tdSet = parser.extractAllNodesThatMatch(new HasAttributeFilter("tr"));
      parser = new Parser(new Lexer(tdSet.toHtml()));

      PrototypicalNodeFactory p = new PrototypicalNodeFactory();
      p.registerTag(new SpanTag());
      parser.setNodeFactory(p);

      NodeList spanSet = parser.extractAllNodesThatMatch(new HasAttributeFilter("span"));
      int index = 0;
      for (int i = 5; i < spanSet.size(); i = i + 5) {
        String str = spanSet.elementAt(i).toPlainTextString();
        String now = "" + (starNo * 100 + index);
        index++;
        while (str.compareTo(now) != 0) {
          System.out.println(now);
          now = "" + (starNo * 100 + index);
          index++;
        }
        // System.out.println(str);
      }
    } catch (ParserException e) {
      e.printStackTrace();
    } catch (MalformedURLException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  /**
   * 获取文章链接
   *
   * @param url
   * @throws Exception
   */
  void doc(String url, String pre) throws Exception {
    Parser parser = new Parser();
    parser.setURL(url);
    parser.setEncoding("GB2312");

    NodeFilter fileter = new NodeClassFilter(LinkTag.class);
    NodeList list = parser.extractAllNodesThatMatch(fileter);
    if (list != null && list.size() > 0) {
      Parser p1 = new Parser();
      p1.setInputHTML(list.toHtml());
      NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
      NodeList linkList = p1.extractAllNodesThatMatch(linkFilter);
      if (linkList != null && linkList.size() > 0) {
        for (int i = 0; i < linkList.size(); i++) {
          LinkTag link = (LinkTag) linkList.elementAt(i);
          LinkBean bean = null;
          if (link.getLink().toLowerCase().startsWith(pre)
              && !link.getLinkText().equalsIgnoreCase("详细内容")) {
            bean = new LinkBean();
            bean.setLink(link.getLink());
            bean.setName(link.getLinkText());
            LINKHASH.put(link.getLink(), bean);
          }
        }
      }
    }
  }
  /**
   * 获取文章链接
   *
   * @param url
   * @throws Exception
   */
  void docByHTML(String content, String pre) throws Exception {
    Parser parser = new Parser();
    parser.setInputHTML(content);
    parser.setEncoding("GB2312");

    NodeFilter fileter = new NodeClassFilter(LinkTag.class);
    NodeList list = parser.extractAllNodesThatMatch(fileter);
    if (list != null && list.size() > 0) {
      Parser p1 = new Parser();
      p1.setInputHTML(list.toHtml());
      NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
      NodeList linkList = p1.extractAllNodesThatMatch(linkFilter);
      if (linkList != null && linkList.size() > 0) {
        for (int i = 0; i < linkList.size(); i++) {
          LinkTag link = (LinkTag) linkList.elementAt(i);
          LinkBean bean = null;
          if (link.getLink().toLowerCase().startsWith(pre)
              && !link.getLinkText().equalsIgnoreCase("详细内容")) {
            if (null == articleDocCache.get(getKey(link.getLink()))) {
              bean = new LinkBean();
              bean.setLink(link.getLink());
              bean.setName(link.getLinkText());
              LINKHASH.put(link.getLink(), bean);
            } else {
              logger.info(">> 已存在 [" + link.getLink() + "] 地址");
            }
          }
        }
      }
    }
  }
  /** Test a better method of modifying an HTML page. */
  public void testPageModification() throws Exception {
    Parser parser = Parser.createParser(HTML_WITH_LINK, null);
    NodeList list = parser.parse(null); // no filter
    // make an inner class that does the same thing as the UrlModifyingVisitor
    NodeVisitor visitor =
        new NodeVisitor() {
          String linkPrefix = "localhost://";

          public void visitTag(Tag tag) {
            if (tag instanceof LinkTag)
              ((LinkTag) tag).setLink(linkPrefix + ((LinkTag) tag).getLink());
            else if (tag instanceof ImageTag)
              ((ImageTag) tag).setImageURL(linkPrefix + ((ImageTag) tag).getImageURL());
          }
        };
    list.visitAllNodesWith(visitor);
    String result = list.toHtml();
    assertStringEquals("Expected HTML", MODIFIED_HTML, result);
  }
  public static HashMap<String, String> SouthamptonGetDetails(String[] url) {

    while (true) {
      try {
        HashMap<String, String> result = new HashMap<String, String>();
        RequestConfig requestConfig =
            RequestConfig.custom().setSocketTimeout(10000).setConnectTimeout(10000).build();
        CloseableHttpClient httpclient =
            HttpClients.custom().setDefaultRequestConfig(requestConfig).build();

        HttpGet httpGet = new HttpGet(url[1]);
        HttpResponse response = httpclient.execute(httpGet);
        HttpEntity entity = response.getEntity();

        String htmls = null;
        if (entity != null) {
          htmls = EntityUtils.toString(entity).replace("\t", " ");
          // System.out.println(htmls);

        }
        System.out.println("Got reply!");
        // htmls=HTMLFilter(htmls);

        Parser parser = null;

        // **********************************get school**********************
        parser = Parser.createParser(htmls.replace("span", "form"), "utf-8");
        AndFilter SFilter =
            new AndFilter(
                new TagNameFilter("form"), // table class="CSCPreviewTable grey"
                new HasAttributeFilter("class", "first-owner"));
        NodeList nodes4 = parser.extractAllNodesThatMatch(SFilter);
        if (nodes4.size() > 0) {

          String school = html2Str(nodes4.elementAt(0).toHtml());
          result.put("School", school);
        }

        // **********************************get entry structure**********************

        parser = Parser.createParser(htmls, "utf-8");
        AndFilter ESFilter =
            new AndFilter(
                new TagNameFilter("div"), // table class="CSCPreviewTable grey"
                new HasAttributeFilter("class", "body__inner w-doublewide copy"));
        NodeList nodes1 = parser.extractAllNodesThatMatch(ESFilter);
        String structure = "";
        String[] ProgramURL = null;
        if (nodes1.size() > 0) {
          String AllContents = nodes1.toHtml();
          String[] SP = AllContents.split("<h2 id=");
          for (int i = 1; i < SP.length; i++) {
            String row = "<h2 id=" + SP[i];
            if (row.contains("<h2 id=\"requirements\">Requirements</h2>")) // Structure
            {
              structure =
                  (html2Str(
                          row.replace("<br />", "\r\n")
                              .replace("</strong>", "")
                              .replace("<strong>", "")
                              .replace("</", "\r\n</")
                              .replace("\t", " ")
                              .replace("&amp;", " "))
                      .replace("\r\n\r\n", "\r\n"));
              structure = HTMLFilter(structure);
              result.put("Structure", structure);
            } // <a href="/program/BSC">Bachelor of Science (BSC)</a>
            else if (row.contains("<h2 id=\"relevant-degrees\">Relevant Degrees</h2>")) {
              parser = Parser.createParser(row, "utf-8");
              AndFilter ProfessionNameFilter =
                  new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href"));
              NodeList nodes5 = parser.extractAllNodesThatMatch(ProfessionNameFilter);
              for (int j = 0; j < nodes4.size(); j++) {
                LinkTag link = (LinkTag) nodes5.elementAt(j);
                if (!link.getAttribute("href").equals("#")) {
                  String code = link.getAttribute("href").replace("/program/", "");
                  ProgramURL = getProgram(code);
                  result.put("Scholarship", code);
                  // title=HTMLFilter(html2Str(link.toHtml()));
                }
              }
            }
          }
        }

        // ****************IELTS
        result.put("IELTS Average Requirement", "6.5");
        result.put("IELTS Lowest Requirement", "6.0");

        // **************************get title & type**********************

        result.put("Title", url[4] + " " + url[2]);

        result.put("Level", url[3]);
        if (ProgramURL != null) {
          result.put("Type", ProgramURL[2]);
          result.put("Length (months)", ProgramURL[5]);
        }

        httpclient.close();
        return result;
      } catch (Exception ee) {
        System.out.println("Retrying..." + url[0]);
        ee.printStackTrace();
      }
    }
  } // ...
  @Override
  public List<JclqScheduleItem> fetchJclqSchedule(String officialDate) throws FetchFailedException {
    Map<String, String> headerParams = new HashMap<String, String>();
    headerParams.put("Referer", "http://info.sporttery.cn");
    headerParams.put(
        "User-Agent",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19");
    List<JclqScheduleItem> jclqScheduleItemList = new ArrayList<JclqScheduleItem>();

    String encoding = "gbk";
    JclqScheduleItem jclqScheduleItem = null;
    String siteName = "中国竞彩网网[竞彩篮球赛程]";
    String logHeader = siteName + SCHEDULE_URL;

    try {
      String webInfo =
          CoreFetcherUtils.URLGetWithHeaderParams(SCHEDULE_URL, headerParams, null, encoding);
      if (webInfo == null || webInfo.indexOf("404 Not Found") > 0) {
        logger.error(logHeader + ",data is null or 404 Not Found");
        throw new FetchFailedException("404 Not Found");
      }

      Parser parser = Parser.createParser(webInfo, encoding);

      NodeList nodeList =
          parser.extractAllNodesThatMatch(new CssSelectorNodeFilter("div[class='box-tbl']"));
      if (null != nodeList && nodeList.size() > 0) {
        NodeFilter tableFilter = new TagNameFilter("table");
        Parser parser2 = Parser.createParser(nodeList.toHtml(), encoding);
        NodeList tableNodeList = parser2.extractAllNodesThatMatch(tableFilter);
        if (tableNodeList != null && tableNodeList.size() > 0) {
          TableTag catchTableTag = new TableTag();
          catchTableTag = (TableTag) tableNodeList.elementAt(0);
          if (catchTableTag != null) {
            TableRow[] catchRows = catchTableTag.getRows();
            TableColumn[] catchColumns = null;
            for (int i = 2; i < catchRows.length; i++) {
              catchColumns = catchRows[i].getColumns();
              if (catchColumns != null && catchColumns.length >= 6) {
                jclqScheduleItem = new JclqScheduleItem();

                String officialNum = catchColumns[0].toPlainTextString().trim();
                if (officialNum.length() < 5) {
                  continue;
                }

                // 先解析比赛时间
                String matchDateStr = catchColumns[3].toPlainTextString().trim();
                String[] yearStr = matchDateStr.split("-");
                if (yearStr[0].length() <= 2) {
                  matchDateStr = "20" + matchDateStr + ":00";
                } else {
                  matchDateStr = matchDateStr + ":00";
                }
                Date matchDate = CoreDateUtils.parseDate(matchDateStr, CoreDateUtils.DATETIME);
                Calendar matchDateCalendar = Calendar.getInstance();
                matchDateCalendar.setTime(matchDate);
                matchDateCalendar.add(Calendar.MINUTE, 1);
                jclqScheduleItem.setMatchDate(matchDateCalendar.getTime());

                // 根据周几、当前时间和比赛时间计算官方发布的日期
                Calendar cd = Calendar.getInstance();
                // 将时分秒等区域清零
                cd.set(Calendar.HOUR_OF_DAY, 0);
                cd.set(Calendar.MINUTE, 0);
                cd.set(Calendar.SECOND, 0);
                cd.set(Calendar.MILLISECOND, 0);

                int nowWeekDay = cd.get(Calendar.DAY_OF_WEEK);
                int fetchWeekDay = weekDay.get(officialNum.substring(0, 2));

                if (nowWeekDay != fetchWeekDay) {
                  int m = fetchWeekDay - nowWeekDay;
                  if (m < -1) {
                    cd.add(Calendar.DATE, m + 7);
                  } else {
                    cd.add(Calendar.DATE, m);
                  }
                }

                // 如果计算出来的日期超过了比赛时间,减去一周
                if (cd.after(matchDateCalendar)) {
                  cd.add(Calendar.DATE, -7);
                }

                // 如果计算出来的日期距离比赛时间相隔超过一周,加上一周的倍数
                // 一周的毫秒数
                long weekTimeInMillis = 3600 * 1000 * 24 * 7;
                long diffTimeInMillis = matchDateCalendar.getTimeInMillis() - cd.getTimeInMillis();
                if (diffTimeInMillis > weekTimeInMillis) {
                  // 计算相差几周
                  int diffWeekCount = (int) (diffTimeInMillis / weekTimeInMillis);
                  cd.add(Calendar.DATE, 7 * diffWeekCount);
                }

                jclqScheduleItem.setMatchNum(
                    CoreDateUtils.formatDate(cd.getTime(), "yyyyMMdd")
                        + LotteryConstant.JCLQ_MATCH_NUM_CODE_DEFAULT
                        + officialNum.substring(2));
                jclqScheduleItem.setOfficialDate(
                    CoreDateUtils.parseDate(CoreDateUtils.formatDate(cd.getTime())));
                Integer oNum = null;
                try {
                  oNum = Integer.valueOf(officialNum.substring(2));
                } catch (Exception e) {
                  logger.error("截取官方编码时,转换为Integer错误", e);
                }
                jclqScheduleItem.setOfficialNum(oNum);
                jclqScheduleItem.setMatchName(
                    JclqUtil.convertMatchName(
                        catchColumns[1].toPlainTextString().trim(),
                        LotteryType.JCLQ_SF,
                        FetcherType.T_PENGINEAPI));

                String team = catchColumns[2].toPlainTextString().trim();
                String[] teamStr = team.split("VS");
                jclqScheduleItem.setAwayTeam(teamStr[0].trim());
                jclqScheduleItem.setHomeTeam(teamStr[1].trim());

                if ("已开售".equals(catchColumns[4].toPlainTextString().trim())) {
                  jclqScheduleItem.setStatus(JclqRaceStatus.OPEN);
                } else {
                  jclqScheduleItem.setStatus(JclqRaceStatus.UNOPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜负单关") > 0) {
                  jclqScheduleItem.setDynamicSaleSfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleSfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜负过关") > 0) {
                  jclqScheduleItem.setStaticSaleSfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleSfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("让分胜负单关") > 0) {
                  jclqScheduleItem.setDynamicSaleRfsfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleRfsfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("让分胜负过关") > 0) {
                  jclqScheduleItem.setStaticSaleRfsfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleRfsfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜分差单关") > 0) {
                  jclqScheduleItem.setDynamicSaleSfcStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleSfcStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("胜分差过关") > 0) {
                  jclqScheduleItem.setStaticSaleSfcStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleSfcStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("大小分单关") > 0) {
                  jclqScheduleItem.setDynamicSaleDxfStatus(JclqDynamicSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setDynamicSaleDxfStatus(JclqDynamicSaleStatus.SALE_OPEN);
                }
                if (catchColumns[5].toPlainTextString().trim().indexOf("大小分过关") > 0) {
                  jclqScheduleItem.setStaticSaleDxfStatus(JclqStaticSaleStatus.SALE_UNOPEN);
                } else {
                  jclqScheduleItem.setStaticSaleDxfStatus(JclqStaticSaleStatus.SALE_OPEN);
                }
                jclqScheduleItemList.add(jclqScheduleItem);
              }
            } // end for catchRows
          } // end if catchTableTag!=null
        } // end if(tableNodeList!=null&&tableNodeList.size()>0)
      } else {
        logger.error(logHeader + "竞彩篮球赛程数据表格不存在,返回null");
        throw new FetchFailedException("竞彩篮球赛程数据表格不存在");
      }
    } catch (Exception e) {
      logger.error(logHeader + "竞彩篮球赛程错误" + e.getMessage(), e);
      throw new FetchFailedException(e.getMessage());
    }
    return jclqScheduleItemList;
  }
  public static HashMap<String, String> SouthamptonGetDetails2(String[] url) // for ECS
      {

    while (true) {
      try {
        HashMap<String, String> result = new HashMap<String, String>();
        RequestConfig requestConfig =
            RequestConfig.custom().setSocketTimeout(10000).setConnectTimeout(10000).build();
        CloseableHttpClient httpclient =
            HttpClients.custom().setDefaultRequestConfig(requestConfig).build();

        HttpGet httpGet = new HttpGet(url[1]);
        HttpResponse response = httpclient.execute(httpGet);
        HttpEntity entity = response.getEntity();

        String htmls = null;
        if (entity != null) {
          htmls = EntityUtils.toString(entity).replace("\t", " ");
          // System.out.println(htmls);

        }
        System.out.println("Got reply!");
        // htmls=HTMLFilter(htmls);
        Parser parser = null;
        HtmlPage page = new HtmlPage(parser);
        if (htmls.contains("September") || htmls.contains("september")) {
          result.put("Month of Entry", "9");

        } else if (htmls.contains("October") || htmls.contains("october")) {
          result.put("Month of Entry", "10");
        } else {
          result.put("Month of Entry", "");
        }

        // div class="widgetCourse" h1
        // **************************get title & type**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter TitleFilter =
            new AndFilter(
                new TagNameFilter("h2"), new HasAttributeFilter("class", "uos-sia-title"));
        NodeList nodes4 = parser.extractAllNodesThatMatch(TitleFilter);
        if (nodes4.size() > 0) {

          String title = HTMLFilter(html2Str(nodes4.toHtml()));

          result.put("Title", title);
          result.put("Type", GetType(title));
        }

        // **********************************get school**********************

        result.put("School", "Electronics and Computer Science (ECS)");

        // **********************************get fee**********************

        Pattern p = Pattern.compile("&pound;[0-9]+");
        Matcher m = p.matcher(htmls.replace(",", ""));
        ArrayList<Integer> money = new ArrayList<Integer>();
        while (m.find()) {
          money.add(Integer.parseInt(m.group().replace("&pound;", "")));
        }
        int max = 0;
        for (int w = 0; w < money.size(); w++) {
          if (money.get(w) > max) {
            max = money.get(w);
          }
        }
        if (max != 0) {
          System.out.println(max);
          result.put("Tuition Fee", "" + max);
        }

        // **************************get entry**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter EntryFilter =
            new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pane_entry"));
        NodeList nodes5 = parser.extractAllNodesThatMatch(EntryFilter);
        String entryAll = "";
        if (nodes5.size() > 0) {
          for (int i = 0; i < nodes5.size(); i++) {

            Node node = (Node) nodes5.elementAt(i);

            entryAll = (html2Str(node.toHtml().replace(">", "> "))).replace("\r", "");
            entryAll = entryAll.replace("\n", " ");
            entryAll = HTMLFilter(entryAll);
            result.put("Academic Entry Requirement", entryAll);
          }
        }

        // **************************get entry**********************
        parser = Parser.createParser(htmls, "utf-8");
        AndFilter StructureFilter =
            new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pane_modules"));
        NodeList nodes6 = parser.extractAllNodesThatMatch(StructureFilter);
        String structure = "";
        if (nodes6.size() > 0) {
          for (int i = 0; i < nodes6.size(); i++) {

            Node node = (Node) nodes6.elementAt(i);

            structure =
                (html2Str(
                        node.toHtml()
                            .replace("<br />", "\r\n")
                            .replace("</strong>", "")
                            .replace("<strong>", "")
                            .replace("</", "\r\n</")
                            .replace("\t", " ")
                            .replace("&amp;", " "))
                    .replace("\r\n\r\n", "\r\n"));
            structure = HTMLFilter(structure);
            result.put("Structure", structure);
          }
        }

        // *****************Length
        String length = getLastYear(structure);
        result.put("Length (months)", length);

        // ****************IELTS
        String International = entryAll;
        ArrayList<String> list = new ArrayList<String>();
        if (International.contains("7.5")) {
          list.add("7.5");
        }
        if (International.contains("7.0") || International.contains(" 7 ")) {
          list.add("7.0");
        }
        if (International.contains("6.5")) {
          list.add("6.5");
        }
        if (International.contains("6.0") || International.contains(" 6 ")) {
          list.add("6.0");
        }
        if (International.contains("5.5")) {
          list.add("5.5");
        }
        if (list.size() == 1) {
          result.put("IELTS Average Requirement", list.get(0));
          result.put("IELTS Lowest Requirement", list.get(0));
        } else if (list.size() >= 2) {
          result.put("IELTS Average Requirement", list.get(0));
          result.put("IELTS Lowest Requirement", list.get(1));
        } else {
          result.put("IELTS Average Requirement", "6.0");

          result.put("IELTS Lowest Requirement", "5.5");
        }

        // finance/

        result.put("Level", "Undergraduate");
        result.put("Scholarship", "");

        httpclient.close();
        return result;
      } catch (Exception ee) {
        System.out.println("Retrying...");
        ee.printStackTrace();
      }
    }
  } // ...
  public ContentModel listHtml(String param, String type) {
    ContentModel model = new ContentModel();
    StringBuffer html = new StringBuffer();
    try {
      NodeFilter filter = new TagNameFilter("body");
      Parser parser = new Parser();
      parser.setURL(SearchHelper.SEARCH_URL_BAIDU + param);
      parser.setEncoding(parser.getEncoding());
      NodeList list = parser.extractAllNodesThatMatch(filter);
      String body = list.toHtml();

      Parser content = new Parser();
      content.setInputHTML(body);
      content.setEncoding(parser.getEncoding());
      NodeFilter content_filter = new TagNameFilter("table");
      NodeList content_list = content.extractAllNodesThatMatch(content_filter);
      for (int i = 0; i < content_list.size(); i++) {
        String s = content_list.elementAt(i).toHtml();
        if (s.indexOf("div") != -1) {
          continue;
        }

        if (s.indexOf("相关搜索") != -1) {

          html.append("<div id=\"rs\">" + s + "</div>");
          continue;
        }
        html.append("<div class=\"content\">");
        for (Node n : extractHtml(content_list.elementAt(i), type)) {

          if (n instanceof LinkTag) {
            if (n.toPlainTextString().equals("百度快照")) {
              continue;
            }
            html.append("<h3 class=\"t\">" + n.toHtml() + "</h3>");
          } else {
            html.append(n.toHtml());
          }
        }

        html.append("<br/></div><br>");
      }

      /** 获取分页数据 */
      Parser page = new Parser();
      page.setInputHTML(body);
      page.setEncoding(parser.getEncoding());
      NodeFilter page_filter = new TagNameFilter("p");
      NodeList page_list = page.extractAllNodesThatMatch(page_filter);
      for (int i = 0; i < page_list.size(); i++) {
        String s = page_list.elementAt(i).toHtml();
        if (s.indexOf("page") == -1) {
          continue;
        }
        html.append("<p id=\"page\">" + page_list.elementAt(i).toHtml() + "</div>");
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    model.setContent(html.toString());
    return model;
  }