Exemplo n.º 1
0
  public List<Newsitem> parseContent(String content) throws Exception {
    List<Newsitem> newsitems = new ArrayList<Newsitem>();

    Tag newsDiv = this.extractTagByClassName(this.stripHtmlComments(content), "box_news");
    NodeList nodes = this.extractTagsByClassName(newsDiv.toHtml(), "subItem");

    for (int i = 0; i < nodes.size(); i++) {
      NewsitemImpl newsitem = new NewsitemImpl();
      Tag itemTable = (Tag) nodes.elementAt(i);

      Tag titleTag = this.extractTagByClassName(itemTable.toHtml(), "subItemtitle");
      newsitem.setTitle(titleTag.toPlainTextString());

      Node descriptionSpan =
          titleTag.getNextSibling().getNextSibling().getNextSibling().getNextSibling();
      newsitem.setDescription(
          descriptionSpan
              .toPlainTextString()
              .replaceAll("[^\\u0000-\\u00FF]", " ")
              .replace("&nbsp;Read More...", "")
              .trim());

      Tag linkTag = (Tag) extractLinks(itemTable.toHtml(), "/index.php.*").elementAt(0);
      newsitem.setUrl(URL_PREFIX + linkTag.getAttribute("href"));
      newsitems.add(newsitem);
    }
    return newsitems;
  }