public void download(Connection aInConnection, Collection<Image> images) throws IOException {
    aInConnection.url(url);
    Document lDocument = aInConnection.get();
    Element lMain = lDocument.getElementById("main");
    Elements lContents = lMain.getElementsByClass("content");

    if (lContents.size() == 1) {
      StringBuilder sb = new StringBuilder();
      Element lContent = lContents.first();

      collectImages(lContent, images);

      Elements lLightboxElements = lContent.getElementsByClass("lightbox");
      for (Element lLightboxElement : lLightboxElements) {
        Collection<Node> lImageNodes = extractImageNodes(lLightboxElement);

        Element lParent = lLightboxElement.parent();
        int i = lLightboxElement.siblingIndex();
        lParent.insertChildren(i, lImageNodes);
        lLightboxElement.remove();
      }

      Elements lChildElements = lContent.children();
      for (Element lChildElement : lChildElements) {
        if (lChildElement.hasClass("clear")) {
          // no more post content
          break;
        }

        if (title == null && lChildElement.tagName().equals("h1")) {
          // the first h1 header is the title
          title = lChildElement.html();
        } else {
          if (excerpt == null && lChildElement.tagName().equals("p")) {
            excerpt = lChildElement.text();
          }
          String lStr = lChildElement.toString();
          sb.append(lStr);
        }
      }

      content = sb.toString();

      Elements lDateElements = lContent.getElementsByClass("date");
      String lHunDate = lDateElements.first().html();
      date = new PostDate(lHunDate);
    } else {
      System.out.println("More than one content in main section of post page " + toString());
    }
  }
Пример #2
0
 public static Connection connect(URL url) {
   Connection con = new HttpConnection();
   con.url(url);
   return con;
 }