public void download(Connection aInConnection, Collection<Image> images) throws IOException { aInConnection.url(url); Document lDocument = aInConnection.get(); Element lMain = lDocument.getElementById("main"); Elements lContents = lMain.getElementsByClass("content"); if (lContents.size() == 1) { StringBuilder sb = new StringBuilder(); Element lContent = lContents.first(); collectImages(lContent, images); Elements lLightboxElements = lContent.getElementsByClass("lightbox"); for (Element lLightboxElement : lLightboxElements) { Collection<Node> lImageNodes = extractImageNodes(lLightboxElement); Element lParent = lLightboxElement.parent(); int i = lLightboxElement.siblingIndex(); lParent.insertChildren(i, lImageNodes); lLightboxElement.remove(); } Elements lChildElements = lContent.children(); for (Element lChildElement : lChildElements) { if (lChildElement.hasClass("clear")) { // no more post content break; } if (title == null && lChildElement.tagName().equals("h1")) { // the first h1 header is the title title = lChildElement.html(); } else { if (excerpt == null && lChildElement.tagName().equals("p")) { excerpt = lChildElement.text(); } String lStr = lChildElement.toString(); sb.append(lStr); } } content = sb.toString(); Elements lDateElements = lContent.getElementsByClass("date"); String lHunDate = lDateElements.first().html(); date = new PostDate(lHunDate); } else { System.out.println("More than one content in main section of post page " + toString()); } }
public static Connection connect(URL url) { Connection con = new HttpConnection(); con.url(url); return con; }