public static List genSitemap(String mapUrl, String base) { try { Document doc = Jsoup.connect(mapUrl).get(); Elements links = doc.select("a"); Elements imgs = doc.select("img"); List<String> stringLinks = new ArrayList<String>(); for (Element link : links) { stringLinks.add(link.attr("abs:href")); } Iterator<String> domIt = stringLinks.iterator(); // filter out links to external domains while (domIt.hasNext()) { String incDom = domIt.next(); boolean domTest; domTest = incDom.contains(base); if (domTest == false) { domIt.remove(); } } Iterator<String> i = stringLinks.iterator(); while (i.hasNext()) { // remove index.html from incoming links prevents infinite loop String incA = i.next(); if (incA.contains("index")) { i.remove(); } } return stringLinks; } catch (Exception e) { // System.out.println(e); return null; } }
public static List getImgs(String mapUrl) { try { Document doc = Jsoup.connect(mapUrl).get(); Elements imgs = doc.select("img"); List<String> stringImgs = new ArrayList<String>(); stringImgs.add(mapUrl); for (Element img : imgs) { String imgSrc = img.attr("abs:src"); if (imgSrc.contains("paypal") == false) stringImgs.add(imgSrc); } return stringImgs; } catch (Exception e) { System.out.println(e); return null; } }