Пример #1
0
  public static List genSitemap(String mapUrl, String base) {
    try {
      Document doc = Jsoup.connect(mapUrl).get();
      Elements links = doc.select("a");
      Elements imgs = doc.select("img");
      List<String> stringLinks = new ArrayList<String>();
      for (Element link : links) {
        stringLinks.add(link.attr("abs:href"));
      }

      Iterator<String> domIt = stringLinks.iterator(); // filter out links to external domains
      while (domIt.hasNext()) {
        String incDom = domIt.next();
        boolean domTest;
        domTest = incDom.contains(base);
        if (domTest == false) {
          domIt.remove();
        }
      }
      Iterator<String> i = stringLinks.iterator();
      while (i.hasNext()) { // remove index.html from incoming links prevents infinite loop
        String incA = i.next();
        if (incA.contains("index")) {
          i.remove();
        }
      }

      return stringLinks;
    } catch (Exception e) {
      // System.out.println(e);
      return null;
    }
  }
Пример #2
0
 public static List getImgs(String mapUrl) {
   try {
     Document doc = Jsoup.connect(mapUrl).get();
     Elements imgs = doc.select("img");
     List<String> stringImgs = new ArrayList<String>();
     stringImgs.add(mapUrl);
     for (Element img : imgs) {
       String imgSrc = img.attr("abs:src");
       if (imgSrc.contains("paypal") == false) stringImgs.add(imgSrc);
     }
     return stringImgs;
   } catch (Exception e) {
     System.out.println(e);
     return null;
   }
 }