示例#1
1
 @Override
 public HSDeck getDeckDetail(final HSDeck hsDeck, final float n) {
   try {
     final Document value = Jsoup.connect(HPDeckSource.BASE_URL + hsDeck.getUrl()).get();
     final Elements select = value.select("section.class-listing table.listing td.col-name");
     final HashMap<String, String> classHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list = new ArrayList<String>();
     for (int i = 0; i < select.size(); ++i) {
       final String text = select.get(i).select("a").get(0).text();
       classHsItemMap.put(
           text, select.get(i).text().trim().substring(select.get(i).text().trim().length() - 1));
       list.add(text);
     }
     hsDeck.setClassHsItemMap(classHsItemMap);
     hsDeck.setClassHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list));
     final Elements select2 = value.select("section.neutral-listing table.listing td.col-name");
     final HashMap<String, String> neutralHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list2 = new ArrayList<String>();
     for (int j = 0; j < select2.size(); ++j) {
       final String text2 = select2.get(j).select("a").get(0).text();
       neutralHsItemMap.put(
           text2,
           select2.get(j).text().trim().substring(select2.get(j).text().trim().length() - 1));
       list2.add(text2);
     }
     hsDeck.setNeutralHsItemMap(neutralHsItemMap);
     hsDeck.setNeutralHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list2));
     hsDeck.setDescription(
         HtmlHelper.parseDescription(value.select("div.deck-description").html(), n, false));
     return hsDeck;
   } catch (IOException ex) {
     ex.printStackTrace();
     return hsDeck;
   }
 }
示例#2
0
  public static List genSitemap(String mapUrl, String base) {
    try {
      Document doc = Jsoup.connect(mapUrl).get();
      Elements links = doc.select("a");
      Elements imgs = doc.select("img");
      List<String> stringLinks = new ArrayList<String>();
      for (Element link : links) {
        stringLinks.add(link.attr("abs:href"));
      }

      Iterator<String> domIt = stringLinks.iterator(); // filter out links to external domains
      while (domIt.hasNext()) {
        String incDom = domIt.next();
        boolean domTest;
        domTest = incDom.contains(base);
        if (domTest == false) {
          domIt.remove();
        }
      }
      Iterator<String> i = stringLinks.iterator();
      while (i.hasNext()) { // remove index.html from incoming links prevents infinite loop
        String incA = i.next();
        if (incA.contains("index")) {
          i.remove();
        }
      }

      return stringLinks;
    } catch (Exception e) {
      // System.out.println(e);
      return null;
    }
  }
示例#3
0
 public static List getImgs(String mapUrl) {
   try {
     Document doc = Jsoup.connect(mapUrl).get();
     Elements imgs = doc.select("img");
     List<String> stringImgs = new ArrayList<String>();
     stringImgs.add(mapUrl);
     for (Element img : imgs) {
       String imgSrc = img.attr("abs:src");
       if (imgSrc.contains("paypal") == false) stringImgs.add(imgSrc);
     }
     return stringImgs;
   } catch (Exception e) {
     System.out.println(e);
     return null;
   }
 }
示例#4
0
  public Scraper() {

    Document doc = null;

    try {
      doc =
          Jsoup.connect(
                  "http://www.geog.leeds.ac.uk/courses/other/programming/practicals/general/web/scraping-intro/table.html")
              .get();
    } catch (IOException ioe) {
      ioe.printStackTrace();
    }
    Element table = doc.getElementById("datatable");
    Elements rows = table.getElementsByTag("TR");

    for (Element row : rows) {
      Elements tds = row.getElementsByTag("TD");
      for (int i = 0; i < tds.size(); i++) {
        if (i == 1) System.out.println(tds.get(i).text());
      }
    }
  }
示例#5
0
 @Override
 public List<HSDeck> getDeckListFiltered(final DeckBrowserRequest deckBrowserRequest) {
   final List<HSPlayerClass> classFilter = deckBrowserRequest.getClassFilter();
   final ArrayList<HSDeck> list = new ArrayList<HSDeck>();
   try {
     String s2;
     final String s = s2 = HPDeckSource.BASE_URL + HPDeckSource.DECKS_URL;
     if (deckBrowserRequest.getSortingKey() != null) {
       s2 = s;
       if (!deckBrowserRequest.getSortingKey().trim().isEmpty()) {
         s2 =
             s
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_OPTION.requestParam
                 + deckBrowserRequest.getSortingKey();
       }
     }
     String string = s2;
     if (deckBrowserRequest.getDeckNameFilter() != null) {
       string = s2;
       if (!deckBrowserRequest.getDeckNameFilter().trim().isEmpty()) {
         string =
             s2
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_SEARCH.requestParam
                 + this.constructDeckNameFilter(deckBrowserRequest.getDeckNameFilter());
       }
     }
     String string2 = string;
     if (classFilter != null) {
       string2 = string;
       if (classFilter.size() > 0) {
         string2 = string;
         if (!classFilter.contains(HSPlayerClass.ALL)) {
           int n = 0;
           for (final HSPlayerClass hsPlayerClass : classFilter) {
             if (hsPlayerClass.isSingleClass()) {
               n += hsPlayerClass.getHsFilterValue();
             }
           }
           string2 = string + "&" + HP_REQUEST_PARAMS.FILTER_CLASS.requestParam + n;
         }
       }
     }
     String string3 = string2;
     if (deckBrowserRequest.getOrderBy() != null) {
       string3 = string2;
       if (!deckBrowserRequest.getOrderBy().isEmpty()) {
         String s3;
         if (deckBrowserRequest.isAsc()) {
           s3 = "";
         } else {
           s3 = "-";
         }
         string3 =
             string2
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_SORT.requestParam
                 + s3
                 + deckBrowserRequest.getOrderBy();
       }
     }
     final Elements select =
         Jsoup.connect(string3)
             .referrer(HPDeckSource.BASE_URL + "/")
             .followRedirects(true)
             .ignoreHttpErrors(true)
             .get()
             .select("table#decks tr");
     for (int i = 1; i < select.size() - 1; ++i) {
       final Element value = select.get(i);
       final Elements select2 = value.select("td.col-name span.tip a");
       final Elements select3 = value.select("td.col-deck-type");
       final Elements select4 = value.select("td.col-class");
       final Elements select5 = value.select("td.col-ratings div.rating-sum");
       final Elements select6 = value.select("td.col-dust-cost");
       final Elements select7 = value.select("td.col-updated abbr");
       final HSDeck hsDeck = new HSDeck();
       hsDeck.setName(select2.get(0).text());
       hsDeck.setUrl(select2.get(0).attr("href"));
       hsDeck.setType(select3.get(0).text());
       hsDeck.setPlayerClass(select4.get(0).text());
       hsDeck.setRating(select5.get(0).text());
       hsDeck.setCost(select6.get(0).text());
       if (select7.get(0).hasAttr("data-epoch")) {
         hsDeck.setLastUpdate(select7.get(0).attributes().get("data-epoch"));
       }
       hsDeck.setLastUpdateAsString(select7.get(0).text());
       list.add(hsDeck);
     }
   } catch (IOException ex) {
     ex.printStackTrace();
   }
   return list;
 }