Пример #1
1
 @Override
 public HSDeck getDeckDetail(final HSDeck hsDeck, final float n) {
   try {
     final Document value = Jsoup.connect(HPDeckSource.BASE_URL + hsDeck.getUrl()).get();
     final Elements select = value.select("section.class-listing table.listing td.col-name");
     final HashMap<String, String> classHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list = new ArrayList<String>();
     for (int i = 0; i < select.size(); ++i) {
       final String text = select.get(i).select("a").get(0).text();
       classHsItemMap.put(
           text, select.get(i).text().trim().substring(select.get(i).text().trim().length() - 1));
       list.add(text);
     }
     hsDeck.setClassHsItemMap(classHsItemMap);
     hsDeck.setClassHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list));
     final Elements select2 = value.select("section.neutral-listing table.listing td.col-name");
     final HashMap<String, String> neutralHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list2 = new ArrayList<String>();
     for (int j = 0; j < select2.size(); ++j) {
       final String text2 = select2.get(j).select("a").get(0).text();
       neutralHsItemMap.put(
           text2,
           select2.get(j).text().trim().substring(select2.get(j).text().trim().length() - 1));
       list2.add(text2);
     }
     hsDeck.setNeutralHsItemMap(neutralHsItemMap);
     hsDeck.setNeutralHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list2));
     hsDeck.setDescription(
         HtmlHelper.parseDescription(value.select("div.deck-description").html(), n, false));
     return hsDeck;
   } catch (IOException ex) {
     ex.printStackTrace();
     return hsDeck;
   }
 }
Пример #2
0
  public ArrayList<String> collectLinks(String p) {
    ArrayList<String> PageLinks = new ArrayList<String>();
    try {

      URL url = new URL(p);
      BufferedReader br3 = new BufferedReader(new InputStreamReader(url.openStream()));
      String str = "";
      while (null != (str = br3.readLine())) {
        Pattern link =
            Pattern.compile(
                "<a target=\"_top\" href=\"/m/.*", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
        Matcher match = link.matcher(str);
        while (match.find()) {
          String tmp = match.group();
          int start = tmp.indexOf('/');
          tmp = tmp.substring(start + 1, tmp.indexOf('\"', start + 1));
          if (Crawl.contains("http://www.rottentomatoes.com/" + tmp)
              || ToCrawl.contains("http://www.rottentomatoes.com/" + tmp)
              || PageLinks.contains("http://www.rottentomatoes.com/" + tmp)) continue;
          PageLinks.add("http://www.rottentomatoes.com/" + tmp);
          // bw4.write("http://www.rottentomatoes.com/"+tmp+"\r\n");
        }
      }

      br3.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }

    return PageLinks;
  }
Пример #3
0
  public void CrawlRT(String RTPage) throws IOException {
    ArrayList<String> t = new ArrayList<String>();
    String crawlData;
    String crawlData2;
    String crawlData3;

    FileReader freader = new FileReader("Crawl.txt");
    BufferedReader br = new BufferedReader(freader);
    FileReader freader2 = new FileReader("Tocrawl.txt");
    BufferedReader br2 = new BufferedReader(freader2);
    FileWriter fwriter2 = new FileWriter("Tocrawl.txt", true);
    BufferedWriter bw2 = new BufferedWriter(fwriter2);
    FileWriter fwriter = new FileWriter("Crawl.txt", true);
    BufferedWriter bw = new BufferedWriter(fwriter);

    /*while(null != (crawlData2 = br.readLine()))
    {
    	if(crawlData2 !=null)
    		Crawl.add(crawlData2);
    }
    t = collectLinks(RTPage);
    Iterator<String> e3= t.iterator();
    while(e3.hasNext())
    {
    	String ee = e3.next();

    		if(!Crawl.contains(ee))
    		{
    			bw2.write(ee+"\r\n");
    		}



    }
    br.close();
    br2.close();
    bw.close();
    bw2.close();*/

    if (null == (crawlData = br.readLine()))
    // if(true)
    {
      // initial iteration
      bw.write(RTPage + "\r\n");
      Crawl.add(RTPage);
      t = collectLinks(RTPage);
      ToCrawl.addAll(t);
    } else {
      // collect data from files and load to array lists
      while (null != (crawlData2 = br.readLine())) {
        if (crawlData2 != null) Crawl.add(crawlData2);
      }

      while (null != (crawlData3 = br2.readLine())) {
        if (crawlData3 != null) ToCrawl.add(crawlData3);
      }
    }
    System.out.println("Crawlled");

    // Number of movies to be crawled
    for (int i = 0; i < 1000; i++) {
      if (ToCrawl.size() > 0) {
        Crawl.removeAll(Collections.singleton(null));
        ToCrawl.removeAll(Collections.singleton(null));
        String c = ToCrawl.get(0);
        if (Crawl.contains(c)) ToCrawl.remove(c);
        else {
          // collect links and collect data from a particular link
          Crawl.add(c);
          t = collectLinks(c);
          CollectData(c);
          ToCrawl.remove(c);
          Iterator<String> e3 = t.iterator();
          while (e3.hasNext()) {
            String ee = e3.next();
            if (!ToCrawl.contains(ee)) {
              if (!Crawl.contains(ee)) {
                ToCrawl.add(ee);
              }
            }
          }
          bw.write(c + "\r\n");
        }
      }
    }

    System.out.println("To Be Crawlled");
    Iterator<String> e2 = ToCrawl.iterator();
    while (e2.hasNext()) {
      // write to file the movies still to be crawled.
      bw2.write(e2.next() + "\r\n");
    }

    prop.setProperty("Id", Integer.toString(n));
    prop.store(new FileOutputStream("config.properties"), null);
    br.close();
    br2.close();
    bw.close();
    bw2.close();
  }
Пример #4
0
 @Override
 public List<HSDeck> getDeckListFiltered(final DeckBrowserRequest deckBrowserRequest) {
   final List<HSPlayerClass> classFilter = deckBrowserRequest.getClassFilter();
   final ArrayList<HSDeck> list = new ArrayList<HSDeck>();
   try {
     String s2;
     final String s = s2 = HPDeckSource.BASE_URL + HPDeckSource.DECKS_URL;
     if (deckBrowserRequest.getSortingKey() != null) {
       s2 = s;
       if (!deckBrowserRequest.getSortingKey().trim().isEmpty()) {
         s2 =
             s
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_OPTION.requestParam
                 + deckBrowserRequest.getSortingKey();
       }
     }
     String string = s2;
     if (deckBrowserRequest.getDeckNameFilter() != null) {
       string = s2;
       if (!deckBrowserRequest.getDeckNameFilter().trim().isEmpty()) {
         string =
             s2
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_SEARCH.requestParam
                 + this.constructDeckNameFilter(deckBrowserRequest.getDeckNameFilter());
       }
     }
     String string2 = string;
     if (classFilter != null) {
       string2 = string;
       if (classFilter.size() > 0) {
         string2 = string;
         if (!classFilter.contains(HSPlayerClass.ALL)) {
           int n = 0;
           for (final HSPlayerClass hsPlayerClass : classFilter) {
             if (hsPlayerClass.isSingleClass()) {
               n += hsPlayerClass.getHsFilterValue();
             }
           }
           string2 = string + "&" + HP_REQUEST_PARAMS.FILTER_CLASS.requestParam + n;
         }
       }
     }
     String string3 = string2;
     if (deckBrowserRequest.getOrderBy() != null) {
       string3 = string2;
       if (!deckBrowserRequest.getOrderBy().isEmpty()) {
         String s3;
         if (deckBrowserRequest.isAsc()) {
           s3 = "";
         } else {
           s3 = "-";
         }
         string3 =
             string2
                 + "&"
                 + HP_REQUEST_PARAMS.FILTER_SORT.requestParam
                 + s3
                 + deckBrowserRequest.getOrderBy();
       }
     }
     final Elements select =
         Jsoup.connect(string3)
             .referrer(HPDeckSource.BASE_URL + "/")
             .followRedirects(true)
             .ignoreHttpErrors(true)
             .get()
             .select("table#decks tr");
     for (int i = 1; i < select.size() - 1; ++i) {
       final Element value = select.get(i);
       final Elements select2 = value.select("td.col-name span.tip a");
       final Elements select3 = value.select("td.col-deck-type");
       final Elements select4 = value.select("td.col-class");
       final Elements select5 = value.select("td.col-ratings div.rating-sum");
       final Elements select6 = value.select("td.col-dust-cost");
       final Elements select7 = value.select("td.col-updated abbr");
       final HSDeck hsDeck = new HSDeck();
       hsDeck.setName(select2.get(0).text());
       hsDeck.setUrl(select2.get(0).attr("href"));
       hsDeck.setType(select3.get(0).text());
       hsDeck.setPlayerClass(select4.get(0).text());
       hsDeck.setRating(select5.get(0).text());
       hsDeck.setCost(select6.get(0).text());
       if (select7.get(0).hasAttr("data-epoch")) {
         hsDeck.setLastUpdate(select7.get(0).attributes().get("data-epoch"));
       }
       hsDeck.setLastUpdateAsString(select7.get(0).text());
       list.add(hsDeck);
     }
   } catch (IOException ex) {
     ex.printStackTrace();
   }
   return list;
 }