Example #1
0
  private void doParse(String url, int page, String catName) throws IOException {
    try {
      List<Product> productList = new ArrayList<Product>();
      List<Category> catList = new ArrayList<Category>();
      Category cat = new Category();
      for (int i = 0; i < page; i++) {
        parseUrl(url + i);
        for (String prodUrl : myProdLinks) {
          System.out.println(prodUrl);
          Document doc = Jsoup.connect(prodUrl).timeout(0).get();
          Elements elements = doc.select("table[style=padding-left:10px;]").select("td");
          Elements linkElements = elements.get(9).select("a[href]");
          if (linkElements.isEmpty()) linkElements = elements.get(11).select("a[href]");
          for (Element e : linkElements) {
            Product prod = new Product();
            // System.out.println(e.attr("abs:href"));
            prod = parseProductUrl(e.attr("abs:href"));
            System.out.println(prod.getName());
            System.out.println(prod.getCena());
            System.out.println(prod.getOpis());
            System.out.println(prod.getImg());

            if (!productList.contains(prod)) {
              productList.add(prod);
              System.out.println("true");
            } else {
              System.out.println("false");
            }
            System.out.println();
          }
        }
        myProdLinks.clear();
      }
      for (Product product : productList) {
        product.setCategoryName(catName);
      }
      System.out.println("Total size: " + productList.size());
      cat.setProductList(productList);
      catList.add(cat);
      objectToXML(catList, new File(XML_FILE_NAME));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Example #2
0
 @Override
 void test() throws IOException {
   List<Product> productList = new ArrayList<Product>();
   List<Category> catList = new ArrayList<Category>();
   Category cat = new Category();
   // for(int i=0;i<2;i++){
   parseUrl("http://www.signal.pl/o_typ_fotelobreco.php?id_typ=17&porcja=" + 0);
   // for(String url : myProdLinks){
   Document doc = Jsoup.connect(myProdLinks.get(0)).timeout(0).get();
   Elements elements = doc.select("table[style=padding-left:10px;]").select("td");
   Element element = elements.get(9).select("a[href]").first();
   System.out.println(element);
   productList.add(parseProductUrl(element.attr("abs:href")));
   // for (Element e : elements.get(9).select("a[href]"))
   // productList.add(parseProductUrl(e.attr("abs:href")));
   // }
   // }
   cat.setProductList(productList);
   catList.add(cat);
   objectToXML(catList, new File(XML_FILE_NAME));
 }