Esempio n. 1
0
  /**
   * Take links from results and do pagination (max 7 times).
   *
   * @param document
   * @return
   */
  @Override
  public List<URL> getNextPages(Document document) {
    List<URL> urls = new ArrayList<>();

    // Collect rows with links to comparing offerts links
    Elements elements = document.select(PRODUCTS_ROW_QUERY + ":not([onclick])");

    for (Element element : elements) {
      String str = element.attr("abs:href");
      try {
        urls.add(Utils.stringToURL(str));
      } catch (ConnectionException e) {
      }
    }

    // Pagination
    final int MAX_PAGE = 7;
    Element next = document.select("a[href].next").first();
    if (next != null) {
      String nextStr = next.attr("href");
      if (!nextStr.contains("page_nr=" + MAX_PAGE)) {
        try {
          urls.add(Utils.stringToURL(nextStr));
        } catch (ConnectionException e) {
        }
      }
    }

    logger.debug("Collected " + urls.size() + " urls to visit");
    return urls;
  }
Esempio n. 2
0
  @Override
  public URL prepareTargetUrl(String product) throws ConnectionException {
    product = Utils.urlEncodeSpecial(product, '~', '"', '<', '>');

    String target =
        getSourceURL().toString() + "lt/search?search_query=" + product + "&submit_search=Ieškoti";
    URL url = Utils.stringToURL(target);
    return url;
  }
  /**
   * Do not paginate.
   *
   * @param document
   * @return
   */
  @Override
  public List<URL> getNextPages(Document document) {
    List<URL> urls = new ArrayList<>();

    String nextStrUrl = null;

    // Pagination
    /*URL res;
    try {
    	Elements elements = document.getElementsByClass("next");
    	Element next = elements.first().select("a").first();
    	nextStrUrl = next.attr("abs:href");
    } catch (NullPointerException e) {
    	return null;
    }

    try {
    	res = Utils.stringToURL(nextStrUrl);
    } catch (ConnectionException e) {
    	logger.debug(e.toString());
    	return null;
    }
    urls.add(res);*/

    for (Element element : document.select("div#productView > div.productCompare")) {
      String href = element.select("a[href].buttonRetail").first().attr("abs:href");
      try {
        urls.add(Utils.stringToURL(href));
      } catch (ConnectionException e) {
      }
    }

    return urls;
  }
  /**
   * Prepare first url to visit for given product. Output e.g.
   * http://www.preisvergleich.de/search/result/query/xbox+one/
   *
   * @param product
   * @return
   * @throws ConnectionException
   */
  public URL prepareTargetUrl(String product) throws ConnectionException {
    product = product.toLowerCase().trim().replaceAll(" ", "+");

    String target = getSourceURL().toString() + "search/result/query/" + product;
    URL url = Utils.stringToURL(target);
    return url;
  }