/** * Take links from results and do pagination (max 7 times). * * @param document * @return */ @Override public List<URL> getNextPages(Document document) { List<URL> urls = new ArrayList<>(); // Collect rows with links to comparing offerts links Elements elements = document.select(PRODUCTS_ROW_QUERY + ":not([onclick])"); for (Element element : elements) { String str = element.attr("abs:href"); try { urls.add(Utils.stringToURL(str)); } catch (ConnectionException e) { } } // Pagination final int MAX_PAGE = 7; Element next = document.select("a[href].next").first(); if (next != null) { String nextStr = next.attr("href"); if (!nextStr.contains("page_nr=" + MAX_PAGE)) { try { urls.add(Utils.stringToURL(nextStr)); } catch (ConnectionException e) { } } } logger.debug("Collected " + urls.size() + " urls to visit"); return urls; }
@Override public URL prepareTargetUrl(String product) throws ConnectionException { product = Utils.urlEncodeSpecial(product, '~', '"', '<', '>'); String target = getSourceURL().toString() + "lt/search?search_query=" + product + "&submit_search=Ieškoti"; URL url = Utils.stringToURL(target); return url; }
/** * Do not paginate. * * @param document * @return */ @Override public List<URL> getNextPages(Document document) { List<URL> urls = new ArrayList<>(); String nextStrUrl = null; // Pagination /*URL res; try { Elements elements = document.getElementsByClass("next"); Element next = elements.first().select("a").first(); nextStrUrl = next.attr("abs:href"); } catch (NullPointerException e) { return null; } try { res = Utils.stringToURL(nextStrUrl); } catch (ConnectionException e) { logger.debug(e.toString()); return null; } urls.add(res);*/ for (Element element : document.select("div#productView > div.productCompare")) { String href = element.select("a[href].buttonRetail").first().attr("abs:href"); try { urls.add(Utils.stringToURL(href)); } catch (ConnectionException e) { } } return urls; }
/** * Prepare first url to visit for given product. Output e.g. * http://www.preisvergleich.de/search/result/query/xbox+one/ * * @param product * @return * @throws ConnectionException */ public URL prepareTargetUrl(String product) throws ConnectionException { product = product.toLowerCase().trim().replaceAll(" ", "+"); String target = getSourceURL().toString() + "search/result/query/" + product; URL url = Utils.stringToURL(target); return url; }