private Observable<WebPageEntity> parseDocument(DownloadResult downloadResult) { Set<WebPageEntity> result = new HashSet<>(1); Document document = downloadResult.getDocument(); if (document != null) { Elements elements = document.select(".InfoArea a[title]"); if (!elements.isEmpty()) { for (Element element : elements) { WebPageEntity webPageEntity = new WebPageEntity( downloadResult.getSourcePage(), "", "productPage", element.attr("abs:href"), downloadResult.getSourcePage().getCategory()); LOGGER.info("productPageUrl={}", webPageEntity.getUrl()); result.add(webPageEntity); } } else { WebPageEntity webPageEntity = new WebPageEntity( downloadResult.getSourcePage(), "", "productPage", downloadResult.getSourcePage().getUrl(), downloadResult.getSourcePage().getCategory()); LOGGER.info("productPageUrl={}", webPageEntity.getUrl()); result.add(webPageEntity); } } return Observable.from(result); }
private Observable<WebPageEntity> parseDocument(DownloadResult downloadResult) { Set<WebPageEntity> result = new HashSet<>(1); Document document = downloadResult.getDocument(); if (document != null) { Elements elements = document.select(".SideCategoryListFlyout a"); for (Element element : elements) { WebPageEntity webPageEntity = new WebPageEntity( downloadResult.getSourcePage(), "", "productList", element.attr("abs:href"), element.text()); LOGGER.info("Product page listing={}", webPageEntity.getUrl()); result.add(webPageEntity); } } return Observable.from(result); }