private Observable<WebPageEntity> parseDocument(DownloadResult downloadResult) {
    Set<WebPageEntity> result = new HashSet<>(1);

    Document document = downloadResult.getDocument();
    if (document != null) {
      Elements elements = document.select(".InfoArea a[title]");
      if (!elements.isEmpty()) {
        for (Element element : elements) {
          WebPageEntity webPageEntity =
              new WebPageEntity(
                  downloadResult.getSourcePage(),
                  "",
                  "productPage",
                  element.attr("abs:href"),
                  downloadResult.getSourcePage().getCategory());
          LOGGER.info("productPageUrl={}", webPageEntity.getUrl());
          result.add(webPageEntity);
        }
      } else {
        WebPageEntity webPageEntity =
            new WebPageEntity(
                downloadResult.getSourcePage(),
                "",
                "productPage",
                downloadResult.getSourcePage().getUrl(),
                downloadResult.getSourcePage().getCategory());
        LOGGER.info("productPageUrl={}", webPageEntity.getUrl());
        result.add(webPageEntity);
      }
    }
    return Observable.from(result);
  }
Example #2
0
  private Observable<WebPageEntity> parseDocument(DownloadResult downloadResult) {
    Set<WebPageEntity> result = new HashSet<>(1);

    Document document = downloadResult.getDocument();
    if (document != null) {
      Elements elements = document.select(".SideCategoryListFlyout a");
      for (Element element : elements) {
        WebPageEntity webPageEntity =
            new WebPageEntity(
                downloadResult.getSourcePage(),
                "",
                "productList",
                element.attr("abs:href"),
                element.text());
        LOGGER.info("Product page listing={}", webPageEntity.getUrl());
        result.add(webPageEntity);
      }
    }
    return Observable.from(result);
  }