@Override public Page download(Request request, Task task) { checkInit(); WebDriver webDriver; try { webDriver = webDriverPool.get(); } catch (InterruptedException e) { logger.warn("interrupted", e); return null; } logger.info("downloading page " + request.getUrl()); webDriver.get(request.getUrl()); try { Thread.sleep(sleepTime); } catch (InterruptedException e) { e.printStackTrace(); } WebDriver.Options manage = webDriver.manage(); Site site = task.getSite(); if (site.getCookies() != null) { for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) { Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue()); manage.addCookie(cookie); } } /* * TODO You can add mouse event or other processes * * @author: [email protected] */ WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); Page page = new Page(); page.setRawText(content); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request); webDriverPool.returnToPool(webDriver); return page; }
@Override public Page download(Request request, Task task) { checkInit(); final WebDriver webDriver; try { webDriver = webDriverPool.get(); } catch (InterruptedException e) { logger.warn("interrupted", e); return null; } logger.info("downloading page " + request.getUrl()); webDriver.get(request.getUrl()); // webDriver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS); /* WebElement element = null; By selector = By.xpath("//*[@id='comments-list']/div[1]/div[1]/ul"); element = webDriver.findElement(selector); element.click(); */ // ((JavascriptExecutor) webDriver).executeScript("arguments[0].scrollIntoView();", // element); if (request.getUrl().matches("http://item\\.jd\\.com/\\d+\\.html#comment")) { WebDriverWait wait = new WebDriverWait(webDriver, 15); WebElement element = wait.until( new ExpectedCondition<WebElement>() { @Override public WebElement apply(WebDriver driver) { By selector = By.xpath("//*[@id='comments-list']/div[1]/div[1]/ul/li[1]/a/em"); WebElement em = null; int i = 0; try { do { if ((em = webElementExist(driver, selector)) != null) { ((JavascriptExecutor) driver) .executeScript("arguments[0].scrollIntoView();", em); Thread.sleep(1000); String str = em.getText(); System.out.println(str); Pattern pattern = Pattern.compile("\\d+"); Matcher m = pattern.matcher(str); if (m.find()) { break; } } if (i == 5) { driver.navigate().refresh(); } } while (++i < 10); } catch (Exception e) { e.printStackTrace(); return em; } return em; } }); if (element == null) { webDriverPool.returnToPool(webDriver); return null; } } /* //有时出现找不到的情况,抛出no such element错误导致程序中止 int retries = 0; boolean exist = false; while (retries++ < 5) { exist = isWebElementExist(webDriver, selector); if (exist) { break; } webDriver.navigate().refresh(); } if (exist) { element = webDriver.findElement(selector); element.click(); WebDriverWait wait = new WebDriverWait(webDriver, 10); wait.until(new ExpectedCondition<WebElement>() { @Override public WebElement apply(WebDriver d) { By selector = By.id("type_1"); boolean exists = isWebElementExist(d, selector); if (exists) { return d.findElement(By.id("type_1")); } else { return null; } } }); } */ try { if (this.sleepTime > 0) { Thread.sleep(sleepTime); } } catch (InterruptedException e) { e.printStackTrace(); } /* WebDriver.Options manage = webDriver.manage(); Site site = task.getSite(); if (site.getCookies() != null) { for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) { Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue()); manage.addCookie(cookie); } }*/ WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); Page page = new Page(); // page.setRawText(content); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request); webDriverPool.returnToPool(webDriver); return page; }
@Override public void close() throws IOException { webDriverPool.closeAll(); }