protected void processRequest(Request request) { Page page = downloader.download(request, this); if (page == null) { sleep(site.getSleepTime()); return; } // for cycle retry if (page.getHtml() == null) { addRequest(page); sleep(site.getSleepTime()); return; } pageProcessor.process(page); addRequest(page); if (!page.getResultItems().isSkip()) { for (Pipeline pipeline : pipelines) { pipeline.process(page.getResultItems(), this); } } sleep(site.getSleepTime()); }
/** * create a spider with pageProcessor. * * @param pageProcessor */ public Spider(PageProcessor pageProcessor) { this.pageProcessor = pageProcessor; this.site = pageProcessor.getSite(); this.startUrls = pageProcessor.getSite().getStartUrls(); }