Пример #1
0
 protected void processRequest(Request request) {
   Page page = downloader.download(request, this);
   if (page == null) {
     sleep(site.getSleepTime());
     return;
   }
   // for cycle retry
   if (page.getHtml() == null) {
     addRequest(page);
     sleep(site.getSleepTime());
     return;
   }
   pageProcessor.process(page);
   addRequest(page);
   if (!page.getResultItems().isSkip()) {
     for (Pipeline pipeline : pipelines) {
       pipeline.process(page.getResultItems(), this);
     }
   }
   sleep(site.getSleepTime());
 }
Пример #2
0
 /**
  * create a spider with pageProcessor.
  *
  * @param pageProcessor
  */
 public Spider(PageProcessor pageProcessor) {
   this.pageProcessor = pageProcessor;
   this.site = pageProcessor.getSite();
   this.startUrls = pageProcessor.getSite().getStartUrls();
 }