public static void startCrawling() {
   DealerPost.Dao dao = new AutohomeModule().getInstance(DealerPost.Dao.class);
   // TODO: fix DB Connection lost in a about 30 minutes issue (cause: auto finalize the DBCursor?)
   List<DealerPost> dealerList = new ArrayList<>();
   dao.find().forEach(dealerList::add);
   log.info("DealerList size: {}", dealerList.size());
   CrawlAutohomeDealerInfoTask crawlTask = new CrawlAutohomeDealerInfoTask(dealerList.iterator());
   new TaskControl(crawlTask);
 }
Example #2
0
 public void exportDealersToCsv(String fileName, DealerPost.Dao dao) {
   List<DealerPost> dealerPosts = dao.find().toArray();
   //    Map<String, List<DealerPost>> dealerByCity = StreamSupport.stream(cursor.spliterator(),
   // false)
   //        .collect((groupingBy(DealerPost::getCity)));
   // Sort by province first, then by city
   dealerPosts.sort(
       (a, b) -> {
         if (a.getCity() != null && b.getCity() != null) {
           return City.order(a.getCity(), a.getProvince())
               - City.order(b.getCity(), b.getProvince());
         }
         return a.getCity() != null ? 1 : -1;
       });
   exportToCsv(fileName, dealerPosts.iterator());
 }