public static void startCrawling() { DealerPost.Dao dao = new AutohomeModule().getInstance(DealerPost.Dao.class); // TODO: fix DB Connection lost in a about 30 minutes issue (cause: auto finalize the DBCursor?) List<DealerPost> dealerList = new ArrayList<>(); dao.find().forEach(dealerList::add); log.info("DealerList size: {}", dealerList.size()); CrawlAutohomeDealerInfoTask crawlTask = new CrawlAutohomeDealerInfoTask(dealerList.iterator()); new TaskControl(crawlTask); }
public void exportDealersToCsv(String fileName, DealerPost.Dao dao) { List<DealerPost> dealerPosts = dao.find().toArray(); // Map<String, List<DealerPost>> dealerByCity = StreamSupport.stream(cursor.spliterator(), // false) // .collect((groupingBy(DealerPost::getCity))); // Sort by province first, then by city dealerPosts.sort( (a, b) -> { if (a.getCity() != null && b.getCity() != null) { return City.order(a.getCity(), a.getProvince()) - City.order(b.getCity(), b.getProvince()); } return a.getCity() != null ? 1 : -1; }); exportToCsv(fileName, dealerPosts.iterator()); }