// identify if the post is too old private boolean isResultTooOld(String result) { // parse post, check the last one's timestamp, if not old enough then // continue if (result == null || "[]".equals(result) || result.startsWith("{\"error\"") || !result.startsWith("{\"statuses\"")) { return false; } List<Status> statusList; try { statusList = StatusDB.getStatusList(result); } catch (Exception e) { System.out.println("Error in constructing status list for checking if the post is too old."); e.printStackTrace(); return false; } if (!statusList.isEmpty()) { Status s = statusList.get(statusList.size() - 1); System.out.println("The last geo-post date: " + s.getCreatedAt().toString()); if (CrawlTool.timeToUnixTime(s.getCreatedAt()) <= this.stopCrawlPostTimeStamp) { return true; } } return false; }
private void setup(String configFileName) { InputStream is = null; try { is = new FileInputStream(Utils.getPath() + "/" + configFileName); Properties config = new Properties(); config.load(is); // get the config item // keys this.keyFileName = config.getProperty("key_file_name"); if (config.containsKey("key_from")) { this.keysFrom = Integer.valueOf(config.getProperty("key_from")); } if (config.containsKey("key_to")) { this.keysTo = Integer.valueOf(config.getProperty("key_to")); } // log files this.logName = config.getProperty("log_name"); this.JsonlogName = config.getProperty("json_log_name"); // interval this.interval = Integer.valueOf(config.getProperty("interval")); // times to crawl if (config.containsKey("crawl_times")) { this.requestedCrawlingTimes = Integer.valueOf(config.getProperty("crawl_times")); } // crawl start and end this.startCrawlTimeStamp = CrawlTool.timeToUnixTime(config.getProperty("start_crawl_time")); this.stopCrawlTimeStamp = CrawlTool.timeToUnixTime(config.getProperty("stop_crawl_time")); // posts posted after this timestamp wont be crawled this.stopCrawlPostTimeStamp = CrawlTool.timeToUnixTime(config.getProperty("stop_crawl_post_time")); // max page if (config.containsKey("max_page_count")) { this.maxPageCount = Integer.valueOf(config.getProperty("max_page_count")); } if (config.containsKey("count")) { this.count = Integer.valueOf(config.getProperty("count")); } // User ID file this.uidFileName = config.getProperty("uid_file_name"); if (config.containsKey("start_uid")) { // uid starting point this.startingUid = config.getProperty("start_uid"); } // init the appkey, uid this.keyList = CrawlTool.initAppkey( Utils.getPath() + "/" + this.keyFileName, this.keysFrom, this.keysTo); this.uidList = CrawlTool.initUidList(Utils.getPath() + "/" + this.uidFileName, this.startingUid); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } }