コード例 #1
0
  // identify if the post is too old
  private boolean isResultTooOld(String result) {
    // parse post, check the last one's timestamp, if not old enough then
    // continue
    if (result == null
        || "[]".equals(result)
        || result.startsWith("{\"error\"")
        || !result.startsWith("{\"statuses\"")) {
      return false;
    }
    List<Status> statusList;
    try {
      statusList = StatusDB.getStatusList(result);
    } catch (Exception e) {
      System.out.println("Error in constructing status list for checking if the post is too old.");
      e.printStackTrace();
      return false;
    }

    if (!statusList.isEmpty()) {
      Status s = statusList.get(statusList.size() - 1);
      System.out.println("The last geo-post date: " + s.getCreatedAt().toString());
      if (CrawlTool.timeToUnixTime(s.getCreatedAt()) <= this.stopCrawlPostTimeStamp) {
        return true;
      }
    }
    return false;
  }
コード例 #2
0
  private void setup(String configFileName) {
    InputStream is = null;
    try {
      is = new FileInputStream(Utils.getPath() + "/" + configFileName);
      Properties config = new Properties();
      config.load(is);

      // get the config item

      // keys
      this.keyFileName = config.getProperty("key_file_name");
      if (config.containsKey("key_from")) {
        this.keysFrom = Integer.valueOf(config.getProperty("key_from"));
      }
      if (config.containsKey("key_to")) {
        this.keysTo = Integer.valueOf(config.getProperty("key_to"));
      }

      // log files
      this.logName = config.getProperty("log_name");
      this.JsonlogName = config.getProperty("json_log_name");

      // interval
      this.interval = Integer.valueOf(config.getProperty("interval"));

      // times to crawl
      if (config.containsKey("crawl_times")) {
        this.requestedCrawlingTimes = Integer.valueOf(config.getProperty("crawl_times"));
      }

      // crawl start and end
      this.startCrawlTimeStamp = CrawlTool.timeToUnixTime(config.getProperty("start_crawl_time"));
      this.stopCrawlTimeStamp = CrawlTool.timeToUnixTime(config.getProperty("stop_crawl_time"));

      // posts posted after this timestamp wont be crawled
      this.stopCrawlPostTimeStamp =
          CrawlTool.timeToUnixTime(config.getProperty("stop_crawl_post_time"));

      // max page
      if (config.containsKey("max_page_count")) {
        this.maxPageCount = Integer.valueOf(config.getProperty("max_page_count"));
      }

      if (config.containsKey("count")) {
        this.count = Integer.valueOf(config.getProperty("count"));
      }

      // User ID file
      this.uidFileName = config.getProperty("uid_file_name");
      if (config.containsKey("start_uid")) { // uid starting point
        this.startingUid = config.getProperty("start_uid");
      }

      // init the appkey, uid
      this.keyList =
          CrawlTool.initAppkey(
              Utils.getPath() + "/" + this.keyFileName, this.keysFrom, this.keysTo);
      this.uidList =
          CrawlTool.initUidList(Utils.getPath() + "/" + this.uidFileName, this.startingUid);

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (is != null) {
        try {
          is.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }
  }