@Override public void run() { List<FBSource> sources = FBSourceManager.getSources(); LOGGER.info(sources.toString()); if (sources.size() == 0) { LOGGER.info("没有任务!"); } for (FBSource source : sources) { LOGGER.info("开始任务 :" + source.getName()); crawl(source); TimerUtils.delay(); LOGGER.info("任务结束:" + source.getName()); } }
/** * 当当前时间减去上次操作时间大于cycle(小时)时,即返回true,表示可以爬取,否则表示不需要爬取 * * @param source * @return */ private boolean shouldCrawl(FBSource source) { long curr = System.currentTimeMillis(); long last = source.getLastoptime(); LOGGER.info("上次运行时间 : " + new Timestamp(last)); // 设定 可以提前10分钟开始 long interval = (source.getCycle() * 60 - 10) * 60 * 1000l; if ((curr - last) >= interval) { return true; } else { LOGGER.info( source.getName() + "任务还未到开始时间!上次操作时间:" + new Timestamp(last) + " 现在:" + new Timestamp(curr)); return false; } }
private void crawl(FBSource source) { if (!shouldCrawl(source)) { return; } String homeUrl = source.getHomepage(); String html; try { html = request(homeUrl); } catch (Throwable e) { String errormsg = "\n 爬取" + source.getName() + "的facebook失败 homepage:" + source.getHomepage(); LOGGER.error(e.getMessage() + errormsg, e); return; } String name = source.getName(); try { List<ParseResult> list = FacebookPageParser.parse(html, source); store(list, name); source.setLastoptime(System.currentTimeMillis()); } catch (Exception e) { LOGGER.error(e.getMessage(), e); } }