public void inject(Path urlDir) throws Exception { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("InjectorJob: starting at " + sdf.format(start)); LOG.info("InjectorJob: Injecting urlDir: " + urlDir); run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir)); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
/** * Mark URLs ready for fetching. * * @throws ClassNotFoundException * @throws InterruptedException */ public String generate(long topN, long curTime, boolean filter, boolean norm) throws Exception { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("GeneratorJob: starting at " + sdf.format(start)); LOG.info("GeneratorJob: Selecting best-scoring urls due for fetch."); LOG.info("GeneratorJob: starting"); LOG.info("GeneratorJob: filtering: " + filter); LOG.info("GeneratorJob: normalizing: " + norm); if (topN != Long.MAX_VALUE) { LOG.info("GeneratorJob: topN: " + topN); } Map<String, Object> results = run( ToolUtil.toArgMap( Nutch.ARG_TOPN, topN, Nutch.ARG_CURTIME, curTime, Nutch.ARG_FILTER, filter, Nutch.ARG_NORMALIZE, norm)); String batchId = getConf().get(BATCH_ID); long finish = System.currentTimeMillis(); long generateCount = (Long) results.get(GENERATE_COUNT); LOG.info( "GeneratorJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish)); LOG.info( "GeneratorJob: generated batch id: " + batchId + " containing " + generateCount + " URLs"); if (generateCount == 0) { return null; } return batchId; }
public void inject(Path urlDir) throws Exception { LOG.info("InjectorJob: starting"); LOG.info("InjectorJob: urlDir: " + urlDir); run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir)); }