public Map<String, Object> run(Map<String, Object> args) throws Exception { String batchId = (String) args.get(Nutch.ARG_BATCH); if (batchId != null) { getConf().set(GeneratorJob.BATCH_ID, batchId); } // map to inverted subset due for fetch, sort by score Long topN = (Long) args.get(Nutch.ARG_TOPN); Long curTime = (Long) args.get(Nutch.ARG_CURTIME); if (curTime == null) { curTime = System.currentTimeMillis(); } Boolean filter = (Boolean) args.get(Nutch.ARG_FILTER); Boolean norm = (Boolean) args.get(Nutch.ARG_NORMALIZE); // map to inverted subset due for fetch, sort by score getConf().setLong(GENERATOR_CUR_TIME, curTime); if (topN != null) getConf().setLong(GENERATOR_TOP_N, topN); if (filter != null) getConf().setBoolean(GENERATOR_FILTER, filter); getConf().setLong(Nutch.GENERATE_TIME_KEY, System.currentTimeMillis()); if (norm != null) getConf().setBoolean(GENERATOR_NORMALISE, norm); String mode = getConf().get(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST); if (GENERATOR_COUNT_VALUE_HOST.equalsIgnoreCase(mode)) { getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST); } else if (GENERATOR_COUNT_VALUE_DOMAIN.equalsIgnoreCase(mode)) { getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_DOMAIN); } else { LOG.warn( "Unknown generator.max.count mode '" + mode + "', using mode=" + GENERATOR_COUNT_VALUE_HOST); getConf().set(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST); getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST); } numJobs = 1; currentJobNum = 0; currentJob = new NutchJob(getConf(), "generate: " + getConf().get(BATCH_ID)); Collection<WebPage.Field> fields = getFields(currentJob); StorageUtils.initMapperJob( currentJob, fields, SelectorEntry.class, WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class, true); StorageUtils.initReducerJob(currentJob, GeneratorReducer.class); currentJob.waitForCompletion(true); ToolUtil.recordJobStatus(null, currentJob, results); results.put(BATCH_ID, getConf().get(BATCH_ID)); long generateCount = currentJob.getCounters().findCounter("Generator", "GENERATE_MARK").getValue(); results.put(GENERATE_COUNT, generateCount); return results; }