Пример #1
0
 public void inject(Path urlDir) throws Exception {
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
   long start = System.currentTimeMillis();
   LOG.info("InjectorJob: starting at " + sdf.format(start));
   LOG.info("InjectorJob: Injecting urlDir: " + urlDir);
   run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir));
   long end = System.currentTimeMillis();
   LOG.info(
       "Injector: finished at "
           + sdf.format(end)
           + ", elapsed: "
           + TimingUtil.elapsedTime(start, end));
 }
Пример #2
0
  /**
   * Mark URLs ready for fetching.
   *
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  public String generate(long topN, long curTime, boolean filter, boolean norm) throws Exception {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("GeneratorJob: starting at " + sdf.format(start));
    LOG.info("GeneratorJob: Selecting best-scoring urls due for fetch.");
    LOG.info("GeneratorJob: starting");
    LOG.info("GeneratorJob: filtering: " + filter);
    LOG.info("GeneratorJob: normalizing: " + norm);
    if (topN != Long.MAX_VALUE) {
      LOG.info("GeneratorJob: topN: " + topN);
    }
    Map<String, Object> results =
        run(
            ToolUtil.toArgMap(
                Nutch.ARG_TOPN,
                topN,
                Nutch.ARG_CURTIME,
                curTime,
                Nutch.ARG_FILTER,
                filter,
                Nutch.ARG_NORMALIZE,
                norm));
    String batchId = getConf().get(BATCH_ID);
    long finish = System.currentTimeMillis();
    long generateCount = (Long) results.get(GENERATE_COUNT);
    LOG.info(
        "GeneratorJob: finished at "
            + sdf.format(finish)
            + ", time elapsed: "
            + TimingUtil.elapsedTime(start, finish));
    LOG.info(
        "GeneratorJob: generated batch id: " + batchId + " containing " + generateCount + " URLs");
    if (generateCount == 0) {
      return null;
    }
    return batchId;
  }
Пример #3
0
  public void inject(Path urlDir) throws Exception {
    LOG.info("InjectorJob: starting");
    LOG.info("InjectorJob: urlDir: " + urlDir);

    run(ToolUtil.toArgMap(Nutch.ARG_SEEDDIR, urlDir));
  }