コード例 #1
0
  /**
   * (non-Javadoc)
   *
   * @see java.lang.Thread#run()
   */
  public void run() {
    String name = controller.getMetadata().getJobName();
    logger.fine(getName() + " started for order '" + name + "'");
    Recorder.setHttpRecorder(httpRecorder);

    try {
      while (true) {
        ArchiveUtils.continueCheck();

        setStep(Step.ABOUT_TO_GET_URI, null);

        CrawlURI curi = controller.getFrontier().next();

        synchronized (this) {
          ArchiveUtils.continueCheck();
          setCurrentCuri(curi);
          currentCuri.setThreadNumber(this.serialNumber);
          lastStartTime = System.currentTimeMillis();
          currentCuri.setRecorder(httpRecorder);
        }

        try {
          KeyedProperties.loadOverridesFrom(curi);

          controller.getFetchChain().process(curi, this);

          controller.getFrontier().beginDisposition(curi);

          controller.getDispositionChain().process(curi, this);

        } catch (RuntimeExceptionWrapper e) {
          // Workaround to get cause from BDB
          if (e.getCause() == null) {
            e.initCause(e.getCause());
          }
          recoverableProblem(e);
        } catch (AssertionError ae) {
          // This risks leaving crawl in fatally inconsistent state,
          // but is often reasonable for per-Processor assertion problems
          recoverableProblem(ae);
        } catch (RuntimeException e) {
          recoverableProblem(e);
        } catch (InterruptedException e) {
          if (currentCuri != null) {
            recoverableProblem(e);
            Thread.interrupted(); // clear interrupt status
          } else {
            throw e;
          }
        } catch (StackOverflowError err) {
          recoverableProblem(err);
        } catch (Error err) {
          // OutOfMemory and any others
          seriousError(err);
        } finally {
          KeyedProperties.clearOverridesFrom(curi);
        }

        setStep(Step.ABOUT_TO_RETURN_URI, null);
        ArchiveUtils.continueCheck();

        synchronized (this) {
          controller.getFrontier().finished(currentCuri);
          controller.getFrontier().endDisposition();
          setCurrentCuri(null);
        }

        setStep(Step.FINISHING_PROCESS, null);
        lastFinishTime = System.currentTimeMillis();
        if (shouldRetire) {
          break; // from while(true)
        }
      }
    } catch (InterruptedException e) {
      if (currentCuri != null) {
        logger.log(
            Level.SEVERE,
            "Interrupt leaving unfinished CrawlURI " + getName() + " - job may hang",
            e);
      }
      // thread interrupted, ok to end
      logger.log(Level.FINE, this.getName() + " ended with Interruption");
    } catch (Exception e) {
      // everything else (including interruption)
      logger.log(Level.SEVERE, "Fatal exception in " + getName(), e);
    } catch (OutOfMemoryError err) {
      seriousError(err);
    } finally {
      controller.getFrontier().endDisposition();
    }

    setCurrentCuri(null);
    // Do cleanup so that objects can be GC.
    this.httpRecorder.closeRecorders();
    this.httpRecorder = null;

    logger.fine(getName() + " finished for order '" + name + "'");
    setStep(Step.FINISHED, null);
    controller = null;
  }