/** * (non-Javadoc) * * @see java.lang.Thread#run() */ public void run() { String name = controller.getMetadata().getJobName(); logger.fine(getName() + " started for order '" + name + "'"); Recorder.setHttpRecorder(httpRecorder); try { while (true) { ArchiveUtils.continueCheck(); setStep(Step.ABOUT_TO_GET_URI, null); CrawlURI curi = controller.getFrontier().next(); synchronized (this) { ArchiveUtils.continueCheck(); setCurrentCuri(curi); currentCuri.setThreadNumber(this.serialNumber); lastStartTime = System.currentTimeMillis(); currentCuri.setRecorder(httpRecorder); } try { KeyedProperties.loadOverridesFrom(curi); controller.getFetchChain().process(curi, this); controller.getFrontier().beginDisposition(curi); controller.getDispositionChain().process(curi, this); } catch (RuntimeExceptionWrapper e) { // Workaround to get cause from BDB if (e.getCause() == null) { e.initCause(e.getCause()); } recoverableProblem(e); } catch (AssertionError ae) { // This risks leaving crawl in fatally inconsistent state, // but is often reasonable for per-Processor assertion problems recoverableProblem(ae); } catch (RuntimeException e) { recoverableProblem(e); } catch (InterruptedException e) { if (currentCuri != null) { recoverableProblem(e); Thread.interrupted(); // clear interrupt status } else { throw e; } } catch (StackOverflowError err) { recoverableProblem(err); } catch (Error err) { // OutOfMemory and any others seriousError(err); } finally { KeyedProperties.clearOverridesFrom(curi); } setStep(Step.ABOUT_TO_RETURN_URI, null); ArchiveUtils.continueCheck(); synchronized (this) { controller.getFrontier().finished(currentCuri); controller.getFrontier().endDisposition(); setCurrentCuri(null); } setStep(Step.FINISHING_PROCESS, null); lastFinishTime = System.currentTimeMillis(); if (shouldRetire) { break; // from while(true) } } } catch (InterruptedException e) { if (currentCuri != null) { logger.log( Level.SEVERE, "Interrupt leaving unfinished CrawlURI " + getName() + " - job may hang", e); } // thread interrupted, ok to end logger.log(Level.FINE, this.getName() + " ended with Interruption"); } catch (Exception e) { // everything else (including interruption) logger.log(Level.SEVERE, "Fatal exception in " + getName(), e); } catch (OutOfMemoryError err) { seriousError(err); } finally { controller.getFrontier().endDisposition(); } setCurrentCuri(null); // Do cleanup so that objects can be GC. this.httpRecorder.closeRecorders(); this.httpRecorder = null; logger.fine(getName() + " finished for order '" + name + "'"); setStep(Step.FINISHED, null); controller = null; }