public void testOutOfBounds() throws Exception {
    MatchesStatusCodeDecideRule dr = makeDecideRule(400, 499);
    CrawlURI testUri = createTestUri("http://www.archive.org");
    testUri.setFetchStatus(200);

    assertFalse(dr.evaluate(testUri));
  }
Ejemplo n.º 2
0
 /**
  * Terminates a thread.
  *
  * <p>Calling this method will ensure that the current thread will stop processing as soon as
  * possible (note: this may be never). Meant to 'short circuit' hung threads.
  *
  * <p>Current crawl uri will have its fetch status set accordingly and will be immediately
  * returned to the frontier.
  *
  * <p>As noted before, this does not ensure that the thread will stop running (ever). But once
  * evoked it will not try and communicate with other parts of crawler and will terminate as soon
  * as control is established.
  */
 protected void kill() {
   this.interrupt();
   synchronized (this) {
     if (currentCuri != null) {
       currentCuri.setFetchStatus(S_PROCESSING_THREAD_KILLED);
       controller.getFrontier().finished(currentCuri);
     }
   }
 }
Ejemplo n.º 3
0
  private void seriousError(Error err) {
    // try to prevent timeslicing until we have a chance to deal with OOM
    // Note that modern-day JVM priority indifference with native threads
    // may make this priority-jumbling pointless
    setPriority(DEFAULT_PRIORITY + 1);
    if (controller != null) {
      // hold all ToeThreads from proceeding to next processor
      controller.freeReserveMemory();
      controller.requestCrawlPause();
      if (controller.getFrontier().getFrontierJournal() != null) {
        controller.getFrontier().getFrontierJournal().seriousError(getName() + err.getMessage());
      }
    }

    // OutOfMemory etc.
    String extraInfo = DevUtils.extraInfo();
    System.err.println("<<<");
    System.err.println(ArchiveUtils.getLog17Date());
    System.err.println(err);
    System.err.println(extraInfo);
    err.printStackTrace(System.err);

    if (controller != null) {
      PrintWriter pw = new PrintWriter(System.err);
      controller.getToePool().compactReportTo(pw);
      pw.flush();
    }
    System.err.println(">>>");
    //        DevUtils.sigquitSelf();

    String context = "unknown";
    if (currentCuri != null) {
      // update fetch-status, saving original as annotation
      currentCuri.getAnnotations().add("err=" + err.getClass().getName());
      currentCuri.getAnnotations().add("os" + currentCuri.getFetchStatus());
      currentCuri.setFetchStatus(S_SERIOUS_ERROR);
      context = currentCuri.shortReportLine() + " in " + currentProcessorName;
    }
    String message = "Serious error occured trying " + "to process '" + context + "'\n" + extraInfo;
    logger.log(Level.SEVERE, message.toString(), err);
    setPriority(DEFAULT_PRIORITY);
  }
Ejemplo n.º 4
0
 /**
  * Handling for exceptions and errors that are possibly recoverable.
  *
  * @param e
  */
 private void recoverableProblem(Throwable e) {
   Object previousStep = step;
   setStep(Step.HANDLING_RUNTIME_EXCEPTION, null);
   // e.printStackTrace(System.err);
   currentCuri.setFetchStatus(S_RUNTIME_EXCEPTION);
   // store exception temporarily for logging
   currentCuri.getAnnotations().add("err=" + e.getClass().getName());
   currentCuri.getData().put(A_RUNTIME_EXCEPTION, e);
   String message =
       "Problem "
           + e
           + " occured when trying to process '"
           + currentCuri.toString()
           + "' at step "
           + previousStep
           + " in "
           + currentProcessorName
           + "\n";
   logger.log(Level.SEVERE, message.toString(), e);
 }