Ejemplo n.º 1
2
  private void checkRules() {
    outputMessage("\nChecking " + m_baseUrl, TEST_SUMMARY_MESSAGE);
    outputMessage(
        "crawl depth: " + m_crawlDepth + "     crawl delay: " + m_crawlDelay + " ms.",
        PLAIN_MESSAGE);

    TreeSet crawlList = new TreeSet();
    TreeSet fetched = new TreeSet();
    // inialize with the baseUrl
    crawlList.add(m_baseUrl);
    depth_incl = new int[m_crawlDepth];
    depth_fetched = new int[m_crawlDepth];
    depth_parsed = new int[m_crawlDepth];
    long start_time = TimeBase.nowMs();
    for (int depth = 1; depth <= m_crawlDepth; depth++) {
      if (isInterrupted()) {
        return;
      }
      m_curDepth = depth;
      if (crawlList.isEmpty() && depth <= m_crawlDepth) {
        outputMessage("\nNothing left to crawl, exiting after depth " + (depth - 1), PLAIN_MESSAGE);
        break;
      }
      String[] urls = (String[]) crawlList.toArray(new String[0]);
      crawlList.clear();
      outputMessage("\nDepth " + depth, PLAIN_MESSAGE);
      for (int ix = 0; ix < urls.length; ix++) {
        if (isInterrupted()) {
          return;
        }
        pauseBeforeFetch();
        String urlstr = urls[ix];

        m_incls.clear();
        m_excls.clear();

        // crawl the page
        buildUrlSets(urlstr);
        fetched.add(urlstr);
        // output incl/excl results,
        // add the new_incls to the crawlList for next crawl depth loop
        crawlList.addAll(outputUrlResults(urlstr, m_incls, m_excls));
      }
    }
    long elapsed_time = TimeBase.nowMs() - start_time;
    outputSummary(m_baseUrl, fetched, crawlList, elapsed_time);
  }
Ejemplo n.º 2
0
    public void lockssRun() {
      setPriority(PRIORITY_PARAM_SIZE_CALC, PRIORITY_DEFAULT_SIZE_CALC);
      startWDog(WDOG_PARAM_SIZE_CALC, WDOG_DEFAULT_SIZE_CALC);
      triggerWDogOnExit(true);
      nowRunning();

      while (goOn) {
        try {
          pokeWDog();
          if (sizeCalcQueue.isEmpty()) {
            Deadline timeout = Deadline.in(Constants.HOUR);
            sizeCalcSem.take(timeout);
          }
          RepositoryNode node;
          synchronized (sizeCalcQueue) {
            node = (RepositoryNode) CollectionUtil.getAnElement(sizeCalcQueue);
          }
          if (node != null) {
            long start = TimeBase.nowMs();
            log.debug2("CalcSize start: " + node);
            long dur = 0;
            try {
              doSizeCalc(node);
              dur = TimeBase.nowMs() - start;
              log.debug2("CalcSize finish (" + StringUtil.timeIntervalToString(dur) + "): " + node);
            } catch (RuntimeException e) {
              log.warning("doSizeCalc: " + node, e);
            }
            synchronized (sizeCalcQueue) {
              sizeCalcQueue.remove(node);
            }
            pokeWDog();
            long sleep = sleepTimeToAchieveLoad(dur, sizeCalcMaxLoad);
            Deadline.in(sleep).sleep();
          }
        } catch (InterruptedException e) {
          // just wakeup and check for exit
        }
      }
      if (!goOn) {
        triggerWDogOnExit(false);
      }
    }
Ejemplo n.º 3
0
 /*
  * Abbreviate the verbose constructor
  */
 private AuState makeAuState(
     ArchivalUnit au,
     long lastCrawlTime,
     long lastCrawlAttempt,
     long lastTopLevelPoll,
     long lastPollStart,
     long lastTreeWalk,
     HashSet crawlUrls,
     int clockssSubscriptionStatus,
     double v3Agreement,
     double highestV3Agreement,
     HistoryRepository historyRepo) {
   return new AuState(
       au,
       lastCrawlTime,
       lastCrawlAttempt,
       -1,
       null,
       lastTopLevelPoll,
       lastPollStart,
       -1,
       null,
       0,
       lastTreeWalk,
       crawlUrls,
       null,
       clockssSubscriptionStatus,
       v3Agreement,
       highestV3Agreement,
       SubstanceChecker.State.Unknown,
       null, // substanceFeatureVersion
       null, // metadataFeatureVersion
       -1, // lastMetadataIndex
       TimeBase.nowMs(), // lastContentChange
       -1,
       -1,
       -1,
       -1, // numWillingRepairers
       -1, // numCurrentSuspectVersions
       0,
       null,
       historyRepo);
 }