private void checkRules() { outputMessage("\nChecking " + m_baseUrl, TEST_SUMMARY_MESSAGE); outputMessage( "crawl depth: " + m_crawlDepth + " crawl delay: " + m_crawlDelay + " ms.", PLAIN_MESSAGE); TreeSet crawlList = new TreeSet(); TreeSet fetched = new TreeSet(); // inialize with the baseUrl crawlList.add(m_baseUrl); depth_incl = new int[m_crawlDepth]; depth_fetched = new int[m_crawlDepth]; depth_parsed = new int[m_crawlDepth]; long start_time = TimeBase.nowMs(); for (int depth = 1; depth <= m_crawlDepth; depth++) { if (isInterrupted()) { return; } m_curDepth = depth; if (crawlList.isEmpty() && depth <= m_crawlDepth) { outputMessage("\nNothing left to crawl, exiting after depth " + (depth - 1), PLAIN_MESSAGE); break; } String[] urls = (String[]) crawlList.toArray(new String[0]); crawlList.clear(); outputMessage("\nDepth " + depth, PLAIN_MESSAGE); for (int ix = 0; ix < urls.length; ix++) { if (isInterrupted()) { return; } pauseBeforeFetch(); String urlstr = urls[ix]; m_incls.clear(); m_excls.clear(); // crawl the page buildUrlSets(urlstr); fetched.add(urlstr); // output incl/excl results, // add the new_incls to the crawlList for next crawl depth loop crawlList.addAll(outputUrlResults(urlstr, m_incls, m_excls)); } } long elapsed_time = TimeBase.nowMs() - start_time; outputSummary(m_baseUrl, fetched, crawlList, elapsed_time); }
public void lockssRun() { setPriority(PRIORITY_PARAM_SIZE_CALC, PRIORITY_DEFAULT_SIZE_CALC); startWDog(WDOG_PARAM_SIZE_CALC, WDOG_DEFAULT_SIZE_CALC); triggerWDogOnExit(true); nowRunning(); while (goOn) { try { pokeWDog(); if (sizeCalcQueue.isEmpty()) { Deadline timeout = Deadline.in(Constants.HOUR); sizeCalcSem.take(timeout); } RepositoryNode node; synchronized (sizeCalcQueue) { node = (RepositoryNode) CollectionUtil.getAnElement(sizeCalcQueue); } if (node != null) { long start = TimeBase.nowMs(); log.debug2("CalcSize start: " + node); long dur = 0; try { doSizeCalc(node); dur = TimeBase.nowMs() - start; log.debug2("CalcSize finish (" + StringUtil.timeIntervalToString(dur) + "): " + node); } catch (RuntimeException e) { log.warning("doSizeCalc: " + node, e); } synchronized (sizeCalcQueue) { sizeCalcQueue.remove(node); } pokeWDog(); long sleep = sleepTimeToAchieveLoad(dur, sizeCalcMaxLoad); Deadline.in(sleep).sleep(); } } catch (InterruptedException e) { // just wakeup and check for exit } } if (!goOn) { triggerWDogOnExit(false); } }
/* * Abbreviate the verbose constructor */ private AuState makeAuState( ArchivalUnit au, long lastCrawlTime, long lastCrawlAttempt, long lastTopLevelPoll, long lastPollStart, long lastTreeWalk, HashSet crawlUrls, int clockssSubscriptionStatus, double v3Agreement, double highestV3Agreement, HistoryRepository historyRepo) { return new AuState( au, lastCrawlTime, lastCrawlAttempt, -1, null, lastTopLevelPoll, lastPollStart, -1, null, 0, lastTreeWalk, crawlUrls, null, clockssSubscriptionStatus, v3Agreement, highestV3Agreement, SubstanceChecker.State.Unknown, null, // substanceFeatureVersion null, // metadataFeatureVersion -1, // lastMetadataIndex TimeBase.nowMs(), // lastContentChange -1, -1, -1, -1, // numWillingRepairers -1, // numCurrentSuspectVersions 0, null, historyRepo); }