// Called by RegistryPlugin iff any config below RegistryPlugin.PREFIX // has changed protected void setConfig( Configuration config, Configuration prevConfig, Configuration.Differences changedKeys) { m_maxRefetchDepth = config.getInt( NewContentCrawler.PARAM_MAX_CRAWL_DEPTH, NewContentCrawler.DEFAULT_MAX_CRAWL_DEPTH); fetchRateLimiter = recomputeFetchRateLimiter(fetchRateLimiter); enablePolls = config.getBoolean(PARAM_ENABLE_REGISTRY_POLLS, DEFAULT_ENABLE_REGISTRY_POLLS); }
private boolean startCrawl(ArchivalUnit au, boolean force, boolean deep) throws CrawlManagerImpl.NotEligibleException { CrawlManagerImpl cmi = (CrawlManagerImpl) crawlMgr; if (force) { RateLimiter limit = cmi.getNewContentRateLimiter(au); if (!limit.isEventOk()) { limit.unevent(); } } cmi.checkEligibleToQueueNewContentCrawl(au); String delayMsg = ""; String deepMsg = ""; try { cmi.checkEligibleForNewContentCrawl(au); } catch (CrawlManagerImpl.NotEligibleException e) { delayMsg = ", Start delayed due to: " + e.getMessage(); } Configuration config = ConfigManager.getCurrentConfig(); int pri = config.getInt(PARAM_CRAWL_PRIORITY, DEFAULT_CRAWL_PRIORITY); CrawlReq req; try { req = new CrawlReq(au); req.setPriority(pri); if (deep) { int d = Integer.parseInt(formDepth); if (d < 0) { errMsg = "Illegal refetch depth: " + d; return false; } req.setRefetchDepth(d); deepMsg = "Deep (" + req.getRefetchDepth() + ") "; } } catch (NumberFormatException e) { errMsg = "Illegal refetch depth: " + formDepth; return false; } catch (RuntimeException e) { log.error("Couldn't create CrawlReq: " + au, e); errMsg = "Couldn't create CrawlReq: " + e.toString(); return false; } cmi.startNewContentCrawl(req, null); statusMsg = deepMsg + "Crawl requested for " + au.getName() + delayMsg; return true; }
public void loadAuConfigDescrs(Configuration config) throws ConfigurationException { super.loadAuConfigDescrs(config); this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey()); // Now we can construct a valid CC permission checker. m_permissionCheckers = // ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl)); ListUtil.list(new CreativeCommonsPermissionChecker()); paramMap.putLong( KEY_AU_NEW_CONTENT_CRAWL_INTERVAL, CurrentConfig.getTimeIntervalParam( PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL)); if (log.isDebug2()) { log.debug2( "Setting Registry AU recrawl interval to " + StringUtil.timeIntervalToString( paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL))); } }