public void testBaseUrlPath() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/some/path/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$"); Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat1.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat2.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals(m1.group(1), m2.group(1)); } }
void setConfig(Configuration config) { log.debug("config: " + config); proxyHost = config.get(PARAM_PROXY_HOST); proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT); if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { String http_proxy = System.getenv("http_proxy"); if (!StringUtil.isNullString(http_proxy)) { try { HostPortParser hpp = new HostPortParser(http_proxy); proxyHost = hpp.getHost(); proxyPort = hpp.getPort(); } catch (HostPortParser.InvalidSpec e) { log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e); } } } if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { proxyHost = null; } else { log.info("Proxying through " + proxyHost + ":" + proxyPort); } userAgent = config.get(PARAM_USER_AGENT); if (StringUtil.isNullString(userAgent)) { userAgent = null; } else { log.debug("Setting User-Agent to " + userAgent); } }
Configuration natureAuConfig() { Configuration conf = ConfigManager.newConfiguration(); conf.put("base_url", BASE_URL); conf.put("journal_id", "aps"); conf.put("volume_name", "123"); conf.put("year", "2008"); return conf; }
// Called by RegistryPlugin iff any config below RegistryPlugin.PREFIX // has changed protected void setConfig( Configuration config, Configuration prevConfig, Configuration.Differences changedKeys) { m_maxRefetchDepth = config.getInt( NewContentCrawler.PARAM_MAX_CRAWL_DEPTH, NewContentCrawler.DEFAULT_MAX_CRAWL_DEPTH); fetchRateLimiter = recomputeFetchRateLimiter(fetchRateLimiter); enablePolls = config.getBoolean(PARAM_ENABLE_REGISTRY_POLLS, DEFAULT_ENABLE_REGISTRY_POLLS); }
void enableFilter(SimulatedArchivalUnit sau, boolean enable) throws ArchivalUnit.ConfigurationException { Configuration auConfig = sau.getConfiguration().copy(); // no bad file when playing with filtering auConfig.remove("badCachedFileLoc"); auConfig.remove("badCachedFileNum"); if (enable) { auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true"); } else { auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC); } sau.setConfiguration(auConfig); }
Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("base_url", BASE_URL); conf.put("depth", "1"); conf.put("branch", "4"); conf.put("numFiles", "7"); conf.put( "fileTypes", "" + (SimulatedContentGenerator.FILE_TYPE_HTML | SimulatedContentGenerator.FILE_TYPE_PDF)); conf.put("binFileSize", "" + fileSize); return conf; }
Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("depth", "2"); conf.put("branch", "2"); conf.put("numFiles", "2"); conf.put("badCachedFileLoc", "2,2"); conf.put("badCachedFileNum", "2"); return conf; }
public void loadAuConfigDescrs(Configuration config) throws ConfigurationException { super.loadAuConfigDescrs(config); this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey()); // Now we can construct a valid CC permission checker. m_permissionCheckers = // ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl)); ListUtil.list(new CreativeCommonsPermissionChecker()); paramMap.putLong( KEY_AU_NEW_CONTENT_CRAWL_INTERVAL, CurrentConfig.getTimeIntervalParam( PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL)); if (log.isDebug2()) { log.debug2( "Setting Registry AU recrawl interval to " + StringUtil.timeIntervalToString( paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL))); } }
public void setConfig( Configuration config, Configuration oldConfig, Configuration.Differences changedKeys) { // Build list of repositories from list of disk (fs) paths). Needs to // be generalized if ever another repository implementation. if (changedKeys.contains(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST)) { List lst = new ArrayList(); String dspace = config.get(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, ""); List paths = StringUtil.breakAt(dspace, ';'); if (paths != null) { for (Iterator iter = paths.iterator(); iter.hasNext(); ) { lst.add("local:" + (String) iter.next()); } } repoList = lst; } if (changedKeys.contains(PARAM_MAX_PER_AU_CACHE_SIZE)) { paramNodeCacheSize = config.getInt(PARAM_MAX_PER_AU_CACHE_SIZE, DEFAULT_MAX_PER_AU_CACHE_SIZE); for (Iterator iter = getDaemon().getAllLockssRepositories().iterator(); iter.hasNext(); ) { LockssRepository repo = (LockssRepository) iter.next(); if (repo instanceof LockssRepositoryImpl) { LockssRepositoryImpl repoImpl = (LockssRepositoryImpl) repo; repoImpl.setNodeCacheSize(paramNodeCacheSize); } } } if (changedKeys.contains(PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE)) { paramSuspectVersionsCacheSize = config.getInt( PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE, DEFAULT_MAX_SUSPECT_VERSIONS_CACHE_SIZE); suspectVersionsCache.setMaxSize(paramSuspectVersionsCacheSize); } if (changedKeys.contains(GLOBAL_CACHE_PREFIX)) { paramIsGlobalNodeCache = config.getBoolean(PARAM_GLOBAL_CACHE_ENABLED, DEFAULT_GLOBAL_CACHE_ENABLED); if (paramIsGlobalNodeCache) { paramGlobalNodeCacheSize = config.getInt(PARAM_MAX_GLOBAL_CACHE_SIZE, DEFAULT_MAX_GLOBAL_CACHE_SIZE); log.debug("global node cache size: " + paramGlobalNodeCacheSize); globalNodeCache.setMaxSize(paramGlobalNodeCacheSize); } } if (changedKeys.contains(DISK_PREFIX)) { int minMB = config.getInt(PARAM_DISK_WARN_FRRE_MB, DEFAULT_DISK_WARN_FRRE_MB); double minPer = config.getPercentage(PARAM_DISK_WARN_FRRE_PERCENT, DEFAULT_DISK_WARN_FRRE_PERCENT); paramDFWarn = PlatformUtil.DF.makeThreshold(minMB, minPer); minMB = config.getInt(PARAM_DISK_FULL_FRRE_MB, DEFAULT_DISK_FULL_FRRE_MB); minPer = config.getPercentage(PARAM_DISK_FULL_FRRE_PERCENT, DEFAULT_DISK_FULL_FRRE_PERCENT); paramDFFull = PlatformUtil.DF.makeThreshold(minMB, minPer); } if (changedKeys.contains(PARAM_SIZE_CALC_MAX_LOAD)) { sizeCalcMaxLoad = config.getPercentage(PARAM_SIZE_CALC_MAX_LOAD, DEFAULT_SIZE_CALC_MAX_LOAD); } if (changedKeys.contains(PREFIX)) { maxUnusedDirSearch = config.getInt(PARAM_MAX_UNUSED_DIR_SEARCH, DEFAULT_MAX_UNUSED_DIR_SEARCH); isStatefulUnusedDirSearch = config.getBoolean( PARAM_IS_STATEFUL_UNUSED_DIR_SEARCH, DEFAULT_IS_STATEFUL_UNUSED_DIR_SEARCH); enableLongComponents = config.getBoolean(PARAM_ENABLE_LONG_COMPONENTS, DEFAULT_ENABLE_LONG_COMPONENTS); enableLongComponentsCompatibility = config.getBoolean( PARAM_ENABLE_LONG_COMPONENTS_COMPATIBILITY, DEFAULT_ENABLE_LONG_COMPONENTS_COMPATIBILITY); maxComponentLength = config.getInt(PARAM_MAX_COMPONENT_LENGTH, DEFAULT_MAX_COMPONENT_LENGTH); checkUnnormalized = (CheckUnnormalizedMode) config.getEnum( CheckUnnormalizedMode.class, PARAM_CHECK_UNNORMALIZED, DEFAULT_CHECK_UNNORMALIZED); } }