public void testBaseUrlPath() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/some/path/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$");
    Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat1.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat2.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals(m1.group(1), m2.group(1));
    }
  }
示例#2
0
 void setConfig(Configuration config) {
   log.debug("config: " + config);
   proxyHost = config.get(PARAM_PROXY_HOST);
   proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT);
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     String http_proxy = System.getenv("http_proxy");
     if (!StringUtil.isNullString(http_proxy)) {
       try {
         HostPortParser hpp = new HostPortParser(http_proxy);
         proxyHost = hpp.getHost();
         proxyPort = hpp.getPort();
       } catch (HostPortParser.InvalidSpec e) {
         log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e);
       }
     }
   }
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     proxyHost = null;
   } else {
     log.info("Proxying through " + proxyHost + ":" + proxyPort);
   }
   userAgent = config.get(PARAM_USER_AGENT);
   if (StringUtil.isNullString(userAgent)) {
     userAgent = null;
   } else {
     log.debug("Setting User-Agent to " + userAgent);
   }
 }
 Configuration natureAuConfig() {
   Configuration conf = ConfigManager.newConfiguration();
   conf.put("base_url", BASE_URL);
   conf.put("journal_id", "aps");
   conf.put("volume_name", "123");
   conf.put("year", "2008");
   return conf;
 }
 // Called by RegistryPlugin iff any config below RegistryPlugin.PREFIX
 // has changed
 protected void setConfig(
     Configuration config, Configuration prevConfig, Configuration.Differences changedKeys) {
   m_maxRefetchDepth =
       config.getInt(
           NewContentCrawler.PARAM_MAX_CRAWL_DEPTH, NewContentCrawler.DEFAULT_MAX_CRAWL_DEPTH);
   fetchRateLimiter = recomputeFetchRateLimiter(fetchRateLimiter);
   enablePolls = config.getBoolean(PARAM_ENABLE_REGISTRY_POLLS, DEFAULT_ENABLE_REGISTRY_POLLS);
 }
 void enableFilter(SimulatedArchivalUnit sau, boolean enable)
     throws ArchivalUnit.ConfigurationException {
   Configuration auConfig = sau.getConfiguration().copy();
   // no bad file when playing with filtering
   auConfig.remove("badCachedFileLoc");
   auConfig.remove("badCachedFileNum");
   if (enable) {
     auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true");
   } else {
     auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC);
   }
   sau.setConfiguration(auConfig);
 }
 Configuration simAuConfig(String rootPath) {
   Configuration conf = ConfigManager.newConfiguration();
   conf.put("root", rootPath);
   conf.put("base_url", BASE_URL);
   conf.put("depth", "1");
   conf.put("branch", "4");
   conf.put("numFiles", "7");
   conf.put(
       "fileTypes",
       "" + (SimulatedContentGenerator.FILE_TYPE_HTML | SimulatedContentGenerator.FILE_TYPE_PDF));
   conf.put("binFileSize", "" + fileSize);
   return conf;
 }
 Configuration simAuConfig(String rootPath) {
   Configuration conf = ConfigManager.newConfiguration();
   conf.put("root", rootPath);
   conf.put("depth", "2");
   conf.put("branch", "2");
   conf.put("numFiles", "2");
   conf.put("badCachedFileLoc", "2,2");
   conf.put("badCachedFileNum", "2");
   return conf;
 }
  public void loadAuConfigDescrs(Configuration config) throws ConfigurationException {
    super.loadAuConfigDescrs(config);
    this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey());
    // Now we can construct a valid CC permission checker.
    m_permissionCheckers =
        //       ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl));
        ListUtil.list(new CreativeCommonsPermissionChecker());

    paramMap.putLong(
        KEY_AU_NEW_CONTENT_CRAWL_INTERVAL,
        CurrentConfig.getTimeIntervalParam(
            PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL));
    if (log.isDebug2()) {
      log.debug2(
          "Setting Registry AU recrawl interval to "
              + StringUtil.timeIntervalToString(
                  paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL)));
    }
  }
 public void setConfig(
     Configuration config, Configuration oldConfig, Configuration.Differences changedKeys) {
   //  Build list of repositories from list of disk (fs) paths).  Needs to
   //  be generalized if ever another repository implementation.
   if (changedKeys.contains(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST)) {
     List lst = new ArrayList();
     String dspace = config.get(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, "");
     List paths = StringUtil.breakAt(dspace, ';');
     if (paths != null) {
       for (Iterator iter = paths.iterator(); iter.hasNext(); ) {
         lst.add("local:" + (String) iter.next());
       }
     }
     repoList = lst;
   }
   if (changedKeys.contains(PARAM_MAX_PER_AU_CACHE_SIZE)) {
     paramNodeCacheSize =
         config.getInt(PARAM_MAX_PER_AU_CACHE_SIZE, DEFAULT_MAX_PER_AU_CACHE_SIZE);
     for (Iterator iter = getDaemon().getAllLockssRepositories().iterator(); iter.hasNext(); ) {
       LockssRepository repo = (LockssRepository) iter.next();
       if (repo instanceof LockssRepositoryImpl) {
         LockssRepositoryImpl repoImpl = (LockssRepositoryImpl) repo;
         repoImpl.setNodeCacheSize(paramNodeCacheSize);
       }
     }
   }
   if (changedKeys.contains(PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE)) {
     paramSuspectVersionsCacheSize =
         config.getInt(
             PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE, DEFAULT_MAX_SUSPECT_VERSIONS_CACHE_SIZE);
     suspectVersionsCache.setMaxSize(paramSuspectVersionsCacheSize);
   }
   if (changedKeys.contains(GLOBAL_CACHE_PREFIX)) {
     paramIsGlobalNodeCache =
         config.getBoolean(PARAM_GLOBAL_CACHE_ENABLED, DEFAULT_GLOBAL_CACHE_ENABLED);
     if (paramIsGlobalNodeCache) {
       paramGlobalNodeCacheSize =
           config.getInt(PARAM_MAX_GLOBAL_CACHE_SIZE, DEFAULT_MAX_GLOBAL_CACHE_SIZE);
       log.debug("global node cache size: " + paramGlobalNodeCacheSize);
       globalNodeCache.setMaxSize(paramGlobalNodeCacheSize);
     }
   }
   if (changedKeys.contains(DISK_PREFIX)) {
     int minMB = config.getInt(PARAM_DISK_WARN_FRRE_MB, DEFAULT_DISK_WARN_FRRE_MB);
     double minPer =
         config.getPercentage(PARAM_DISK_WARN_FRRE_PERCENT, DEFAULT_DISK_WARN_FRRE_PERCENT);
     paramDFWarn = PlatformUtil.DF.makeThreshold(minMB, minPer);
     minMB = config.getInt(PARAM_DISK_FULL_FRRE_MB, DEFAULT_DISK_FULL_FRRE_MB);
     minPer = config.getPercentage(PARAM_DISK_FULL_FRRE_PERCENT, DEFAULT_DISK_FULL_FRRE_PERCENT);
     paramDFFull = PlatformUtil.DF.makeThreshold(minMB, minPer);
   }
   if (changedKeys.contains(PARAM_SIZE_CALC_MAX_LOAD)) {
     sizeCalcMaxLoad = config.getPercentage(PARAM_SIZE_CALC_MAX_LOAD, DEFAULT_SIZE_CALC_MAX_LOAD);
   }
   if (changedKeys.contains(PREFIX)) {
     maxUnusedDirSearch =
         config.getInt(PARAM_MAX_UNUSED_DIR_SEARCH, DEFAULT_MAX_UNUSED_DIR_SEARCH);
     isStatefulUnusedDirSearch =
         config.getBoolean(
             PARAM_IS_STATEFUL_UNUSED_DIR_SEARCH, DEFAULT_IS_STATEFUL_UNUSED_DIR_SEARCH);
     enableLongComponents =
         config.getBoolean(PARAM_ENABLE_LONG_COMPONENTS, DEFAULT_ENABLE_LONG_COMPONENTS);
     enableLongComponentsCompatibility =
         config.getBoolean(
             PARAM_ENABLE_LONG_COMPONENTS_COMPATIBILITY,
             DEFAULT_ENABLE_LONG_COMPONENTS_COMPATIBILITY);
     maxComponentLength = config.getInt(PARAM_MAX_COMPONENT_LENGTH, DEFAULT_MAX_COMPONENT_LENGTH);
     checkUnnormalized =
         (CheckUnnormalizedMode)
             config.getEnum(
                 CheckUnnormalizedMode.class,
                 PARAM_CHECK_UNNORMALIZED,
                 DEFAULT_CHECK_UNNORMALIZED);
   }
 }