public void testBaseUrlPath() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/some/path/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$"); Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat1.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat2.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals(m1.group(1), m2.group(1)); } }
void enableFilter(SimulatedArchivalUnit sau, boolean enable) throws ArchivalUnit.ConfigurationException { Configuration auConfig = sau.getConfiguration().copy(); // no bad file when playing with filtering auConfig.remove("badCachedFileLoc"); auConfig.remove("badCachedFileNum"); if (enable) { auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true"); } else { auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC); } sau.setConfiguration(auConfig); }
Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("depth", "2"); conf.put("branch", "2"); conf.put("numFiles", "2"); conf.put("badCachedFileLoc", "2,2"); conf.put("badCachedFileNum", "2"); return conf; }