コード例 #1
0
  public void testBaseUrlPath() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/some/path/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$");
    Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat1.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat2.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals(m1.group(1), m2.group(1));
    }
  }
コード例 #2
0
 void enableFilter(SimulatedArchivalUnit sau, boolean enable)
     throws ArchivalUnit.ConfigurationException {
   Configuration auConfig = sau.getConfiguration().copy();
   // no bad file when playing with filtering
   auConfig.remove("badCachedFileLoc");
   auConfig.remove("badCachedFileNum");
   if (enable) {
     auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true");
   } else {
     auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC);
   }
   sau.setConfiguration(auConfig);
 }
コード例 #3
0
 Configuration simAuConfig(String rootPath) {
   Configuration conf = ConfigManager.newConfiguration();
   conf.put("root", rootPath);
   conf.put("depth", "2");
   conf.put("branch", "2");
   conf.put("numFiles", "2");
   conf.put("badCachedFileLoc", "2,2");
   conf.put("badCachedFileNum", "2");
   return conf;
 }