コード例 #1
0
  public void testBaseUrlPath() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/some/path/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$");
    Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat1.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat2.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals(m1.group(1), m2.group(1));
    }
  }
コード例 #2
0
  protected void checkRoot(SimulatedArchivalUnit sau) {
    log.debug("checkRoot()");
    CachedUrlSet set = sau.getAuCachedUrlSet();
    Iterator setIt = set.flatSetIterator();
    ArrayList childL = new ArrayList(1);
    CachedUrlSet cus = null;
    while (setIt.hasNext()) {
      cus = (CachedUrlSet) setIt.next();
      childL.add(cus.getUrl());
    }

    String urlRoot = sau.getUrlRoot();

    String[] expectedA = new String[1];
    expectedA[0] = urlRoot;
    assertIsomorphic(expectedA, childL);

    setIt = cus.flatSetIterator();
    childL = new ArrayList(7);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }

    expectedA =
        new String[] {
          urlRoot + "/001file.html",
          urlRoot + "/001file.txt",
          urlRoot + "/002file.html",
          urlRoot + "/002file.txt",
          urlRoot + "/branch1",
          urlRoot + "/branch2",
          urlRoot + "/index.html"
        };
    assertIsomorphic(expectedA, childL);
  }
コード例 #3
0
  public void testDualContentHash() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet set = sau1.getAuCachedUrlSet();
    byte[] nameH = getHash(set, true);
    byte[] contentH = getHash(set, false);

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    SimulatedArchivalUnit sau2 = setupSimAu(simAuConfig(tempDirPath2));

    createContent(sau2);
    crawlContent(sau2);
    set = sau2.getAuCachedUrlSet();
    byte[] nameH2 = getHash(set, true);
    byte[] contentH2 = getHash(set, false);
    assertEquals(nameH, nameH2);
    assertEquals(contentH, contentH2);
  }
コード例 #4
0
  private void checkHashSet(
      SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected)
      throws Exception {
    enableFilter(sau, filter);
    CachedUrlSet set = sau.getAuCachedUrlSet();
    byte[] hash = getHash(set, namesOnly);
    assertEquals(expected, hash);

    String parent = sau.getUrlRoot() + "/branch1";
    CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
    set = sau.makeCachedUrlSet(spec);
    byte[] hash2 = getHash(set, namesOnly);
    assertFalse(Arrays.equals(hash, hash2));
  }
コード例 #5
0
 private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception {
   MessageDigest dig = null;
   try {
     dig = MessageDigest.getInstance("SHA-1");
   } catch (NoSuchAlgorithmException ex) {
     fail("No algorithm.");
   }
   CachedUrlSet set = sau.getAuCachedUrlSet();
   CachedUrlSetHasher hasher = set.getContentHasher(dig);
   SystemMetrics metrics = theDaemon.getSystemMetrics();
   int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig);
   // should be protected against this being zero by MyMockSystemMetrics,
   // but otherwise use the proper calculation.  This avoids test failure
   // due to really slow machines
   assertTrue(estimate > 0);
   long estimatedTime = set.estimatedHashDuration();
   long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue();
   assertTrue(size > 0);
   System.out.println("b/ms: " + estimate);
   System.out.println("size: " + size);
   System.out.println("estimate: " + estimatedTime);
   assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate));
 }