public void testBaseUrlPath() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/some/path/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$"); Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat1.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat2.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals(m1.group(1), m2.group(1)); } }
protected void checkRoot(SimulatedArchivalUnit sau) { log.debug("checkRoot()"); CachedUrlSet set = sau.getAuCachedUrlSet(); Iterator setIt = set.flatSetIterator(); ArrayList childL = new ArrayList(1); CachedUrlSet cus = null; while (setIt.hasNext()) { cus = (CachedUrlSet) setIt.next(); childL.add(cus.getUrl()); } String urlRoot = sau.getUrlRoot(); String[] expectedA = new String[1]; expectedA[0] = urlRoot; assertIsomorphic(expectedA, childL); setIt = cus.flatSetIterator(); childL = new ArrayList(7); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } expectedA = new String[] { urlRoot + "/001file.html", urlRoot + "/001file.txt", urlRoot + "/002file.html", urlRoot + "/002file.txt", urlRoot + "/branch1", urlRoot + "/branch2", urlRoot + "/index.html" }; assertIsomorphic(expectedA, childL); }
public void testDualContentHash() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet set = sau1.getAuCachedUrlSet(); byte[] nameH = getHash(set, true); byte[] contentH = getHash(set, false); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; SimulatedArchivalUnit sau2 = setupSimAu(simAuConfig(tempDirPath2)); createContent(sau2); crawlContent(sau2); set = sau2.getAuCachedUrlSet(); byte[] nameH2 = getHash(set, true); byte[] contentH2 = getHash(set, false); assertEquals(nameH, nameH2); assertEquals(contentH, contentH2); }
private void checkHashSet( SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected) throws Exception { enableFilter(sau, filter); CachedUrlSet set = sau.getAuCachedUrlSet(); byte[] hash = getHash(set, namesOnly); assertEquals(expected, hash); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); set = sau.makeCachedUrlSet(spec); byte[] hash2 = getHash(set, namesOnly); assertFalse(Arrays.equals(hash, hash2)); }
private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception { MessageDigest dig = null; try { dig = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException ex) { fail("No algorithm."); } CachedUrlSet set = sau.getAuCachedUrlSet(); CachedUrlSetHasher hasher = set.getContentHasher(dig); SystemMetrics metrics = theDaemon.getSystemMetrics(); int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig); // should be protected against this being zero by MyMockSystemMetrics, // but otherwise use the proper calculation. This avoids test failure // due to really slow machines assertTrue(estimate > 0); long estimatedTime = set.estimatedHashDuration(); long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue(); assertTrue(size > 0); System.out.println("b/ms: " + estimate); System.out.println("size: " + size); System.out.println("estimate: " + estimatedTime); assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate)); }