protected void checkRoot(SimulatedArchivalUnit sau) { log.debug("checkRoot()"); CachedUrlSet set = sau.getAuCachedUrlSet(); Iterator setIt = set.flatSetIterator(); ArrayList childL = new ArrayList(1); CachedUrlSet cus = null; while (setIt.hasNext()) { cus = (CachedUrlSet) setIt.next(); childL.add(cus.getUrl()); } String urlRoot = sau.getUrlRoot(); String[] expectedA = new String[1]; expectedA[0] = urlRoot; assertIsomorphic(expectedA, childL); setIt = cus.flatSetIterator(); childL = new ArrayList(7); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } expectedA = new String[] { urlRoot + "/001file.html", urlRoot + "/001file.txt", urlRoot + "/002file.html", urlRoot + "/002file.txt", urlRoot + "/branch1", urlRoot + "/branch2", urlRoot + "/index.html" }; assertIsomorphic(expectedA, childL); }
protected void checkLeaf(SimulatedArchivalUnit sau) { log.debug("checkLeaf()"); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); CachedUrlSet set = sau.makeCachedUrlSet(spec); Iterator setIt = set.contentHashIterator(); ArrayList childL = new ArrayList(16); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } String[] expectedA = new String[] { parent, parent + "/001file.html", parent + "/001file.txt", parent + "/002file.html", parent + "/002file.txt", parent + "/branch1", parent + "/branch1/001file.html", parent + "/branch1/001file.txt", parent + "/branch1/002file.html", parent + "/branch1/002file.txt", parent + "/branch1/index.html", parent + "/branch2", parent + "/branch2/001file.html", parent + "/branch2/001file.txt", parent + "/branch2/002file.html", parent + "/branch2/002file.txt", parent + "/branch2/index.html", parent + "/index.html", }; assertIsomorphic(expectedA, childL); }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
private void hash(CachedUrlSet set, MessageDigest dig, boolean namesOnly) throws IOException { CachedUrlSetHasher hasher = null; if (namesOnly) { hasher = set.getNameHasher(dig); } else { hasher = set.getContentHasher(dig); } int bytesHashed = 0; long timeTaken = System.currentTimeMillis(); while (!hasher.finished()) { bytesHashed += hasher.hashStep(256); } timeTaken = System.currentTimeMillis() - timeTaken; if ((timeTaken > 0) && (bytesHashed > 500)) { System.out.println("Bytes hashed: " + bytesHashed); System.out.println("Time taken: " + timeTaken + "ms"); System.out.println("Bytes/sec: " + (bytesHashed * 1000 / timeTaken)); } else { System.out.println("No time taken, or insufficient bytes hashed."); System.out.println("Bytes hashed: " + bytesHashed); System.out.println("Time taken: " + timeTaken + "ms"); } }
private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception { MessageDigest dig = null; try { dig = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException ex) { fail("No algorithm."); } CachedUrlSet set = sau.getAuCachedUrlSet(); CachedUrlSetHasher hasher = set.getContentHasher(dig); SystemMetrics metrics = theDaemon.getSystemMetrics(); int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig); // should be protected against this being zero by MyMockSystemMetrics, // but otherwise use the proper calculation. This avoids test failure // due to really slow machines assertTrue(estimate > 0); long estimatedTime = set.estimatedHashDuration(); long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue(); assertTrue(size > 0); System.out.println("b/ms: " + estimate); System.out.println("size: " + size); System.out.println("estimate: " + estimatedTime); assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate)); }