private void deleteBlock(CachedUrl cu) throws IOException { log.info("deleting " + cu.getUrl()); CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl()); ArchivalUnit au = cu.getArchivalUnit(); CachedUrlSet cus = au.makeCachedUrlSet(cuss); NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au); nm.deleteNode(cus); }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
public void testArticleCountAndType() throws Exception { int expCount = 28; PluginTestUtil.crawlSimAu(sau); String pat1 = "branch(\\d+)/(\\d+file\\.html)"; String rep1 = "aps/journal/v123/n$1/full/$2"; PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1); String pat2 = "branch(\\d+)/(\\d+file\\.pdf)"; String rep2 = "aps/journal/v123/n$1/pdf/$2"; PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2); // Remove some URLs int deleted = 0; for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) it.next(); if (cusn instanceof CachedUrl) { CachedUrl cu = (CachedUrl) cusn; String url = cu.getUrl(); if (url.contains("/journal/") && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) { deleteBlock(cu); ++deleted; } } } assertEquals(8, deleted); Iterator<ArticleFiles> it = nau.getArticleIterator(); int count = 0; int countHtmlOnly = 0; int countPdfOnly = 0; while (it.hasNext()) { ArticleFiles af = it.next(); log.info(af.toString()); CachedUrl cu = af.getFullTextCu(); String url = cu.getUrl(); assertNotNull(cu); String contentType = cu.getContentType(); log.debug("count " + count + " url " + url + " " + contentType); count++; if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) { ++countHtmlOnly; } if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) { ++countPdfOnly; } } log.debug("Article count is " + count); assertEquals(expCount, count); assertEquals(4, countHtmlOnly); assertEquals(4, countPdfOnly); }
/* private support methods */ private List<ArticleMetadata> setupContentForAU( ArchivalUnit au, String url, String content, boolean isHtmlExtractor) throws IOException, PluginException { FileMetadataExtractor me; InputStream input = null; CIProperties props = null; if (isHtmlExtractor) { input = IOUtils.toInputStream(content, "utf-8"); props = getContentHtmlProperties(); me = new BaseAtyponHtmlMetadataExtractorFactory() .createFileMetadataExtractor(MetadataTarget.Any(), "text/html"); } else { input = IOUtils.toInputStream(content, "utf-8"); props = getContentRisProperties(); me = new BaseAtyponRisMetadataExtractorFactory() .createFileMetadataExtractor(MetadataTarget.Any(), "text/plain"); } UrlData ud = new UrlData(input, props, url); UrlCacher uc = au.makeUrlCacher(ud); uc.storeContent(); CachedUrl cu = uc.getCachedUrl(); FileMetadataListExtractor mle = new FileMetadataListExtractor(me); return mle.extract(MetadataTarget.Any(), cu); }
List<String> auUrls(ArchivalUnit au) { List<String> res = new ArrayList<String>(); for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next(); if (cusn.hasContent()) { res.add(cusn.getUrl()); } } return res; }
public void setUp() throws Exception { super.setUp(); setUpDiskSpace(); // you need this to have startService work properly... theDaemon = getMockLockssDaemon(); theDaemon.getAlertManager(); theDaemon.getPluginManager().setLoadablePluginsReady(true); theDaemon.setDaemonInited(true); theDaemon.getPluginManager().startService(); theDaemon.getCrawlManager(); // in this directory this is file "test_baseatypon.tdb" but it becomes xml ConfigurationUtil.addFromUrl(getResource("test_baseatypon.xml")); Tdb tdb = ConfigManager.getCurrentConfig().getTdb(); TdbAu tdbau1 = tdb.getTdbAusLikeName(goodJournal + " Volume " + goodVolume).get(0); assertNotNull("Didn't find named TdbAu", tdbau1); bau1 = PluginTestUtil.createAndStartAu(tdbau1); assertNotNull(bau1); TypedEntryMap auConfig = bau1.getProperties(); assertEquals(BASE_URL, auConfig.getString(BASE_URL_KEY)); }
public static V1Poll createCompletedPoll( LockssDaemon daemon, ArchivalUnit au, V1LcapMessage testmsg, int numAgree, int numDisagree, PollManager pollmanager) throws Exception { log.debug( "createCompletedPoll: au: " + au.toString() + " peer " + testmsg.getOriginatorId() + " votes " + numAgree + "/" + numDisagree); CachedUrlSetSpec cusSpec = null; if ((testmsg.getLwrBound() != null) && (testmsg.getLwrBound().equals(PollSpec.SINGLE_NODE_LWRBOUND))) { cusSpec = new SingleNodeCachedUrlSetSpec(testmsg.getTargetUrl()); } else { cusSpec = new RangeCachedUrlSetSpec( testmsg.getTargetUrl(), testmsg.getLwrBound(), testmsg.getUprBound()); } CachedUrlSet cus = au.makeCachedUrlSet(cusSpec); PollSpec spec = new PollSpec(cus, Poll.V1_CONTENT_POLL); ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false); V1Poll p = null; if (testmsg.isContentPoll()) { p = new V1ContentPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isNamePoll()) { p = new V1NamePoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isVerifyPoll()) { p = new V1VerifyPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm(), testmsg.getVerifier()); } assertNotNull(p); p.setMessage(testmsg); p.m_tally.quorum = numAgree + numDisagree; p.m_tally.numAgree = numAgree; p.m_tally.numDisagree = numDisagree; p.m_tally.wtAgree = 2000; p.m_tally.wtDisagree = 200; p.m_tally.localEntries = makeEntries(1, 3); p.m_tally.votedEntries = makeEntries(1, 5); p.m_tally.votedEntries.remove(1); p.m_pollstate = V1Poll.PS_COMPLETE; p.m_callerID = testmsg.getOriginatorId(); log.debug3("poll " + p.toString()); p.m_tally.tallyVotes(); return p; }