protected void checkRoot(SimulatedArchivalUnit sau) { log.debug("checkRoot()"); CachedUrlSet set = sau.getAuCachedUrlSet(); Iterator setIt = set.flatSetIterator(); ArrayList childL = new ArrayList(1); CachedUrlSet cus = null; while (setIt.hasNext()) { cus = (CachedUrlSet) setIt.next(); childL.add(cus.getUrl()); } String urlRoot = sau.getUrlRoot(); String[] expectedA = new String[1]; expectedA[0] = urlRoot; assertIsomorphic(expectedA, childL); setIt = cus.flatSetIterator(); childL = new ArrayList(7); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } expectedA = new String[] { urlRoot + "/001file.html", urlRoot + "/001file.txt", urlRoot + "/002file.html", urlRoot + "/002file.txt", urlRoot + "/branch1", urlRoot + "/branch2", urlRoot + "/index.html" }; assertIsomorphic(expectedA, childL); }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
public void testStoreNodeState() throws Exception { TimeBase.setSimulated(100); CachedUrlSet mcus = new MockCachedUrlSet(mau, new RangeCachedUrlSetSpec("http://www.example.com")); CrawlState crawl = new CrawlState(1, 2, 123); List polls = new ArrayList(2); PollState poll1 = new PollState(1, "sdf", "jkl", 2, 123, Deadline.at(456), false); PollState poll2 = new PollState(2, "abc", "def", 3, 321, Deadline.at(654), false); polls.add(poll1); polls.add(poll2); NodeState nodeState = new NodeStateImpl(mcus, 123321, crawl, polls, repository); ((NodeStateImpl) nodeState).setState(NodeState.DAMAGE_AT_OR_BELOW); repository.storeNodeState(nodeState); String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau); filePath = LockssRepositoryImpl.mapUrlToFileLocation( filePath, "http://www.example.com/" + HistoryRepositoryImpl.NODE_FILE_NAME); File xmlFile = new File(filePath); assertTrue(xmlFile.exists()); nodeState = null; nodeState = repository.loadNodeState(mcus); assertSame(mcus, nodeState.getCachedUrlSet()); assertEquals(123321, nodeState.getAverageHashDuration()); assertEquals(1, nodeState.getCrawlState().getType()); assertEquals(2, nodeState.getCrawlState().getStatus()); assertEquals(123, nodeState.getCrawlState().getStartTime()); assertEquals(NodeState.DAMAGE_AT_OR_BELOW, nodeState.getState()); Iterator pollIt = nodeState.getActivePolls(); assertTrue(pollIt.hasNext()); PollState loadedPoll = (PollState) pollIt.next(); assertEquals(1, loadedPoll.getType()); assertEquals("sdf", loadedPoll.getLwrBound()); assertEquals("jkl", loadedPoll.getUprBound()); assertEquals(2, loadedPoll.getStatus()); assertEquals(123, loadedPoll.getStartTime()); assertEquals(456, loadedPoll.getDeadline().getExpirationTime()); assertTrue(pollIt.hasNext()); loadedPoll = (PollState) pollIt.next(); assertEquals(2, loadedPoll.getType()); assertEquals("abc", loadedPoll.getLwrBound()); assertEquals("def", loadedPoll.getUprBound()); assertEquals(3, loadedPoll.getStatus()); assertEquals(321, loadedPoll.getStartTime()); assertEquals(654, loadedPoll.getDeadline().getExpirationTime()); assertFalse(pollIt.hasNext()); TimeBase.setReal(); }
public void testArticleCountAndType() throws Exception { int expCount = 28; PluginTestUtil.crawlSimAu(sau); String pat1 = "branch(\\d+)/(\\d+file\\.html)"; String rep1 = "aps/journal/v123/n$1/full/$2"; PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1); String pat2 = "branch(\\d+)/(\\d+file\\.pdf)"; String rep2 = "aps/journal/v123/n$1/pdf/$2"; PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2); // Remove some URLs int deleted = 0; for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) it.next(); if (cusn instanceof CachedUrl) { CachedUrl cu = (CachedUrl) cusn; String url = cu.getUrl(); if (url.contains("/journal/") && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) { deleteBlock(cu); ++deleted; } } } assertEquals(8, deleted); Iterator<ArticleFiles> it = nau.getArticleIterator(); int count = 0; int countHtmlOnly = 0; int countPdfOnly = 0; while (it.hasNext()) { ArticleFiles af = it.next(); log.info(af.toString()); CachedUrl cu = af.getFullTextCu(); String url = cu.getUrl(); assertNotNull(cu); String contentType = cu.getContentType(); log.debug("count " + count + " url " + url + " " + contentType); count++; if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) { ++countHtmlOnly; } if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) { ++countPdfOnly; } } log.debug("Article count is " + count); assertEquals(expCount, count); assertEquals(4, countHtmlOnly); assertEquals(4, countPdfOnly); }
protected void checkLeaf(SimulatedArchivalUnit sau) { log.debug("checkLeaf()"); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); CachedUrlSet set = sau.makeCachedUrlSet(spec); Iterator setIt = set.contentHashIterator(); ArrayList childL = new ArrayList(16); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } String[] expectedA = new String[] { parent, parent + "/001file.html", parent + "/001file.txt", parent + "/002file.html", parent + "/002file.txt", parent + "/branch1", parent + "/branch1/001file.html", parent + "/branch1/001file.txt", parent + "/branch1/002file.html", parent + "/branch1/002file.txt", parent + "/branch1/index.html", parent + "/branch2", parent + "/branch2/001file.html", parent + "/branch2/001file.txt", parent + "/branch2/002file.html", parent + "/branch2/002file.txt", parent + "/branch2/index.html", parent + "/index.html", }; assertIsomorphic(expectedA, childL); }
private V3LcapMessage makeTestVoteMessage(Collection voteBlocks) throws IOException { mPollMgr.setStateDir("key", tempDir); V3LcapMessage msg = new V3LcapMessage( "ArchivalID_2", "key", "Plug42", m_testBytes, m_testBytes, V3LcapMessage.MSG_VOTE, 987654321, m_testID, tempDir, theDaemon); // Set msg vote blocks. for (Iterator ix = voteBlocks.iterator(); ix.hasNext(); ) { msg.addVoteBlock((VoteBlock) ix.next()); } msg.setHashAlgorithm(LcapMessage.getDefaultHashAlgorithm()); msg.setArchivalId(m_archivalID); msg.setPluginVersion("PlugVer42"); return msg; }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractGoodRisContent() throws Exception { String goodContent = createGoodRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME)); assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE)); assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE)); assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE)); assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN)); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI)); // This shouldn't get set. It will default later to fuill_text_cu assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL)); }
// Break the line at commas, return a map of the resulting strings // broken at equals sign. (<i>Ie</i>, name value pairs.) Map getRow(String line) { Map map = new HashMap(); for (Iterator iter = StringUtil.breakAt(line, ',').iterator(); iter.hasNext(); ) { String item = (String) iter.next(); List pair = StringUtil.breakAt(item, '='); map.put(pair.get(0), pair.get(1)); } return map; }
List<String> auUrls(ArchivalUnit au) { List<String> res = new ArrayList<String>(); for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next(); if (cusn.hasContent()) { res.add(cusn.getUrl()); } } return res; }
public void testGetUrls() { HashSet stringCollection = new HashSet(); stringCollection.add("test"); AuState auState = makeAuState(mau, -1, -1, -1, -1, 123, stringCollection, 1, -1.0, 1.0, historyRepo); Collection col = auState.getCrawlUrls(); Iterator colIter = col.iterator(); assertTrue(colIter.hasNext()); assertEquals("test", colIter.next()); assertFalse(colIter.hasNext()); }
public void testSimpleDatasetXML() throws Exception { log.debug3("testSimpleDatasetXML"); String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile)); String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null); assertEquals(6, mdList.size()); Iterator<ArticleMetadata> mdIt = mdList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateDatasetMetadataRecord(mdRecord); } }
public void testGetAuConfigProperties() { for (Iterator iter = plugin.getLocalAuConfigDescrs().iterator(); iter.hasNext(); ) { ConfigParamDescr desc = (ConfigParamDescr) iter.next(); if (desc.equals(ConfigParamDescr.BASE_URL)) { continue; } if (desc.equals(ConfigParamDescr.VOLUME_NUMBER)) { continue; } if ("issues".equals(desc.getKey())) { assertEquals(ConfigParamDescr.TYPE_SET, desc.getType()); assertFalse(desc.isDefinitional()); continue; } fail("Unexpected config param: " + desc.getKey()); } }
// make a Schedule with one chunk per task Schedule sched(List tasks) { List events = new ArrayList(); for (Iterator iter = tasks.iterator(); iter.hasNext(); ) { Object obj = iter.next(); if (obj instanceof Schedule.Event) { events.add(obj); } else { SchedulableTask task = (SchedulableTask) obj; if (task.isBackgroundTask()) { events.add(bEvent((BackgroundTask) task, Schedule.EventType.START)); } else { events.add(chunk((StepTask) task)); } } } Schedule s = new Schedule(events); return s; }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractAlternateRisContent() throws Exception { String goodContent = createAlternateRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); }
public void testRequestMessageCreation() throws Exception { V3LcapMessage reqMsg = new V3LcapMessage( "ArchivalID_2", "key", "Plug42", m_testBytes, m_testBytes, V3LcapMessage.MSG_REPAIR_REQ, 987654321, m_testID, tempDir, theDaemon); reqMsg.setTargetUrl("http://foo.com/"); for (Iterator ix = m_testVoteBlocks.iterator(); ix.hasNext(); ) { reqMsg.addVoteBlock((VoteBlock) ix.next()); } assertEquals(3, reqMsg.getProtocolVersion()); assertEquals("Plug42", reqMsg.getPluginVersion()); assertTrue(m_testID == reqMsg.getOriginatorId()); assertEquals(V3LcapMessage.MSG_REPAIR_REQ, reqMsg.getOpcode()); assertEquals("ArchivalID_2", reqMsg.getArchivalId()); assertEquals("http://foo.com/", reqMsg.getTargetUrl()); assertEquals(m_testBytes, reqMsg.getPollerNonce()); assertEquals(m_testBytes, reqMsg.getVoterNonce()); assertEquals(null, reqMsg.getVoterNonce2()); List aBlocks = new ArrayList(); List bBlocks = new ArrayList(); for (VoteBlocksIterator iter = m_testMsg.getVoteBlockIterator(); iter.hasNext(); ) { aBlocks.add(iter.next()); } for (VoteBlocksIterator iter = reqMsg.getVoteBlockIterator(); iter.hasNext(); ) { bBlocks.add(iter.next()); } assertEquals(aBlocks, bBlocks); // Actual size of test vote blocks is unpredictable assertTrue(reqMsg.getEstimatedEncodedLength() > V3LcapMessage.EST_ENCODED_HEADER_LENGTH); }
public void testStorePollHistories() throws Exception { TimeBase.setSimulated(123321); MockCachedUrlSetSpec mspec = new MockCachedUrlSetSpec("http://www.example.com", null); CachedUrlSet mcus = new MockCachedUrlSet(mau, mspec); NodeStateImpl nodeState = new NodeStateImpl(mcus, -1, null, null, repository); List histories = ListUtil.list( createPollHistoryBean(3), createPollHistoryBean(3), createPollHistoryBean(3), createPollHistoryBean(3), createPollHistoryBean(3)); /* * CASTOR: [summary] Rewrite test in non-Castor way * This is obviously not an appropriate way of writing this test, * Right now it creates sample data in Castor format, from legacy * code back when Castor was the built-in serialization engine. * TODO: Rewrite test in non-Castor way */ // nodeState.setPollHistoryBeanList(histories); nodeState.setPollHistoryList(NodeHistoryBean.fromBeanListToList(histories)); repository.storePollHistories(nodeState); String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau); filePath = LockssRepositoryImpl.mapUrlToFileLocation( filePath, "http://www.example.com/" + HistoryRepositoryImpl.HISTORY_FILE_NAME); File xmlFile = new File(filePath); assertTrue(xmlFile.exists()); nodeState.setPollHistoryList(new ArrayList()); repository.loadPollHistories(nodeState); List loadedHistory = nodeState.getPollHistoryList(); assertEquals(histories.size(), loadedHistory.size()); // CASTOR: some Castor-tailored stuff here // PollHistoryBean expect1 = (PollHistoryBean)histories.get(0); // PollHistoryBean elem1 = (PollHistoryBean)loadedHistory.get(0); PollHistory expect1 = (PollHistory) histories.get(0); PollHistory elem1 = (PollHistory) loadedHistory.get(0); assertEquals(expect1.type, elem1.type); assertEquals(expect1.lwrBound, elem1.lwrBound); assertEquals(expect1.uprBound, elem1.uprBound); assertEquals(expect1.status, elem1.status); assertEquals(expect1.startTime, elem1.startTime); assertEquals(expect1.duration, elem1.duration); // CASTOR: some Castor-tailored stuff here // List expectBeans = (List)expect1.getVoteBeans(); // List elemBeans = (List)elem1.getVoteBeans(); Iterator expectIter = (Iterator) expect1.getVotes(); Iterator elemIter = (Iterator) elem1.getVotes(); while (expectIter.hasNext() && elemIter.hasNext()) { Vote expectVote = (Vote) expectIter.next(); Vote elemVote = (Vote) elemIter.next(); assertEquals( expectVote.getVoterIdentity().getIdString(), elemVote.getVoterIdentity().getIdString()); assertEquals(expectVote.isAgreeVote(), elemVote.isAgreeVote()); assertEquals(expectVote.getChallengeString(), elemVote.getChallengeString()); assertEquals(expectVote.getVerifierString(), elemVote.getVerifierString()); assertEquals(expectVote.getHashString(), elemVote.getHashString()); } assertFalse(expectIter.hasNext()); assertFalse(expectIter.hasNext()); TimeBase.setReal(); }
public void testStoreAuState() throws Exception { HashSet strCol = new HashSet(); strCol.add("test"); AuState origState = new AuState( mau, 123000, 123123, 41, "woop woop", 321000, 222000, 3, "pollres", 12345, 456000, strCol, AuState.AccessType.OpenAccess, 2, 1.0, 1.0, SubstanceChecker.State.Yes, "SubstVer3", "MetadatVer7", 111444, 12345, 111222, // lastPoPPoll 7, // lastPoPPollResult 222333, // lastLocalHashScan 444777, // numAgreePeersLastPoR 777444, // numWillingRepairers 747474, // numCurrentSuspectVersions ListUtil.list("http://hos.t/pa/th"), repository); assertEquals("SubstVer3", origState.getFeatureVersion(Plugin.Feature.Substance)); assertEquals("MetadatVer7", origState.getFeatureVersion(Plugin.Feature.Metadata)); assertEquals(111444, origState.getLastMetadataIndex()); repository.storeAuState(origState); String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau); filePath += HistoryRepositoryImpl.AU_FILE_NAME; File xmlFile = new File(filePath); assertTrue(xmlFile.exists()); origState = null; AuState loadedState = repository.loadAuState(); assertEquals(123000, loadedState.getLastCrawlTime()); assertEquals(123123, loadedState.getLastCrawlAttempt()); assertEquals(41, loadedState.getLastCrawlResult()); assertEquals("woop woop", loadedState.getLastCrawlResultMsg()); assertEquals(321000, loadedState.getLastTopLevelPollTime()); assertEquals(222000, loadedState.getLastPollStart()); assertEquals(3, loadedState.getLastPollResult()); assertEquals("Inviting Peers", loadedState.getLastPollResultMsg()); assertEquals(111222, loadedState.getLastPoPPoll()); assertEquals(7, loadedState.getLastPoPPollResult()); assertEquals(222333, loadedState.getLastLocalHashScan()); assertEquals(444777, loadedState.getNumAgreePeersLastPoR()); assertEquals(777444, loadedState.getNumWillingRepairers()); assertEquals(747474, loadedState.getNumCurrentSuspectVersions()); assertEquals(ListUtil.list("http://hos.t/pa/th"), loadedState.getCdnStems()); loadedState.addCdnStem("http://this.is.new/"); assertEquals( ListUtil.list("http://hos.t/pa/th", "http://this.is.new/"), loadedState.getCdnStems()); assertEquals(12345, loadedState.getPollDuration()); assertEquals(2, loadedState.getClockssSubscriptionStatus()); assertEquals(AuState.AccessType.OpenAccess, loadedState.getAccessType()); assertEquals(SubstanceChecker.State.Yes, loadedState.getSubstanceState()); assertEquals("SubstVer3", loadedState.getFeatureVersion(Plugin.Feature.Substance)); assertEquals("MetadatVer7", loadedState.getFeatureVersion(Plugin.Feature.Metadata)); assertEquals(111444, loadedState.getLastMetadataIndex()); assertEquals(12345, loadedState.getLastContentChange()); assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId()); // check crawl urls Collection col = loadedState.getCrawlUrls(); Iterator colIter = col.iterator(); assertTrue(colIter.hasNext()); assertEquals("test", colIter.next()); assertFalse(colIter.hasNext()); }