/** Test class for <code>org.lockss.scheduler.StepperTask</code> */ public class TestStepperTask extends LockssTestCase { public static Class testedClasses[] = { org.lockss.scheduler.StepTask.class, org.lockss.scheduler.StepperTask.class, }; static Logger log = Logger.getLogger("TestStepperTask"); public void setUp() throws Exception { super.setUp(); TimeBase.setSimulated(); } public void tearDown() throws Exception { TimeBase.setReal(); super.tearDown(); } static StepperTask taskBetween(long minStart, long deadline, int duration, Stepper stepper) { return new StepperTask( Deadline.at(minStart), Deadline.at(deadline), duration, null, null, stepper); } Stepper newSt(final boolean isFinished) { return new Stepper() { public int computeStep(int metric) { return 0; } public boolean isFinished() { return isFinished; } }; } public void testStepper() { Stepper st = newSt(false); StepperTask t = taskBetween(100, 200, 50, st); assertEquals(st, t.getStepper()); assertFalse(t.isBackgroundTask()); assertFalse(t.isFinished()); t.e = new Exception(); assertTrue(t.isFinished()); Stepper st2 = newSt(true); StepperTask t2 = taskBetween(100, 200, 50, st2); assertTrue(t2.isFinished()); } public void testToString() { Stepper st = newSt(false); StepperTask t = taskBetween(100, 200, 50, st); t.toString(); t.cookie = "foo"; t.toString(); } }
public class TestElsevierXmlLinkExtractorFactory extends LinkExtractorTestCase { private static Logger logger = Logger.getLogger("TestElsevierXmlLinkExtractorFactory"); String srcUrl = "http://www.example.com/"; private static final String withLinks = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE dataset SYSTEM \"http://support.sciencedirect.com/xml/sdosftp10.dtd\">\n" + "<dataset identifier=\"OXM10160\" customer=\"OHL\"" + " status=\"Announcement\"" + " version=\"Network Dataset Announcement/Confirmation v1.0\">" + " <date year=\"2007\" month=\"May\" day=\"1\"/>\n" + "<file name=\"01407007.tar\" size=\"21780480\"" + " md5=\"6c7266e0e246bf3e8cf1cd8b659a7a73\"/>\n" + "<file name=\"03064530.tar\" size=\"12748800\"" + " md5=\"df9519d3075e164d22f5dd4988a693c3\"/>\n" + "<file name=\"dataset.toc\" size=\"2216587\"" + " md5=\"cd21741eb91fa0fdfef2fa36485e21a0\"/>\n" + "</dataset>\n"; private static final String withoutLinks = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE dataset SYSTEM \"http://support.sciencedirect.com/xml/sdosftp10.dtd\">\n" + "<dataset identifier=\"OXM10160\" customer=\"OHL\"" + " status=\"Announcement\"" + " version=\"Network Dataset Announcement/Confirmation v1.0\">" + " <date year=\"2007\" month=\"May\" day=\"1\"/>\n" + "</dataset>\n"; private static final String[] links = { "01407007.tar", "03064530.tar", "dataset.toc", }; public String getMimeType() { return "text/xml"; } public LinkExtractorFactory getFactory() { return new ElsevierXmlLinkExtractorFactory(); } public void testFindCorrectEntries() throws Exception { Set expected = new HashSet(); for (String link : links) { expected.add(srcUrl + link); } assertEquals(expected, extractUrls(withLinks)); } public void testFindNoEntries() throws Exception { assertEmpty(extractUrls(withoutLinks)); } }
/** JUnitTest case for class: org.lockss.poller.Poll */ public class TestPoll extends LockssTestCase { private static Logger log = Logger.getLogger("TestPoll"); private static String[] rootV1urls = { "http://www.test.org", "http://www.test1.org", "http://www.test2.org" }; private static String lwrbnd = "test1.doc"; private static String uprbnd = "test3.doc"; private static long testduration = Constants.DAY; // protected ArchivalUnit testau = // PollTestPlugin.PTArchivalUnit.createFromListOfRootUrls(rootV1urls); protected MockArchivalUnit testau; private IdentityManager idmgr; private MockLockssDaemon theDaemon; private ArrayList agree_entries = makeEntries(10, 50); private ArrayList disagree_entries = makeEntries(15, 57); private ArrayList dissenting_entries = makeEntries(7, 50); protected PeerIdentity testID; protected PeerIdentity testID1; protected V1LcapMessage[] testV1msg; protected V1Poll[] testV1polls; protected PollManager pollmanager; protected void setUp() throws Exception { super.setUp(); TimeBase.setSimulated(); initRequiredServices(); testau.setPlugin(new MockPlugin()); initTestPeerIDs(); initTestMsg(); initTestPolls(); } /** * tearDown method for test case * * @throws Exception if removePoll failed */ public void tearDown() throws Exception { pollmanager.stopService(); theDaemon.getLockssRepository(testau).stopService(); theDaemon.getHashService().stopService(); theDaemon.getDatagramRouterManager().stopService(); theDaemon.getRouterManager().stopService(); theDaemon.getSystemMetrics().stopService(); TimeBase.setReal(); for (int i = 0; i < testV1msg.length; i++) { if (testV1msg[i] != null) pollmanager.removePoll(testV1msg[i].getKey()); } super.tearDown(); } /** test for method scheduleVote(..) */ public void testScheduleVote() { V1Poll p = testV1polls[1]; assertTrue(p instanceof V1ContentPoll); log.debug3("testScheduleVote 1"); p.scheduleVote(); log.debug3("testScheduleVote 2"); assertNotNull(p.m_voteTime); assertTrue(p.m_voteTime.getRemainingTime() < p.m_deadline.getRemainingTime()); log.debug3("at end of testScheduleVote"); } /** test for method checkVote(..) */ public void testCheckVote() throws Exception { V1LcapMessage msg = null; log.debug3("starting testCheeckVote"); msg = V1LcapMessage.makeReplyMsg( testV1polls[0].getMessage(), ByteArray.makeRandomBytes(20), ByteArray.makeRandomBytes(20), null, V1LcapMessage.NAME_POLL_REP, testduration, testID); log.debug3("testCheeckVote 2"); V1Poll p = null; p = createCompletedPoll(theDaemon, testau, msg, 8, 2, pollmanager); assertTrue(p instanceof V1NamePoll); log.debug3("testCheeckVote 3"); assertNotNull(p); PeerIdentity id = msg.getOriginatorId(); assertNotNull(id); assertNotNull(p.m_tally); int rep = p.m_tally.wtAgree + idmgr.getReputation(id); // good vote check p.checkVote(msg.getHashed(), new Vote(msg, false)); assertEquals(9, p.m_tally.numAgree); assertEquals(2, p.m_tally.numDisagree); assertEquals(rep, p.m_tally.wtAgree); rep = p.m_tally.wtDisagree + idmgr.getReputation(id); // bad vote check p.checkVote(ByteArray.makeRandomBytes(20), new Vote(msg, false)); assertEquals(9, p.m_tally.numAgree); assertEquals(3, p.m_tally.numDisagree); assertEquals(rep, p.m_tally.wtDisagree); } /** test for method tally(..) */ public void testTally() { V1Poll p = testV1polls[0]; LcapMessage msg = p.getMessage(); PeerIdentity id = msg.getOriginatorId(); p.m_tally.addVote(new Vote(msg, false), id, false); p.m_tally.addVote(new Vote(msg, false), id, false); p.m_tally.addVote(new Vote(msg, false), id, false); assertEquals(0, p.m_tally.numAgree); assertEquals(0, p.m_tally.wtAgree); assertEquals(3, p.m_tally.numDisagree); assertEquals(1500, p.m_tally.wtDisagree); p = testV1polls[1]; msg = p.getMessage(); p.m_tally.addVote(new Vote(msg, true), id, false); p.m_tally.addVote(new Vote(msg, true), id, false); p.m_tally.addVote(new Vote(msg, true), id, false); assertEquals(3, p.m_tally.numAgree); assertEquals(1500, p.m_tally.wtAgree); assertEquals(0, p.m_tally.numDisagree); assertEquals(0, p.m_tally.wtDisagree); } public void testNamePollTally() throws Exception { V1NamePoll np; // test a name poll we won np = makeCompletedNamePoll(4, 1, 0); assertEquals(5, np.m_tally.numAgree); assertEquals(1, np.m_tally.numDisagree); assertEquals(Tallier.RESULT_WON, np.m_tally.getTallyResult()); // test a name poll we lost with a dissenting vote np = makeCompletedNamePoll(1, 8, 1); assertEquals(2, np.m_tally.numAgree); assertEquals(9, np.m_tally.numDisagree); // build a master list np.buildPollLists(np.m_tally.pollVotes.iterator()); // these should be different since we lost the poll assertFalse(CollectionUtil.isIsomorphic(np.m_tally.localEntries, np.m_tally.votedEntries)); // the expected "correct" set is in our disagree msg assertTrue(CollectionUtil.isIsomorphic(disagree_entries, np.m_tally.votedEntries)); } /** test for method vote(..) */ public void testVote() { V1Poll p = testV1polls[1]; p.m_hash = ByteArray.makeRandomBytes(20); try { p.castOurVote(); } catch (IllegalStateException e) { // the socket isn't inited and should squack } p.m_pollstate = V1Poll.PS_COMPLETE; } /** test for method voteInPoll(..) */ public void testVoteInPoll() { V1Poll p = testV1polls[1]; p.m_tally.quorum = 10; p.m_tally.numAgree = 5; p.m_tally.numDisagree = 2; p.m_tally.wtAgree = 2000; p.m_tally.wtDisagree = 200; p.m_hash = ByteArray.makeRandomBytes(20); try { p.voteInPoll(); } catch (IllegalStateException e) { // the socket isn't inited and should squack } p.m_tally.numAgree = 20; try { p.voteInPoll(); } catch (NullPointerException npe) { // the socket isn't inited and should squack } p.m_pollstate = V1Poll.PS_COMPLETE; } public void testStartPoll() { V1Poll p = testV1polls[0]; p.startPoll(); assertEquals(V1Poll.PS_WAIT_HASH, p.m_pollstate); p.m_pollstate = V1Poll.PS_COMPLETE; } public void testScheduleOurHash() { V1Poll p = testV1polls[0]; p.m_pollstate = V1Poll.PS_WAIT_HASH; // no time has elapsed - so we should be able to schedule our hash assertTrue(p.scheduleOurHash()); // half the time has elapsed so we should be able to schedule our hash TimeBase.step(p.m_deadline.getRemainingTime() / 2); assertTrue(p.scheduleOurHash()); // all of the time has elapsed we should not be able to schedule our hash TimeBase.step(p.m_deadline.getRemainingTime() - 1000); assertFalse(p.scheduleOurHash()); p.m_pollstate = V1Poll.PS_COMPLETE; } /** test for method stopPoll(..) */ public void testStopPoll() { V1Poll p = testV1polls[1]; p.m_tally.quorum = 10; p.m_tally.numAgree = 7; p.m_tally.numDisagree = 3; p.m_pollstate = V1Poll.PS_WAIT_TALLY; p.stopPoll(); assertTrue(p.m_pollstate == V1Poll.PS_COMPLETE); p.startPoll(); assertTrue(p.m_pollstate == V1Poll.PS_COMPLETE); } /** test for method startVoteCheck(..) */ public void testStartVote() { V1Poll p = testV1polls[0]; p.m_pendingVotes = 3; p.startVoteCheck(); assertEquals(4, p.m_pendingVotes); p.m_pollstate = V1Poll.PS_COMPLETE; } /** test for method stopVote(..) */ public void testStopVote() { V1Poll p = testV1polls[1]; p.m_pendingVotes = 3; p.stopVoteCheck(); assertEquals(2, p.m_pendingVotes); p.m_pollstate = V1Poll.PS_COMPLETE; } private V1NamePoll makeCompletedNamePoll(int numAgree, int numDisagree, int numDissenting) throws Exception { V1NamePoll np = null; V1LcapMessage agree_msg = null; V1LcapMessage disagree_msg1 = null; V1LcapMessage disagree_msg2 = null; Plugin plugin = testau.getPlugin(); PollSpec spec = new MockPollSpec(testau, rootV1urls[0], null, null, Poll.V1_NAME_POLL); ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false); V1LcapMessage poll_msg = V1LcapMessage.makeRequestMsg( spec, null, ByteArray.makeRandomBytes(20), ByteArray.makeRandomBytes(20), V1LcapMessage.NAME_POLL_REQ, testduration, testID); // make our poll np = (V1NamePoll) new V1NamePoll( spec, pollmanager, poll_msg.getOriginatorId(), poll_msg.getChallenge(), poll_msg.getDuration(), poll_msg.getHashAlgorithm()); np.setMessage(poll_msg); // generate agree vote msg agree_msg = V1LcapMessage.makeReplyMsg( poll_msg, ByteArray.makeRandomBytes(20), poll_msg.getVerifier(), agree_entries, V1LcapMessage.NAME_POLL_REP, testduration, testID); // generate a disagree vote msg disagree_msg1 = V1LcapMessage.makeReplyMsg( poll_msg, ByteArray.makeRandomBytes(20), ByteArray.makeRandomBytes(20), disagree_entries, V1LcapMessage.NAME_POLL_REP, testduration, testID1); // generate a losing disagree vote msg disagree_msg2 = V1LcapMessage.makeReplyMsg( poll_msg, ByteArray.makeRandomBytes(20), ByteArray.makeRandomBytes(20), dissenting_entries, V1LcapMessage.NAME_POLL_REP, testduration, testID1); // add our vote V1LcapMessage msg = (V1LcapMessage) (np.getMessage()); PeerIdentity id = msg.getOriginatorId(); np.m_tally.addVote(np.makeNameVote(msg, true), id, true); // add the agree votes id = agree_msg.getOriginatorId(); for (int i = 0; i < numAgree; i++) { np.m_tally.addVote(np.makeNameVote(agree_msg, true), id, false); } // add the disagree votes id = disagree_msg1.getOriginatorId(); for (int i = 0; i < numDisagree; i++) { np.m_tally.addVote(np.makeNameVote(disagree_msg1, false), id, false); } // add dissenting disagree vote id = disagree_msg2.getOriginatorId(); for (int i = 0; i < numDissenting; i++) { np.m_tally.addVote(np.makeNameVote(disagree_msg2, false), id, false); } np.m_pollstate = V1Poll.PS_COMPLETE; np.m_tally.tallyVotes(); return np; } public static V1Poll createCompletedPoll( LockssDaemon daemon, ArchivalUnit au, V1LcapMessage testmsg, int numAgree, int numDisagree, PollManager pollmanager) throws Exception { log.debug( "createCompletedPoll: au: " + au.toString() + " peer " + testmsg.getOriginatorId() + " votes " + numAgree + "/" + numDisagree); CachedUrlSetSpec cusSpec = null; if ((testmsg.getLwrBound() != null) && (testmsg.getLwrBound().equals(PollSpec.SINGLE_NODE_LWRBOUND))) { cusSpec = new SingleNodeCachedUrlSetSpec(testmsg.getTargetUrl()); } else { cusSpec = new RangeCachedUrlSetSpec( testmsg.getTargetUrl(), testmsg.getLwrBound(), testmsg.getUprBound()); } CachedUrlSet cus = au.makeCachedUrlSet(cusSpec); PollSpec spec = new PollSpec(cus, Poll.V1_CONTENT_POLL); ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false); V1Poll p = null; if (testmsg.isContentPoll()) { p = new V1ContentPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isNamePoll()) { p = new V1NamePoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isVerifyPoll()) { p = new V1VerifyPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm(), testmsg.getVerifier()); } assertNotNull(p); p.setMessage(testmsg); p.m_tally.quorum = numAgree + numDisagree; p.m_tally.numAgree = numAgree; p.m_tally.numDisagree = numDisagree; p.m_tally.wtAgree = 2000; p.m_tally.wtDisagree = 200; p.m_tally.localEntries = makeEntries(1, 3); p.m_tally.votedEntries = makeEntries(1, 5); p.m_tally.votedEntries.remove(1); p.m_pollstate = V1Poll.PS_COMPLETE; p.m_callerID = testmsg.getOriginatorId(); log.debug3("poll " + p.toString()); p.m_tally.tallyVotes(); return p; } public static ArrayList makeEntries(int firstEntry, int lastEntry) { int numEntries = lastEntry - firstEntry + 1; ArrayList ret_arry = new ArrayList(numEntries); for (int i = 0; i < numEntries; i++) { String name = "/testentry" + (firstEntry + i) + ".html"; ret_arry.add(new PollTally.NameListEntry(i % 2 == 1, name)); } return ret_arry; } private void initRequiredServices() { theDaemon = getMockLockssDaemon(); pollmanager = new LocalPollManager(); pollmanager.initService(theDaemon); theDaemon.setPollManager(pollmanager); theDaemon.getPluginManager(); testau = PollTestPlugin.PTArchivalUnit.createFromListOfRootUrls(rootV1urls); PluginTestUtil.registerArchivalUnit(testau); String tempDirPath = null; try { tempDirPath = getTempDir().getAbsolutePath() + File.separator; } catch (IOException ex) { fail("unable to create a temporary directory"); } Properties p = new Properties(); p.setProperty(IdentityManager.PARAM_IDDB_DIR, tempDirPath + "iddb"); p.setProperty(LockssRepositoryImpl.PARAM_CACHE_LOCATION, tempDirPath); p.setProperty(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, tempDirPath); p.setProperty(IdentityManager.PARAM_LOCAL_IP, "127.0.0.1"); p.setProperty(ConfigManager.PARAM_NEW_SCHEDULER, "false"); // XXX we need to disable verification of votes because the // voter isn't really there p.setProperty(V1Poll.PARAM_AGREE_VERIFY, "0"); p.setProperty(V1Poll.PARAM_DISAGREE_VERIFY, "0"); ConfigurationUtil.setCurrentConfigFromProps(p); idmgr = theDaemon.getIdentityManager(); idmgr.startService(); // theDaemon.getSchedService().startService(); theDaemon.getHashService().startService(); theDaemon.getDatagramRouterManager().startService(); theDaemon.getRouterManager().startService(); theDaemon.getSystemMetrics().startService(); theDaemon.getActivityRegulator(testau).startService(); theDaemon.setNodeManager(new MockNodeManager(), testau); pollmanager.startService(); } private void initTestPeerIDs() { try { testID = idmgr.stringToPeerIdentity("127.0.0.1"); testID1 = idmgr.stringToPeerIdentity("1.1.1.1"); } catch (IdentityManager.MalformedIdentityKeyException ex) { fail("can't open test host"); } } private void initTestMsg() throws Exception { testV1msg = new V1LcapMessage[3]; int[] pollType = { Poll.V1_NAME_POLL, Poll.V1_CONTENT_POLL, Poll.V1_VERIFY_POLL, }; PollFactory ppf = pollmanager.getPollFactory(1); assertNotNull("PollFactory should not be null", ppf); // XXX V1 support mandatory assertTrue(ppf instanceof V1PollFactory); V1PollFactory pf = (V1PollFactory) ppf; for (int i = 0; i < testV1msg.length; i++) { PollSpec spec = new MockPollSpec(testau, rootV1urls[i], lwrbnd, uprbnd, pollType[i]); log.debug("Created poll spec: " + spec); ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false); int opcode = V1LcapMessage.NAME_POLL_REQ + (i * 2); long duration = -1; // NB calcDuration is not applied to Verify polls. switch (opcode) { case V1LcapMessage.NAME_POLL_REQ: case V1LcapMessage.CONTENT_POLL_REQ: // this will attempt to schedule and can return -1 duration = Math.max(pf.calcDuration(spec, pollmanager), 1000); break; case V1LcapMessage.VERIFY_POLL_REQ: case V1LcapMessage.VERIFY_POLL_REP: duration = 100000; // Arbitrary break; default: fail("Bad opcode " + opcode); break; } testV1msg[i] = V1LcapMessage.makeRequestMsg( spec, agree_entries, pf.makeVerifier(100000), pf.makeVerifier(100000), opcode, duration, testID); assertNotNull(testV1msg[i]); } } private void initTestPolls() throws Exception { testV1polls = new V1Poll[testV1msg.length]; for (int i = 0; i < testV1polls.length; i++) { log.debug3("initTestPolls: V1 " + i); BasePoll p = pollmanager.makePoll(testV1msg[i]); assertNotNull(p); assertNotNull(p.getMessage()); log.debug("initTestPolls: V1 " + i + " returns " + p); assertTrue(p instanceof V1Poll); switch (i) { case 0: assertTrue(p instanceof V1NamePoll); break; case 1: assertTrue(p instanceof V1ContentPoll); break; case 2: assertTrue(p instanceof V1VerifyPoll); break; } testV1polls[i] = (V1Poll) p; assertNotNull(testV1polls[i]); log.debug3("initTestPolls: " + i + " " + p.toString()); } } static class LocalPollManager extends PollManager { // ignore message sends public void sendMessage(V1LcapMessage msg, ArchivalUnit au) throws IOException {} } /** * Executes the test case * * @param argv array of Strings containing command line arguments */ public static void main(String[] argv) { String[] testCaseList = {TestPoll.class.getName()}; junit.swingui.TestRunner.main(testCaseList); } }
/** Minimal fully functional plugin capable of serving a little static content. */ public class StaticContentPlugin extends BasePlugin implements PluginTestable { static Logger log = Logger.getLogger("StaticContentPlugin"); Map cuMap = new HashMap(); public StaticContentPlugin() {} public String getVersion() { throw new UnsupportedOperationException("Not implemented"); } public String getPluginName() { return "Static Content"; } public List getSupportedTitles() { throw new UnsupportedOperationException("Not implemented"); } public List getLocalAuConfigDescrs() { return Collections.EMPTY_LIST; // throw new UnsupportedOperationException("Not implemented"); } protected ArchivalUnit createAu0(Configuration auConfig) throws ArchivalUnit.ConfigurationException { return new SAU(this); } public void registerArchivalUnit(ArchivalUnit au) { aus.add(au); } public void unregisterArchivalUnit(ArchivalUnit au) { aus.remove(au); } public class SAU extends BaseArchivalUnit { protected SAU(Plugin myPlugin) { super(myPlugin); } protected String makeName() { return "Static Content AU"; } protected String makeStartUrl() { throw new UnsupportedOperationException("Not Implemented"); } public CachedUrlSet makeCachedUrlSet(CachedUrlSetSpec cuss) { return new SCUS(this, cuss); } public CachedUrl makeCachedUrl(String url) { CachedUrl res = (CachedUrl) cuMap.get(url); log.debug("makeCachedUrl(" + url + ") = " + res); return (CachedUrl) cuMap.get(url); } public org.lockss.plugin.UrlCacher makeUrlCacher(String url) { throw new UnsupportedOperationException("Not implemented"); } public boolean shouldBeCached(String url) { return cuMap.containsKey(url); } public List getNewContentCrawlUrls() { throw new UnsupportedOperationException("Not implemented"); } public Collection getUrlStems() { throw new UnsupportedOperationException("Not implemented"); } public CachedUrlSet cachedUrlSetFactory(ArchivalUnit owner, CachedUrlSetSpec cuss) { throw new UnsupportedOperationException("Not implemented"); } public CachedUrl cachedUrlFactory(CachedUrlSet owner, String url) { throw new UnsupportedOperationException("Not implemented"); } public UrlCacher urlCacherFactory(CachedUrlSet owner, String url) { throw new UnsupportedOperationException("Not implemented"); } public String getManifestPage() { throw new UnsupportedOperationException("Not Implemented"); } public FilterRule getFilterRule(String mimeType) { throw new UnsupportedOperationException("Not implemented"); } /** * Create a CU with content and store it in AU * * @param owner the CUS owner * @param url the url * @param type the type * @param contents the contents */ public void storeCachedUrl(CachedUrlSet owner, String url, String type, String contents) { SCU scu = new SCU(owner, url, type, contents); cuMap.put(scu.getUrl(), scu); } public void storeCachedUrl(String url, String type, String contents) { storeCachedUrl(null, url, type, contents); } public String toString() { return "[sau: " + cuMap + "]"; } protected CrawlRule makeRules() { throw new UnsupportedOperationException("Not implemented"); } /** * loadDefiningConfig * * @param config Configuration */ protected void loadAuConfigDescrs(Configuration config) {} } public class SCU extends BaseCachedUrl { private String contents = null; private CIProperties props = new CIProperties(); public SCU(CachedUrlSet owner, String url) { super(null, url); } /** * Create a CachedUrl with content * * @param owner the CUS owner * @param url the url * @param type the type * @param contents the contents */ public SCU(CachedUrlSet owner, String url, String type, String contents) { this(owner, url); setContents(contents); props.setProperty(CachedUrl.PROPERTY_CONTENT_TYPE, type); } private void setContents(String s) { contents = s; props.setProperty("Content-Length", "" + s.length()); } public String getUrl() { return url; } public boolean hasContent() { return contents != null; } public boolean isLeaf() { throw new UnsupportedOperationException("Not implemented"); } public InputStream getUnfilteredInputStream() { return new StringInputStream(contents); } public InputStream openForHashing() { return getUnfilteredInputStream(); } protected InputStream getFilteredStream() { throw new UnsupportedOperationException("Not implemented"); } public Reader openForReading() { throw new UnsupportedOperationException("Not implemented"); } public long getContentSize() { return contents == null ? 0 : contents.length(); } public CIProperties getProperties() { return props; } } class SCUS extends BaseCachedUrlSet { public SCUS(ArchivalUnit owner, CachedUrlSetSpec spec) { super(owner, spec); } public void storeActualHashDuration(long elapsed, Exception err) { throw new UnsupportedOperationException("Not implemented"); } public Iterator flatSetIterator() { throw new UnsupportedOperationException("Not implemented"); } public Iterator contentHashIterator() { throw new UnsupportedOperationException("Not implemented"); } public boolean isLeaf() { throw new UnsupportedOperationException("Not implemented"); } public CachedUrlSetHasher getContentHasher(MessageDigest digest) { throw new UnsupportedOperationException("Not implemented"); } public CachedUrlSetHasher getNameHasher(MessageDigest digest) { throw new UnsupportedOperationException("Not implemented"); } public long estimatedHashDuration() { return 1000; } } }
public class TestBaseAtyponMetadataExtractor extends LockssTestCase { static Logger log = Logger.getLogger("TestBaseAtyponMetadataExtractor"); private MockLockssDaemon theDaemon; private ArchivalUnit bau; private ArchivalUnit bau1; private static String PLUGIN_NAME = "org.lockss.plugin.atypon.BaseAtyponPlugin"; static final String BASE_URL_KEY = ConfigParamDescr.BASE_URL.getKey(); private static String BASE_URL = "http://www.baseatypon.org/"; // the metadata that should be extracted static String goodDate = "2012-07-05"; static String[] goodAuthors = new String[] {"D. Author", "S. Author2"}; static String goodFormat = "text/HTML"; static String goodTitle = "Title of Article"; static String goodType = "research-article"; static String goodPublisher = "Base Atypon"; static String goodPublishingPlatform = "Atypon"; static String goodDOI = "10.1137/10081839X"; static String goodJID = "xxx"; static String goodJournal = "Journal Name"; static String goodStartPage = "22"; static String goodEndPage = "44"; static String goodVolume = "13"; static String goodIssue = "3"; static String goodIssn = "1540-3459"; static String doiURL = "http://dx.doi.org/" + goodDOI; private static final String ABS_URL = BASE_URL + "doi/abs/10.1175/2010WCAS1063.1"; private static final String RIS_URL = BASE_URL + "action/downloadCitation?doi=" + goodDOI + "&format=ris&include=cit"; public void setUp() throws Exception { super.setUp(); setUpDiskSpace(); // you need this to have startService work properly... theDaemon = getMockLockssDaemon(); theDaemon.getAlertManager(); theDaemon.getPluginManager().setLoadablePluginsReady(true); theDaemon.setDaemonInited(true); theDaemon.getPluginManager().startService(); theDaemon.getCrawlManager(); // in this directory this is file "test_baseatypon.tdb" but it becomes xml ConfigurationUtil.addFromUrl(getResource("test_baseatypon.xml")); Tdb tdb = ConfigManager.getCurrentConfig().getTdb(); TdbAu tdbau1 = tdb.getTdbAusLikeName(goodJournal + " Volume " + goodVolume).get(0); assertNotNull("Didn't find named TdbAu", tdbau1); bau1 = PluginTestUtil.createAndStartAu(tdbau1); assertNotNull(bau1); TypedEntryMap auConfig = bau1.getProperties(); assertEquals(BASE_URL, auConfig.getString(BASE_URL_KEY)); } public void tearDown() throws Exception { theDaemon.stopDaemon(); super.tearDown(); } /* * Test the functionality of the MetadataUtilities * */ public void testNormalizeTitleValue() throws Exception { assertEquals( BaseAtyponMetadataUtil.normalizeTitle("The title goes here"), BaseAtyponMetadataUtil.normalizeTitle("Title Goes Here")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Title with random spaces"), BaseAtyponMetadataUtil.normalizeTitle("Title with random spaces")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Normalize -- hyphen"), BaseAtyponMetadataUtil.normalizeTitle("normalize \u2013\u2013 hyphen")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Title and title"), BaseAtyponMetadataUtil.normalizeTitle("Title & title")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle(" leading spaces"), BaseAtyponMetadataUtil.normalizeTitle("leading spaces")); // now checking the fall-back last ditch attempt assertEquals( BaseAtyponMetadataUtil.generateRawTitle("leading spaces:colon?"), BaseAtyponMetadataUtil.generateRawTitle("leadingspacescolon")); assertEquals( BaseAtyponMetadataUtil.generateRawTitle("relapsing-remitting"), BaseAtyponMetadataUtil.generateRawTitle("relapsing?remitting")); assertEquals( BaseAtyponMetadataUtil.generateRawTitle("foo\"blah"), BaseAtyponMetadataUtil.generateRawTitle("foo-blah")); } /** * Configuration method. * * @return */ /* "<meta name="dc.Title" content="Title of Article"></meta> "<meta name="dc.Creator" content="D. Author"></meta> "<meta name="dc.Creator" content="S. Author2"></meta> "<meta name="dc.Subject" content="weighted regularity; elliptic problem; oscillatory diffusion; $hp$ finite elements; 65N30; 35B65; 35J57"></meta> "<meta name="dc.Description" content="Long test summary of article, probably taken directly from the adstract..."></meta> "<meta name="dc.Publisher" content="Name of Publisher"></meta> "<meta name="dc.Date" scheme="WTN8601" content="2012-07-05"></meta> "<meta name="dc.Type" content="research-article"></meta> "<meta name="dc.Format" content="text/HTML"></meta> "<meta name="dc.Identifier" scheme="publisher" content="81839"></meta> "<meta name="dc.Identifier" scheme="doi" content="10.1137/10081839X"></meta> "<meta name="dc.Source" content="http://dx.doi.org/10.1137/10081839X"></meta> "<meta name="dc.Language" content="en"></meta> "<meta name="dc.Coverage" content="world"></meta> "<meta name="keywords" content="weighted regularity, elliptic problem, oscillatory diffusion, $hp$ finite elements, 65N30, 35B65, 35J57"></meta> */ // a chunk of html source code from the publisher's site from where the // metadata should be extracted String goodHtmlContent = "<meta name=\"dc.Title\" content=\"Title of Article\"></meta>" + "<meta name=\"dc.Creator\" content=\"D. Author\"></meta>" + "<meta name=\"dc.Creator\" content=\"S. Author2\"></meta>" + "<meta name=\"dc.Subject\" content=\"weighted regularity; elliptic problem; oscillatory diffusion; $hp$ finite elements; 65N30; 35B65; 35J57\"></meta>" + "<meta name=\"dc.Description\" content=\"Long test summary of article, probably taken directly from the adstract...\"></meta>" + "<meta name=\"dc.Publisher\" content=\"Base Atypon\"></meta>" + "<meta name=\"dc.Date\" scheme=\"WTN8601\" content=\"2012-07-05\"></meta>" + "<meta name=\"dc.Type\" content=\"research-article\"></meta>" + "<meta name=\"dc.Format\" content=\"text/HTML\"></meta>" + "<meta name=\"dc.Identifier\" scheme=\"publisher\" content=\"81839\"></meta>" + "<meta name=\"dc.Identifier\" scheme=\"doi\" content=\"10.1137/10081839X\"></meta>" + "<meta name=\"dc.Source\" content=\"http://dx.doi.org/10.1137/10081839X\"></meta>" + "<meta name=\"dc.Language\" content=\"en\"></meta>" + "<meta name=\"dc.Coverage\" content=\"world\"></meta>" + "<meta name=\"keywords\" content=\"weighted regularity, elliptic problem, oscillatory diffusion, $hp$ finite elements, 65N30, 35B65, 35J57\"></meta>"; public void testExtractGoodHtmlContent() throws Exception { List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContent, true); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodTitle, md.get(MetadataField.DC_FIELD_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodFormat, md.get(MetadataField.DC_FIELD_FORMAT)); assertEquals(goodType, md.get(MetadataField.DC_FIELD_TYPE)); assertEquals(Arrays.asList(goodAuthors), md.getList(MetadataField.FIELD_AUTHOR)); assertEquals(goodAuthors[0], md.get(MetadataField.DC_FIELD_CREATOR)); } String goodHtmlContentNoDOIorPublisher = "<meta name=\"dc.Title\" content=\"Title of Article\"></meta>" + "<meta name=\"dc.Creator\" content=\"D. Author\"></meta>" + "<meta name=\"dc.Creator\" content=\"S. Author2\"></meta>" + "<meta name=\"dc.Subject\" content=\"weighted regularity; elliptic problem; oscillatory diffusion; $hp$ finite elements; 65N30; 35B65; 35J57\"></meta>" + "<meta name=\"dc.Description\" content=\"Long test summary of article, probably taken directly from the adstract...\"></meta>" + "<meta name=\"dc.Date\" scheme=\"WTN8601\" content=\"2012-07-05\"></meta>" + "<meta name=\"dc.Type\" content=\"research-article\"></meta>" + "<meta name=\"dc.Format\" content=\"text/HTML\"></meta>" + "<meta name=\"dc.Identifier\" scheme=\"publisher\" content=\"81839\"></meta>" + "<meta name=\"dc.Language\" content=\"en\"></meta>" + "<meta name=\"dc.Coverage\" content=\"world\"></meta>" + "<meta name=\"keywords\" content=\"weighted regularity, elliptic problem, oscillatory diffusion, $hp$ finite elements, 65N30, 35B65, 35J57\"></meta>"; public void testDOIExtraction() throws Exception { List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContentNoDOIorPublisher, true); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); // gets pulled from the URL if not set in the metadata assertEquals("10.1175/2010WCAS1063.1", md.get(MetadataField.FIELD_DOI)); // gets set manually if not in the metadata // first it would try the TDB assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); } private String createGoodRisContent() { StringBuilder sb = new StringBuilder(); sb.append("TY - JOUR"); for (String auth : goodAuthors) { sb.append("\nA1 - "); sb.append(auth); } sb.append("\nDA - "); sb.append(goodDate); sb.append("\nJF - "); sb.append(goodJournal); sb.append("\nSP - "); sb.append(goodStartPage); sb.append("\nEP - "); sb.append(goodEndPage); sb.append("\nVL - "); sb.append(goodVolume); sb.append("\nIS - "); sb.append(goodIssue); sb.append("\nSN - "); sb.append(goodIssn); sb.append("\nT1 - "); sb.append(goodTitle); sb.append("\nPB - "); sb.append(goodPublisher); sb.append("\nDO - "); sb.append(goodDOI); sb.append("\nUR - "); sb.append(doiURL); sb.append("\nER -"); return sb.toString(); } /** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractGoodRisContent() throws Exception { String goodContent = createGoodRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME)); assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE)); assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE)); assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE)); assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN)); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI)); // This shouldn't get set. It will default later to fuill_text_cu assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL)); } /* the extractor checks if data is missing it uses possible alternate RIS tags */ private String createAlternateRisContent() { StringBuilder sb = new StringBuilder(); sb.append("TY - JOUR"); for (String auth : goodAuthors) { sb.append("\nAU - "); sb.append(auth); } sb.append("\nY1 - "); sb.append(goodDate); sb.append("\nT2 - "); sb.append(goodJournal); sb.append("\nT1 - "); sb.append(goodTitle); sb.append("\nPB - "); sb.append(goodPublisher); sb.append("\nER -"); return sb.toString(); } /** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractAlternateRisContent() throws Exception { String goodContent = createAlternateRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); } /* private support methods */ private List<ArticleMetadata> setupContentForAU( ArchivalUnit au, String url, String content, boolean isHtmlExtractor) throws IOException, PluginException { FileMetadataExtractor me; InputStream input = null; CIProperties props = null; if (isHtmlExtractor) { input = IOUtils.toInputStream(content, "utf-8"); props = getContentHtmlProperties(); me = new BaseAtyponHtmlMetadataExtractorFactory() .createFileMetadataExtractor(MetadataTarget.Any(), "text/html"); } else { input = IOUtils.toInputStream(content, "utf-8"); props = getContentRisProperties(); me = new BaseAtyponRisMetadataExtractorFactory() .createFileMetadataExtractor(MetadataTarget.Any(), "text/plain"); } UrlData ud = new UrlData(input, props, url); UrlCacher uc = au.makeUrlCacher(ud); uc.storeContent(); CachedUrl cu = uc.getCachedUrl(); FileMetadataListExtractor mle = new FileMetadataListExtractor(me); return mle.extract(MetadataTarget.Any(), cu); } private CIProperties getContentHtmlProperties() { CIProperties cProps = new CIProperties(); // the CU checks the X-Lockss-content-type, not the content-type to determine encoding cProps.put(CachedUrl.PROPERTY_CONTENT_TYPE, "text/html; charset=UTF-8"); cProps.put("Content-type", "text/html; charset=UTF-8"); return cProps; } private CIProperties getContentRisProperties() { CIProperties cProps = new CIProperties(); // the CU checks the X-Lockss-content-type, not the content-type to determine encoding cProps.put(CachedUrl.PROPERTY_CONTENT_TYPE, "text/plain; charset=UTF-8"); cProps.put("Content-type", "text/plain; charset=UTF-8"); return cProps; } }
public class TestServeContent extends LockssServletTestCase { private static final Logger log = Logger.getLogger(TestServeContent.class); private MyServeContent sc; protected void setUp() throws Exception { super.setUp(); sc = new MyServeContent(); } public void testGetMissingFileAction() throws Exception { assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(true); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(false); ConfigurationUtil.setFromArgs(ServeContent.PARAM_MISSING_FILE_ACTION, "Redirect"); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.Redirect, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(true); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(false); ConfigurationUtil.setFromArgs(ServeContent.PARAM_MISSING_FILE_ACTION, "AlwaysRedirect"); assertEquals(MissingFileAction.AlwaysRedirect, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.AlwaysRedirect, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.AlwaysRedirect, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.AlwaysRedirect, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(true); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.HostAuIndex, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(false); ConfigurationUtil.setFromArgs(ServeContent.PARAM_MISSING_FILE_ACTION, "Error_404"); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.Unknown)); sc.setNeverProxy(true); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.KnownDown)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.RecentlyDown)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.NoContent)); assertEquals(MissingFileAction.Error_404, sc.getMissingFileAction(PubState.Unknown)); } class MyServeContent extends ServeContent { boolean isNeverProxy = false; protected boolean isNeverProxy() { return isNeverProxy; } void setNeverProxy(boolean val) { isNeverProxy = val; } } }
public class TestNatureArticleIteratorFactory extends LockssTestCase { static Logger log = Logger.getLogger("TestNatureArticleIteratorFactory"); private SimulatedArchivalUnit sau; // Simulated AU to generate content private ArchivalUnit nau; // Nature AU private MockLockssDaemon theDaemon; private static final int DEFAULT_FILESIZE = 3000; private static int fileSize = DEFAULT_FILESIZE; private static String PLUGIN_NAME = "org.lockss.plugin.nature.ClockssNaturePublishingGroupPlugin"; private static String BASE_URL = "http://www.nature.com/"; public void setUp() throws Exception { super.setUp(); String tempDirPath = getTempDir().getAbsolutePath() + File.separator; ConfigurationUtil.setFromArgs(LockssRepositoryImpl.PARAM_CACHE_LOCATION, tempDirPath); theDaemon = getMockLockssDaemon(); theDaemon.getAlertManager(); theDaemon.getPluginManager().setLoadablePluginsReady(true); theDaemon.setDaemonInited(true); theDaemon.getPluginManager().startService(); theDaemon.getCrawlManager(); sau = PluginTestUtil.createAndStartSimAu(simAuConfig(tempDirPath)); nau = PluginTestUtil.createAndStartAu(PLUGIN_NAME, natureAuConfig()); } public void tearDown() throws Exception { sau.deleteContentTree(); theDaemon.stopDaemon(); super.tearDown(); } Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("base_url", BASE_URL); conf.put("depth", "1"); conf.put("branch", "4"); conf.put("numFiles", "7"); conf.put( "fileTypes", "" + (SimulatedContentGenerator.FILE_TYPE_HTML | SimulatedContentGenerator.FILE_TYPE_PDF)); conf.put("binFileSize", "" + fileSize); return conf; } Configuration natureAuConfig() { Configuration conf = ConfigManager.newConfiguration(); conf.put("base_url", BASE_URL); conf.put("journal_id", "aps"); conf.put("volume_name", "123"); conf.put("year", "2008"); return conf; } public void testArticleCountAndType() throws Exception { int expCount = 28; PluginTestUtil.crawlSimAu(sau); String pat1 = "branch(\\d+)/(\\d+file\\.html)"; String rep1 = "aps/journal/v123/n$1/full/$2"; PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1); String pat2 = "branch(\\d+)/(\\d+file\\.pdf)"; String rep2 = "aps/journal/v123/n$1/pdf/$2"; PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2); // Remove some URLs int deleted = 0; for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) it.next(); if (cusn instanceof CachedUrl) { CachedUrl cu = (CachedUrl) cusn; String url = cu.getUrl(); if (url.contains("/journal/") && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) { deleteBlock(cu); ++deleted; } } } assertEquals(8, deleted); Iterator<ArticleFiles> it = nau.getArticleIterator(); int count = 0; int countHtmlOnly = 0; int countPdfOnly = 0; while (it.hasNext()) { ArticleFiles af = it.next(); log.info(af.toString()); CachedUrl cu = af.getFullTextCu(); String url = cu.getUrl(); assertNotNull(cu); String contentType = cu.getContentType(); log.debug("count " + count + " url " + url + " " + contentType); count++; if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) { ++countHtmlOnly; } if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) { ++countPdfOnly; } } log.debug("Article count is " + count); assertEquals(expCount, count); assertEquals(4, countHtmlOnly); assertEquals(4, countPdfOnly); } // public void testArticleCountAndDefaultType() throws Exception { // testArticleCountAndType("text/html", true, 24); // } // // public void testArticleCountAndPdf() throws Exception { // testArticleCountAndType("application/pdf", false, 0); // } private void deleteBlock(CachedUrl cu) throws IOException { log.info("deleting " + cu.getUrl()); CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl()); ArchivalUnit au = cu.getArchivalUnit(); CachedUrlSet cus = au.makeCachedUrlSet(cuss); NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au); nm.deleteNode(cus); } }
/** Test class for <code>org.lockss.hasher.HashQueue</code> */ public class TestHashQueue extends LockssTestCase { public static Class testedClasses[] = {org.lockss.hasher.HashQueue.class}; static Logger log = Logger.getLogger("HashQueue"); // static HashServiceTestPlugin.CUS cus = HashServiceTestPlugin.getCUS(); MockArchivalUnit mau = null; MockCachedUrlSet cus; static final String hashAlgorithm = "SHA-1"; static MessageDigest dig; public TestHashQueue(String msg) { super(msg); } public void setUp() throws Exception { super.setUp(); log.setLevel(Logger.LEVEL_DEBUG); if (dig == null) { dig = MessageDigest.getInstance(hashAlgorithm); } mau = new MockArchivalUnit(new MockPlugin()); cus = new MockCachedUrlSet(mau, null); cus.setHashItSource(Collections.EMPTY_LIST); TimeBase.setSimulated(); } public void tearDown() throws Exception { TimeBase.setReal(); super.tearDown(); } HashQueue.Request simpleReq(long deadlineIn, int duration) { return new HashQueue.Request( cus, Deadline.in(deadlineIn), null, null, new GenericContentHasher(cus, dig), duration); } HashQueue.Request req(long deadlineIn, int duration, int bytes, HashService.Callback callback) { HashQueue.Request req = req(null, deadlineIn, duration, bytes, callback); req.cookie = req; return req; } HashQueue.Request req( Object cookie, long deadlineIn, int duration, int bytes, HashService.Callback callback) { MockCachedUrlSetHasher hasher = new MockCachedUrlSetHasher(); hasher.setNumBytes(bytes); cus.setContentHasher(hasher); // cus.setHashDuration(duration, bytes); HashQueue.Request req = new HashQueue.Request(cus, Deadline.in(deadlineIn), callback, cookie, hasher, duration); return req; } public void testReqOrder() { HashQueue q = new HashQueue(); HashQueue.Request req1 = simpleReq(1000, 100); HashQueue.Request req2 = simpleReq(2000, 100); HashQueue.Request req3 = simpleReq(2000, 100); assertTrue(req1.runBefore(req2)); assertFalse(req2.runBefore(req1)); // 2 and 3 expire at the same time, so runBefore should be // false in both directions assertFalse(req2.runBefore(req3)); assertFalse(req3.runBefore(req2)); } public void testOverrunReqOrder() { HashQueue q = new HashQueue(); HashQueue.Request req1 = simpleReq(1000, 100); HashQueue.Request req2 = simpleReq(2000, 100); HashQueue.Request reqO1 = simpleReq(500, 100); reqO1.timeUsed = 600; HashQueue.Request reqO2 = simpleReq(1500, 100); reqO2.timeUsed = 1600; assertTrue(reqO1.runBefore(reqO2)); assertFalse(reqO2.runBefore(reqO1)); assertTrue(req1.runBefore(reqO1)); assertTrue(req2.runBefore(reqO1)); assertTrue(req1.runBefore(reqO2)); assertTrue(req2.runBefore(reqO2)); } // test request acceptance public void testAccept() { HashQueue q = new HashQueue(); HashQueue.Request r1, r2, r3, r4, r5, r6; r1 = simpleReq(-1, 100); r2 = simpleReq(2000, 1000); r3 = simpleReq(3000, 2900); assertEquals(null, q.head()); assertFalse(q.insert(r1)); assertTrue(q.insert(r2)); assertFalse(q.insert(r3)); // change r2 to overrun r2.timeUsed = 1200; // r3 should now be accepted. It would prevent r2 from finishing in // time, but sr2 should be ignored as it has overrun. assertTrue(q.insert(r3)); } // test insert order public void testInsertOrder() throws Exception { HashQueue q = new HashQueue(); HashQueue.Request r1, r2, r3, r4, r5, r6, r7; r1 = simpleReq(2000, 0); r2 = simpleReq(3000, 0); r3 = simpleReq(5000, 0); r4 = simpleReq(2500, 0); r5 = simpleReq(200, 0); r6 = simpleReq(200, 0); // identical to r5, inserted before it // so should go before it in queue // One that has overrun, should end up last r7 = simpleReq(200, 0); r7.timeUsed = 201; Object ord[] = {r6, r5, r1, r4, r2, r3, r7}; assertTrue(q.insert(r1)); assertTrue(q.insert(r2)); assertTrue(q.insert(r3)); assertTrue(q.insert(r6)); assertTrue(q.insert(r4)); assertTrue(q.insert(r5)); assertTrue(q.insert(r7)); assertIsomorphic(ord, (Collection) PrivilegedAccessor.getValue(q, "qlist")); } // test completion & callback public void testDone() throws Exception { HashQueue q = new HashQueue(); final List cookieList = new LinkedList(); final List eList = new LinkedList(); HashService.Callback cb = new HashService.Callback() { public void hashingFinished( CachedUrlSet urlset, long timeUsed, Object cookie, CachedUrlSetHasher hasher, Exception e) { cookieList.add(cookie); eList.add(e); } }; HashQueue.Request r1, r2, r3, r4, r5; r1 = req(2000, 0, 100, cb); r2 = req(10000, 0, 200, cb); r3 = req(20000, 0, 0, cb); r4 = req(50000, 0, 1, cb); assertTrue(q.insert(r1)); assertTrue(q.insert(r2)); assertTrue(q.insert(r4)); assertEquals(0, cookieList.size()); q.removeCompleted(); assertEquals(0, cookieList.size()); // make r1 timeout r1.deadline.expire(); q.removeCompleted(); List exp = ListUtil.list(r1); assertEquals(exp, cookieList); assertEquals(exp, q.getCompletedSnapshot()); // make r2 timeout TimeBase.step(11000); // r3 is finished assertTrue(q.insert(r3)); Exception r4e = new Exception(); // make r4 error r4.e = r4e; q.removeCompleted(); // check that they all finished, and in the right order Object exp2[] = {r1, r2, r3, r4}; assertIsomorphic(exp2, cookieList); assertIsomorphic(exp2, q.getCompletedSnapshot()); // check their exceptions assertTrue(eList.get(0) instanceof HashService.Timeout); assertTrue(eList.get(1) instanceof HashService.Timeout); assertSame(null, eList.get(2)); assertSame(r4e, eList.get(3)); } // test stepper public void testStep() throws Exception { HashQueue q = new HashQueue(); final List cookieList = new LinkedList(); HashService.Callback cb = new HashService.Callback() { public void hashingFinished( CachedUrlSet urlset, long timeUsed, Object cookie, CachedUrlSetHasher hasher, Exception e) { cookieList.add(cookie); } }; HashQueue.Request r1, r2, r3, r4, r5; r1 = req("1", 20000, 10000, 10000, cb); r2 = req("2", 100000, 20000, 20000, cb); r3 = req("3", 200000, 30000, 40000, cb); assertTrue(q.insert(r2)); q.runAndNotify(3, 75, Boolean.TRUE); long n2 = 20000 - 3 * 75; assertEquals(n2, getBytesLeft(r2)); assertTrue(q.insert(r1)); assertTrue(q.insert(r3)); q.runAndNotify(3, 75, Boolean.TRUE); // assertEquals(n2, getBytesLeft(r2)); // assertEquals(n2, getBytesLeft(r1)); } public void testGetAvailableHashTimeBefore() { HashQueue q = new HashQueue(); assertEquals(500, q.getAvailableHashTimeBefore(Deadline.in(500))); HashQueue.Request r1, r2, r3, r4, r5, r6, r7; r1 = simpleReq(200, 100); r2 = simpleReq(2000, 1200); r3 = simpleReq(3000, 500); assertTrue(q.insert(r1)); assertTrue(q.insert(r2)); assertTrue(q.insert(r3)); assertEquals(100, q.getAvailableHashTimeBefore(Deadline.in(100))); assertEquals(400, q.getAvailableHashTimeBefore(Deadline.in(500))); assertEquals(700, q.getAvailableHashTimeBefore(Deadline.in(1000))); assertEquals(700, q.getAvailableHashTimeBefore(Deadline.in(2000))); assertEquals(1200, q.getAvailableHashTimeBefore(Deadline.in(3000))); assertEquals(2200, q.getAvailableHashTimeBefore(Deadline.in(4000))); // this will fully commit first 200 ms r4 = simpleReq(200, 100); assertTrue(q.insert(r4)); assertEquals(0, q.getAvailableHashTimeBefore(Deadline.in(100))); assertEquals(0, q.getAvailableHashTimeBefore(Deadline.in(0))); } private long getBytesLeft(HashQueue.Request req) { MockCachedUrlSetHasher hasher = (MockCachedUrlSetHasher) req.urlsetHasher; return hasher.getBytesLeft(); } }
public class TestHighWireArticleIteratorFactory extends ArticleIteratorTestCase { static Logger log = Logger.getLogger(TestHighWireArticleIteratorFactory.class); private SimulatedArchivalUnit sau; // Simulated AU to generate content private static String PLUGIN_NAME = "org.lockss.plugin.highwire.HighWirePressPlugin"; private static String BASE_URL = "http://pediatrics.aappublications.org/"; private static String SIM_ROOT = BASE_URL + "cgi/reprint/"; public void setUp() throws Exception { super.setUp(); String tempDirPath = setUpDiskSpace(); au = createAu(); sau = PluginTestUtil.createAndStartSimAu(simAuConfig(tempDirPath)); } public void tearDown() throws Exception { sau.deleteContentTree(); // theDaemon.stopDaemon(); super.tearDown(); } Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("base_url", SIM_ROOT); conf.put("depth", "0"); conf.put("branch", "0"); conf.put("numFiles", "2"); conf.put( "fileTypes", "" + (SimulatedContentGenerator.FILE_TYPE_PDF | SimulatedContentGenerator.FILE_TYPE_HTML)); conf.put("binFileSize", "7"); return conf; } protected ArchivalUnit createAu() throws ArchivalUnit.ConfigurationException { return PluginTestUtil.createAndStartAu( PLUGIN_NAME, ConfigurationUtil.fromArgs( "base_url", "http://pediatrics.aappublications.org/", "volume_name", "52", "journal_issn", "1098-4275")); } public void testRoots() throws Exception { SubTreeArticleIterator artIter = createSubTreeIter(); System.out.println("Root Urls::" + getRootUrls(artIter)); assertEquals( ListUtil.list( "http://pediatrics.aappublications.org/cgi/content/full/52/", "http://pediatrics.aappublications.org/cgi/reprint/52/"), getRootUrls(artIter)); } public void testUrlsWithPrefixes() throws Exception { SubTreeArticleIterator artIter = createSubTreeIter(); Pattern pat = getPattern(artIter); assertMatchesRE( pat, "http://pediatrics.aappublications.org/cgi/reprint/foo;52/Supplement_3/S69.pdf"); assertMatchesRE( pat, "http://pediatrics.aappublications.org/cgi/reprint/52/supplement_3/S69.pdf"); assertNotMatchesRE( pat, "http://pediatrics.aappublications.org/cgi/reprin/1014174823t49006/j0143.pdfwrong"); assertNotMatchesRE( pat, "http://pediatrics.aappublications.org/cgi/reprintt/1014174823t49006/j0143.pdfwrong"); assertNotMatchesRE(pat, "http://www.example.com/content/"); assertNotMatchesRE(pat, "http://www.example.com/content/j"); assertNotMatchesRE(pat, "http://www.example.com/content/j0123/j383.pdfwrong"); } public void testCreateArticleFiles() throws Exception { PluginTestUtil.crawlSimAu(sau); String pat0 = "001file[.]html"; String rep0 = "52/1/S1"; PluginTestUtil.copyAu(sau, au, ".*[.]html$", pat0, rep0); String pat1 = "001file[.]pdf"; String rep1 = "52/1/S1.pdf"; PluginTestUtil.copyAu(sau, au, ".*[.]pdf$", pat1, rep1); String pdfurl = "http://pediatrics.aappublications.org/cgi/reprint/52/1/S1.pdf"; String url = "http://pediatrics.aappublications.org/cgi/reprint/52/1/S1"; au.makeCachedUrl(url); CachedUrl cu = au.makeCachedUrl(pdfurl); assertNotNull(cu); SubTreeArticleIterator artIter = createSubTreeIter(); assertNotNull(artIter); ArticleFiles af = artIter.next(); assertNotNull(af); System.out.println("article files::" + af); assertEquals(url, af.getRoleCu(ArticleFiles.ROLE_FULL_TEXT_PDF_LANDING_PAGE).getUrl()); assertEquals(pdfurl, af.getRoleCu(ArticleFiles.ROLE_FULL_TEXT_PDF).getUrl()); } }
/** Test class for <code>org.lockss.scheduler.TaskRunner</code> */ public class TestTaskRunner extends LockssTestCase { public static Class testedClasses[] = {org.lockss.scheduler.TaskRunner.class}; private static final Logger log = Logger.getLogger(TestTaskRunner.class); private MyMockTaskRunner tr; private SchedFact fact; private List removedChunks; private List removedTasks; public void setUp() throws Exception { super.setUp(); TimeBase.setSimulated(); ConfigurationUtil.setFromArgs(SortScheduler.PARAM_OVERHEAD_LOAD, "0"); removedChunks = new ArrayList(); removedTasks = new ArrayList(); fact = new SchedFact(null); tr = new MyMockTaskRunner(fact); tr.initService(getMockLockssDaemon()); tr.startService(); } public void tearDown() throws Exception { TimeBase.setReal(); tr.stopService(); super.tearDown(); } StepTask task(long start, long end, long duration) { return new StepperTask( Deadline.at(start), Deadline.at(end), duration, null, null, new MyMockStepper()); } StepTask task(long start, long end, long duration, TaskCallback cb) { return new StepperTask( Deadline.at(start), Deadline.at(end), duration, cb, null, new MyMockStepper()); } StepTask task(long start, long end, long duration, TaskCallback cb, Stepper stepper) { return new StepperTask(Deadline.at(start), Deadline.at(end), duration, cb, null, stepper); } BackgroundTask btask(long start, long end, double loadFactor, TaskCallback cb) { return new BackgroundTask(Deadline.at(start), Deadline.at(end), loadFactor, cb); } // make a Chunk for the task Schedule.Chunk chunk(StepTask task) { return new Schedule.Chunk(task, task.getEarliestStart(), task.getLatestFinish(), task.curEst()); } // make a BackgroundEvent for the task Schedule.BackgroundEvent bEvent(BackgroundTask task, Schedule.EventType event) { return new Schedule.BackgroundEvent( task, (event == Schedule.EventType.START ? task.getStart() : task.getFinish()), event); } // make a Schedule with one chunk per task Schedule sched(List tasks) { List events = new ArrayList(); for (Iterator iter = tasks.iterator(); iter.hasNext(); ) { Object obj = iter.next(); if (obj instanceof Schedule.Event) { events.add(obj); } else { SchedulableTask task = (SchedulableTask) obj; if (task.isBackgroundTask()) { events.add(bEvent((BackgroundTask) task, Schedule.EventType.START)); } else { events.add(chunk((StepTask) task)); } } } Schedule s = new Schedule(events); return s; } void assertForegroundStat(int expected, int stat_ix) { assertEquals(expected, tr.getForegroundStat(stat_ix)); } void assertBackgroundStat(int expected, int stat_ix) { assertEquals(expected, tr.getBackgroundStat(stat_ix)); } // ensure addToSchedule returns false if (Mock)Scheduler returns false public void testAddToScheduleFail() { fact.setResult(null); StepTask t1 = task(100, 200, 50); assertFalse(tr.addToSchedule(t1)); assertEmpty(tr.getAcceptedTasks()); assertForegroundStat(0, TaskRunner.STAT_ACCEPTED); assertForegroundStat(1, TaskRunner.STAT_REFUSED); } // ensure addToSchedule updates structures if (Mock)Scheduler returns true public void testAddToScheduleOk() { StepTask t1 = task(100, 200, 50); StepTask t2 = task(100, 200, 100); Schedule sched = sched(ListUtil.list(t1, t2)); fact.setResult(sched); assertTrue(tr.addToSchedule(t1)); assertIsomorphic(ListUtil.list(t1), fact.scheduler.tasks); assertForegroundStat(1, TaskRunner.STAT_ACCEPTED); assertForegroundStat(0, TaskRunner.STAT_REFUSED); fact.setResult(sched); assertTrue(tr.addToSchedule(t2)); assertEquals(SetUtil.set(t1, t2), SetUtil.theSet(fact.scheduler.tasks)); assertEquals(sched, tr.getCurrentSchedule()); assertEquals(SetUtil.set(t1, t2), SetUtil.theSet(tr.getAcceptedTasks())); assertForegroundStat(2, TaskRunner.STAT_ACCEPTED); assertForegroundStat(2, TaskRunner.STAT_WAITING); assertForegroundStat(0, TaskRunner.STAT_REFUSED); } // Now with task dropping on // only one try, because no tasks to drop public void testAddToScheduleFailNothingToDrop() { ConfigurationUtil.addFromArgs(TaskRunner.PARAM_DROP_TASK_MAX, "2"); fact.setResult(null); StepTask t1 = task(100, 200, 50); assertFalse(tr.addToSchedule(t1)); assertEmpty(tr.getAcceptedTasks()); assertEquals(1, fact.createArgs.size()); assertForegroundStat(1, TaskRunner.STAT_REFUSED); assertForegroundStat(0, TaskRunner.STAT_WAITING); assertForegroundStat(0, TaskRunner.STAT_DROPPED); } // one task to drop, two failed schedule tries public void testAddToScheduleFailNoCleanup() { ConfigurationUtil.addFromArgs( TaskRunner.PARAM_DROP_TASK_MAX, "10", TaskRunner.PARAM_MIN_CLEANUP_INTERVAL, "0"); StepTask t1 = task(100, 200, 50); StepTask t2 = task(100, 200, 100); Schedule sched = sched(ListUtil.list(t1)); fact.setResult(sched); assertTrue(tr.addToSchedule(t1)); assertIsomorphic(ListUtil.list(t1), fact.scheduler.tasks); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getAcceptedTasks())); assertForegroundStat(1, TaskRunner.STAT_ACCEPTED); assertForegroundStat(0, TaskRunner.STAT_REFUSED); assertForegroundStat(1, TaskRunner.STAT_WAITING); assertForegroundStat(0, TaskRunner.STAT_DROPPED); assertFalse(tr.addToSchedule(t2)); assertEquals( ListUtil.list(ListUtil.list(t1), ListUtil.list(t1, t2), ListUtil.list(t1)), fact.createArgs); assertEquals(SetUtil.set(t1), SetUtil.theSet(fact.scheduler.tasks)); assertEquals(sched, tr.getCurrentSchedule()); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getAcceptedTasks())); assertForegroundStat(1, TaskRunner.STAT_ACCEPTED); assertForegroundStat(1, TaskRunner.STAT_REFUSED); assertForegroundStat(1, TaskRunner.STAT_WAITING); assertForegroundStat(0, TaskRunner.STAT_DROPPED); } // one task not ready to start yet, so not droppable public void testAddToScheduleFailNoDroppable() { log.debug("testAddToScheduleOkAfterDrops()"); ConfigurationUtil.addFromArgs( TaskRunner.PARAM_DROP_TASK_MAX, "10", TaskRunner.PARAM_MIN_CLEANUP_INTERVAL, "0"); StepTask t1 = task(100, 200, 50); StepTask t2 = task(100, 200, 100); Schedule sched1 = sched(ListUtil.list(t1)); Schedule sched2 = sched(ListUtil.list(t2)); fact.setResults(ListUtil.list(sched1, null, null, sched2, sched2)); assertTrue(tr.addToSchedule(t1)); assertIsomorphic(ListUtil.list(t1), fact.scheduler.tasks); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getAcceptedTasks())); assertFalse(tr.addToSchedule(t2)); assertEquals( ListUtil.list(ListUtil.list(t1), ListUtil.list(t1, t2), ListUtil.list(t1)), fact.createArgs); assertEquals(SetUtil.set(t1), SetUtil.theSet(fact.scheduler.tasks)); assertFalse(t1.isDropped()); assertEquals(sched1, tr.getCurrentSchedule()); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getAcceptedTasks())); assertEmpty(SetUtil.theSet(tr.getOverrunTasks())); } // one task to drop, succeeds after dropping it public void testAddToScheduleOkAfterDrops() { log.debug("testAddToScheduleOkAfterDrops()"); ConfigurationUtil.addFromArgs( TaskRunner.PARAM_DROP_TASK_MAX, "10", TaskRunner.PARAM_MIN_CLEANUP_INTERVAL, "0"); StepTask t1 = task(100, 200, 50); StepTask t2 = task(100, 200, 100); Schedule sched1 = sched(ListUtil.list(t1)); Schedule sched2 = sched(ListUtil.list(t2)); fact.setResults(ListUtil.list(sched1, null, null, sched2, sched2)); TimeBase.step(101); assertTrue(tr.addToSchedule(t1)); assertIsomorphic(ListUtil.list(t1), fact.scheduler.tasks); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getAcceptedTasks())); assertForegroundStat(1, TaskRunner.STAT_ACCEPTED); assertForegroundStat(0, TaskRunner.STAT_REFUSED); assertForegroundStat(1, TaskRunner.STAT_WAITING); assertForegroundStat(0, TaskRunner.STAT_DROPPED); assertTrue(tr.addToSchedule(t2)); assertEquals( ListUtil.list( ListUtil.list(t1), ListUtil.list(t1, t2), ListUtil.list(t1), Collections.EMPTY_SET, ListUtil.list(t1, t2)), fact.createArgs); assertEquals(SetUtil.set(t1, t2), SetUtil.theSet(fact.scheduler.tasks)); assertTrue(t1.isDropped()); assertEquals(sched2, tr.getCurrentSchedule()); assertEquals(SetUtil.set(t1, t2), SetUtil.theSet(tr.getAcceptedTasks())); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getOverrunTasks())); assertForegroundStat(2, TaskRunner.STAT_ACCEPTED); assertForegroundStat(0, TaskRunner.STAT_REFUSED); assertForegroundStat(1, TaskRunner.STAT_WAITING); assertForegroundStat(1, TaskRunner.STAT_DROPPED); t2.cancel(); assertForegroundStat(2, TaskRunner.STAT_ACCEPTED); assertForegroundStat(0, TaskRunner.STAT_REFUSED); assertForegroundStat(0, TaskRunner.STAT_WAITING); assertForegroundStat(1, TaskRunner.STAT_DROPPED); assertForegroundStat(1, TaskRunner.STAT_CANCELLED); } public void testIsTaskSchedulable() { fact.setResult(null); StepTask t1 = task(100, 200, 50); assertFalse(tr.isTaskSchedulable(t1)); fact.setResult(sched(ListUtil.list(t1))); assertTrue(tr.isTaskSchedulable(t1)); } public void testFindChunkTaskToRun() { assertFalse(tr.findTaskToRun()); StepTask t1 = task(100, 200, 100); StepTask t2 = task(100, 300, 50); Schedule s = sched(ListUtil.list(t1, t2)); fact.setResults(s, s); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); assertFalse(tr.findTaskToRun()); assertEquals(Deadline.at(100), tr.runningDeadline); TimeBase.setSimulated(101); assertTrue(tr.findTaskToRun()); assertEquals(t1, tr.runningTask); assertEquals(t1.getLatestFinish(), tr.runningDeadline); assertEquals(s.getEvents().get(0), tr.runningChunk); } public void testFindRunnableChunk() { assertFalse(tr.findTaskToRun()); StepTask t1 = task(100, 200, 100); StepTask t2 = task(10, 300, 50); Schedule.Chunk c1 = new Schedule.Chunk(t1, Deadline.at(100), Deadline.at(200), 100); Schedule.Chunk c2 = new Schedule.Chunk(t2, Deadline.at(200), Deadline.at(300), 100); Schedule s = new Schedule(ListUtil.list(c1, c2)); fact.setResults(s, s); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); assertFalse(tr.findTaskToRun()); assertEquals(Deadline.at(100), tr.runningDeadline); TimeBase.setSimulated(11); assertTrue(tr.findTaskToRun()); assertEquals(t2, tr.runningTask); assertEquals(c2, tr.runningChunk); assertEquals(Deadline.at(100), tr.runningDeadline); assertEquals(s.getEvents().get(1), tr.runningChunk); } public void testFindOverrunTaskToRun() { assertFalse(tr.findTaskToRun()); StepTask t1 = task(100, 200, 100); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); assertFalse(tr.findTaskToRun()); assertEquals(Deadline.at(100), tr.runningDeadline); StepTask t2 = task(0, 300, 50); tr.addOverrunner(t2); assertTrue(tr.findTaskToRun()); assertEquals(t2, tr.runningTask); assertEquals(Deadline.at(100), tr.runningDeadline); assertNull(tr.runningChunk); } public void testFindTaskToRunRemovesExpiredChunks() { assertFalse(tr.findTaskToRun()); StepTask t1 = task(100, 200, 100); StepTask t2 = task(100, 300, 50); StepTask texp1 = task(0, 0, 50); StepTask texp2 = task(0, 0, 50); Schedule s = sched(ListUtil.list(texp1, texp2, t1, t2)); fact.setResults(s, s); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); assertFalse(tr.findTaskToRun()); assertEquals(2, removedChunks.size()); assertEquals( SetUtil.set(texp1, texp2), SetUtil.set( ((Schedule.Chunk) removedChunks.get(0)).getTask(), ((Schedule.Chunk) removedChunks.get(1)).getTask())); } public void testFindTaskToRunRemovesExpiredOverrunners() { assertFalse(tr.findTaskToRun()); StepTask t1 = task(100, 200, 100); StepTask t2 = task(100, 300, 50); StepTask texp1 = task(0, 0, 50); StepTask texp2 = task(0, 0, 49); Schedule s = sched(ListUtil.list(t1, t2)); fact.setResults(s, s); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); tr.addOverrunner(texp1); tr.addOverrunner(texp2); // if this fails, it might be because the sorted list/set is treating // sort-order equivalence as object equality, which we don't want assertEquals(2, tr.getOverrunTasks().size()); assertFalse(tr.findTaskToRun()); assertEquals(0, removedChunks.size()); assertEquals(2, removedTasks.size()); assertEquals( SetUtil.set(texp1, texp2), SetUtil.set((StepTask) removedTasks.get(0), (StepTask) removedTasks.get(1))); } public void testRemoveChunk() { StepTask t1 = task(100, 200, 100); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); assertIsomorphic(ListUtil.list(t1), tr.getAcceptedTasks()); Schedule.Chunk chunk = (Schedule.Chunk) s.getEvents().get(0); assertTrue(tr.getCurrentSchedule().getEvents().contains(chunk)); tr.removeChunk(chunk); assertFalse(tr.getCurrentSchedule().getEvents().contains(chunk)); } // This should generate an impossible state log, and leave the task in // acceptedTasks public void testRemoveChunkTaskEnd() { final List finished = new ArrayList(); StepTask t1 = task( 100, 200, 100, new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { if (log.isDebug2()) { log.debug2("testRemoveChunkTaskEnd event " + event); } if (event == Schedule.EventType.FINISH) { finished.add(task); } } }); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); Schedule.Chunk chunk = (Schedule.Chunk) s.getEvents().get(0); assertTrue(tr.getCurrentSchedule().getEvents().contains(chunk)); chunk.setTaskEnd(); t1.setFinished(); // avoids impossible task state warning in removeTask() tr.removeChunk(chunk); assertFalse(tr.getCurrentSchedule().getEvents().contains(chunk)); assertEmpty(tr.getAcceptedTasks()); assertIsomorphic(ListUtil.list(t1), finished); } // remove task-ending chunk, past task deadline, s.b. Timeout error. public void testRemoveChunkTaskEndTimeout() { final List finished = new ArrayList(); StepTask t1 = task( 100, 200, 100, new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { if (log.isDebug2()) { log.debug2("testRemoveChunkTaskEndTimeout callback"); } if (event == Schedule.EventType.FINISH) { finished.add(task); } } }); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); Schedule.Chunk chunk = (Schedule.Chunk) s.getEvents().get(0); assertTrue(tr.getCurrentSchedule().getEvents().contains(chunk)); chunk.setTaskEnd(); TimeBase.setSimulated(201); tr.removeChunk(chunk); assertFalse(tr.getCurrentSchedule().getEvents().contains(chunk)); assertSame(t1, finished.get(0)); assertNotNull(t1.e); assertTrue(t1.e.toString(), t1.e instanceof SchedService.Timeout); assertEmpty(tr.getAcceptedTasks()); } // remove overrunnable task-ending chunk, before deadline, public void testRemoveChunkTaskEndOver() { final List finished = new ArrayList(); StepTask t1 = task( 100, 200, 100, new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { if (log.isDebug2()) { log.debug2("testRemoveChunkTaskEndOver callback"); } if (event == Schedule.EventType.FINISH) { finished.add(task); } } }); t1.setOverrunAllowed(true); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); Schedule.Chunk chunk = (Schedule.Chunk) s.getEvents().get(0); assertTrue(tr.getCurrentSchedule().getEvents().contains(chunk)); chunk.setTaskEnd(); tr.removeChunk(chunk); assertFalse(tr.getCurrentSchedule().getEvents().contains(chunk)); assertEmpty(finished); assertIsomorphic(ListUtil.list(t1), tr.getAcceptedTasks()); assertIsomorphic(SetUtil.set(t1), tr.getOverrunTasks()); } // Background event record class BERec { Deadline when; BackgroundTask task; Schedule.EventType event; BERec(Deadline when, BackgroundTask task, Schedule.EventType event) { this.when = when; this.task = task; this.event = event; } BERec(long when, BackgroundTask task, Schedule.EventType event) { this.when = Deadline.at(when); this.task = task; this.event = event; } public boolean equals(Object obj) { if (obj instanceof BERec) { BERec o = (BERec) obj; return when.equals(o.when) && task.equals(o.task) && event == o.event; } return false; } public String toString() { return "[BERec: " + event + ", " + when + ", " + task + "]"; } } public void testBackground() { final List rec = new ArrayList(); TaskCallback cb = new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { rec.add(new BERec(Deadline.in(0), (BackgroundTask) task, event)); } }; assertFalse(tr.findTaskToRun()); BackgroundTask t1 = btask(100, 200, .1, cb); BackgroundTask t2 = btask(100, 300, .2, cb); BackgroundTask t3 = btask(150, 200, .4, cb); Schedule s = sched( ListUtil.list( bEvent(t1, Schedule.EventType.START), bEvent(t2, Schedule.EventType.START), bEvent(t3, Schedule.EventType.START), bEvent(t1, Schedule.EventType.FINISH), bEvent(t3, Schedule.EventType.FINISH), bEvent(t2, Schedule.EventType.FINISH))); fact.setResults(ListUtil.list(s, s, s)); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); assertTrue(tr.addToSchedule(t3)); assertEquals(3, tr.getAcceptedTasks().size()); assertIsomorphic(ListUtil.list(t1, t2, t3), tr.getAcceptedTasks()); assertFalse(tr.findTaskToRun()); assertEquals(0, rec.size()); assertEquals(0, tr.getBackgroundLoadFactor(), .005); assertEquals(Deadline.at(100), tr.runningDeadline); TimeBase.setSimulated(101); assertFalse(tr.findTaskToRun()); assertEquals(2, rec.size()); assertEquals(.3, tr.getBackgroundLoadFactor(), .005); TimeBase.setSimulated(151); assertFalse(tr.findTaskToRun()); assertEquals(3, rec.size()); assertEquals(.7, tr.getBackgroundLoadFactor(), .005); assertEquals(3, tr.getAcceptedTasks().size()); TimeBase.setSimulated(201); assertFalse(tr.findTaskToRun()); assertEquals(5, rec.size()); assertEquals(.2, tr.getBackgroundLoadFactor(), .005); assertEquals(1, tr.getAcceptedTasks().size()); t2.taskIsFinished(); TimeBase.setSimulated(202); assertFalse(tr.findTaskToRun()); assertEquals(6, rec.size()); assertEquals(0, tr.getBackgroundLoadFactor(), .005); assertEquals(0, tr.getAcceptedTasks().size()); TimeBase.setSimulated(301); assertFalse(tr.findTaskToRun()); assertEquals(6, rec.size()); assertEquals(0, tr.getBackgroundLoadFactor(), .005); List exp = ListUtil.list( new BERec(101, t1, Schedule.EventType.START), new BERec(101, t2, Schedule.EventType.START), new BERec(151, t3, Schedule.EventType.START), new BERec(201, t1, Schedule.EventType.FINISH), new BERec(201, t3, Schedule.EventType.FINISH), new BERec(201, t2, Schedule.EventType.FINISH)); assertEquals(exp, rec); } public void testRunStepsOneTaskAndCallback() { final List finished = new ArrayList(); TaskCallback cb = new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { if (event == Schedule.EventType.FINISH) { finished.add(task); } } }; StepTask t1 = task(100, 200, 100, cb, new MyMockStepper(10, -10)); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); TimeBase.setSimulated(101); assertTrue(tr.findTaskToRun()); Interrupter intr = null; try { intr = interruptMeIn(TIMEOUT_SHOULDNT, true); tr.runSteps(new MutableBoolean(true), null); intr.cancel(); } catch (Exception e) { log.error("runSteps threw:", e); } finally { if (intr.did()) { fail("runSteps looped"); } } assertSame(t1, finished.get(0)); assertNull(t1.e); } public void testRunStepsWithOverrunDisallowed() { StepTask t1 = task(100, 300, 100, null, new MyMockStepper(15, -10)); // t1.setOverrunAllowed(true); StepTask t2 = task(150, 250, 100, null, new MyMockStepper(10, -10)); Schedule s = sched(ListUtil.list(t1, t2)); fact.setResults(s, s); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); TimeBase.setSimulated(101); Interrupter intr = null; try { intr = interruptMeIn(TIMEOUT_SHOULDNT, true); while (tr.findTaskToRun()) { tr.runSteps(new MutableBoolean(true), null); } intr.cancel(); } catch (Exception e) { log.error("runSteps threw:", e); } finally { if (intr.did()) { fail("runSteps looped"); } } assertEquals(SetUtil.set(t1, t2), SetUtil.theSet(removedTasks)); assertTrue(t1.e.toString(), t1.e instanceof SchedService.Overrun); } private void newTr(MyMockTaskRunner newTr) { if (tr != null) { tr.stopService(); } tr = newTr; tr.initService(getMockLockssDaemon()); tr.startService(); } public void testRunStepsWithOverrunAllowed() { StepTask t1 = task(100, 500, 30, null, new MyMockStepper(15, -10)); t1.setOverrunAllowed(true); StepTask t2 = task(150, 250, 100, null, new MyMockStepper(10, -10)); newTr( new MyMockTaskRunner( new TaskRunner.SchedulerFactory() { public Scheduler createScheduler() { return new SortScheduler(); } })); assertTrue(tr.addToSchedule(t1)); assertTrue(tr.addToSchedule(t2)); TimeBase.setSimulated(101); assertTrue(tr.findTaskToRun()); Interrupter intr = null; try { intr = interruptMeIn(TIMEOUT_SHOULDNT, true); while (tr.findTaskToRun()) { tr.runSteps(new MutableBoolean(true), null); } intr.cancel(); } catch (Exception e) { log.error("runSteps threw:", e); } finally { if (intr.did()) { fail("runSteps looped"); } } assertNull(t1.e); assertTrue(t1.hasOverrun()); } // test resched with overrun task doesn't lose task. public void testRunStepsWithOverrunAllowedPlusResched() { StepTask t1 = task(100, 500, 30, null, new MyMockStepper(15, -10)); t1.setOverrunAllowed(true); StepTask t2 = task(150, 250, 100, null, new MyMockStepper(10, -10)); newTr( new MyMockTaskRunner( new TaskRunner.SchedulerFactory() { public Scheduler createScheduler() { return new SortScheduler(); } })); assertTrue(tr.addToSchedule(t1)); assertEmpty(tr.getOverrunTasks()); TimeBase.setSimulated(101); assertTrue(tr.findTaskToRun()); t1.timeUsed = 1000; assertTrue(t1.hasOverrun()); assertEmpty(tr.getOverrunTasks()); assertTrue(tr.addToSchedule(t2)); assertEquals(SetUtil.set(t1), SetUtil.theSet(tr.getOverrunTasks())); } public void testStepperThrows() { final List finished = new ArrayList(); TaskCallback cb = new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { if (event == Schedule.EventType.FINISH) { finished.add(task); } } }; MyMockStepper stepper = new MyMockStepper(10, -10); stepper.setWhenToThrow(5); StepTask t1 = task(100, 200, 100, cb, stepper); Schedule s = sched(ListUtil.list(t1)); fact.setResult(s); assertTrue(tr.addToSchedule(t1)); TimeBase.setSimulated(101); assertTrue(tr.findTaskToRun()); Interrupter intr = null; try { intr = interruptMeIn(TIMEOUT_SHOULDNT, true); tr.runSteps(new MutableBoolean(true), null); intr.cancel(); } catch (Exception e) { log.error("runSteps threw:", e); } finally { if (intr.did()) { fail("runSteps looped"); } } assertSame(t1, finished.get(0)); assertTrue(t1.e instanceof ExpectedRuntimeException); assertEquals(5, stepper.nSteps); } public void testNotifyThread() { final List rec = new ArrayList(); final SimpleBinarySemaphore sem = new SimpleBinarySemaphore(); tr.setImmediateNotify(false); TaskCallback cb = new TaskCallback() { public void taskEvent(SchedulableTask task, Schedule.EventType event) { rec.add(new BERec(Deadline.in(0), (BackgroundTask) task, event)); sem.give(); } }; BackgroundTask t1 = btask(100, 200, .1, cb); BackgroundTask t2 = btask(100, 300, .2, cb); tr.notify(t1, Schedule.EventType.START); tr.notify(t1, Schedule.EventType.FINISH); // 2nd finish event should not cause another callback tr.notify(t1, Schedule.EventType.FINISH); tr.notify(t2, Schedule.EventType.START); Interrupter intr = null; try { intr = interruptMeIn(TIMEOUT_SHOULDNT, true); while (rec.size() < 3) { sem.take(); } assertEquals( ListUtil.list( new BERec(0, t1, Schedule.EventType.START), new BERec(0, t1, Schedule.EventType.FINISH), new BERec(0, t2, Schedule.EventType.START)), rec); intr.cancel(); } finally { if (intr.did()) { fail("Notifier didn't run callbacks"); } } } class MyMockTaskRunner extends TaskRunner { private boolean doImmediateNotify = true; MyMockTaskRunner(TaskRunner.SchedulerFactory fact) { super(fact); } void removeChunk(Schedule.Chunk chunk) { removedChunks.add(chunk); super.removeChunk(chunk); } void removeTask(StepTask task) { removedTasks.add(task); super.removeTask(task); } // Most of the tests in this file were written when task event // notification was done synchronously in TaskRunner. Reproduce that // behavior here for simplicity. void notify(SchedulableTask task, Schedule.EventType eventType) { if (doImmediateNotify) { task.callback.taskEvent(task, eventType); } else { super.notify(task, eventType); } } void setImmediateNotify(boolean immediate) { doImmediateNotify = immediate; } } class SchedFact implements TaskRunner.SchedulerFactory { List results; MyMockScheduler scheduler; List createArgs = new ArrayList(); public SchedFact(Schedule resultSchedule) { this.results = ListUtil.list(resultSchedule); } public SchedFact() { this(null); } public void setResult(Schedule resultSchedule) { this.results = ListUtil.list(resultSchedule); } public void setResults(List results) { this.results = results; } public void setResults(Schedule s1, Schedule s2) { setResults(ListUtil.list(s1, s2)); } public Scheduler createScheduler() { scheduler = new MyMockScheduler(results); return scheduler; } class MyMockScheduler implements Scheduler { List results; Collection tasks; Schedule lastSched = null; MyMockScheduler(List results) { this.results = results; } public boolean createSchedule(Collection tasks) { log.debug("createSchedule(" + tasks + ")"); this.tasks = tasks; createArgs.add(tasks); if (results == null || results.isEmpty()) { lastSched = null; } else { lastSched = (Schedule) results.remove(0); } return lastSched != null; } public Schedule getSchedule() { log.info("getSchedule(): " + lastSched); return lastSched; } public Collection getTasks() { return tasks; } } } class MyMockStepper implements Stepper { int nSteps = 1; // not finished by default int eachStepTime = 0; int whenToThrow = -1; MyMockStepper() {} /** * Make a stepper that repeats n times: wait until time elapsed, or advance simulated time, * * @param nSteps number of steps to execute before isFinished returns true. * @param eachStepTime if >0, ms to sleep on each step. if <0, step * @returns some measure of amount of work done. simulated time by abs(eachStepTime) on each * step. */ MyMockStepper(int nSteps, int eachStepTime) { this.nSteps = nSteps; this.eachStepTime = eachStepTime; } public int computeStep(int metric) { int work = 0; if (nSteps == whenToThrow) { throw new ExpectedRuntimeException("Hash step throw test"); } if (nSteps-- > 0) { if (eachStepTime > 0) { Deadline time = Deadline.in(eachStepTime); while (!time.expired()) { try { Thread.sleep(1); } catch (InterruptedException e) { throw new RuntimeException(e.toString()); } work++; } } else { work = -eachStepTime; TimeBase.step(work); try { Thread.sleep(1); } catch (InterruptedException e) { throw new RuntimeException(e.toString()); } } } return work; } public boolean isFinished() { return nSteps <= 0; } void setFinished() { nSteps = 0; } void setWhenToThrow(int step) { whenToThrow = step; } } }
/** Functional tests on the simulated content generator. */ public class FuncSimulatedContent extends LockssTestCase { static final Logger log = Logger.getLogger("FuncSimulatedContent"); private PluginManager pluginMgr; private Plugin simPlugin; private SimulatedArchivalUnit sau1; private SimulatedContentGenerator scgen = null; private MockLockssDaemon theDaemon; String tempDirPath; String tempDirPath2; private static String DAMAGED_CACHED_URL = "/branch2/branch2/002file.txt"; public FuncSimulatedContent(String msg) { super(msg); } public void setUp() throws Exception { super.setUp(); tempDirPath = getTempDir().getAbsolutePath() + File.separator; theDaemon = getMockLockssDaemon(); theDaemon.getAlertManager(); theDaemon.getPluginManager().setLoadablePluginsReady(true); theDaemon.getHashService(); MockSystemMetrics metrics = new MyMockSystemMetrics(); metrics.initService(theDaemon); theDaemon.setSystemMetrics(metrics); theDaemon.setDaemonInited(true); Properties props = new Properties(); props.setProperty(SystemMetrics.PARAM_HASH_TEST_DURATION, "1000"); props.setProperty(SystemMetrics.PARAM_HASH_TEST_BYTE_STEP, "1024"); props.setProperty(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, tempDirPath); ConfigurationUtil.setCurrentConfigFromProps(props); pluginMgr = theDaemon.getPluginManager(); pluginMgr.startService(); theDaemon.getHashService().startService(); metrics.startService(); metrics.setHashSpeed(100); simPlugin = PluginTestUtil.findPlugin(SimulatedPlugin.class); } public void tearDown() throws Exception { theDaemon.getLockssRepository(sau1).stopService(); theDaemon.getNodeManager(sau1).stopService(); theDaemon.getPluginManager().stopService(); theDaemon.getHashService().stopService(); theDaemon.getSystemMetrics().stopService(); theDaemon.stopDaemon(); super.tearDown(); } SimulatedArchivalUnit setupSimAu(Configuration auConfig) throws ArchivalUnit.ConfigurationException { ArchivalUnit au = PluginTestUtil.createAndStartAu(simPlugin, auConfig); return (SimulatedArchivalUnit) au; } Configuration simAuConfig(String rootPath) { Configuration conf = ConfigManager.newConfiguration(); conf.put("root", rootPath); conf.put("depth", "2"); conf.put("branch", "2"); conf.put("numFiles", "2"); conf.put("badCachedFileLoc", "2,2"); conf.put("badCachedFileNum", "2"); return conf; } void enableFilter(SimulatedArchivalUnit sau, boolean enable) throws ArchivalUnit.ConfigurationException { Configuration auConfig = sau.getConfiguration().copy(); // no bad file when playing with filtering auConfig.remove("badCachedFileLoc"); auConfig.remove("badCachedFileNum"); if (enable) { auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true"); } else { auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC); } sau.setConfiguration(auConfig); } public void testSimulatedContent() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); checkContent(sau1); doDamageRemoveTest(sau1); // must be before content read again checkFilter(sau1); hashContent(sau1); // this resets AU's config, do last to avoid messing up toBeDamaged set } public void testDualContentHash() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet set = sau1.getAuCachedUrlSet(); byte[] nameH = getHash(set, true); byte[] contentH = getHash(set, false); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; SimulatedArchivalUnit sau2 = setupSimAu(simAuConfig(tempDirPath2)); createContent(sau2); crawlContent(sau2); set = sau2.getAuCachedUrlSet(); byte[] nameH2 = getHash(set, true); byte[] contentH2 = getHash(set, false); assertEquals(nameH, nameH2); assertEquals(contentH, contentH2); } public void testBaseUrl() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat = Pattern.compile("http://([^/]+)(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals("www.example.com", m1.group(1)); assertEquals("anotherhost.org", m2.group(1)); assertEquals(m1.group(2), m2.group(2)); } } public void testBaseUrlPath() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/some/path/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$"); Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat1.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat2.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals(m1.group(1), m2.group(1)); } } List<String> auUrls(ArchivalUnit au) { List<String> res = new ArrayList<String>(); for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next(); if (cusn.hasContent()) { res.add(cusn.getUrl()); } } return res; } protected void createContent(SimulatedArchivalUnit sau) { log.debug("createContent()"); scgen = sau.getContentGenerator(); scgen.setFileTypes( SimulatedContentGenerator.FILE_TYPE_HTML + SimulatedContentGenerator.FILE_TYPE_TXT); scgen.setAbnormalFile("1,1", 1); scgen.setOddBranchesHaveContent(true); sau.deleteContentTree(); sau.generateContentTree(); assertTrue(scgen.isContentTree()); } protected void crawlContent(SimulatedArchivalUnit sau) { log.debug("crawlContent()"); CrawlSpec spec = new SpiderCrawlSpec(sau.getNewContentCrawlUrls(), null); Crawler crawler = new NoCrawlEndActionsNewContentCrawler(sau, spec, new MockAuState()); crawler.doCrawl(); } protected void checkContent(SimulatedArchivalUnit sau) throws IOException { log.debug("checkContent()"); checkRoot(sau); checkLeaf(sau); checkStoredContent(sau); checkDepth(sau); } protected void checkFilter(SimulatedArchivalUnit sau) throws Exception { log.debug("checkFilter()"); CachedUrl cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html"); enableFilter(sau, true); InputStream is = cu.openForHashing(); String expected = "001file.html This is file 1, depth 0, branch 0. foobar "; assertEquals(expected, StringUtil.fromInputStream(is)); is.close(); enableFilter(sau, false); cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html"); is = cu.openForHashing(); expected = "<HTML><HEAD><TITLE>001file.html</TITLE></HEAD><BODY>\n" + "This is file 1, depth 0, branch 0.<br><!-- comment --> " + "Citation String foobar<br><script>" + "(defun fact (n) (cond ((= n 0) 1) (t (fact (sub1 n)))))</script>\n" + "</BODY></HTML>"; assertEquals(expected, StringUtil.fromInputStream(is)); is.close(); } private byte[] fromHex(String hex) { return ByteArray.fromHexString(hex); } protected void hashContent(SimulatedArchivalUnit sau) throws Exception { log.debug("hashContent()"); measureHashSpeed(sau); // If any changes are made to the contents or shape of the simulated // content tree, these hash values will have to be changed checkHashSet(sau, true, false, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2")); checkHashSet(sau, true, true, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2")); checkHashSet(sau, false, false, fromHex("409893F1A603F4C276632694DB1621B639BD5164")); checkHashSet(sau, false, true, fromHex("85E6213C3771BEAC5A4602CAF7982C6C222800D5")); } protected void checkDepth(SimulatedArchivalUnit sau) { log.debug("checkDepth()"); String URL_ROOT = sau.getUrlRoot(); assertEquals(0, sau.getLinkDepth(URL_ROOT + "/index.html")); assertEquals(0, sau.getLinkDepth(URL_ROOT + "/")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/001file.html")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/index.html")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/")); assertEquals(2, sau.getLinkDepth(URL_ROOT + "/branch1/001file.html")); } protected void checkRoot(SimulatedArchivalUnit sau) { log.debug("checkRoot()"); CachedUrlSet set = sau.getAuCachedUrlSet(); Iterator setIt = set.flatSetIterator(); ArrayList childL = new ArrayList(1); CachedUrlSet cus = null; while (setIt.hasNext()) { cus = (CachedUrlSet) setIt.next(); childL.add(cus.getUrl()); } String urlRoot = sau.getUrlRoot(); String[] expectedA = new String[1]; expectedA[0] = urlRoot; assertIsomorphic(expectedA, childL); setIt = cus.flatSetIterator(); childL = new ArrayList(7); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } expectedA = new String[] { urlRoot + "/001file.html", urlRoot + "/001file.txt", urlRoot + "/002file.html", urlRoot + "/002file.txt", urlRoot + "/branch1", urlRoot + "/branch2", urlRoot + "/index.html" }; assertIsomorphic(expectedA, childL); } protected void checkLeaf(SimulatedArchivalUnit sau) { log.debug("checkLeaf()"); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); CachedUrlSet set = sau.makeCachedUrlSet(spec); Iterator setIt = set.contentHashIterator(); ArrayList childL = new ArrayList(16); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } String[] expectedA = new String[] { parent, parent + "/001file.html", parent + "/001file.txt", parent + "/002file.html", parent + "/002file.txt", parent + "/branch1", parent + "/branch1/001file.html", parent + "/branch1/001file.txt", parent + "/branch1/002file.html", parent + "/branch1/002file.txt", parent + "/branch1/index.html", parent + "/branch2", parent + "/branch2/001file.html", parent + "/branch2/001file.txt", parent + "/branch2/002file.html", parent + "/branch2/002file.txt", parent + "/branch2/index.html", parent + "/index.html", }; assertIsomorphic(expectedA, childL); } protected void checkUrlContent( SimulatedArchivalUnit sau, String path, int fileNum, int depth, int branchNum, boolean isAbnormal, boolean isDamaged) throws IOException { String file = sau.getUrlRoot() + path; CachedUrl url = sau.makeCachedUrl(file); String content = getUrlContent(url); String expectedContent; if (path.endsWith(".html")) { String fn = path.substring(path.lastIndexOf("/") + 1); expectedContent = scgen.getHtmlFileContent(fn, fileNum, depth, branchNum, isAbnormal); } else { expectedContent = scgen.getTxtContent(fileNum, depth, branchNum, isAbnormal); } if (isDamaged) { assertNotEquals(expectedContent, content); } else { assertEquals(expectedContent, content); } } protected void checkStoredContent(SimulatedArchivalUnit sau) throws IOException { checkUrlContent(sau, "/001file.txt", 1, 0, 0, false, false); checkUrlContent(sau, "/branch1/branch1/001file.txt", 1, 2, 1, true, false); checkUrlContent(sau, DAMAGED_CACHED_URL, 2, 2, 2, false, true); } protected void doDamageRemoveTest(SimulatedArchivalUnit sau) throws Exception { /* Cache the file again; this time the damage should be gone */ String file = sau.getUrlRoot() + DAMAGED_CACHED_URL; UrlCacher uc = sau.makeUrlCacher(file); BitSet fetchFlags = new BitSet(); fetchFlags.set(UrlCacher.REFETCH_FLAG); uc.setFetchFlags(fetchFlags); uc.cache(); checkUrlContent(sau, DAMAGED_CACHED_URL, 2, 2, 2, false, false); } private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception { MessageDigest dig = null; try { dig = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException ex) { fail("No algorithm."); } CachedUrlSet set = sau.getAuCachedUrlSet(); CachedUrlSetHasher hasher = set.getContentHasher(dig); SystemMetrics metrics = theDaemon.getSystemMetrics(); int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig); // should be protected against this being zero by MyMockSystemMetrics, // but otherwise use the proper calculation. This avoids test failure // due to really slow machines assertTrue(estimate > 0); long estimatedTime = set.estimatedHashDuration(); long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue(); assertTrue(size > 0); System.out.println("b/ms: " + estimate); System.out.println("size: " + size); System.out.println("estimate: " + estimatedTime); assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate)); } private void checkHashSet( SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected) throws Exception { enableFilter(sau, filter); CachedUrlSet set = sau.getAuCachedUrlSet(); byte[] hash = getHash(set, namesOnly); assertEquals(expected, hash); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); set = sau.makeCachedUrlSet(spec); byte[] hash2 = getHash(set, namesOnly); assertFalse(Arrays.equals(hash, hash2)); } private byte[] getHash(CachedUrlSet set, boolean namesOnly) throws IOException { MessageDigest dig = null; try { dig = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException ex) { fail("No algorithm."); } hash(set, dig, namesOnly); return dig.digest(); } private void hash(CachedUrlSet set, MessageDigest dig, boolean namesOnly) throws IOException { CachedUrlSetHasher hasher = null; if (namesOnly) { hasher = set.getNameHasher(dig); } else { hasher = set.getContentHasher(dig); } int bytesHashed = 0; long timeTaken = System.currentTimeMillis(); while (!hasher.finished()) { bytesHashed += hasher.hashStep(256); } timeTaken = System.currentTimeMillis() - timeTaken; if ((timeTaken > 0) && (bytesHashed > 500)) { System.out.println("Bytes hashed: " + bytesHashed); System.out.println("Time taken: " + timeTaken + "ms"); System.out.println("Bytes/sec: " + (bytesHashed * 1000 / timeTaken)); } else { System.out.println("No time taken, or insufficient bytes hashed."); System.out.println("Bytes hashed: " + bytesHashed); System.out.println("Time taken: " + timeTaken + "ms"); } } private String getUrlContent(CachedUrl url) throws IOException { InputStream content = url.getUnfilteredInputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); StreamUtil.copy(content, baos); content.close(); String contentStr = new String(baos.toByteArray()); baos.close(); return contentStr; } // this version doesn't fully override the 'measureHashSpeed()' function, but // protects against it returning '0' by returning the set speed private class MyMockSystemMetrics extends MockSystemMetrics { public int measureHashSpeed(CachedUrlSetHasher hasher, MessageDigest digest) throws IOException { int speed = super.measureHashSpeed(hasher, digest); if (speed == 0) { speed = getHashSpeed(); if (speed <= 0) { throw new RuntimeException("No hash speed set."); } } return speed; } } public static void main(String[] argv) { String[] testCaseList = {FuncSimulatedContent.class.getName()}; junit.swingui.TestRunner.main(testCaseList); } public static Test suite() { return new TestSuite(FuncSimulatedContent.class); } }
/** Test class for <code>org.lockss.servlet.DaemonStatus</code> */ public class TestDaemonStatus extends LockssServletTestCase { private static final Logger log = Logger.getLogger(TestDaemonStatus.class); // private DaemonStatus ds; private StatusService statSvc; private StatusServiceImpl ssi; protected void setUp() throws Exception { super.setUp(); // ds = new DaemonStatus(); statSvc = theDaemon.getStatusService(); ssi = (StatusServiceImpl) statSvc; ssi.startService(); } // Tests that don't need a servlet environment public void testConvertDisplayString() throws Exception { // test null Object testObj = null; assertEquals("", format(testObj, ColumnDescriptor.TYPE_STRING)); // test standard numbers testObj = new Integer(123); assertEquals("123", format(testObj, ColumnDescriptor.TYPE_INT)); testObj = new Float(123321); assertEquals(testObj.toString(), format(testObj, ColumnDescriptor.TYPE_FLOAT)); // check proper 'big int' formatting testObj = new Long(12345678); assertEquals("12,345,678", format(testObj, ColumnDescriptor.TYPE_INT)); // test string testObj = "test string"; assertEquals("test string", format(testObj, ColumnDescriptor.TYPE_STRING)); // Issue 1901: verify that there is no encoding bias testObj = "<>&'\"\n"; String res = format(testObj, ColumnDescriptor.TYPE_STRING); assertEquals( "Expected \"" + StringEscapeUtils.escapeJava(testObj.toString()) + "\" but got \"" + StringEscapeUtils.escapeJava(res) + "\"; encoding bias?", "<>&'\"\n", res); // test percentage testObj = new Double(.453); assertEquals("45%", format(testObj, ColumnDescriptor.TYPE_PERCENT)); // test agreement testObj = new Double(.453); assertEquals("45.30%", format(testObj, ColumnDescriptor.TYPE_AGREEMENT)); testObj = new Double(.999999); assertEquals("99.99%", format(testObj, ColumnDescriptor.TYPE_AGREEMENT)); // test date Calendar cal = Calendar.getInstance(); cal.set(Calendar.YEAR, 2004); cal.set(Calendar.MONTH, Calendar.JANUARY); cal.set(Calendar.DATE, 1); cal.set(Calendar.HOUR_OF_DAY, 15); cal.set(Calendar.MINUTE, 15); testObj = cal.getTime(); assertEquals( new DisplayConverter().getTableDateFormat().format(testObj), format(testObj, ColumnDescriptor.TYPE_DATE)); // test IPAddr testObj = IPAddr.getLocalHost(); assertEquals( IPAddr.getLocalHost().getHostAddress(), format(testObj, ColumnDescriptor.TYPE_IP_ADDRESS)); // test time interval long timeInt = Constants.HOUR + Constants.MINUTE; testObj = new Long(timeInt); assertEquals( StringUtil.timeIntervalToString(timeInt), format(testObj, ColumnDescriptor.TYPE_TIME_INTERVAL)); // test unknown testObj = "unknown string"; assertEquals("unknown string", format(testObj, -1)); } private String format(Object obj, int type) { return new DisplayConverter().convertDisplayString(obj, type); } // Utilities for running the servlet protected void initServletRunner() { super.initServletRunner(); sRunner.registerServlet("/DaemonStatus", DaemonStatus.class.getName()); // DaemonStatus wants there to be a local ip address ConfigurationUtil.setFromArgs(LockssServlet.PARAM_LOCAL_IP, "2.4.6.8"); } // request a table from the servlet WebResponse getTable(String table, boolean text) throws Exception { initServletRunner(); WebRequest request = new GetMethodWebRequest("http://null/DaemonStatus"); request.setParameter("table", table); if (text) { request.setParameter("output", "text"); } return sClient.getResponse(request); } // Break the line at commas, return a map of the resulting strings // broken at equals sign. (<i>Ie</i>, name value pairs.) Map getRow(String line) { Map map = new HashMap(); for (Iterator iter = StringUtil.breakAt(line, ',').iterator(); iter.hasNext(); ) { String item = (String) iter.next(); List pair = StringUtil.breakAt(item, '='); map.put(pair.get(0), pair.get(1)); } return map; } protected void assertEqualTables(Object[][] a1, List lines) { assertEquals("numrows", a1.length, lines.size() - NUM_HEADER_LINES); for (int irow = 0; irow <= a1.length - 1; irow++) { Object expRow[] = a1[irow]; List row = StringUtil.breakAt((String) lines.get(irow + NUM_HEADER_LINES), ','); assertEquals("numcols", expRow.length, row.size()); assertEquals(("row " + irow), SetUtil.fromArray(expRow), new HashSet(row)); } } // Tests for text output /** Number of lines before start of table proper */ static final int NUM_HEADER_LINES = 3; private static final Object[][] colArray1 = { {"name", "Name", new Integer(ColumnDescriptor.TYPE_STRING), "Foot note"}, {"rank", "Rank", new Integer(ColumnDescriptor.TYPE_INT)} }; private static final Object[][] colArrayWithNonString = { {StatusTable.ROW_SEPARATOR, "Foo", new Integer(ColumnDescriptor.TYPE_STRING)}, {"rank", "Rank", new Integer(ColumnDescriptor.TYPE_INT)} }; private static final Object[][] rowArray1 = { {"AA", "1"}, {"BB", "2"}, {"CC", "3"}, {"DD", "4"} }; private static final Object[][] table1 = { {"name=AA", "rank=1"}, {"name=BB", "rank=2"}, {"name=CC", "rank=3"}, {"name=DD", "rank=4"} }; private static final Object[][] rowArrayWithNulls = { {"AA", "1"}, {"BB", "2"}, {null, "3"}, {"DD", null} }; // null sorts to beginning of table private static final Object[][] tableWithNulls = { {"name=(null)", "rank=3"}, {"name=AA", "rank=1"}, {"name=BB", "rank=2"}, {"name=DD", "rank=(null)"} }; public void testText() throws Exception { MockStatusAccessor statusAccessor = MockStatusAccessor.generateStatusAccessor(colArray1, rowArray1); statusAccessor.setTitle("testtbl", null); statSvc.registerStatusAccessor("testtbl", statusAccessor); WebResponse resp = getTable("testtbl", true); assertResponseOk(resp); assertEquals("Content type", "text/plain", resp.getContentType()); log.debug(resp.getText()); List lines = getLines(resp); assertEquals(rowArray1.length + 3, lines.size()); Map row0 = getRow((String) lines.get(0)); assertEquals("2.4.6.8", row0.get("host")); Map row2 = getRow((String) lines.get(2)); assertEquals("testtbl", row2.get("table")); assertEqualTables(table1, lines); } // test null value in rows doesn't throw public void testTextNull() throws Exception { MockStatusAccessor statusAccessor = MockStatusAccessor.generateStatusAccessor(colArray1, rowArrayWithNulls); statSvc.registerStatusAccessor("testtbl", statusAccessor); WebResponse resp = getTable("testtbl", true); log.debug(resp.getText()); List lines = getLines(resp); assertEqualTables(tableWithNulls, lines); } // test special (non-string) key in row doesn't throw public void testTextNonStringKey() throws Exception { MockStatusAccessor statusAccessor = new MockStatusAccessor(); List cols = ListUtil.list("foo", StatusTable.ROW_SEPARATOR); statusAccessor.setRows(MockStatusAccessor.makeRowsFrom(cols, rowArray1), null); statusAccessor.setColumnDescriptors( MockStatusAccessor.makeColumnDescriptorsFrom(colArray1), null); statSvc.registerStatusAccessor("testtbl", statusAccessor); WebResponse resp = getTable("testtbl", true); log.debug(resp.getText()); } }
public class TestElsevierDTD5XmlMetadataExtractor extends SourceXmlMetadataExtractorTest { private static final Logger log = Logger.getLogger(TestElsevierDTD5XmlMetadataExtractor.class); private MockLockssDaemon theDaemon; protected ArchivalUnit tarAu; private static String PLUGIN_NAME = "org.lockss.plugin.elsevier.ClockssElsevierDTD5SourcePlugin"; private static String BASE_URL = "http://www.source.org/"; private static String YEAR_NAME = "2014"; private static String TAR_A_BASE = BASE_URL + YEAR_NAME + "/CLKS003A.tar"; private static String TAR_B_BASE = BASE_URL + YEAR_NAME + "/CLKS003B.tar"; private static String SUBDIR = "!/CLKS003/"; CIProperties tarHeader; /* for testing validation */ private static Map<String, String> pubTitleMap; private static Map<String, String> dateMap; private static Map<String, String> accessUrlMap; private static Map<String, String> volMap; private static Map<String, String> issueMap; private static Map<String, List<String>> authorMap; static FileMetadataListExtractor els_mle; static FileMetadataListExtractor nocheck_mle; private static final String testDatasetFile = "testDataset.xml"; private static final String realTARFile_A = "CLKS003A.tar"; private static final String realTARFile_B = "CLKS003B.tar"; public void setUp() throws Exception { super.setUp(); tarHeader = new CIProperties(); tarHeader.put(CachedUrl.PROPERTY_CONTENT_TYPE, "application/tar"); tarAu = createTarAu(); // for tests that also check for content els_mle = new FileMetadataListExtractor( new ElsevierDTD5XmlSourceMetadataExtractorFactory .ElsevierDTD5XmlSourceMetadataExtractor()); // for tests that use a no-check-for-pdf version of the extractor nocheck_mle = new FileMetadataListExtractor(new TestElsevierDTD5MetadataExtractor()); setUpExpectedTarContent(); } protected ArchivalUnit createTarAu() throws ArchivalUnit.ConfigurationException { // in this directory this is file "test_elsevierdtd5.tdb" but it becomes xml try { ConfigurationUtil.addFromUrl(getResource("test_elsevierdtd5.xml")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Tdb tdb = ConfigManager.getCurrentConfig().getTdb(); TdbAu tdbau1 = tdb.getTdbAusLikeName("Elsevier Source Content 2014").get(0); assertNotNull("Didn't find named TdbAu", tdbau1); return PluginTestUtil.createAndStartAu(tdbau1); } /* * The tests to run for this class */ public void testSimpleMainXML() throws Exception { log.debug3("testSimpleMainXML"); String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null); assertEquals(1, mdList.size()); validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article"); } public void testSimpleDatasetXML() throws Exception { log.debug3("testSimpleDatasetXML"); String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile)); String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null); assertEquals(6, mdList.size()); Iterator<ArticleMetadata> mdIt = mdList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateDatasetMetadataRecord(mdRecord); } } public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } } /* * The supporting methods */ private void setUpExpectedTarContent() { /* maps the DOIs in the metadata to the expected values */ log.debug3("setUpExpectedTarContent"); pubTitleMap = new HashMap<String, String>(); { pubTitleMap.put("10.1016/j.jidx.2014.07.028", "International Journal of XXX"); pubTitleMap.put("10.1016/j.jidx2.2014.05.013", "Revista"); pubTitleMap.put("10.1016/S1473-1111(14)70840-0", "The Journal"); pubTitleMap.put("10.1016/S0140-1111(14)61865-1", "The Other Journal"); pubTitleMap.put("10.1016/j.foo.2014.08.001", "Foo"); pubTitleMap.put("10.1016/j.foo.2014.08.123", "Foo"); } ; dateMap = new HashMap<String, String>(); { dateMap.put("10.1016/j.jidx.2014.07.028", "2014-07-30"); dateMap.put("10.1016/j.jidx2.2014.05.013", "2014-07-09"); dateMap.put("10.1016/S1473-1111(14)70840-0", "2014-09-01"); dateMap.put("10.1016/S0140-1111(14)61865-1", "2014"); // will get from main.xml as backup dateMap.put("10.1016/j.foo.2014.08.001", "2014-08-20"); dateMap.put("10.1016/j.foo.2014.08.123", "2014-08-20"); } ; accessUrlMap = new HashMap<String, String>(); { accessUrlMap.put( "10.1016/j.jidx.2014.07.028", TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.pdf"); accessUrlMap.put( "10.1016/j.jidx2.2014.05.013", TAR_A_BASE + SUBDIR + "00349356/v61i9/S0034935614001819/main.pdf"); accessUrlMap.put( "10.1016/S1473-1111(14)70840-0", TAR_A_BASE + SUBDIR + "14733099/v14i10/S1473309914708400/main.pdf"); accessUrlMap.put( "10.1016/S0140-1111(14)61865-1", TAR_B_BASE + SUBDIR + "01406736/v384sS1/S0140673614618651/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.001", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514004151/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.123", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514003856/main.pdf"); } ; ArrayList<String> goodAuthors = new ArrayList<String>(); { goodAuthors.add("Writer, Bob"); goodAuthors.add("Q. Text, Samantha"); } ArrayList<String> simpleAuthors = new ArrayList<String>(); { simpleAuthors.add("Simple, Josh"); } ArrayList<String> extendedAuthors = new ArrayList<String>(); { extendedAuthors.add("Writer, Bob"); extendedAuthors.add("Q. Text, Samantha"); extendedAuthors.add("The COLLABORATIVE Investigators"); } authorMap = new HashMap<String, List<String>>(); { authorMap.put("10.1016/j.jidx.2014.07.028", goodAuthors); authorMap.put("10.1016/j.jidx2.2014.05.013", goodAuthors); authorMap.put("10.1016/S1473-1111(14)70840-0", extendedAuthors); authorMap.put("10.1016/S0140-1111(14)61865-1", simpleAuthors); authorMap.put("10.1016/j.foo.2014.08.001", goodAuthors); authorMap.put("10.1016/j.foo.2014.08.123", goodAuthors); } ; volMap = new HashMap<String, String>(); { volMap.put("10.1016/j.jidx.2014.07.028", "64"); volMap.put("10.1016/j.jidx2.2014.05.013", "61"); volMap.put("10.1016/S1473-1111(14)70840-0", "14"); volMap.put("10.1016/S0140-1111(14)61865-1", "384"); volMap.put("10.1016/j.foo.2014.08.001", "242"); volMap.put("10.1016/j.foo.2014.08.123", "242"); } ; issueMap = new HashMap<String, String>(); { issueMap.put("10.1016/j.jidx.2014.07.028", "C"); issueMap.put("10.1016/j.jidx2.2014.05.013", "9"); issueMap.put("10.1016/S1473-1111(14)70840-0", "10"); issueMap.put("10.1016/S0140-1111(14)61865-1", "S1"); issueMap.put("10.1016/j.foo.2014.08.001", "C"); issueMap.put("10.1016/j.foo.2014.08.123", "C"); } ; } private String common_issn = "1111-1111"; private String common_article_title = "Article about Important Things"; // private String common_simple_article_title = "Simple Article Title for Update"; private String common_simple_article_title = "Newsdesk Simple Dochead"; /* * When testing a complete extraction out of the tarset, the MD record will be completely filled in * and pdf-existence will get established */ private void validateCompleteMetadataRecord(ArticleMetadata am) { log.debug3("valideCompleteMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); /* make sure we can pick up both types of xml article data */ log.debug3("doi val is: " + doi_val); if ("JA 5.2.0 SIMPLE-ARTICLE" .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) { log.debug3("simple-article"); assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER)); log.debug3(am.ppString(2)); } /* * When testing no-pdf-check basic XML parsing, you will get partial MD records * depending on whether the info comes from dataset.xml or from main.xml */ private void validateDatasetMetadataRecord(ArticleMetadata am) { log.debug3("valideDatasetMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); log.debug3("doi val is: " + doi_val); // The dataset doesn't set this value, it'll fail over the main.xml value if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) { assertEquals(null, am.get(MetadataField.FIELD_DATE)); } else { assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); } assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); } /* * You will have to tell it the DOI and the schema because those normally come from dataset */ private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) { log.debug3("valideSingleMainMetadatRecord"); if ("simple-article".equals(schema)) { assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } log.debug3("doi val is: " + doi_val); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead)); assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi)); assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright)); } private static final String simpleMain = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + "<!DOCTYPE article PUBLIC \"-//ES//DTD journal article DTD version 5.2.0//EN//XML\" \"art520.dtd\">" + "<article docsubtype=\"fla\" xml:lang=\"en\">" + "<item-info><jid>TEST</jid>" + "<aid>9906</aid>" + "<ce:article-number>e09906</ce:article-number>" + "<ce:pii>S9999-9994(15)00010-0</ce:pii>" + "<ce:doi>10.1016/j.jidx.2014.07.028</ce:doi>" + "<ce:copyright type=\"full-transfer\" year=\"2014\">Elsevier GmbH</ce:copyright>" + "</item-info>" + "<head>" + "<ce:dochead id=\"cedoch10\"><ce:textfn>Comment</ce:textfn></ce:dochead>" + "<ce:title id=\"tm005\">Article about Important Things</ce:title>" + "<ce:author-group id=\"ag005\">" + "<ce:author id=\"au005\">" + "<ce:given-name>Bob</ce:given-name><ce:surname>Writer</ce:surname>" + "<ce:cross-ref id=\"ar005\" refid=\"af005\"><ce:sup>a</ce:sup></ce:cross-ref>" + "<ce:cross-ref id=\"ar010\" refid=\"cor1\"><ce:sup>⁎</ce:sup></ce:cross-ref>" + "<ce:e-address id=\"em005\" type=\"email\">[email protected]</ce:e-address>" + "</ce:author>" + "<ce:author id=\"au001\">" + "<ce:given-name>Samantha</ce:given-name><ce:surname>Q. Text</ce:surname>" + "<ce:cross-ref id=\"ar001\" refid=\"af001\"><ce:sup>a</ce:sup></ce:cross-ref>" + "<ce:cross-ref id=\"ar010\" refid=\"cor1\"><ce:sup>⁎</ce:sup></ce:cross-ref>" + "<ce:e-address id=\"em005\" type=\"email\">[email protected]</ce:e-address>" + "</ce:author>" + "</ce:author-group>" + "<ce:date-received day=\"1\" month=\"1\" year=\"2014\"/>" + "<ce:date-revised day=\"26\" month=\"7\" year=\"2014\"/>" + "<ce:date-accepted day=\"3\" month=\"8\" year=\"2014\"/>" + "<ce:abstract class=\"author\" xml:lang=\"en\" id=\"ab005\"><ce:section-title id=\"st050\">Abstract</ce:section-title>" + "<ce:abstract-sec id=\"as005\"><ce:simple-para id=\"sp005\">Abstract goes here.</ce:simple-para></ce:abstract-sec>" + "</ce:abstract>" + "</head>" + "<body>" + "</body>" + "<tail>" + "</tail>" + "</article>"; static class MyListEmitter implements ArticleMetadataExtractor.Emitter { List<ArticleMetadata> amlst = new ArrayList<ArticleMetadata>(); public void emitMetadata(ArticleFiles af, ArticleMetadata md) { if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")"); if (md != null) { log.debug3("add " + md + " to amlist"); amlst.add(md); } ; } public List<ArticleMetadata> getAmList() { return amlst; } } /* * A test version of the extractor that allows for suppression of the file check * this allows for basic XML parsing tests without having to provide the actual file content */ public class TestElsevierDTD5MetadataExtractor extends ElsevierDTD5XmlSourceMetadataExtractorFactory.ElsevierDTD5XmlSourceMetadataExtractor { // // Override implementation of getFilenamesAssociatedWithRecord to force // emit for testing purposes - while allowing use of Elsevier extractor. // If a null list is returned, preEmitCheck returns "true" // allowing emit. // protected ArrayList<String> getFilenamesAssociatedWithRecord( SourceXmlSchemaHelper helper, CachedUrl cu, ArticleMetadata oneAM) { return null; } } }