protected void checkRoot(SimulatedArchivalUnit sau) {
    log.debug("checkRoot()");
    CachedUrlSet set = sau.getAuCachedUrlSet();
    Iterator setIt = set.flatSetIterator();
    ArrayList childL = new ArrayList(1);
    CachedUrlSet cus = null;
    while (setIt.hasNext()) {
      cus = (CachedUrlSet) setIt.next();
      childL.add(cus.getUrl());
    }

    String urlRoot = sau.getUrlRoot();

    String[] expectedA = new String[1];
    expectedA[0] = urlRoot;
    assertIsomorphic(expectedA, childL);

    setIt = cus.flatSetIterator();
    childL = new ArrayList(7);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }

    expectedA =
        new String[] {
          urlRoot + "/001file.html",
          urlRoot + "/001file.txt",
          urlRoot + "/002file.html",
          urlRoot + "/002file.txt",
          urlRoot + "/branch1",
          urlRoot + "/branch2",
          urlRoot + "/index.html"
        };
    assertIsomorphic(expectedA, childL);
  }
 protected void checkLeaf(SimulatedArchivalUnit sau) {
   log.debug("checkLeaf()");
   String parent = sau.getUrlRoot() + "/branch1";
   CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
   CachedUrlSet set = sau.makeCachedUrlSet(spec);
   Iterator setIt = set.contentHashIterator();
   ArrayList childL = new ArrayList(16);
   while (setIt.hasNext()) {
     childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
   }
   String[] expectedA =
       new String[] {
         parent,
         parent + "/001file.html",
         parent + "/001file.txt",
         parent + "/002file.html",
         parent + "/002file.txt",
         parent + "/branch1",
         parent + "/branch1/001file.html",
         parent + "/branch1/001file.txt",
         parent + "/branch1/002file.html",
         parent + "/branch1/002file.txt",
         parent + "/branch1/index.html",
         parent + "/branch2",
         parent + "/branch2/001file.html",
         parent + "/branch2/001file.txt",
         parent + "/branch2/002file.html",
         parent + "/branch2/002file.txt",
         parent + "/branch2/index.html",
         parent + "/index.html",
       };
   assertIsomorphic(expectedA, childL);
 }
 List<String> auUrls(ArchivalUnit au) {
   List<String> res = new ArrayList<String>();
   for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) {
     CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next();
     if (cusn.hasContent()) {
       res.add(cusn.getUrl());
     }
   }
   return res;
 }
Пример #4
0
  public void testGetUrls() {
    HashSet stringCollection = new HashSet();
    stringCollection.add("test");

    AuState auState =
        makeAuState(mau, -1, -1, -1, -1, 123, stringCollection, 1, -1.0, 1.0, historyRepo);
    Collection col = auState.getCrawlUrls();
    Iterator colIter = col.iterator();
    assertTrue(colIter.hasNext());
    assertEquals("test", colIter.next());
    assertFalse(colIter.hasNext());
  }
  public void testStoreNodeState() throws Exception {
    TimeBase.setSimulated(100);
    CachedUrlSet mcus =
        new MockCachedUrlSet(mau, new RangeCachedUrlSetSpec("http://www.example.com"));
    CrawlState crawl = new CrawlState(1, 2, 123);
    List polls = new ArrayList(2);
    PollState poll1 = new PollState(1, "sdf", "jkl", 2, 123, Deadline.at(456), false);
    PollState poll2 = new PollState(2, "abc", "def", 3, 321, Deadline.at(654), false);
    polls.add(poll1);
    polls.add(poll2);
    NodeState nodeState = new NodeStateImpl(mcus, 123321, crawl, polls, repository);
    ((NodeStateImpl) nodeState).setState(NodeState.DAMAGE_AT_OR_BELOW);
    repository.storeNodeState(nodeState);
    String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau);
    filePath =
        LockssRepositoryImpl.mapUrlToFileLocation(
            filePath, "http://www.example.com/" + HistoryRepositoryImpl.NODE_FILE_NAME);
    File xmlFile = new File(filePath);
    assertTrue(xmlFile.exists());

    nodeState = null;
    nodeState = repository.loadNodeState(mcus);
    assertSame(mcus, nodeState.getCachedUrlSet());

    assertEquals(123321, nodeState.getAverageHashDuration());
    assertEquals(1, nodeState.getCrawlState().getType());
    assertEquals(2, nodeState.getCrawlState().getStatus());
    assertEquals(123, nodeState.getCrawlState().getStartTime());
    assertEquals(NodeState.DAMAGE_AT_OR_BELOW, nodeState.getState());

    Iterator pollIt = nodeState.getActivePolls();
    assertTrue(pollIt.hasNext());
    PollState loadedPoll = (PollState) pollIt.next();
    assertEquals(1, loadedPoll.getType());
    assertEquals("sdf", loadedPoll.getLwrBound());
    assertEquals("jkl", loadedPoll.getUprBound());
    assertEquals(2, loadedPoll.getStatus());
    assertEquals(123, loadedPoll.getStartTime());
    assertEquals(456, loadedPoll.getDeadline().getExpirationTime());

    assertTrue(pollIt.hasNext());
    loadedPoll = (PollState) pollIt.next();
    assertEquals(2, loadedPoll.getType());
    assertEquals("abc", loadedPoll.getLwrBound());
    assertEquals("def", loadedPoll.getUprBound());
    assertEquals(3, loadedPoll.getStatus());
    assertEquals(321, loadedPoll.getStartTime());
    assertEquals(654, loadedPoll.getDeadline().getExpirationTime());
    assertFalse(pollIt.hasNext());

    TimeBase.setReal();
  }
  public void testArticleCountAndType() throws Exception {
    int expCount = 28;
    PluginTestUtil.crawlSimAu(sau);
    String pat1 = "branch(\\d+)/(\\d+file\\.html)";
    String rep1 = "aps/journal/v123/n$1/full/$2";
    PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1);
    String pat2 = "branch(\\d+)/(\\d+file\\.pdf)";
    String rep2 = "aps/journal/v123/n$1/pdf/$2";
    PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2);

    // Remove some URLs
    int deleted = 0;
    for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) it.next();
      if (cusn instanceof CachedUrl) {
        CachedUrl cu = (CachedUrl) cusn;
        String url = cu.getUrl();
        if (url.contains("/journal/")
            && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) {
          deleteBlock(cu);
          ++deleted;
        }
      }
    }
    assertEquals(8, deleted);

    Iterator<ArticleFiles> it = nau.getArticleIterator();
    int count = 0;
    int countHtmlOnly = 0;
    int countPdfOnly = 0;
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.info(af.toString());
      CachedUrl cu = af.getFullTextCu();
      String url = cu.getUrl();
      assertNotNull(cu);
      String contentType = cu.getContentType();
      log.debug("count " + count + " url " + url + " " + contentType);
      count++;
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) {
        ++countHtmlOnly;
      }
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) {
        ++countPdfOnly;
      }
    }
    log.debug("Article count is " + count);
    assertEquals(expCount, count);
    assertEquals(4, countHtmlOnly);
    assertEquals(4, countPdfOnly);
  }
  public void testStoreAuState() throws Exception {
    HashSet strCol = new HashSet();
    strCol.add("test");
    AuState origState =
        new AuState(
            mau,
            123000,
            123123,
            41,
            "woop woop",
            321000,
            222000,
            3,
            "pollres",
            12345,
            456000,
            strCol,
            AuState.AccessType.OpenAccess,
            2,
            1.0,
            1.0,
            SubstanceChecker.State.Yes,
            "SubstVer3",
            "MetadatVer7",
            111444,
            12345,
            111222, // lastPoPPoll
            7, // lastPoPPollResult
            222333, // lastLocalHashScan
            444777, // numAgreePeersLastPoR
            777444, // numWillingRepairers
            747474, // numCurrentSuspectVersions
            ListUtil.list("http://hos.t/pa/th"),
            repository);

    assertEquals("SubstVer3", origState.getFeatureVersion(Plugin.Feature.Substance));
    assertEquals("MetadatVer7", origState.getFeatureVersion(Plugin.Feature.Metadata));
    assertEquals(111444, origState.getLastMetadataIndex());

    repository.storeAuState(origState);

    String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau);
    filePath += HistoryRepositoryImpl.AU_FILE_NAME;
    File xmlFile = new File(filePath);
    assertTrue(xmlFile.exists());

    origState = null;
    AuState loadedState = repository.loadAuState();
    assertEquals(123000, loadedState.getLastCrawlTime());
    assertEquals(123123, loadedState.getLastCrawlAttempt());
    assertEquals(41, loadedState.getLastCrawlResult());
    assertEquals("woop woop", loadedState.getLastCrawlResultMsg());
    assertEquals(321000, loadedState.getLastTopLevelPollTime());
    assertEquals(222000, loadedState.getLastPollStart());
    assertEquals(3, loadedState.getLastPollResult());
    assertEquals("Inviting Peers", loadedState.getLastPollResultMsg());

    assertEquals(111222, loadedState.getLastPoPPoll());
    assertEquals(7, loadedState.getLastPoPPollResult());
    assertEquals(222333, loadedState.getLastLocalHashScan());

    assertEquals(444777, loadedState.getNumAgreePeersLastPoR());
    assertEquals(777444, loadedState.getNumWillingRepairers());
    assertEquals(747474, loadedState.getNumCurrentSuspectVersions());
    assertEquals(ListUtil.list("http://hos.t/pa/th"), loadedState.getCdnStems());
    loadedState.addCdnStem("http://this.is.new/");
    assertEquals(
        ListUtil.list("http://hos.t/pa/th", "http://this.is.new/"), loadedState.getCdnStems());

    assertEquals(12345, loadedState.getPollDuration());
    assertEquals(2, loadedState.getClockssSubscriptionStatus());
    assertEquals(AuState.AccessType.OpenAccess, loadedState.getAccessType());
    assertEquals(SubstanceChecker.State.Yes, loadedState.getSubstanceState());
    assertEquals("SubstVer3", loadedState.getFeatureVersion(Plugin.Feature.Substance));
    assertEquals("MetadatVer7", loadedState.getFeatureVersion(Plugin.Feature.Metadata));
    assertEquals(111444, loadedState.getLastMetadataIndex());
    assertEquals(12345, loadedState.getLastContentChange());
    assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId());

    // check crawl urls
    Collection col = loadedState.getCrawlUrls();
    Iterator colIter = col.iterator();
    assertTrue(colIter.hasNext());
    assertEquals("test", colIter.next());
    assertFalse(colIter.hasNext());
  }
  public void testStorePollHistories() throws Exception {
    TimeBase.setSimulated(123321);
    MockCachedUrlSetSpec mspec = new MockCachedUrlSetSpec("http://www.example.com", null);
    CachedUrlSet mcus = new MockCachedUrlSet(mau, mspec);
    NodeStateImpl nodeState = new NodeStateImpl(mcus, -1, null, null, repository);
    List histories =
        ListUtil.list(
            createPollHistoryBean(3),
            createPollHistoryBean(3),
            createPollHistoryBean(3),
            createPollHistoryBean(3),
            createPollHistoryBean(3));

    /*
     * CASTOR: [summary] Rewrite test in non-Castor way
     * This is obviously not an appropriate way of writing this test,
     * Right now it creates sample data in Castor format, from legacy
     * code back when Castor was the built-in serialization engine.
     * TODO: Rewrite test in non-Castor way
     */
    // nodeState.setPollHistoryBeanList(histories);
    nodeState.setPollHistoryList(NodeHistoryBean.fromBeanListToList(histories));

    repository.storePollHistories(nodeState);
    String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau);
    filePath =
        LockssRepositoryImpl.mapUrlToFileLocation(
            filePath, "http://www.example.com/" + HistoryRepositoryImpl.HISTORY_FILE_NAME);
    File xmlFile = new File(filePath);
    assertTrue(xmlFile.exists());

    nodeState.setPollHistoryList(new ArrayList());
    repository.loadPollHistories(nodeState);
    List loadedHistory = nodeState.getPollHistoryList();
    assertEquals(histories.size(), loadedHistory.size());
    // CASTOR: some Castor-tailored stuff here
    // PollHistoryBean expect1 = (PollHistoryBean)histories.get(0);
    // PollHistoryBean elem1 = (PollHistoryBean)loadedHistory.get(0);
    PollHistory expect1 = (PollHistory) histories.get(0);
    PollHistory elem1 = (PollHistory) loadedHistory.get(0);
    assertEquals(expect1.type, elem1.type);
    assertEquals(expect1.lwrBound, elem1.lwrBound);
    assertEquals(expect1.uprBound, elem1.uprBound);
    assertEquals(expect1.status, elem1.status);
    assertEquals(expect1.startTime, elem1.startTime);
    assertEquals(expect1.duration, elem1.duration);
    // CASTOR: some Castor-tailored stuff here
    // List expectBeans = (List)expect1.getVoteBeans();
    // List elemBeans = (List)elem1.getVoteBeans();
    Iterator expectIter = (Iterator) expect1.getVotes();
    Iterator elemIter = (Iterator) elem1.getVotes();
    while (expectIter.hasNext() && elemIter.hasNext()) {
      Vote expectVote = (Vote) expectIter.next();
      Vote elemVote = (Vote) elemIter.next();
      assertEquals(
          expectVote.getVoterIdentity().getIdString(), elemVote.getVoterIdentity().getIdString());
      assertEquals(expectVote.isAgreeVote(), elemVote.isAgreeVote());
      assertEquals(expectVote.getChallengeString(), elemVote.getChallengeString());
      assertEquals(expectVote.getVerifierString(), elemVote.getVerifierString());
      assertEquals(expectVote.getHashString(), elemVote.getHashString());
    }
    assertFalse(expectIter.hasNext());
    assertFalse(expectIter.hasNext());
    TimeBase.setReal();
  }