private void deleteBlock(CachedUrl cu) throws IOException {
   log.info("deleting " + cu.getUrl());
   CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl());
   ArchivalUnit au = cu.getArchivalUnit();
   CachedUrlSet cus = au.makeCachedUrlSet(cuss);
   NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au);
   nm.deleteNode(cus);
 }
  public void testFunctionalFromTarHierarchy() throws Exception {
    log.debug3("in testFromTarHierarchy");
    // load the tarballs
    InputStream file_input = null;
    try {
      file_input = getResourceAsStream(realTARFile_A);
      // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE);
      // uc.storeContent(file_input, tarHeader);
      UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

      file_input = getResourceAsStream(realTARFile_B);
      // uc = au.makeUrlCacher(TAR_B_BASE);
      // uc.storeContent(file_input, tarHeader);
      uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      IOUtil.safeClose(file_input);
    }

    CachedUrlSet cus = tarAu.getAuCachedUrlSet();
    for (CachedUrl cu : cus.getCuIterable()) {
      log.debug3("AU - cu is: " + cu.getUrl());
      cu.release();
    }

    // We need to start from the level of the ArticleMetadataExtractor
    MyListEmitter emitter = new MyListEmitter();
    ArticleMetadataExtractor amEx =
        new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA);

    Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any());
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.debug3("Metadata test - articlefiles " + af.toString());
      // CachedUrl cu = af.getFullTextCu();
      CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA);
      log.debug3("metadata cu is " + cu.getUrl());
      // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu);
      amEx.extract(MetadataTarget.Any(), af, emitter);
      List<ArticleMetadata> returnList = emitter.getAmList();

      assertNotNull(returnList);
      log.debug3("size of returnList is " + returnList.size());
      Iterator<ArticleMetadata> mdIt = returnList.iterator();
      ArticleMetadata mdRecord = null;
      while (mdIt.hasNext()) {
        mdRecord = (ArticleMetadata) mdIt.next();
        validateCompleteMetadataRecord(mdRecord);
      }
    }
  }
  public void testArticleCountAndType() throws Exception {
    int expCount = 28;
    PluginTestUtil.crawlSimAu(sau);
    String pat1 = "branch(\\d+)/(\\d+file\\.html)";
    String rep1 = "aps/journal/v123/n$1/full/$2";
    PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1);
    String pat2 = "branch(\\d+)/(\\d+file\\.pdf)";
    String rep2 = "aps/journal/v123/n$1/pdf/$2";
    PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2);

    // Remove some URLs
    int deleted = 0;
    for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) it.next();
      if (cusn instanceof CachedUrl) {
        CachedUrl cu = (CachedUrl) cusn;
        String url = cu.getUrl();
        if (url.contains("/journal/")
            && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) {
          deleteBlock(cu);
          ++deleted;
        }
      }
    }
    assertEquals(8, deleted);

    Iterator<ArticleFiles> it = nau.getArticleIterator();
    int count = 0;
    int countHtmlOnly = 0;
    int countPdfOnly = 0;
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.info(af.toString());
      CachedUrl cu = af.getFullTextCu();
      String url = cu.getUrl();
      assertNotNull(cu);
      String contentType = cu.getContentType();
      log.debug("count " + count + " url " + url + " " + contentType);
      count++;
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) {
        ++countHtmlOnly;
      }
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) {
        ++countPdfOnly;
      }
    }
    log.debug("Article count is " + count);
    assertEquals(expCount, count);
    assertEquals(4, countHtmlOnly);
    assertEquals(4, countPdfOnly);
  }
  /* private support methods */
  private List<ArticleMetadata> setupContentForAU(
      ArchivalUnit au, String url, String content, boolean isHtmlExtractor)
      throws IOException, PluginException {
    FileMetadataExtractor me;

    InputStream input = null;
    CIProperties props = null;
    if (isHtmlExtractor) {
      input = IOUtils.toInputStream(content, "utf-8");
      props = getContentHtmlProperties();
      me =
          new BaseAtyponHtmlMetadataExtractorFactory()
              .createFileMetadataExtractor(MetadataTarget.Any(), "text/html");
    } else {
      input = IOUtils.toInputStream(content, "utf-8");
      props = getContentRisProperties();
      me =
          new BaseAtyponRisMetadataExtractorFactory()
              .createFileMetadataExtractor(MetadataTarget.Any(), "text/plain");
    }
    UrlData ud = new UrlData(input, props, url);
    UrlCacher uc = au.makeUrlCacher(ud);
    uc.storeContent();
    CachedUrl cu = uc.getCachedUrl();
    FileMetadataListExtractor mle = new FileMetadataListExtractor(me);
    return mle.extract(MetadataTarget.Any(), cu);
  }
 List<String> auUrls(ArchivalUnit au) {
   List<String> res = new ArrayList<String>();
   for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) {
     CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next();
     if (cusn.hasContent()) {
       res.add(cusn.getUrl());
     }
   }
   return res;
 }
  public void setUp() throws Exception {
    super.setUp();
    setUpDiskSpace(); // you need this to have startService work properly...

    theDaemon = getMockLockssDaemon();
    theDaemon.getAlertManager();
    theDaemon.getPluginManager().setLoadablePluginsReady(true);
    theDaemon.setDaemonInited(true);
    theDaemon.getPluginManager().startService();
    theDaemon.getCrawlManager();

    // in this directory this is file "test_baseatypon.tdb" but it becomes xml
    ConfigurationUtil.addFromUrl(getResource("test_baseatypon.xml"));
    Tdb tdb = ConfigManager.getCurrentConfig().getTdb();

    TdbAu tdbau1 = tdb.getTdbAusLikeName(goodJournal + " Volume " + goodVolume).get(0);
    assertNotNull("Didn't find named TdbAu", tdbau1);
    bau1 = PluginTestUtil.createAndStartAu(tdbau1);
    assertNotNull(bau1);
    TypedEntryMap auConfig = bau1.getProperties();
    assertEquals(BASE_URL, auConfig.getString(BASE_URL_KEY));
  }
Beispiel #7
0
 public static V1Poll createCompletedPoll(
     LockssDaemon daemon,
     ArchivalUnit au,
     V1LcapMessage testmsg,
     int numAgree,
     int numDisagree,
     PollManager pollmanager)
     throws Exception {
   log.debug(
       "createCompletedPoll: au: "
           + au.toString()
           + " peer "
           + testmsg.getOriginatorId()
           + " votes "
           + numAgree
           + "/"
           + numDisagree);
   CachedUrlSetSpec cusSpec = null;
   if ((testmsg.getLwrBound() != null)
       && (testmsg.getLwrBound().equals(PollSpec.SINGLE_NODE_LWRBOUND))) {
     cusSpec = new SingleNodeCachedUrlSetSpec(testmsg.getTargetUrl());
   } else {
     cusSpec =
         new RangeCachedUrlSetSpec(
             testmsg.getTargetUrl(), testmsg.getLwrBound(), testmsg.getUprBound());
   }
   CachedUrlSet cus = au.makeCachedUrlSet(cusSpec);
   PollSpec spec = new PollSpec(cus, Poll.V1_CONTENT_POLL);
   ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false);
   V1Poll p = null;
   if (testmsg.isContentPoll()) {
     p =
         new V1ContentPoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm());
   } else if (testmsg.isNamePoll()) {
     p =
         new V1NamePoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm());
   } else if (testmsg.isVerifyPoll()) {
     p =
         new V1VerifyPoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm(),
             testmsg.getVerifier());
   }
   assertNotNull(p);
   p.setMessage(testmsg);
   p.m_tally.quorum = numAgree + numDisagree;
   p.m_tally.numAgree = numAgree;
   p.m_tally.numDisagree = numDisagree;
   p.m_tally.wtAgree = 2000;
   p.m_tally.wtDisagree = 200;
   p.m_tally.localEntries = makeEntries(1, 3);
   p.m_tally.votedEntries = makeEntries(1, 5);
   p.m_tally.votedEntries.remove(1);
   p.m_pollstate = V1Poll.PS_COMPLETE;
   p.m_callerID = testmsg.getOriginatorId();
   log.debug3("poll " + p.toString());
   p.m_tally.tallyVotes();
   return p;
 }