/**
   * Method that creates a simulated Cached URL from the source code provided by the goodContent
   * String. It then asserts that the metadata extracted, by using the
   * MetaPressRisMetadataExtractorFactory, match the metadata in the source code.
   *
   * @throws Exception
   */
  public void testExtractGoodRisContent() throws Exception {
    String goodContent = createGoodRisContent();
    log.debug3(goodContent);

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);

    assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME));
    assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE));
    assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE));
    assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE));
    assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN));
    Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator();
    for (String expAuth : goodAuthors) {
      assertEquals(expAuth, actAuthIter.next());
    }
    assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE));
    assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));

    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
    assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI));
    // This shouldn't get set. It will default later to fuill_text_cu
    assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL));
  }
Пример #2
0
  private void assertEqualMessages(V3LcapMessage a, V3LcapMessage b) throws Exception {
    assertTrue(a.getOriginatorId() == b.getOriginatorId());
    assertEquals(a.getOpcode(), b.getOpcode());
    assertEquals(a.getTargetUrl(), b.getTargetUrl());
    assertEquals(a.getArchivalId(), b.getArchivalId());
    assertEquals(a.getProtocolVersion(), b.getProtocolVersion());
    assertEquals(a.getPollerNonce(), b.getPollerNonce());
    assertEquals(a.getVoterNonce(), b.getVoterNonce());
    assertEquals(a.getVoterNonce2(), b.getVoterNonce2());
    assertEquals(a.getPluginVersion(), b.getPluginVersion());
    assertEquals(a.getHashAlgorithm(), b.getHashAlgorithm());
    assertEquals(a.isVoteComplete(), b.isVoteComplete());
    assertEquals(a.getRepairDataLength(), b.getRepairDataLength());
    assertEquals(a.getLastVoteBlockURL(), b.getLastVoteBlockURL());
    assertIsomorphic(a.getNominees(), b.getNominees());
    List aBlocks = new ArrayList();
    List bBlocks = new ArrayList();
    for (VoteBlocksIterator iter = a.getVoteBlockIterator(); iter.hasNext(); ) {
      aBlocks.add(iter.next());
    }
    for (VoteBlocksIterator iter = b.getVoteBlockIterator(); iter.hasNext(); ) {
      bBlocks.add(iter.next());
    }
    assertTrue(aBlocks.equals(bBlocks));

    //  TODO: Figure out how to test time.

  }
 public void testSimpleMainXML() throws Exception {
   log.debug3("testSimpleMainXML");
   String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml";
   List<ArticleMetadata> mdList =
       extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null);
   assertEquals(1, mdList.size());
   validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article");
 }
  public void testFunctionalFromTarHierarchy() throws Exception {
    log.debug3("in testFromTarHierarchy");
    // load the tarballs
    InputStream file_input = null;
    try {
      file_input = getResourceAsStream(realTARFile_A);
      // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE);
      // uc.storeContent(file_input, tarHeader);
      UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

      file_input = getResourceAsStream(realTARFile_B);
      // uc = au.makeUrlCacher(TAR_B_BASE);
      // uc.storeContent(file_input, tarHeader);
      uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      IOUtil.safeClose(file_input);
    }

    CachedUrlSet cus = tarAu.getAuCachedUrlSet();
    for (CachedUrl cu : cus.getCuIterable()) {
      log.debug3("AU - cu is: " + cu.getUrl());
      cu.release();
    }

    // We need to start from the level of the ArticleMetadataExtractor
    MyListEmitter emitter = new MyListEmitter();
    ArticleMetadataExtractor amEx =
        new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA);

    Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any());
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.debug3("Metadata test - articlefiles " + af.toString());
      // CachedUrl cu = af.getFullTextCu();
      CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA);
      log.debug3("metadata cu is " + cu.getUrl());
      // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu);
      amEx.extract(MetadataTarget.Any(), af, emitter);
      List<ArticleMetadata> returnList = emitter.getAmList();

      assertNotNull(returnList);
      log.debug3("size of returnList is " + returnList.size());
      Iterator<ArticleMetadata> mdIt = returnList.iterator();
      ArticleMetadata mdRecord = null;
      while (mdIt.hasNext()) {
        mdRecord = (ArticleMetadata) mdIt.next();
        validateCompleteMetadataRecord(mdRecord);
      }
    }
  }
 List<String> auUrls(ArchivalUnit au) {
   List<String> res = new ArrayList<String>();
   for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) {
     CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next();
     if (cusn.hasContent()) {
       res.add(cusn.getUrl());
     }
   }
   return res;
 }
Пример #6
0
 void assertPubKs(File file, String pass, List<String> hosts) throws Exception {
   KeyStore ks = loadKeyStore("jceks", file, pass);
   List aliases = ListUtil.fromIterator(new EnumerationIterator(ks.aliases()));
   assertEquals(hosts.size(), aliases.size());
   for (String host : hosts) {
     String alias = host + ".crt";
     Certificate cert = ks.getCertificate(alias);
     assertNotNull(cert);
     assertEquals("X.509", cert.getType());
   }
 }
  public void testDOIExtraction() throws Exception {

    List<ArticleMetadata> mdlist =
        setupContentForAU(bau1, ABS_URL, goodHtmlContentNoDOIorPublisher, true);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);
    // gets pulled from the URL if not set in the metadata
    assertEquals("10.1175/2010WCAS1063.1", md.get(MetadataField.FIELD_DOI));
    // gets set manually if not in the metadata
    // first it would try the TDB
    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
  }
Пример #8
0
 void assertPrivateKs(File file, String pass, String alias) throws Exception {
   KeyStore ks = loadKeyStore("jceks", file, alias);
   List aliases = ListUtil.fromIterator(new EnumerationIterator(ks.aliases()));
   assertEquals(2, aliases.size());
   Certificate cert = ks.getCertificate(alias + ".crt");
   assertNotNull(cert);
   assertEquals("X.509", cert.getType());
   assertTrue(ks.isKeyEntry(alias + ".key"));
   assertTrue(ks.isCertificateEntry(alias + ".crt"));
   Key key = ks.getKey(alias + ".key", pass.toCharArray());
   assertNotNull(key);
   assertEquals("RSA", key.getAlgorithm());
 }
Пример #9
0
 public List findExistingRepositoriesFor(String auid) {
   List res = null;
   for (Iterator iter = getRepositoryList().iterator(); iter.hasNext(); ) {
     String repoName = (String) iter.next();
     String path = LockssRepositoryImpl.getLocalRepositoryPath(repoName);
     if (LockssRepositoryImpl.doesAuDirExist(auid, path)) {
       if (res == null) {
         res = new ArrayList();
       }
       res.add(repoName);
     }
   }
   return res == null ? Collections.EMPTY_LIST : res;
 }
  public void testExtractGoodHtmlContent() throws Exception {

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContent, true);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);
    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
    assertEquals(goodTitle, md.get(MetadataField.DC_FIELD_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));
    assertEquals(goodFormat, md.get(MetadataField.DC_FIELD_FORMAT));
    assertEquals(goodType, md.get(MetadataField.DC_FIELD_TYPE));
    assertEquals(Arrays.asList(goodAuthors), md.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(goodAuthors[0], md.get(MetadataField.DC_FIELD_CREATOR));
  }
  public void testSimpleDatasetXML() throws Exception {
    log.debug3("testSimpleDatasetXML");
    String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile));
    String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml";

    List<ArticleMetadata> mdList =
        extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null);
    assertEquals(6, mdList.size());
    Iterator<ArticleMetadata> mdIt = mdList.iterator();
    ArticleMetadata mdRecord = null;
    while (mdIt.hasNext()) {
      mdRecord = (ArticleMetadata) mdIt.next();
      validateDatasetMetadataRecord(mdRecord);
    }
  }
 public void emitMetadata(ArticleFiles af, ArticleMetadata md) {
   if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")");
   if (md != null) {
     log.debug3("add " + md + " to amlist");
     amlst.add(md);
   }
   ;
 }
  public void testBaseUrlPath() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/some/path/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$");
    Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat1.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat2.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals(m1.group(1), m2.group(1));
    }
  }
 /**
  * Checks the consistency of the node, and continues with its children if it's consistent.
  *
  * @param node RepositoryNodeImpl the node to check
  */
 private void recurseConsistencyCheck(RepositoryNodeImpl node) {
   logger.debug2("Checking node '" + node.getNodeUrl() + "'...");
   // check consistency at each node
   // correct/deactivate as necessary
   // 'checkNodeConsistency()' will repair if possible
   if (node.checkNodeConsistency()) {
     logger.debug3("Node consistent; recursing on children...");
     List children = node.getNodeList(null, false);
     Iterator iter = children.iterator();
     while (iter.hasNext()) {
       RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next();
       recurseConsistencyCheck(child);
     }
   } else {
     logger.debug3("Node inconsistent; deactivating...");
     deactivateInconsistentNode(node);
   }
 }
  /**
   * Method that creates a simulated Cached URL from the source code provided by the goodContent
   * String. It then asserts that the metadata extracted, by using the
   * MetaPressRisMetadataExtractorFactory, match the metadata in the source code.
   *
   * @throws Exception
   */
  public void testExtractAlternateRisContent() throws Exception {
    String goodContent = createAlternateRisContent();
    log.debug3(goodContent);

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);

    Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator();
    for (String expAuth : goodAuthors) {
      assertEquals(expAuth, actAuthIter.next());
    }
    assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE));
    assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));
    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
  }
Пример #16
0
  public void testRequestMessageCreation() throws Exception {
    V3LcapMessage reqMsg =
        new V3LcapMessage(
            "ArchivalID_2",
            "key",
            "Plug42",
            m_testBytes,
            m_testBytes,
            V3LcapMessage.MSG_REPAIR_REQ,
            987654321,
            m_testID,
            tempDir,
            theDaemon);
    reqMsg.setTargetUrl("http://foo.com/");

    for (Iterator ix = m_testVoteBlocks.iterator(); ix.hasNext(); ) {
      reqMsg.addVoteBlock((VoteBlock) ix.next());
    }

    assertEquals(3, reqMsg.getProtocolVersion());
    assertEquals("Plug42", reqMsg.getPluginVersion());
    assertTrue(m_testID == reqMsg.getOriginatorId());
    assertEquals(V3LcapMessage.MSG_REPAIR_REQ, reqMsg.getOpcode());
    assertEquals("ArchivalID_2", reqMsg.getArchivalId());
    assertEquals("http://foo.com/", reqMsg.getTargetUrl());
    assertEquals(m_testBytes, reqMsg.getPollerNonce());
    assertEquals(m_testBytes, reqMsg.getVoterNonce());
    assertEquals(null, reqMsg.getVoterNonce2());
    List aBlocks = new ArrayList();
    List bBlocks = new ArrayList();
    for (VoteBlocksIterator iter = m_testMsg.getVoteBlockIterator(); iter.hasNext(); ) {
      aBlocks.add(iter.next());
    }
    for (VoteBlocksIterator iter = reqMsg.getVoteBlockIterator(); iter.hasNext(); ) {
      bBlocks.add(iter.next());
    }
    assertEquals(aBlocks, bBlocks);

    // Actual size of test vote blocks is unpredictable
    assertTrue(reqMsg.getEstimatedEncodedLength() > V3LcapMessage.EST_ENCODED_HEADER_LENGTH);
  }
Пример #17
0
 protected void recordExportFile(File file) {
   if (fileSet.add(file)) {
     fileList.add(file);
   }
 }
Пример #18
0
 protected void recordError(String msg) {
   log.error(msg);
   errors.add(msg);
 }
Пример #19
0
 protected void recordError(String msg, Throwable t) {
   log.error(msg, t);
   errors.add(msg + ": " + t.toString());
 }
Пример #20
0
 public void setConfig(
     Configuration config, Configuration oldConfig, Configuration.Differences changedKeys) {
   //  Build list of repositories from list of disk (fs) paths).  Needs to
   //  be generalized if ever another repository implementation.
   if (changedKeys.contains(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST)) {
     List lst = new ArrayList();
     String dspace = config.get(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, "");
     List paths = StringUtil.breakAt(dspace, ';');
     if (paths != null) {
       for (Iterator iter = paths.iterator(); iter.hasNext(); ) {
         lst.add("local:" + (String) iter.next());
       }
     }
     repoList = lst;
   }
   if (changedKeys.contains(PARAM_MAX_PER_AU_CACHE_SIZE)) {
     paramNodeCacheSize =
         config.getInt(PARAM_MAX_PER_AU_CACHE_SIZE, DEFAULT_MAX_PER_AU_CACHE_SIZE);
     for (Iterator iter = getDaemon().getAllLockssRepositories().iterator(); iter.hasNext(); ) {
       LockssRepository repo = (LockssRepository) iter.next();
       if (repo instanceof LockssRepositoryImpl) {
         LockssRepositoryImpl repoImpl = (LockssRepositoryImpl) repo;
         repoImpl.setNodeCacheSize(paramNodeCacheSize);
       }
     }
   }
   if (changedKeys.contains(PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE)) {
     paramSuspectVersionsCacheSize =
         config.getInt(
             PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE, DEFAULT_MAX_SUSPECT_VERSIONS_CACHE_SIZE);
     suspectVersionsCache.setMaxSize(paramSuspectVersionsCacheSize);
   }
   if (changedKeys.contains(GLOBAL_CACHE_PREFIX)) {
     paramIsGlobalNodeCache =
         config.getBoolean(PARAM_GLOBAL_CACHE_ENABLED, DEFAULT_GLOBAL_CACHE_ENABLED);
     if (paramIsGlobalNodeCache) {
       paramGlobalNodeCacheSize =
           config.getInt(PARAM_MAX_GLOBAL_CACHE_SIZE, DEFAULT_MAX_GLOBAL_CACHE_SIZE);
       log.debug("global node cache size: " + paramGlobalNodeCacheSize);
       globalNodeCache.setMaxSize(paramGlobalNodeCacheSize);
     }
   }
   if (changedKeys.contains(DISK_PREFIX)) {
     int minMB = config.getInt(PARAM_DISK_WARN_FRRE_MB, DEFAULT_DISK_WARN_FRRE_MB);
     double minPer =
         config.getPercentage(PARAM_DISK_WARN_FRRE_PERCENT, DEFAULT_DISK_WARN_FRRE_PERCENT);
     paramDFWarn = PlatformUtil.DF.makeThreshold(minMB, minPer);
     minMB = config.getInt(PARAM_DISK_FULL_FRRE_MB, DEFAULT_DISK_FULL_FRRE_MB);
     minPer = config.getPercentage(PARAM_DISK_FULL_FRRE_PERCENT, DEFAULT_DISK_FULL_FRRE_PERCENT);
     paramDFFull = PlatformUtil.DF.makeThreshold(minMB, minPer);
   }
   if (changedKeys.contains(PARAM_SIZE_CALC_MAX_LOAD)) {
     sizeCalcMaxLoad = config.getPercentage(PARAM_SIZE_CALC_MAX_LOAD, DEFAULT_SIZE_CALC_MAX_LOAD);
   }
   if (changedKeys.contains(PREFIX)) {
     maxUnusedDirSearch =
         config.getInt(PARAM_MAX_UNUSED_DIR_SEARCH, DEFAULT_MAX_UNUSED_DIR_SEARCH);
     isStatefulUnusedDirSearch =
         config.getBoolean(
             PARAM_IS_STATEFUL_UNUSED_DIR_SEARCH, DEFAULT_IS_STATEFUL_UNUSED_DIR_SEARCH);
     enableLongComponents =
         config.getBoolean(PARAM_ENABLE_LONG_COMPONENTS, DEFAULT_ENABLE_LONG_COMPONENTS);
     enableLongComponentsCompatibility =
         config.getBoolean(
             PARAM_ENABLE_LONG_COMPONENTS_COMPATIBILITY,
             DEFAULT_ENABLE_LONG_COMPONENTS_COMPATIBILITY);
     maxComponentLength = config.getInt(PARAM_MAX_COMPONENT_LENGTH, DEFAULT_MAX_COMPONENT_LENGTH);
     checkUnnormalized =
         (CheckUnnormalizedMode)
             config.getEnum(
                 CheckUnnormalizedMode.class,
                 PARAM_CHECK_UNNORMALIZED,
                 DEFAULT_CHECK_UNNORMALIZED);
   }
 }