/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractGoodRisContent() throws Exception { String goodContent = createGoodRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME)); assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE)); assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE)); assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE)); assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN)); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI)); // This shouldn't get set. It will default later to fuill_text_cu assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL)); }
private void assertEqualMessages(V3LcapMessage a, V3LcapMessage b) throws Exception { assertTrue(a.getOriginatorId() == b.getOriginatorId()); assertEquals(a.getOpcode(), b.getOpcode()); assertEquals(a.getTargetUrl(), b.getTargetUrl()); assertEquals(a.getArchivalId(), b.getArchivalId()); assertEquals(a.getProtocolVersion(), b.getProtocolVersion()); assertEquals(a.getPollerNonce(), b.getPollerNonce()); assertEquals(a.getVoterNonce(), b.getVoterNonce()); assertEquals(a.getVoterNonce2(), b.getVoterNonce2()); assertEquals(a.getPluginVersion(), b.getPluginVersion()); assertEquals(a.getHashAlgorithm(), b.getHashAlgorithm()); assertEquals(a.isVoteComplete(), b.isVoteComplete()); assertEquals(a.getRepairDataLength(), b.getRepairDataLength()); assertEquals(a.getLastVoteBlockURL(), b.getLastVoteBlockURL()); assertIsomorphic(a.getNominees(), b.getNominees()); List aBlocks = new ArrayList(); List bBlocks = new ArrayList(); for (VoteBlocksIterator iter = a.getVoteBlockIterator(); iter.hasNext(); ) { aBlocks.add(iter.next()); } for (VoteBlocksIterator iter = b.getVoteBlockIterator(); iter.hasNext(); ) { bBlocks.add(iter.next()); } assertTrue(aBlocks.equals(bBlocks)); // TODO: Figure out how to test time. }
public void testSimpleMainXML() throws Exception { log.debug3("testSimpleMainXML"); String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null); assertEquals(1, mdList.size()); validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article"); }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
List<String> auUrls(ArchivalUnit au) { List<String> res = new ArrayList<String>(); for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next(); if (cusn.hasContent()) { res.add(cusn.getUrl()); } } return res; }
void assertPubKs(File file, String pass, List<String> hosts) throws Exception { KeyStore ks = loadKeyStore("jceks", file, pass); List aliases = ListUtil.fromIterator(new EnumerationIterator(ks.aliases())); assertEquals(hosts.size(), aliases.size()); for (String host : hosts) { String alias = host + ".crt"; Certificate cert = ks.getCertificate(alias); assertNotNull(cert); assertEquals("X.509", cert.getType()); } }
public void testDOIExtraction() throws Exception { List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContentNoDOIorPublisher, true); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); // gets pulled from the URL if not set in the metadata assertEquals("10.1175/2010WCAS1063.1", md.get(MetadataField.FIELD_DOI)); // gets set manually if not in the metadata // first it would try the TDB assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); }
void assertPrivateKs(File file, String pass, String alias) throws Exception { KeyStore ks = loadKeyStore("jceks", file, alias); List aliases = ListUtil.fromIterator(new EnumerationIterator(ks.aliases())); assertEquals(2, aliases.size()); Certificate cert = ks.getCertificate(alias + ".crt"); assertNotNull(cert); assertEquals("X.509", cert.getType()); assertTrue(ks.isKeyEntry(alias + ".key")); assertTrue(ks.isCertificateEntry(alias + ".crt")); Key key = ks.getKey(alias + ".key", pass.toCharArray()); assertNotNull(key); assertEquals("RSA", key.getAlgorithm()); }
public List findExistingRepositoriesFor(String auid) { List res = null; for (Iterator iter = getRepositoryList().iterator(); iter.hasNext(); ) { String repoName = (String) iter.next(); String path = LockssRepositoryImpl.getLocalRepositoryPath(repoName); if (LockssRepositoryImpl.doesAuDirExist(auid, path)) { if (res == null) { res = new ArrayList(); } res.add(repoName); } } return res == null ? Collections.EMPTY_LIST : res; }
public void testExtractGoodHtmlContent() throws Exception { List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContent, true); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodTitle, md.get(MetadataField.DC_FIELD_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodFormat, md.get(MetadataField.DC_FIELD_FORMAT)); assertEquals(goodType, md.get(MetadataField.DC_FIELD_TYPE)); assertEquals(Arrays.asList(goodAuthors), md.getList(MetadataField.FIELD_AUTHOR)); assertEquals(goodAuthors[0], md.get(MetadataField.DC_FIELD_CREATOR)); }
public void testSimpleDatasetXML() throws Exception { log.debug3("testSimpleDatasetXML"); String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile)); String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null); assertEquals(6, mdList.size()); Iterator<ArticleMetadata> mdIt = mdList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateDatasetMetadataRecord(mdRecord); } }
public void emitMetadata(ArticleFiles af, ArticleMetadata md) { if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")"); if (md != null) { log.debug3("add " + md + " to amlist"); amlst.add(md); } ; }
public void testBaseUrlPath() throws Exception { sau1 = setupSimAu(simAuConfig(tempDirPath)); createContent(sau1); crawlContent(sau1); CachedUrlSet cus1 = sau1.getAuCachedUrlSet(); tempDirPath2 = getTempDir().getAbsolutePath() + File.separator; Configuration config2 = simAuConfig(tempDirPath2); config2.put("base_url", "http://anotherhost.org/some/path/"); SimulatedArchivalUnit sau2 = setupSimAu(config2); createContent(sau2); crawlContent(sau2); CachedUrlSet cus2 = sau1.getAuCachedUrlSet(); List urls1 = auUrls(sau1); List urls2 = auUrls(sau2); Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$"); Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$"); List<String> l1 = auUrls(sau1); List<String> l2 = auUrls(sau2); assertEquals(l1.size(), l2.size()); for (int ix = 0; ix < l1.size(); ix++) { Matcher m1 = pat1.matcher(l1.get(ix)); assertTrue(m1.matches()); Matcher m2 = pat2.matcher(l2.get(ix)); assertTrue(m2.matches()); assertEquals(m1.group(1), m2.group(1)); } }
/** * Checks the consistency of the node, and continues with its children if it's consistent. * * @param node RepositoryNodeImpl the node to check */ private void recurseConsistencyCheck(RepositoryNodeImpl node) { logger.debug2("Checking node '" + node.getNodeUrl() + "'..."); // check consistency at each node // correct/deactivate as necessary // 'checkNodeConsistency()' will repair if possible if (node.checkNodeConsistency()) { logger.debug3("Node consistent; recursing on children..."); List children = node.getNodeList(null, false); Iterator iter = children.iterator(); while (iter.hasNext()) { RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next(); recurseConsistencyCheck(child); } } else { logger.debug3("Node inconsistent; deactivating..."); deactivateInconsistentNode(node); } }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractAlternateRisContent() throws Exception { String goodContent = createAlternateRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); }
public void testRequestMessageCreation() throws Exception { V3LcapMessage reqMsg = new V3LcapMessage( "ArchivalID_2", "key", "Plug42", m_testBytes, m_testBytes, V3LcapMessage.MSG_REPAIR_REQ, 987654321, m_testID, tempDir, theDaemon); reqMsg.setTargetUrl("http://foo.com/"); for (Iterator ix = m_testVoteBlocks.iterator(); ix.hasNext(); ) { reqMsg.addVoteBlock((VoteBlock) ix.next()); } assertEquals(3, reqMsg.getProtocolVersion()); assertEquals("Plug42", reqMsg.getPluginVersion()); assertTrue(m_testID == reqMsg.getOriginatorId()); assertEquals(V3LcapMessage.MSG_REPAIR_REQ, reqMsg.getOpcode()); assertEquals("ArchivalID_2", reqMsg.getArchivalId()); assertEquals("http://foo.com/", reqMsg.getTargetUrl()); assertEquals(m_testBytes, reqMsg.getPollerNonce()); assertEquals(m_testBytes, reqMsg.getVoterNonce()); assertEquals(null, reqMsg.getVoterNonce2()); List aBlocks = new ArrayList(); List bBlocks = new ArrayList(); for (VoteBlocksIterator iter = m_testMsg.getVoteBlockIterator(); iter.hasNext(); ) { aBlocks.add(iter.next()); } for (VoteBlocksIterator iter = reqMsg.getVoteBlockIterator(); iter.hasNext(); ) { bBlocks.add(iter.next()); } assertEquals(aBlocks, bBlocks); // Actual size of test vote blocks is unpredictable assertTrue(reqMsg.getEstimatedEncodedLength() > V3LcapMessage.EST_ENCODED_HEADER_LENGTH); }
protected void recordExportFile(File file) { if (fileSet.add(file)) { fileList.add(file); } }
protected void recordError(String msg) { log.error(msg); errors.add(msg); }
protected void recordError(String msg, Throwable t) { log.error(msg, t); errors.add(msg + ": " + t.toString()); }
public void setConfig( Configuration config, Configuration oldConfig, Configuration.Differences changedKeys) { // Build list of repositories from list of disk (fs) paths). Needs to // be generalized if ever another repository implementation. if (changedKeys.contains(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST)) { List lst = new ArrayList(); String dspace = config.get(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, ""); List paths = StringUtil.breakAt(dspace, ';'); if (paths != null) { for (Iterator iter = paths.iterator(); iter.hasNext(); ) { lst.add("local:" + (String) iter.next()); } } repoList = lst; } if (changedKeys.contains(PARAM_MAX_PER_AU_CACHE_SIZE)) { paramNodeCacheSize = config.getInt(PARAM_MAX_PER_AU_CACHE_SIZE, DEFAULT_MAX_PER_AU_CACHE_SIZE); for (Iterator iter = getDaemon().getAllLockssRepositories().iterator(); iter.hasNext(); ) { LockssRepository repo = (LockssRepository) iter.next(); if (repo instanceof LockssRepositoryImpl) { LockssRepositoryImpl repoImpl = (LockssRepositoryImpl) repo; repoImpl.setNodeCacheSize(paramNodeCacheSize); } } } if (changedKeys.contains(PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE)) { paramSuspectVersionsCacheSize = config.getInt( PARAM_MAX_SUSPECT_VERSIONS_CACHE_SIZE, DEFAULT_MAX_SUSPECT_VERSIONS_CACHE_SIZE); suspectVersionsCache.setMaxSize(paramSuspectVersionsCacheSize); } if (changedKeys.contains(GLOBAL_CACHE_PREFIX)) { paramIsGlobalNodeCache = config.getBoolean(PARAM_GLOBAL_CACHE_ENABLED, DEFAULT_GLOBAL_CACHE_ENABLED); if (paramIsGlobalNodeCache) { paramGlobalNodeCacheSize = config.getInt(PARAM_MAX_GLOBAL_CACHE_SIZE, DEFAULT_MAX_GLOBAL_CACHE_SIZE); log.debug("global node cache size: " + paramGlobalNodeCacheSize); globalNodeCache.setMaxSize(paramGlobalNodeCacheSize); } } if (changedKeys.contains(DISK_PREFIX)) { int minMB = config.getInt(PARAM_DISK_WARN_FRRE_MB, DEFAULT_DISK_WARN_FRRE_MB); double minPer = config.getPercentage(PARAM_DISK_WARN_FRRE_PERCENT, DEFAULT_DISK_WARN_FRRE_PERCENT); paramDFWarn = PlatformUtil.DF.makeThreshold(minMB, minPer); minMB = config.getInt(PARAM_DISK_FULL_FRRE_MB, DEFAULT_DISK_FULL_FRRE_MB); minPer = config.getPercentage(PARAM_DISK_FULL_FRRE_PERCENT, DEFAULT_DISK_FULL_FRRE_PERCENT); paramDFFull = PlatformUtil.DF.makeThreshold(minMB, minPer); } if (changedKeys.contains(PARAM_SIZE_CALC_MAX_LOAD)) { sizeCalcMaxLoad = config.getPercentage(PARAM_SIZE_CALC_MAX_LOAD, DEFAULT_SIZE_CALC_MAX_LOAD); } if (changedKeys.contains(PREFIX)) { maxUnusedDirSearch = config.getInt(PARAM_MAX_UNUSED_DIR_SEARCH, DEFAULT_MAX_UNUSED_DIR_SEARCH); isStatefulUnusedDirSearch = config.getBoolean( PARAM_IS_STATEFUL_UNUSED_DIR_SEARCH, DEFAULT_IS_STATEFUL_UNUSED_DIR_SEARCH); enableLongComponents = config.getBoolean(PARAM_ENABLE_LONG_COMPONENTS, DEFAULT_ENABLE_LONG_COMPONENTS); enableLongComponentsCompatibility = config.getBoolean( PARAM_ENABLE_LONG_COMPONENTS_COMPATIBILITY, DEFAULT_ENABLE_LONG_COMPONENTS_COMPATIBILITY); maxComponentLength = config.getInt(PARAM_MAX_COMPONENT_LENGTH, DEFAULT_MAX_COMPONENT_LENGTH); checkUnnormalized = (CheckUnnormalizedMode) config.getEnum( CheckUnnormalizedMode.class, PARAM_CHECK_UNNORMALIZED, DEFAULT_CHECK_UNNORMALIZED); } }