void setConfig(Configuration config) { log.debug("config: " + config); proxyHost = config.get(PARAM_PROXY_HOST); proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT); if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { String http_proxy = System.getenv("http_proxy"); if (!StringUtil.isNullString(http_proxy)) { try { HostPortParser hpp = new HostPortParser(http_proxy); proxyHost = hpp.getHost(); proxyPort = hpp.getPort(); } catch (HostPortParser.InvalidSpec e) { log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e); } } } if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { proxyHost = null; } else { log.info("Proxying through " + proxyHost + ":" + proxyPort); } userAgent = config.get(PARAM_USER_AGENT); if (StringUtil.isNullString(userAgent)) { userAgent = null; } else { log.debug("Setting User-Agent to " + userAgent); } }
public void export() { log.debug("export(" + au.getName() + ")"); log.debug( "dir: " + dir + ", pref: " + prefix + ", size: " + maxSize + ", ver: " + maxVersions + (compress ? ", (C)" : "")); checkArgs(); try { start(); } catch (IOException e) { recordError("Error opening file", e); return; } writeFiles(); try { finish(); } catch (IOException e) { if (!isDiskFull) { // If we already knew (and reported) disk full, also reporting it // as a close error is misleading. recordError("Error closing file", e); } } }
// If there is a <title> element on the start page, use that as our AU // name. String recomputeRegName() { if (!isStarted()) { // This can get invoked (seveeral times, mostly from logging) before // enough mechanism has started to make it possible to resolve the CuUrl // below. return null; } try { CachedUrl cu = makeCachedUrl(m_registryUrl); if (cu == null) return null; URL cuUrl = CuUrl.fromCu(cu); Parser parser = new Parser(cuUrl.toString()); NodeList nodelst = parser.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class)); Node nodes[] = nodelst.toNodeArray(); recomputeRegName = false; if (nodes.length < 1) return null; // Get the first title found TitleTag tag = (TitleTag) nodes[0]; if (tag == null) return null; return tag.getTitle(); } catch (MalformedURLException e) { log.warning("recomputeRegName", e); return null; } catch (ParserException e) { if (e.getThrowable() instanceof FileNotFoundException) { log.warning("recomputeRegName: " + e.getThrowable().toString()); } else { log.warning("recomputeRegName", e); } return null; } }
/** Create LockssKeystores from config subtree below {@link #PARAM_KEYSTORE} */ void configureKeyStores(Configuration config) { Configuration allKs = config.getConfigTree(PARAM_KEYSTORE); for (Iterator iter = allKs.nodeIterator(); iter.hasNext(); ) { String id = (String) iter.next(); Configuration oneKs = allKs.getConfigTree(id); try { LockssKeyStore lk = createLockssKeyStore(oneKs); String name = lk.getName(); if (name == null) { log.error("KeyStore definition missing name: " + oneKs); continue; } LockssKeyStore old = keystoreMap.get(name); if (old != null && !lk.equals(old)) { log.warning( "Keystore " + name + " redefined. " + "New definition may not take effect until daemon restart"); } log.debug("Adding keystore " + name); keystoreMap.put(name, lk); } catch (Exception e) { log.error("Couldn't create keystore: " + oneKs, e); } } }
public void emitMetadata(ArticleFiles af, ArticleMetadata md) { if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")"); if (md != null) { log.debug3("add " + md + " to amlist"); amlst.add(md); } ; }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
public void testArticleCountAndType() throws Exception { int expCount = 28; PluginTestUtil.crawlSimAu(sau); String pat1 = "branch(\\d+)/(\\d+file\\.html)"; String rep1 = "aps/journal/v123/n$1/full/$2"; PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1); String pat2 = "branch(\\d+)/(\\d+file\\.pdf)"; String rep2 = "aps/journal/v123/n$1/pdf/$2"; PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2); // Remove some URLs int deleted = 0; for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) { CachedUrlSetNode cusn = (CachedUrlSetNode) it.next(); if (cusn instanceof CachedUrl) { CachedUrl cu = (CachedUrl) cusn; String url = cu.getUrl(); if (url.contains("/journal/") && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) { deleteBlock(cu); ++deleted; } } } assertEquals(8, deleted); Iterator<ArticleFiles> it = nau.getArticleIterator(); int count = 0; int countHtmlOnly = 0; int countPdfOnly = 0; while (it.hasNext()) { ArticleFiles af = it.next(); log.info(af.toString()); CachedUrl cu = af.getFullTextCu(); String url = cu.getUrl(); assertNotNull(cu); String contentType = cu.getContentType(); log.debug("count " + count + " url " + url + " " + contentType); count++; if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) { ++countHtmlOnly; } if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) { ++countPdfOnly; } } log.debug("Article count is " + count); assertEquals(expCount, count); assertEquals(4, countHtmlOnly); assertEquals(4, countPdfOnly); }
/* * When testing no-pdf-check basic XML parsing, you will get partial MD records * depending on whether the info comes from dataset.xml or from main.xml */ private void validateDatasetMetadataRecord(ArticleMetadata am) { log.debug3("valideDatasetMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); log.debug3("doi val is: " + doi_val); // The dataset doesn't set this value, it'll fail over the main.xml value if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) { assertEquals(null, am.get(MetadataField.FIELD_DATE)); } else { assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); } assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); }
private void checkSubstance(ArchivalUnit au) { SubstanceChecker subChecker = new SubstanceChecker(au); if (!subChecker.isEnabled()) { errMsg = "No substance patterns defined for plugin."; return; } AuState auState = AuUtil.getAuState(au); SubstanceChecker.State oldState = auState.getSubstanceState(); SubstanceChecker.State newState = subChecker.findSubstance(); String chtxt = (newState == oldState ? "(unchanged)" : "(was " + oldState.toString() + ")"); switch (newState) { case Unknown: log.error("Shouldn't happen: SubstanceChecker returned Unknown"); errMsg = "Error in SubstanceChecker; see log."; break; case Yes: statusMsg = "AU has substance " + chtxt + ": " + au.getName(); auState.setSubstanceState(SubstanceChecker.State.Yes); break; case No: statusMsg = "AU has no substance " + chtxt + ": " + au.getName(); auState.setSubstanceState(SubstanceChecker.State.No); break; } }
/** Create LockssKeystore from a config subtree */ LockssKeyStore createLockssKeyStore(Configuration config) { log.debug2("Creating LockssKeyStore from config: " + config); String name = config.get(KEYSTORE_PARAM_NAME); LockssKeyStore lk = new LockssKeyStore(name); String file = config.get(KEYSTORE_PARAM_FILE); String resource = config.get(KEYSTORE_PARAM_RESOURCE); String url = config.get(KEYSTORE_PARAM_URL); if (!StringUtil.isNullString(file)) { lk.setLocation(file, LocationType.File); } else if (!StringUtil.isNullString(resource)) { lk.setLocation(resource, LocationType.Resource); } else if (!StringUtil.isNullString(url)) { lk.setLocation(url, LocationType.Url); } lk.setType(config.get(KEYSTORE_PARAM_TYPE, defaultKeyStoreType)); lk.setProvider(config.get(KEYSTORE_PARAM_PROVIDER, defaultKeyStoreProvider)); lk.setPassword(config.get(KEYSTORE_PARAM_PASSWORD)); lk.setKeyPassword(config.get(KEYSTORE_PARAM_KEY_PASSWORD)); lk.setKeyPasswordFile(config.get(KEYSTORE_PARAM_KEY_PASSWORD_FILE)); lk.setMayCreate(config.getBoolean(KEYSTORE_PARAM_CREATE, DEFAULT_CREATE)); return lk; }
protected void checkLeaf(SimulatedArchivalUnit sau) { log.debug("checkLeaf()"); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); CachedUrlSet set = sau.makeCachedUrlSet(spec); Iterator setIt = set.contentHashIterator(); ArrayList childL = new ArrayList(16); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } String[] expectedA = new String[] { parent, parent + "/001file.html", parent + "/001file.txt", parent + "/002file.html", parent + "/002file.txt", parent + "/branch1", parent + "/branch1/001file.html", parent + "/branch1/001file.txt", parent + "/branch1/002file.html", parent + "/branch1/002file.txt", parent + "/branch1/index.html", parent + "/branch2", parent + "/branch2/001file.html", parent + "/branch2/001file.txt", parent + "/branch2/002file.html", parent + "/branch2/002file.txt", parent + "/branch2/index.html", parent + "/index.html", }; assertIsomorphic(expectedA, childL); }
protected void checkRoot(SimulatedArchivalUnit sau) { log.debug("checkRoot()"); CachedUrlSet set = sau.getAuCachedUrlSet(); Iterator setIt = set.flatSetIterator(); ArrayList childL = new ArrayList(1); CachedUrlSet cus = null; while (setIt.hasNext()) { cus = (CachedUrlSet) setIt.next(); childL.add(cus.getUrl()); } String urlRoot = sau.getUrlRoot(); String[] expectedA = new String[1]; expectedA[0] = urlRoot; assertIsomorphic(expectedA, childL); setIt = cus.flatSetIterator(); childL = new ArrayList(7); while (setIt.hasNext()) { childL.add(((CachedUrlSetNode) setIt.next()).getUrl()); } expectedA = new String[] { urlRoot + "/001file.html", urlRoot + "/001file.txt", urlRoot + "/002file.html", urlRoot + "/002file.txt", urlRoot + "/branch1", urlRoot + "/branch2", urlRoot + "/index.html" }; assertIsomorphic(expectedA, childL); }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractGoodRisContent() throws Exception { String goodContent = createGoodRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME)); assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE)); assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE)); assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE)); assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN)); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI)); // This shouldn't get set. It will default later to fuill_text_cu assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL)); }
protected void checkContent(SimulatedArchivalUnit sau) throws IOException { log.debug("checkContent()"); checkRoot(sau); checkLeaf(sau); checkStoredContent(sau); checkDepth(sau); }
/* * You will have to tell it the DOI and the schema because those normally come from dataset */ private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) { log.debug3("valideSingleMainMetadatRecord"); if ("simple-article".equals(schema)) { assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } log.debug3("doi val is: " + doi_val); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead)); assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi)); assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright)); }
/** * Returns the base url of the provided CachedUrl, checking to see if it's the result of a * redirect. */ public static String getBaseUrl(CachedUrl cu) { // See the comments in LockssResourceHandler.handleLockssRedirect(); // this is the same logic. CIProperties props = cu.getProperties(); if (props != null) { String redir = props.getProperty(CachedUrl.PROPERTY_CONTENT_URL); if (redir != null) { return redir; } else { String url = cu.getUrl(); String nodeUrl = props.getProperty(CachedUrl.PROPERTY_NODE_URL); if (nodeUrl != null && !nodeUrl.equals(url)) { log.debug("getBaseUrl(" + url + "), nodeUrl: " + nodeUrl); if (dirNodeCheckSlash) { URI uri = new URI(url); if (!uri.getPath().endsWith("/")) { URI nodeUri = new URI(nodeUrl); if (nodeUri.getPath().endsWith("/")) { return nodeUrl; } } } else { return nodeUrl; } } } } return cu.getUrl(); }
void stopThread() { if (sizeCalcThread != null) { log.debug2("Stopping thread"); sizeCalcThread.stopSizeCalc(); sizeCalcThread = null; } }
protected void logParams() { Enumeration en = req.getParameterNames(); while (en.hasMoreElements()) { String name = (String) en.nextElement(); String vals[]; String dispval; if (StringUtil.indexOfIgnoreCase(name, "passw") >= 0) { dispval = req.getParameter(name).length() == 0 ? "" : "********"; } else if (log.isDebug2() && (vals = req.getParameterValues(name)).length > 1) { dispval = StringUtil.separatedString(vals, ", "); } else { dispval = req.getParameter(name); } log.debug(name + " = " + dispval); } }
private void deleteBlock(CachedUrl cu) throws IOException { log.info("deleting " + cu.getUrl()); CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl()); ArchivalUnit au = cu.getArchivalUnit(); CachedUrlSet cus = au.makeCachedUrlSet(cuss); NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au); nm.deleteNode(cus); }
public void testSimpleMainXML() throws Exception { log.debug3("testSimpleMainXML"); String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null); assertEquals(1, mdList.size()); validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article"); }
/** engqueue a size calculation for the node */ public void queueSizeCalc(RepositoryNode node) { synchronized (sizeCalcQueue) { if (sizeCalcQueue.add(node)) { log.debug2("Queue size calc: " + node); startOrKickThread(); } } }
protected String getHostIp() { try { IPAddr localHost = IPAddr.getLocalHost(); return localHost.getHostAddress(); } catch (UnknownHostException e) { log.error("getHostIp()", e); return "1.1.1.1"; } }
void startOrKickThread() { if (sizeCalcThread == null) { log.debug2("Starting thread"); sizeCalcThread = new SizeCalcThread(); sizeCalcThread.start(); sizeCalcThread.waitRunning(); } sizeCalcSem.give(); }
public PlatformUtil.DF getRepositoryDF(String repoName) { String path = LockssRepositoryImpl.getLocalRepositoryPath(repoName); log.debug("path: " + path); // try { return platInfo.getJavaDF(path); // } catch (PlatformUtil.UnsupportedException e) { // return null; // } }
/** * Checks the consistency of the node, and continues with its children if it's consistent. * * @param node RepositoryNodeImpl the node to check */ private void recurseConsistencyCheck(RepositoryNodeImpl node) { logger.debug2("Checking node '" + node.getNodeUrl() + "'..."); // check consistency at each node // correct/deactivate as necessary // 'checkNodeConsistency()' will repair if possible if (node.checkNodeConsistency()) { logger.debug3("Node consistent; recursing on children..."); List children = node.getNodeList(null, false); Iterator iter = children.iterator(); while (iter.hasNext()) { RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next(); recurseConsistencyCheck(child); } } else { logger.debug3("Node inconsistent; deactivating..."); deactivateInconsistentNode(node); } }
private void forceV3Poll() { ArchivalUnit au = getAu(); if (au == null) return; try { callV3ContentPoll(au); } catch (Exception e) { log.error("Can't start poll", e); errMsg = "Error: " + e.toString(); } }
protected void checkDepth(SimulatedArchivalUnit sau) { log.debug("checkDepth()"); String URL_ROOT = sau.getUrlRoot(); assertEquals(0, sau.getLinkDepth(URL_ROOT + "/index.html")); assertEquals(0, sau.getLinkDepth(URL_ROOT + "/")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/001file.html")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/index.html")); assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/")); assertEquals(2, sau.getLinkDepth(URL_ROOT + "/branch1/001file.html")); }
private void doDisableMetadataIndexing() { ArchivalUnit au = getAu(); if (au == null) return; try { disableMetadataIndexing(au, false); } catch (RuntimeException e) { log.error("Can't disable metadata indexing", e); errMsg = "Error: " + e.toString(); } }
private void forceReindexMetadata() { ArchivalUnit au = getAu(); if (au == null) return; try { startReindexingMetadata(au, true); } catch (RuntimeException e) { log.error("Can't reindex metadata", e); errMsg = "Error: " + e.toString(); } }
private void doCheckSubstance() { ArchivalUnit au = getAu(); if (au == null) return; try { checkSubstance(au); } catch (RuntimeException e) { log.error("Error in SubstanceChecker", e); errMsg = "Error in SubstanceChecker; see log."; } }