private void initTestPolls() throws Exception { testV1polls = new V1Poll[testV1msg.length]; for (int i = 0; i < testV1polls.length; i++) { log.debug3("initTestPolls: V1 " + i); BasePoll p = pollmanager.makePoll(testV1msg[i]); assertNotNull(p); assertNotNull(p.getMessage()); log.debug("initTestPolls: V1 " + i + " returns " + p); assertTrue(p instanceof V1Poll); switch (i) { case 0: assertTrue(p instanceof V1NamePoll); break; case 1: assertTrue(p instanceof V1ContentPoll); break; case 2: assertTrue(p instanceof V1VerifyPoll); break; } testV1polls[i] = (V1Poll) p; assertNotNull(testV1polls[i]); log.debug3("initTestPolls: " + i + " " + p.toString()); } }
public void emitMetadata(ArticleFiles af, ArticleMetadata md) { if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")"); if (md != null) { log.debug3("add " + md + " to amlist"); amlst.add(md); } ; }
public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
public String getDefaultArticleMimeType() { String ret = definitionMap.getString(KEY_DEFAULT_ARTICLE_MIME_TYPE, null); log.debug3("DefaultArticleMimeType " + ret); if (ret == null) { ret = super.getDefaultArticleMimeType(); log.debug3("DefaultArticleMimeType from super " + ret); } return ret; }
/** test for method scheduleVote(..) */ public void testScheduleVote() { V1Poll p = testV1polls[1]; assertTrue(p instanceof V1ContentPoll); log.debug3("testScheduleVote 1"); p.scheduleVote(); log.debug3("testScheduleVote 2"); assertNotNull(p.m_voteTime); assertTrue(p.m_voteTime.getRemainingTime() < p.m_deadline.getRemainingTime()); log.debug3("at end of testScheduleVote"); }
/* * When testing no-pdf-check basic XML parsing, you will get partial MD records * depending on whether the info comes from dataset.xml or from main.xml */ private void validateDatasetMetadataRecord(ArticleMetadata am) { log.debug3("valideDatasetMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); log.debug3("doi val is: " + doi_val); // The dataset doesn't set this value, it'll fail over the main.xml value if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) { assertEquals(null, am.get(MetadataField.FIELD_DATE)); } else { assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); } assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); }
/* * You will have to tell it the DOI and the schema because those normally come from dataset */ private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) { log.debug3("valideSingleMainMetadatRecord"); if ("simple-article".equals(schema)) { assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } log.debug3("doi val is: " + doi_val); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead)); assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi)); assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright)); }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractGoodRisContent() throws Exception { String goodContent = createGoodRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME)); assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE)); assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE)); assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE)); assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN)); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI)); // This shouldn't get set. It will default later to fuill_text_cu assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL)); }
/** * Finds the directory for this AU. If none found in the map, designates a new dir for it. * * @param auid AU id representing the au * @param repoRoot path to the root of the repository * @return the dir String */ static String getAuDir(String auid, String repoRoot, boolean create) { String repoCachePath = extendCacheLocation(repoRoot); LocalRepository localRepo = getLocalRepository(repoRoot); synchronized (localRepo) { Map aumap = localRepo.getAuMap(); String auPathSlash = (String) aumap.get(auid); if (auPathSlash != null) { return auPathSlash; } if (!create) { return null; } logger.debug3("Creating new au directory for '" + auid + "'."); String auDir = localRepo.getPrevAuDir(); for (int cnt = RepositoryManager.getMaxUnusedDirSearch(); cnt > 0; cnt--) { // loop through looking for an available dir auDir = getNextDirName(auDir); File testDir = new File(repoCachePath, auDir); if (logger.isDebug3()) logger.debug3("Probe for unused: " + testDir); if (!testDir.exists()) { if (RepositoryManager.isStatefulUnusedDirSearch()) { localRepo.setPrevAuDir(auDir); } String auPath = testDir.toString(); logger.debug3("New au directory: " + auPath); auPathSlash = auPath + File.separator; // write the new au property file to the new dir // XXX this data should be backed up elsewhere to avoid single-point // corruption Properties idProps = new Properties(); idProps.setProperty(AU_ID_PROP, auid); saveAuIdProperties(auPath, idProps); aumap.put(auid, auPathSlash); return auPathSlash; } else { if (logger.isDebug3()) { logger.debug3("Existing directory found at '" + auDir + "'. Checking next..."); } } } } throw new RuntimeException( "Can't find unused repository dir after " + RepositoryManager.getMaxUnusedDirSearch() + " tries in " + repoCachePath); }
public void testSimpleMainXML() throws Exception { log.debug3("testSimpleMainXML"); String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null); assertEquals(1, mdList.size()); validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article"); }
@Override public ArticleMetadata extract(MetadataTarget target, CachedUrl cu) throws IOException { log.debug3("Metadata - cachedurl cu:" + cu.getUrl()); ArticleMetadata am = super.extract(target, cu); am.cook(tagMap); return am; } // extract
/** * Checks the consistency of the node, and continues with its children if it's consistent. * * @param node RepositoryNodeImpl the node to check */ private void recurseConsistencyCheck(RepositoryNodeImpl node) { logger.debug2("Checking node '" + node.getNodeUrl() + "'..."); // check consistency at each node // correct/deactivate as necessary // 'checkNodeConsistency()' will repair if possible if (node.checkNodeConsistency()) { logger.debug3("Node consistent; recursing on children..."); List children = node.getNodeList(null, false); Iterator iter = children.iterator(); while (iter.hasNext()) { RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next(); recurseConsistencyCheck(child); } } else { logger.debug3("Node inconsistent; deactivating..."); deactivateInconsistentNode(node); } }
/** test for method checkVote(..) */ public void testCheckVote() throws Exception { V1LcapMessage msg = null; log.debug3("starting testCheeckVote"); msg = V1LcapMessage.makeReplyMsg( testV1polls[0].getMessage(), ByteArray.makeRandomBytes(20), ByteArray.makeRandomBytes(20), null, V1LcapMessage.NAME_POLL_REP, testduration, testID); log.debug3("testCheeckVote 2"); V1Poll p = null; p = createCompletedPoll(theDaemon, testau, msg, 8, 2, pollmanager); assertTrue(p instanceof V1NamePoll); log.debug3("testCheeckVote 3"); assertNotNull(p); PeerIdentity id = msg.getOriginatorId(); assertNotNull(id); assertNotNull(p.m_tally); int rep = p.m_tally.wtAgree + idmgr.getReputation(id); // good vote check p.checkVote(msg.getHashed(), new Vote(msg, false)); assertEquals(9, p.m_tally.numAgree); assertEquals(2, p.m_tally.numDisagree); assertEquals(rep, p.m_tally.wtAgree); rep = p.m_tally.wtDisagree + idmgr.getReputation(id); // bad vote check p.checkVote(ByteArray.makeRandomBytes(20), new Vote(msg, false)); assertEquals(9, p.m_tally.numAgree); assertEquals(3, p.m_tally.numDisagree); assertEquals(rep, p.m_tally.wtDisagree); }
/** * Return the auid -> au-subdir-path mapping. Enumerating the directories if necessary to * initialize the map */ Map getAuMap() { if (auMap == null) { logger.debug3("Loading name map for '" + repoCacheFile + "'."); auMap = new HashMap(); if (!repoCacheFile.exists()) { logger.debug3("Creating cache dir:" + repoCacheFile + "'."); if (!repoCacheFile.mkdirs()) { logger.critical("Couldn't create directory, check owner/permissions: " + repoCacheFile); // return empty map return auMap; } } else { // read each dir's property file and store mapping auid -> dir File[] auDirs = repoCacheFile.listFiles(); for (int ii = 0; ii < auDirs.length; ii++) { String dirName = auDirs[ii].getName(); // if (dirName.compareTo(lastPluginDir) == 1) { // // adjust the 'lastPluginDir' upwards if necessary // lastPluginDir = dirName; // } String path = auDirs[ii].getAbsolutePath(); Properties idProps = getAuIdProperties(path); if (idProps != null) { String auid = idProps.getProperty(AU_ID_PROP); StringBuilder sb = new StringBuilder(path.length() + File.separator.length()); sb.append(path); sb.append(File.separator); auMap.put(auid, sb.toString()); logger.debug3("Mapping to: " + auMap.get(auid) + ": " + auid); } else { logger.debug3("Not mapping " + path + ", no auid file."); } } } } return auMap; }
private void handlePause(int entriesBetweenSleep) { if ((entriesBetweenSleep % sleepAfter) == 0) { long pauseTime = CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE); Deadline pause = Deadline.in(pauseTime); logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime)); while (!pause.expired()) { try { pause.sleep(); } catch (InterruptedException ie) { // no action } } } }
public MultiPartRequest getMultiPartRequest(int maxLen) throws FormDataTooLongException, IOException { if (req.getContentType() == null || !req.getContentType().startsWith("multipart/form-data")) { return null; } if (req.getContentLength() > maxLen) { throw new FormDataTooLongException(req.getContentLength() + " bytes, " + maxLen + " allowed"); } MultiPartRequest multi = new MultiPartRequest(req); if (log.isDebug2()) { String[] parts = multi.getPartNames(); log.debug3("Multipart request, " + parts.length + " parts"); if (log.isDebug3()) { for (int p = 0; p < parts.length; p++) { String name = parts[p]; String cont = multi.getString(parts[p]); log.debug3(name + ": " + cont); } } } multiReq = multi; return multi; }
public void testSimpleDatasetXML() throws Exception { log.debug3("testSimpleDatasetXML"); String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile)); String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml"; List<ArticleMetadata> mdList = extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null); assertEquals(6, mdList.size()); Iterator<ArticleMetadata> mdIt = mdList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateDatasetMetadataRecord(mdRecord); } }
/* * When testing a complete extraction out of the tarset, the MD record will be completely filled in * and pdf-existence will get established */ private void validateCompleteMetadataRecord(ArticleMetadata am) { log.debug3("valideCompleteMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); /* make sure we can pick up both types of xml article data */ log.debug3("doi val is: " + doi_val); if ("JA 5.2.0 SIMPLE-ARTICLE" .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) { log.debug3("simple-article"); assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER)); log.debug3(am.ppString(2)); }
/** * Refill the buffer from the specified reader * * @param reader reader from which to refill the charBuffer * @return true if the reader has reached eof * @throws IllegalArgumentException if called with a null reader */ public boolean refillBuffer(Reader reader) throws IOException { if (reader == null) { throw new IllegalArgumentException("Called with null reader"); } if (isTrace) { logger.debug3("Refilling buffer"); } int maxRead; while ((maxRead = capacity - size) > 0) { // max chars to add to the end of array int maxEnd = (maxRead <= (capacity - tail) ? maxRead : (capacity - tail)); // max chars to add to the beginning of array int maxStart = maxRead - maxEnd; if (maxStart > 0) { // We have room at the beginning and end. Using a temporary array // seems to be cheaper than calling read() twice if (preBuffer == null) { preBuffer = new char[capacity]; } int charsRead = reader.read(preBuffer, 0, maxRead); if (charsRead == -1) { return true; } try { add(preBuffer, 0, charsRead); } catch (CharRing.RingFullException e) { logger.error("Overfilled a CharRing", e); throw new IOException("Overfilled a CharRing"); } } else { // Adding only to the middle or end, read directly into char buffer int charsRead = reader.read(chars, tail, maxEnd); if (charsRead == -1) { return true; } tail = (tail + charsRead) % capacity; size += charsRead; if (charsRead < maxEnd) { continue; } } } return false; }
/** * Method that creates a simulated Cached URL from the source code provided by the goodContent * String. It then asserts that the metadata extracted, by using the * MetaPressRisMetadataExtractorFactory, match the metadata in the source code. * * @throws Exception */ public void testExtractAlternateRisContent() throws Exception { String goodContent = createAlternateRisContent(); log.debug3(goodContent); List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator(); for (String expAuth : goodAuthors) { assertEquals(expAuth, actAuthIter.next()); } assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE)); assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); }
static void saveAuIdProperties(String location, Properties props) { // XXX these AU_ID_FILE entries need to be backed up elsewhere to avoid // single-point corruption File propDir = new File(location); if (!propDir.exists()) { logger.debug("Creating directory '" + propDir.getAbsolutePath() + "'"); propDir.mkdirs(); } File propFile = new File(propDir, AU_ID_FILE); try { logger.debug3("Saving au id properties at '" + location + "'."); OutputStream os = new BufferedOutputStream(new FileOutputStream(propFile)); props.store(os, "ArchivalUnit id info"); os.close(); propFile.setReadOnly(); } catch (IOException ioe) { logger.error("Couldn't write properties for " + propFile.getPath() + ".", ioe); throw new LockssRepository.RepositoryStateException("Couldn't write au id properties file."); } }
protected CIProperties makeCIProperties(ArchiveRecordHeader elementHeader) throws IOException { CIProperties ret = new CIProperties(); Set elementHeaderFieldKeys = elementHeader.getHeaderFieldKeys(); for (Iterator i = elementHeaderFieldKeys.iterator(); i.hasNext(); ) { String key = (String) i.next(); try { Object valueObject = elementHeader.getHeaderValue(key); if (valueObject == null) { logger.warning("Ignoring null value for key '" + key + "'."); } else { String value = valueObject.toString(); logger.debug3(key + ": " + value); ret.put(key, value); } } catch (ClassCastException ex) { logger.error("makeCIProperties: " + key + " threw ", ex); throw new CacheException.ExploderException(ex); } } return (ret); }
private void setMdTypeFact(Map factClassMap, String mdType, String factName) { log.debug3("Metadata type: " + mdType + " factory " + factName); FileMetadataExtractorFactory fact = (FileMetadataExtractorFactory) newAuxClass(factName, FileMetadataExtractorFactory.class); factClassMap.put(mdType, fact); }
/* * The supporting methods */ private void setUpExpectedTarContent() { /* maps the DOIs in the metadata to the expected values */ log.debug3("setUpExpectedTarContent"); pubTitleMap = new HashMap<String, String>(); { pubTitleMap.put("10.1016/j.jidx.2014.07.028", "International Journal of XXX"); pubTitleMap.put("10.1016/j.jidx2.2014.05.013", "Revista"); pubTitleMap.put("10.1016/S1473-1111(14)70840-0", "The Journal"); pubTitleMap.put("10.1016/S0140-1111(14)61865-1", "The Other Journal"); pubTitleMap.put("10.1016/j.foo.2014.08.001", "Foo"); pubTitleMap.put("10.1016/j.foo.2014.08.123", "Foo"); } ; dateMap = new HashMap<String, String>(); { dateMap.put("10.1016/j.jidx.2014.07.028", "2014-07-30"); dateMap.put("10.1016/j.jidx2.2014.05.013", "2014-07-09"); dateMap.put("10.1016/S1473-1111(14)70840-0", "2014-09-01"); dateMap.put("10.1016/S0140-1111(14)61865-1", "2014"); // will get from main.xml as backup dateMap.put("10.1016/j.foo.2014.08.001", "2014-08-20"); dateMap.put("10.1016/j.foo.2014.08.123", "2014-08-20"); } ; accessUrlMap = new HashMap<String, String>(); { accessUrlMap.put( "10.1016/j.jidx.2014.07.028", TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.pdf"); accessUrlMap.put( "10.1016/j.jidx2.2014.05.013", TAR_A_BASE + SUBDIR + "00349356/v61i9/S0034935614001819/main.pdf"); accessUrlMap.put( "10.1016/S1473-1111(14)70840-0", TAR_A_BASE + SUBDIR + "14733099/v14i10/S1473309914708400/main.pdf"); accessUrlMap.put( "10.1016/S0140-1111(14)61865-1", TAR_B_BASE + SUBDIR + "01406736/v384sS1/S0140673614618651/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.001", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514004151/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.123", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514003856/main.pdf"); } ; ArrayList<String> goodAuthors = new ArrayList<String>(); { goodAuthors.add("Writer, Bob"); goodAuthors.add("Q. Text, Samantha"); } ArrayList<String> simpleAuthors = new ArrayList<String>(); { simpleAuthors.add("Simple, Josh"); } ArrayList<String> extendedAuthors = new ArrayList<String>(); { extendedAuthors.add("Writer, Bob"); extendedAuthors.add("Q. Text, Samantha"); extendedAuthors.add("The COLLABORATIVE Investigators"); } authorMap = new HashMap<String, List<String>>(); { authorMap.put("10.1016/j.jidx.2014.07.028", goodAuthors); authorMap.put("10.1016/j.jidx2.2014.05.013", goodAuthors); authorMap.put("10.1016/S1473-1111(14)70840-0", extendedAuthors); authorMap.put("10.1016/S0140-1111(14)61865-1", simpleAuthors); authorMap.put("10.1016/j.foo.2014.08.001", goodAuthors); authorMap.put("10.1016/j.foo.2014.08.123", goodAuthors); } ; volMap = new HashMap<String, String>(); { volMap.put("10.1016/j.jidx.2014.07.028", "64"); volMap.put("10.1016/j.jidx2.2014.05.013", "61"); volMap.put("10.1016/S1473-1111(14)70840-0", "14"); volMap.put("10.1016/S0140-1111(14)61865-1", "384"); volMap.put("10.1016/j.foo.2014.08.001", "242"); volMap.put("10.1016/j.foo.2014.08.123", "242"); } ; issueMap = new HashMap<String, String>(); { issueMap.put("10.1016/j.jidx.2014.07.028", "C"); issueMap.put("10.1016/j.jidx2.2014.05.013", "9"); issueMap.put("10.1016/S1473-1111(14)70840-0", "10"); issueMap.put("10.1016/S0140-1111(14)61865-1", "S1"); issueMap.put("10.1016/j.foo.2014.08.001", "C"); issueMap.put("10.1016/j.foo.2014.08.123", "C"); } ; }
public static V1Poll createCompletedPoll( LockssDaemon daemon, ArchivalUnit au, V1LcapMessage testmsg, int numAgree, int numDisagree, PollManager pollmanager) throws Exception { log.debug( "createCompletedPoll: au: " + au.toString() + " peer " + testmsg.getOriginatorId() + " votes " + numAgree + "/" + numDisagree); CachedUrlSetSpec cusSpec = null; if ((testmsg.getLwrBound() != null) && (testmsg.getLwrBound().equals(PollSpec.SINGLE_NODE_LWRBOUND))) { cusSpec = new SingleNodeCachedUrlSetSpec(testmsg.getTargetUrl()); } else { cusSpec = new RangeCachedUrlSetSpec( testmsg.getTargetUrl(), testmsg.getLwrBound(), testmsg.getUprBound()); } CachedUrlSet cus = au.makeCachedUrlSet(cusSpec); PollSpec spec = new PollSpec(cus, Poll.V1_CONTENT_POLL); ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false); V1Poll p = null; if (testmsg.isContentPoll()) { p = new V1ContentPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isNamePoll()) { p = new V1NamePoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm()); } else if (testmsg.isVerifyPoll()) { p = new V1VerifyPoll( spec, pollmanager, testmsg.getOriginatorId(), testmsg.getChallenge(), testmsg.getDuration(), testmsg.getHashAlgorithm(), testmsg.getVerifier()); } assertNotNull(p); p.setMessage(testmsg); p.m_tally.quorum = numAgree + numDisagree; p.m_tally.numAgree = numAgree; p.m_tally.numDisagree = numDisagree; p.m_tally.wtAgree = 2000; p.m_tally.wtDisagree = 200; p.m_tally.localEntries = makeEntries(1, 3); p.m_tally.votedEntries = makeEntries(1, 5); p.m_tally.votedEntries.remove(1); p.m_pollstate = V1Poll.PS_COMPLETE; p.m_callerID = testmsg.getOriginatorId(); log.debug3("poll " + p.toString()); p.m_tally.tallyVotes(); return p; }
/** Explode the archive into its constituent elements */ public void explode() throws CacheException { CachedUrl cachedUrl = null; int goodEntries = 0; int badEntries = 0; int ignoredEntries = 0; int entriesBetweenSleep = 0; ArchiveReader arcReader = null; logger.info( (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode"); try { if (storeArchive) { UrlCacher uc = au.makeUrlCacher(new UrlData(arcStream, arcProps, fetchUrl)); BitSet bs = new BitSet(); bs.set(UrlCacher.DONT_CLOSE_INPUT_STREAM_FLAG); uc.setFetchFlags(bs); uc.storeContent(); archiveData.resetInputStream(); arcStream = archiveData.input; } // Wrap it in an ArchiveReader logger.debug3("About to wrap stream"); arcReader = wrapStream(fetchUrl, arcStream); logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null")); // Explode it if (arcReader == null) { throw new CacheException.ExploderException("no WarcReader for " + origUrl); } ArchivalUnit au = crawlFacade.getAu(); Set stemSet = new HashSet(); logger.debug("Exploding " + fetchUrl); // Iterate through the elements in the WARC file, except the first Iterator i = arcReader.iterator(); // Skip first record for (i.next(); i.hasNext(); ) { // XXX probably not necessary helper.pokeWDog(); if ((++entriesBetweenSleep % sleepAfter) == 0) { long pauseTime = CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE); Deadline pause = Deadline.in(pauseTime); logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime)); while (!pause.expired()) { try { pause.sleep(); } catch (InterruptedException ie) { // no action } } } ArchiveRecord element = (ArchiveRecord) i.next(); // Each element is a URL to be cached in a suitable AU ArchiveRecordHeader elementHeader = element.getHeader(); String elementUrl = elementHeader.getUrl(); String elementMimeType = elementHeader.getMimetype(); long elementLength = elementHeader.getLength(); logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType); if (elementUrl.startsWith("http:")) { ArchiveEntry ae = new ArchiveEntry( elementUrl, elementLength, 0, // XXX need to convert getDate string to long element, // ArchiveRecord extends InputStream this, fetchUrl); ae.setHeaderFields(makeCIProperties(elementHeader)); long bytesStored = elementLength; logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored); try { helper.process(ae); } catch (PluginException ex) { throw new CacheException.ExploderException("helper.process() threw", ex); } if (ae.getBaseUrl() != null) { if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) { storeEntry(ae); handleAddText(ae); goodEntries++; crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored); } else { ignoredEntries++; } } else { badEntries++; logger.debug2("Can't map " + elementUrl + " from " + archiveUrl); } } } } catch (IOException ex) { throw new CacheException.ExploderException(ex); } finally { if (arcReader != null) try { arcReader.close(); arcReader = null; } catch (IOException ex) { throw new CacheException.ExploderException(ex); } if (cachedUrl != null) { cachedUrl.release(); } IOUtil.safeClose(arcStream); } if (badEntries == 0 && goodEntries > 0) { // Make it look like a new crawl finished on each AU to which // URLs were added. for (Iterator it = touchedAus.iterator(); it.hasNext(); ) { ArchivalUnit au = (ArchivalUnit) it.next(); logger.debug3(archiveUrl + " touching " + au.toString()); AuUtil.getDaemon(au).getNodeManager(au).newContentCrawlFinished(); } } else { ArchivalUnit au = crawlFacade.getAu(); String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries"; throw new CacheException.UnretryableException(msg); } }
/** Forward to real factory, set error handler */ public DocumentBuilder newDocumentBuilder() throws ParserConfigurationException { DocumentBuilder db = fact.newDocumentBuilder(); log.debug3("Created builder: " + db); db.setErrorHandler(new MyErrorHandler()); return db; }
public LockssDocumentBuilderFactoryImpl() { // fact = new org.apache.crimson.jaxp.DocumentBuilderFactoryImpl(); fact = new org.apache.xerces.jaxp.DocumentBuilderFactoryImpl(); log.debug3("Created fact: " + fact); }
/** Common request handling. */ public void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { resetState(); boolean success = false; HttpSession session = req.getSession(false); try { this.req = req; this.resp = resp; if (log.isDebug()) { logParams(); } resp.setContentType("text/html"); if (!mayPageBeCached()) { resp.setHeader("pragma", "no-cache"); resp.setHeader("Cache-control", "no-cache"); } reqURL = new URL(UrlUtil.getRequestURL(req)); clientAddr = getLocalIPAddr(); // check that current user has permission to run this servlet if (!isServletAllowed(myServletDescr())) { displayWarningInLieuOfPage("You are not authorized to use " + myServletDescr().heading); return; } // check whether servlet is disabled String reason = ServletUtil.servletDisabledReason(myServletDescr().getServletName()); if (reason != null) { displayWarningInLieuOfPage("This function is disabled. " + reason); return; } if (session != null) { session.setAttribute(SESSION_KEY_RUNNING_SERVLET, getHeading()); String reqHost = req.getRemoteHost(); String forw = req.getHeader(HttpFields.__XForwardedFor); if (!StringUtil.isNullString(forw)) { reqHost += " (proxies for " + forw + ")"; } session.setAttribute(SESSION_KEY_REQUEST_HOST, reqHost); } lockssHandleRequest(); success = (errMsg == null); } catch (ServletException e) { log.error("Servlet threw", e); throw e; } catch (IOException e) { log.error("Servlet threw", e); throw e; } catch (RuntimeException e) { log.error("Servlet threw", e); throw e; } finally { if (session != null) { session.setAttribute(SESSION_KEY_RUNNING_SERVLET, null); session.setAttribute(LockssFormAuthenticator.__J_AUTH_ACTIVITY, TimeBase.nowMs()); } if ("please".equalsIgnoreCase(req.getHeader("X-Lockss-Result"))) { log.debug3("X-Lockss-Result: " + (success ? "Ok" : "Fail")); resp.setHeader("X-Lockss-Result", success ? "Ok" : "Fail"); } resetMyLocals(); resetLocals(); } }
/** Explode the archive into its constituent elements */ public void explode() throws CacheException { int goodEntries = 0; int badEntries = 0; int entriesBetweenSleep = 0; ArchiveReader arcReader = null; logger.debug( (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode"); try { // Wrap it in an ArchiveReader logger.debug3("About to wrap stream"); arcReader = wrapStream(fetchUrl, arcStream); logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null")); // Explode it if (arcReader == null) { throw new CacheException.ExploderException("no WarcReader for " + origUrl); } ArchivalUnit au = crawlFacade.getAu(); logger.debug("Exploding " + fetchUrl); // Iterate through the elements in the WARC file, except the first Iterator<ArchiveRecord> iter = arcReader.iterator(); // Skip first record if (iter.hasNext()) iter.next(); while (iter.hasNext()) { helper.pokeWDog(); // check need to pause handlePause(++entriesBetweenSleep); // handle each element in the archive ArchiveRecord element = iter.next(); // Each element is a URL to be cached in our AU ArchiveRecordHeader elementHeader = element.getHeader(); String elementUrl = elementHeader.getUrl(); String elementMimeType = elementHeader.getMimetype(); long elementLength = elementHeader.getLength(); long elementDate; try { elementDate = ArchiveUtils.parse14DigitDate(elementHeader.getDate()).getTime(); } catch (ParseException e) { elementDate = 0; } logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType); // add check to determine if this is a url which should be cached if (au.shouldBeCached(elementUrl) && elementUrl.startsWith("http:")) { ArchiveEntry ae = new ArchiveEntry( elementUrl, elementLength, elementDate, element, // ArchiveRecord extends InputStream this, fetchUrl); ae.setHeaderFields(makeCIProperties(elementHeader)); long bytesStored = elementLength; logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored); try { helper.process(ae); } catch (PluginException ex) { throw new CacheException.ExploderException("helper.process() threw", ex); } if (ae.getBaseUrl() != null) { if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) { storeEntry(ae); handleAddText(ae); goodEntries++; // this needs to use the correct depth ? how CrawlUrlData cud = new CrawlUrlData(elementUrl, 0); crawlFacade.addToParseQueue(cud); crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored); } } else { badEntries++; logger.debug2("Can't map " + elementUrl + " from " + archiveUrl); } } } } catch (IOException ex) { throw new CacheException.ExploderException(ex); } finally { if (arcReader != null) { try { arcReader.close(); } catch (IOException ex) { throw new CacheException.ExploderException(ex); } } IOUtil.safeClose(arcStream); } // report failed fetches if (badEntries != 0) { String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries"; throw new CacheException.UnretryableException(msg); } }