Пример #1
0
 private void initTestPolls() throws Exception {
   testV1polls = new V1Poll[testV1msg.length];
   for (int i = 0; i < testV1polls.length; i++) {
     log.debug3("initTestPolls: V1 " + i);
     BasePoll p = pollmanager.makePoll(testV1msg[i]);
     assertNotNull(p);
     assertNotNull(p.getMessage());
     log.debug("initTestPolls: V1 " + i + " returns " + p);
     assertTrue(p instanceof V1Poll);
     switch (i) {
       case 0:
         assertTrue(p instanceof V1NamePoll);
         break;
       case 1:
         assertTrue(p instanceof V1ContentPoll);
         break;
       case 2:
         assertTrue(p instanceof V1VerifyPoll);
         break;
     }
     testV1polls[i] = (V1Poll) p;
     assertNotNull(testV1polls[i]);
     log.debug3("initTestPolls: " + i + " " + p.toString());
   }
 }
 public void emitMetadata(ArticleFiles af, ArticleMetadata md) {
   if (log.isDebug3()) log.debug3("emit(" + af + ", " + md + ")");
   if (md != null) {
     log.debug3("add " + md + " to amlist");
     amlst.add(md);
   }
   ;
 }
  public void testFunctionalFromTarHierarchy() throws Exception {
    log.debug3("in testFromTarHierarchy");
    // load the tarballs
    InputStream file_input = null;
    try {
      file_input = getResourceAsStream(realTARFile_A);
      // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE);
      // uc.storeContent(file_input, tarHeader);
      UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

      file_input = getResourceAsStream(realTARFile_B);
      // uc = au.makeUrlCacher(TAR_B_BASE);
      // uc.storeContent(file_input, tarHeader);
      uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      IOUtil.safeClose(file_input);
    }

    CachedUrlSet cus = tarAu.getAuCachedUrlSet();
    for (CachedUrl cu : cus.getCuIterable()) {
      log.debug3("AU - cu is: " + cu.getUrl());
      cu.release();
    }

    // We need to start from the level of the ArticleMetadataExtractor
    MyListEmitter emitter = new MyListEmitter();
    ArticleMetadataExtractor amEx =
        new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA);

    Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any());
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.debug3("Metadata test - articlefiles " + af.toString());
      // CachedUrl cu = af.getFullTextCu();
      CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA);
      log.debug3("metadata cu is " + cu.getUrl());
      // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu);
      amEx.extract(MetadataTarget.Any(), af, emitter);
      List<ArticleMetadata> returnList = emitter.getAmList();

      assertNotNull(returnList);
      log.debug3("size of returnList is " + returnList.size());
      Iterator<ArticleMetadata> mdIt = returnList.iterator();
      ArticleMetadata mdRecord = null;
      while (mdIt.hasNext()) {
        mdRecord = (ArticleMetadata) mdIt.next();
        validateCompleteMetadataRecord(mdRecord);
      }
    }
  }
Пример #4
0
 public String getDefaultArticleMimeType() {
   String ret = definitionMap.getString(KEY_DEFAULT_ARTICLE_MIME_TYPE, null);
   log.debug3("DefaultArticleMimeType " + ret);
   if (ret == null) {
     ret = super.getDefaultArticleMimeType();
     log.debug3("DefaultArticleMimeType from super " + ret);
   }
   return ret;
 }
Пример #5
0
 /** test for method scheduleVote(..) */
 public void testScheduleVote() {
   V1Poll p = testV1polls[1];
   assertTrue(p instanceof V1ContentPoll);
   log.debug3("testScheduleVote 1");
   p.scheduleVote();
   log.debug3("testScheduleVote 2");
   assertNotNull(p.m_voteTime);
   assertTrue(p.m_voteTime.getRemainingTime() < p.m_deadline.getRemainingTime());
   log.debug3("at end of testScheduleVote");
 }
  /*
   * When testing no-pdf-check basic XML parsing, you will get partial MD records
   * depending on whether the info comes from dataset.xml or from main.xml
   */
  private void validateDatasetMetadataRecord(ArticleMetadata am) {
    log.debug3("valideDatasetMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));

    log.debug3("doi val is: " + doi_val);
    // The dataset doesn't set this value, it'll fail over the main.xml value
    if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) {
      assertEquals(null, am.get(MetadataField.FIELD_DATE));
    } else {
      assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    }
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
  }
  /*
   * You will have to tell it the DOI and the schema because those normally come from dataset
   */
  private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) {
    log.debug3("valideSingleMainMetadatRecord");
    if ("simple-article".equals(schema)) {
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }

    log.debug3("doi val is: " + doi_val);
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead));
    assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi));
    assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright));
  }
  /**
   * Method that creates a simulated Cached URL from the source code provided by the goodContent
   * String. It then asserts that the metadata extracted, by using the
   * MetaPressRisMetadataExtractorFactory, match the metadata in the source code.
   *
   * @throws Exception
   */
  public void testExtractGoodRisContent() throws Exception {
    String goodContent = createGoodRisContent();
    log.debug3(goodContent);

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);

    assertEquals(goodVolume, md.get(MetadataField.FIELD_VOLUME));
    assertEquals(goodIssue, md.get(MetadataField.FIELD_ISSUE));
    assertEquals(goodStartPage, md.get(MetadataField.FIELD_START_PAGE));
    assertEquals(goodEndPage, md.get(MetadataField.FIELD_END_PAGE));
    assertEquals(goodIssn, md.get(MetadataField.FIELD_ISSN));
    Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator();
    for (String expAuth : goodAuthors) {
      assertEquals(expAuth, actAuthIter.next());
    }
    assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE));
    assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));

    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
    assertEquals(goodDOI, md.get(MetadataField.FIELD_DOI));
    // This shouldn't get set. It will default later to fuill_text_cu
    assertNotEquals(doiURL, md.get(MetadataField.FIELD_ACCESS_URL));
  }
 /**
  * Finds the directory for this AU. If none found in the map, designates a new dir for it.
  *
  * @param auid AU id representing the au
  * @param repoRoot path to the root of the repository
  * @return the dir String
  */
 static String getAuDir(String auid, String repoRoot, boolean create) {
   String repoCachePath = extendCacheLocation(repoRoot);
   LocalRepository localRepo = getLocalRepository(repoRoot);
   synchronized (localRepo) {
     Map aumap = localRepo.getAuMap();
     String auPathSlash = (String) aumap.get(auid);
     if (auPathSlash != null) {
       return auPathSlash;
     }
     if (!create) {
       return null;
     }
     logger.debug3("Creating new au directory for '" + auid + "'.");
     String auDir = localRepo.getPrevAuDir();
     for (int cnt = RepositoryManager.getMaxUnusedDirSearch(); cnt > 0; cnt--) {
       // loop through looking for an available dir
       auDir = getNextDirName(auDir);
       File testDir = new File(repoCachePath, auDir);
       if (logger.isDebug3()) logger.debug3("Probe for unused: " + testDir);
       if (!testDir.exists()) {
         if (RepositoryManager.isStatefulUnusedDirSearch()) {
           localRepo.setPrevAuDir(auDir);
         }
         String auPath = testDir.toString();
         logger.debug3("New au directory: " + auPath);
         auPathSlash = auPath + File.separator;
         // write the new au property file to the new dir
         // XXX this data should be backed up elsewhere to avoid single-point
         // corruption
         Properties idProps = new Properties();
         idProps.setProperty(AU_ID_PROP, auid);
         saveAuIdProperties(auPath, idProps);
         aumap.put(auid, auPathSlash);
         return auPathSlash;
       } else {
         if (logger.isDebug3()) {
           logger.debug3("Existing directory found at '" + auDir + "'.  Checking next...");
         }
       }
     }
   }
   throw new RuntimeException(
       "Can't find unused repository dir after "
           + RepositoryManager.getMaxUnusedDirSearch()
           + " tries in "
           + repoCachePath);
 }
 public void testSimpleMainXML() throws Exception {
   log.debug3("testSimpleMainXML");
   String xml_url = TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.xml";
   List<ArticleMetadata> mdList =
       extractFromContent(xml_url, "text/xml", simpleMain, nocheck_mle, null);
   assertEquals(1, mdList.size());
   validateSingleMainMetadataRecord(mdList.get(0), "10.1016/j.jidx.2014.07.028", "article");
 }
    @Override
    public ArticleMetadata extract(MetadataTarget target, CachedUrl cu) throws IOException {

      log.debug3("Metadata - cachedurl cu:" + cu.getUrl());

      ArticleMetadata am = super.extract(target, cu);
      am.cook(tagMap);
      return am;
    } // extract
 /**
  * Checks the consistency of the node, and continues with its children if it's consistent.
  *
  * @param node RepositoryNodeImpl the node to check
  */
 private void recurseConsistencyCheck(RepositoryNodeImpl node) {
   logger.debug2("Checking node '" + node.getNodeUrl() + "'...");
   // check consistency at each node
   // correct/deactivate as necessary
   // 'checkNodeConsistency()' will repair if possible
   if (node.checkNodeConsistency()) {
     logger.debug3("Node consistent; recursing on children...");
     List children = node.getNodeList(null, false);
     Iterator iter = children.iterator();
     while (iter.hasNext()) {
       RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next();
       recurseConsistencyCheck(child);
     }
   } else {
     logger.debug3("Node inconsistent; deactivating...");
     deactivateInconsistentNode(node);
   }
 }
Пример #13
0
  /** test for method checkVote(..) */
  public void testCheckVote() throws Exception {
    V1LcapMessage msg = null;
    log.debug3("starting testCheeckVote");
    msg =
        V1LcapMessage.makeReplyMsg(
            testV1polls[0].getMessage(),
            ByteArray.makeRandomBytes(20),
            ByteArray.makeRandomBytes(20),
            null,
            V1LcapMessage.NAME_POLL_REP,
            testduration,
            testID);
    log.debug3("testCheeckVote 2");
    V1Poll p = null;
    p = createCompletedPoll(theDaemon, testau, msg, 8, 2, pollmanager);
    assertTrue(p instanceof V1NamePoll);
    log.debug3("testCheeckVote 3");
    assertNotNull(p);
    PeerIdentity id = msg.getOriginatorId();
    assertNotNull(id);
    assertNotNull(p.m_tally);
    int rep = p.m_tally.wtAgree + idmgr.getReputation(id);

    // good vote check

    p.checkVote(msg.getHashed(), new Vote(msg, false));
    assertEquals(9, p.m_tally.numAgree);
    assertEquals(2, p.m_tally.numDisagree);
    assertEquals(rep, p.m_tally.wtAgree);

    rep = p.m_tally.wtDisagree + idmgr.getReputation(id);

    // bad vote check
    p.checkVote(ByteArray.makeRandomBytes(20), new Vote(msg, false));
    assertEquals(9, p.m_tally.numAgree);
    assertEquals(3, p.m_tally.numDisagree);
    assertEquals(rep, p.m_tally.wtDisagree);
  }
    /**
     * Return the auid -> au-subdir-path mapping. Enumerating the directories if necessary to
     * initialize the map
     */
    Map getAuMap() {
      if (auMap == null) {
        logger.debug3("Loading name map for '" + repoCacheFile + "'.");
        auMap = new HashMap();
        if (!repoCacheFile.exists()) {
          logger.debug3("Creating cache dir:" + repoCacheFile + "'.");
          if (!repoCacheFile.mkdirs()) {
            logger.critical("Couldn't create directory, check owner/permissions: " + repoCacheFile);
            // return empty map
            return auMap;
          }
        } else {
          // read each dir's property file and store mapping auid -> dir
          File[] auDirs = repoCacheFile.listFiles();
          for (int ii = 0; ii < auDirs.length; ii++) {
            String dirName = auDirs[ii].getName();
            //       if (dirName.compareTo(lastPluginDir) == 1) {
            //         // adjust the 'lastPluginDir' upwards if necessary
            //         lastPluginDir = dirName;
            //       }

            String path = auDirs[ii].getAbsolutePath();
            Properties idProps = getAuIdProperties(path);
            if (idProps != null) {
              String auid = idProps.getProperty(AU_ID_PROP);
              StringBuilder sb = new StringBuilder(path.length() + File.separator.length());
              sb.append(path);
              sb.append(File.separator);
              auMap.put(auid, sb.toString());
              logger.debug3("Mapping to: " + auMap.get(auid) + ": " + auid);
            } else {
              logger.debug3("Not mapping " + path + ", no auid file.");
            }
          }
        }
      }
      return auMap;
    }
Пример #15
0
 private void handlePause(int entriesBetweenSleep) {
   if ((entriesBetweenSleep % sleepAfter) == 0) {
     long pauseTime = CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE);
     Deadline pause = Deadline.in(pauseTime);
     logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime));
     while (!pause.expired()) {
       try {
         pause.sleep();
       } catch (InterruptedException ie) {
         // no action
       }
     }
   }
 }
Пример #16
0
 public MultiPartRequest getMultiPartRequest(int maxLen)
     throws FormDataTooLongException, IOException {
   if (req.getContentType() == null || !req.getContentType().startsWith("multipart/form-data")) {
     return null;
   }
   if (req.getContentLength() > maxLen) {
     throw new FormDataTooLongException(req.getContentLength() + " bytes, " + maxLen + " allowed");
   }
   MultiPartRequest multi = new MultiPartRequest(req);
   if (log.isDebug2()) {
     String[] parts = multi.getPartNames();
     log.debug3("Multipart request, " + parts.length + " parts");
     if (log.isDebug3()) {
       for (int p = 0; p < parts.length; p++) {
         String name = parts[p];
         String cont = multi.getString(parts[p]);
         log.debug3(name + ": " + cont);
       }
     }
   }
   multiReq = multi;
   return multi;
 }
  public void testSimpleDatasetXML() throws Exception {
    log.debug3("testSimpleDatasetXML");
    String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile));
    String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml";

    List<ArticleMetadata> mdList =
        extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null);
    assertEquals(6, mdList.size());
    Iterator<ArticleMetadata> mdIt = mdList.iterator();
    ArticleMetadata mdRecord = null;
    while (mdIt.hasNext()) {
      mdRecord = (ArticleMetadata) mdIt.next();
      validateDatasetMetadataRecord(mdRecord);
    }
  }
  /*
   * When testing a complete extraction out of the tarset, the MD record will be completely filled in
   * and pdf-existence will get established
   */
  private void validateCompleteMetadataRecord(ArticleMetadata am) {
    log.debug3("valideCompleteMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    /* make sure we can pick up both types of xml article data */
    log.debug3("doi val is: " + doi_val);

    if ("JA 5.2.0 SIMPLE-ARTICLE"
        .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) {
      log.debug3("simple-article");
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER));
    log.debug3(am.ppString(2));
  }
Пример #19
0
  /**
   * Refill the buffer from the specified reader
   *
   * @param reader reader from which to refill the charBuffer
   * @return true if the reader has reached eof
   * @throws IllegalArgumentException if called with a null reader
   */
  public boolean refillBuffer(Reader reader) throws IOException {
    if (reader == null) {
      throw new IllegalArgumentException("Called with null reader");
    }
    if (isTrace) {
      logger.debug3("Refilling buffer");
    }
    int maxRead;
    while ((maxRead = capacity - size) > 0) {
      // max chars to add to the end of array
      int maxEnd = (maxRead <= (capacity - tail) ? maxRead : (capacity - tail));
      // max chars to add to the beginning of array
      int maxStart = maxRead - maxEnd;

      if (maxStart > 0) {
        // We have room at the beginning and end.  Using a temporary array
        // seems to be cheaper than calling read() twice
        if (preBuffer == null) {
          preBuffer = new char[capacity];
        }
        int charsRead = reader.read(preBuffer, 0, maxRead);
        if (charsRead == -1) {
          return true;
        }
        try {
          add(preBuffer, 0, charsRead);
        } catch (CharRing.RingFullException e) {
          logger.error("Overfilled a CharRing", e);
          throw new IOException("Overfilled a CharRing");
        }
      } else {
        // Adding only to the middle or end, read directly into char buffer
        int charsRead = reader.read(chars, tail, maxEnd);
        if (charsRead == -1) {
          return true;
        }
        tail = (tail + charsRead) % capacity;
        size += charsRead;
        if (charsRead < maxEnd) {
          continue;
        }
      }
    }
    return false;
  }
  /**
   * Method that creates a simulated Cached URL from the source code provided by the goodContent
   * String. It then asserts that the metadata extracted, by using the
   * MetaPressRisMetadataExtractorFactory, match the metadata in the source code.
   *
   * @throws Exception
   */
  public void testExtractAlternateRisContent() throws Exception {
    String goodContent = createAlternateRisContent();
    log.debug3(goodContent);

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, RIS_URL, goodContent, false);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);

    Iterator<String> actAuthIter = md.getList(MetadataField.FIELD_AUTHOR).iterator();
    for (String expAuth : goodAuthors) {
      assertEquals(expAuth, actAuthIter.next());
    }
    assertEquals(goodTitle, md.get(MetadataField.FIELD_ARTICLE_TITLE));
    assertEquals(goodJournal, md.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));
    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
  }
 static void saveAuIdProperties(String location, Properties props) {
   // XXX these AU_ID_FILE entries need to be backed up elsewhere to avoid
   // single-point corruption
   File propDir = new File(location);
   if (!propDir.exists()) {
     logger.debug("Creating directory '" + propDir.getAbsolutePath() + "'");
     propDir.mkdirs();
   }
   File propFile = new File(propDir, AU_ID_FILE);
   try {
     logger.debug3("Saving au id properties at '" + location + "'.");
     OutputStream os = new BufferedOutputStream(new FileOutputStream(propFile));
     props.store(os, "ArchivalUnit id info");
     os.close();
     propFile.setReadOnly();
   } catch (IOException ioe) {
     logger.error("Couldn't write properties for " + propFile.getPath() + ".", ioe);
     throw new LockssRepository.RepositoryStateException("Couldn't write au id properties file.");
   }
 }
Пример #22
0
  protected CIProperties makeCIProperties(ArchiveRecordHeader elementHeader) throws IOException {
    CIProperties ret = new CIProperties();
    Set elementHeaderFieldKeys = elementHeader.getHeaderFieldKeys();
    for (Iterator i = elementHeaderFieldKeys.iterator(); i.hasNext(); ) {
      String key = (String) i.next();
      try {

        Object valueObject = elementHeader.getHeaderValue(key);

        if (valueObject == null) {
          logger.warning("Ignoring null value for key '" + key + "'.");
        } else {
          String value = valueObject.toString();
          logger.debug3(key + ": " + value);
          ret.put(key, value);
        }

      } catch (ClassCastException ex) {
        logger.error("makeCIProperties: " + key + " threw ", ex);
        throw new CacheException.ExploderException(ex);
      }
    }
    return (ret);
  }
Пример #23
0
 private void setMdTypeFact(Map factClassMap, String mdType, String factName) {
   log.debug3("Metadata type: " + mdType + " factory " + factName);
   FileMetadataExtractorFactory fact =
       (FileMetadataExtractorFactory) newAuxClass(factName, FileMetadataExtractorFactory.class);
   factClassMap.put(mdType, fact);
 }
  /*
   *  The supporting methods
   */
  private void setUpExpectedTarContent() {
    /* maps the DOIs in the metadata to the expected values */
    log.debug3("setUpExpectedTarContent");
    pubTitleMap = new HashMap<String, String>();
    {
      pubTitleMap.put("10.1016/j.jidx.2014.07.028", "International Journal of XXX");
      pubTitleMap.put("10.1016/j.jidx2.2014.05.013", "Revista");
      pubTitleMap.put("10.1016/S1473-1111(14)70840-0", "The Journal");
      pubTitleMap.put("10.1016/S0140-1111(14)61865-1", "The Other Journal");
      pubTitleMap.put("10.1016/j.foo.2014.08.001", "Foo");
      pubTitleMap.put("10.1016/j.foo.2014.08.123", "Foo");
    }
    ;

    dateMap = new HashMap<String, String>();
    {
      dateMap.put("10.1016/j.jidx.2014.07.028", "2014-07-30");
      dateMap.put("10.1016/j.jidx2.2014.05.013", "2014-07-09");
      dateMap.put("10.1016/S1473-1111(14)70840-0", "2014-09-01");
      dateMap.put("10.1016/S0140-1111(14)61865-1", "2014"); // will get from main.xml as backup
      dateMap.put("10.1016/j.foo.2014.08.001", "2014-08-20");
      dateMap.put("10.1016/j.foo.2014.08.123", "2014-08-20");
    }
    ;

    accessUrlMap = new HashMap<String, String>();
    {
      accessUrlMap.put(
          "10.1016/j.jidx.2014.07.028",
          TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.pdf");
      accessUrlMap.put(
          "10.1016/j.jidx2.2014.05.013",
          TAR_A_BASE + SUBDIR + "00349356/v61i9/S0034935614001819/main.pdf");
      accessUrlMap.put(
          "10.1016/S1473-1111(14)70840-0",
          TAR_A_BASE + SUBDIR + "14733099/v14i10/S1473309914708400/main.pdf");
      accessUrlMap.put(
          "10.1016/S0140-1111(14)61865-1",
          TAR_B_BASE + SUBDIR + "01406736/v384sS1/S0140673614618651/main.pdf");
      accessUrlMap.put(
          "10.1016/j.foo.2014.08.001",
          TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514004151/main.pdf");
      accessUrlMap.put(
          "10.1016/j.foo.2014.08.123",
          TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514003856/main.pdf");
    }
    ;

    ArrayList<String> goodAuthors = new ArrayList<String>();
    {
      goodAuthors.add("Writer, Bob");
      goodAuthors.add("Q. Text, Samantha");
    }
    ArrayList<String> simpleAuthors = new ArrayList<String>();
    {
      simpleAuthors.add("Simple, Josh");
    }
    ArrayList<String> extendedAuthors = new ArrayList<String>();
    {
      extendedAuthors.add("Writer, Bob");
      extendedAuthors.add("Q. Text, Samantha");
      extendedAuthors.add("The COLLABORATIVE Investigators");
    }

    authorMap = new HashMap<String, List<String>>();
    {
      authorMap.put("10.1016/j.jidx.2014.07.028", goodAuthors);
      authorMap.put("10.1016/j.jidx2.2014.05.013", goodAuthors);
      authorMap.put("10.1016/S1473-1111(14)70840-0", extendedAuthors);
      authorMap.put("10.1016/S0140-1111(14)61865-1", simpleAuthors);
      authorMap.put("10.1016/j.foo.2014.08.001", goodAuthors);
      authorMap.put("10.1016/j.foo.2014.08.123", goodAuthors);
    }
    ;

    volMap = new HashMap<String, String>();
    {
      volMap.put("10.1016/j.jidx.2014.07.028", "64");
      volMap.put("10.1016/j.jidx2.2014.05.013", "61");
      volMap.put("10.1016/S1473-1111(14)70840-0", "14");
      volMap.put("10.1016/S0140-1111(14)61865-1", "384");
      volMap.put("10.1016/j.foo.2014.08.001", "242");
      volMap.put("10.1016/j.foo.2014.08.123", "242");
    }
    ;

    issueMap = new HashMap<String, String>();
    {
      issueMap.put("10.1016/j.jidx.2014.07.028", "C");
      issueMap.put("10.1016/j.jidx2.2014.05.013", "9");
      issueMap.put("10.1016/S1473-1111(14)70840-0", "10");
      issueMap.put("10.1016/S0140-1111(14)61865-1", "S1");
      issueMap.put("10.1016/j.foo.2014.08.001", "C");
      issueMap.put("10.1016/j.foo.2014.08.123", "C");
    }
    ;
  }
Пример #25
0
 public static V1Poll createCompletedPoll(
     LockssDaemon daemon,
     ArchivalUnit au,
     V1LcapMessage testmsg,
     int numAgree,
     int numDisagree,
     PollManager pollmanager)
     throws Exception {
   log.debug(
       "createCompletedPoll: au: "
           + au.toString()
           + " peer "
           + testmsg.getOriginatorId()
           + " votes "
           + numAgree
           + "/"
           + numDisagree);
   CachedUrlSetSpec cusSpec = null;
   if ((testmsg.getLwrBound() != null)
       && (testmsg.getLwrBound().equals(PollSpec.SINGLE_NODE_LWRBOUND))) {
     cusSpec = new SingleNodeCachedUrlSetSpec(testmsg.getTargetUrl());
   } else {
     cusSpec =
         new RangeCachedUrlSetSpec(
             testmsg.getTargetUrl(), testmsg.getLwrBound(), testmsg.getUprBound());
   }
   CachedUrlSet cus = au.makeCachedUrlSet(cusSpec);
   PollSpec spec = new PollSpec(cus, Poll.V1_CONTENT_POLL);
   ((MockCachedUrlSet) spec.getCachedUrlSet()).setHasContent(false);
   V1Poll p = null;
   if (testmsg.isContentPoll()) {
     p =
         new V1ContentPoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm());
   } else if (testmsg.isNamePoll()) {
     p =
         new V1NamePoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm());
   } else if (testmsg.isVerifyPoll()) {
     p =
         new V1VerifyPoll(
             spec,
             pollmanager,
             testmsg.getOriginatorId(),
             testmsg.getChallenge(),
             testmsg.getDuration(),
             testmsg.getHashAlgorithm(),
             testmsg.getVerifier());
   }
   assertNotNull(p);
   p.setMessage(testmsg);
   p.m_tally.quorum = numAgree + numDisagree;
   p.m_tally.numAgree = numAgree;
   p.m_tally.numDisagree = numDisagree;
   p.m_tally.wtAgree = 2000;
   p.m_tally.wtDisagree = 200;
   p.m_tally.localEntries = makeEntries(1, 3);
   p.m_tally.votedEntries = makeEntries(1, 5);
   p.m_tally.votedEntries.remove(1);
   p.m_pollstate = V1Poll.PS_COMPLETE;
   p.m_callerID = testmsg.getOriginatorId();
   log.debug3("poll " + p.toString());
   p.m_tally.tallyVotes();
   return p;
 }
Пример #26
0
  /** Explode the archive into its constituent elements */
  public void explode() throws CacheException {
    CachedUrl cachedUrl = null;
    int goodEntries = 0;
    int badEntries = 0;
    int ignoredEntries = 0;
    int entriesBetweenSleep = 0;
    ArchiveReader arcReader = null;

    logger.info(
        (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode");
    try {
      if (storeArchive) {
        UrlCacher uc = au.makeUrlCacher(new UrlData(arcStream, arcProps, fetchUrl));
        BitSet bs = new BitSet();
        bs.set(UrlCacher.DONT_CLOSE_INPUT_STREAM_FLAG);
        uc.setFetchFlags(bs);
        uc.storeContent();
        archiveData.resetInputStream();
        arcStream = archiveData.input;
      }
      // Wrap it in an ArchiveReader
      logger.debug3("About to wrap stream");
      arcReader = wrapStream(fetchUrl, arcStream);
      logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null"));
      // Explode it
      if (arcReader == null) {
        throw new CacheException.ExploderException("no WarcReader for " + origUrl);
      }
      ArchivalUnit au = crawlFacade.getAu();
      Set stemSet = new HashSet();
      logger.debug("Exploding " + fetchUrl);
      // Iterate through the elements in the WARC file, except the first
      Iterator i = arcReader.iterator();
      // Skip first record
      for (i.next(); i.hasNext(); ) {
        // XXX probably not necessary
        helper.pokeWDog();
        if ((++entriesBetweenSleep % sleepAfter) == 0) {
          long pauseTime =
              CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE);
          Deadline pause = Deadline.in(pauseTime);
          logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime));
          while (!pause.expired()) {
            try {
              pause.sleep();
            } catch (InterruptedException ie) {
              // no action
            }
          }
        }
        ArchiveRecord element = (ArchiveRecord) i.next();
        // Each element is a URL to be cached in a suitable AU
        ArchiveRecordHeader elementHeader = element.getHeader();
        String elementUrl = elementHeader.getUrl();
        String elementMimeType = elementHeader.getMimetype();
        long elementLength = elementHeader.getLength();
        logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType);
        if (elementUrl.startsWith("http:")) {
          ArchiveEntry ae =
              new ArchiveEntry(
                  elementUrl,
                  elementLength,
                  0, // XXX need to convert getDate string to long
                  element, // ArchiveRecord extends InputStream
                  this,
                  fetchUrl);
          ae.setHeaderFields(makeCIProperties(elementHeader));
          long bytesStored = elementLength;
          logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored);
          try {
            helper.process(ae);
          } catch (PluginException ex) {
            throw new CacheException.ExploderException("helper.process() threw", ex);
          }
          if (ae.getBaseUrl() != null) {
            if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) {
              storeEntry(ae);
              handleAddText(ae);
              goodEntries++;
              crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored);
            } else {
              ignoredEntries++;
            }
          } else {
            badEntries++;
            logger.debug2("Can't map " + elementUrl + " from " + archiveUrl);
          }
        }
      }
    } catch (IOException ex) {
      throw new CacheException.ExploderException(ex);
    } finally {
      if (arcReader != null)
        try {
          arcReader.close();
          arcReader = null;
        } catch (IOException ex) {
          throw new CacheException.ExploderException(ex);
        }
      if (cachedUrl != null) {
        cachedUrl.release();
      }
      IOUtil.safeClose(arcStream);
    }
    if (badEntries == 0 && goodEntries > 0) {
      // Make it look like a new crawl finished on each AU to which
      // URLs were added.
      for (Iterator it = touchedAus.iterator(); it.hasNext(); ) {
        ArchivalUnit au = (ArchivalUnit) it.next();
        logger.debug3(archiveUrl + " touching " + au.toString());
        AuUtil.getDaemon(au).getNodeManager(au).newContentCrawlFinished();
      }
    } else {
      ArchivalUnit au = crawlFacade.getAu();
      String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries";
      throw new CacheException.UnretryableException(msg);
    }
  }
 /** Forward to real factory, set error handler */
 public DocumentBuilder newDocumentBuilder() throws ParserConfigurationException {
   DocumentBuilder db = fact.newDocumentBuilder();
   log.debug3("Created builder: " + db);
   db.setErrorHandler(new MyErrorHandler());
   return db;
 }
 public LockssDocumentBuilderFactoryImpl() {
   //     fact = new org.apache.crimson.jaxp.DocumentBuilderFactoryImpl();
   fact = new org.apache.xerces.jaxp.DocumentBuilderFactoryImpl();
   log.debug3("Created fact: " + fact);
 }
Пример #29
0
  /** Common request handling. */
  public void service(HttpServletRequest req, HttpServletResponse resp)
      throws ServletException, IOException {
    resetState();
    boolean success = false;
    HttpSession session = req.getSession(false);
    try {
      this.req = req;
      this.resp = resp;
      if (log.isDebug()) {
        logParams();
      }
      resp.setContentType("text/html");

      if (!mayPageBeCached()) {
        resp.setHeader("pragma", "no-cache");
        resp.setHeader("Cache-control", "no-cache");
      }

      reqURL = new URL(UrlUtil.getRequestURL(req));
      clientAddr = getLocalIPAddr();

      // check that current user has permission to run this servlet
      if (!isServletAllowed(myServletDescr())) {
        displayWarningInLieuOfPage("You are not authorized to use " + myServletDescr().heading);
        return;
      }

      // check whether servlet is disabled
      String reason = ServletUtil.servletDisabledReason(myServletDescr().getServletName());
      if (reason != null) {
        displayWarningInLieuOfPage("This function is disabled. " + reason);
        return;
      }
      if (session != null) {
        session.setAttribute(SESSION_KEY_RUNNING_SERVLET, getHeading());
        String reqHost = req.getRemoteHost();
        String forw = req.getHeader(HttpFields.__XForwardedFor);
        if (!StringUtil.isNullString(forw)) {
          reqHost += " (proxies for " + forw + ")";
        }
        session.setAttribute(SESSION_KEY_REQUEST_HOST, reqHost);
      }
      lockssHandleRequest();
      success = (errMsg == null);
    } catch (ServletException e) {
      log.error("Servlet threw", e);
      throw e;
    } catch (IOException e) {
      log.error("Servlet threw", e);
      throw e;
    } catch (RuntimeException e) {
      log.error("Servlet threw", e);
      throw e;
    } finally {
      if (session != null) {
        session.setAttribute(SESSION_KEY_RUNNING_SERVLET, null);
        session.setAttribute(LockssFormAuthenticator.__J_AUTH_ACTIVITY, TimeBase.nowMs());
      }
      if ("please".equalsIgnoreCase(req.getHeader("X-Lockss-Result"))) {
        log.debug3("X-Lockss-Result: " + (success ? "Ok" : "Fail"));
        resp.setHeader("X-Lockss-Result", success ? "Ok" : "Fail");
      }
      resetMyLocals();
      resetLocals();
    }
  }
Пример #30
0
  /** Explode the archive into its constituent elements */
  public void explode() throws CacheException {
    int goodEntries = 0;
    int badEntries = 0;
    int entriesBetweenSleep = 0;
    ArchiveReader arcReader = null;

    logger.debug(
        (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode");
    try {
      // Wrap it in an ArchiveReader
      logger.debug3("About to wrap stream");
      arcReader = wrapStream(fetchUrl, arcStream);
      logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null"));
      // Explode it
      if (arcReader == null) {
        throw new CacheException.ExploderException("no WarcReader for " + origUrl);
      }
      ArchivalUnit au = crawlFacade.getAu();
      logger.debug("Exploding " + fetchUrl);
      // Iterate through the elements in the WARC file, except the first
      Iterator<ArchiveRecord> iter = arcReader.iterator();
      // Skip first record
      if (iter.hasNext()) iter.next();
      while (iter.hasNext()) {
        helper.pokeWDog();
        // check need to pause
        handlePause(++entriesBetweenSleep);
        // handle each element in the archive
        ArchiveRecord element = iter.next();
        // Each element is a URL to be cached in our AU
        ArchiveRecordHeader elementHeader = element.getHeader();
        String elementUrl = elementHeader.getUrl();
        String elementMimeType = elementHeader.getMimetype();
        long elementLength = elementHeader.getLength();
        long elementDate;
        try {
          elementDate = ArchiveUtils.parse14DigitDate(elementHeader.getDate()).getTime();
        } catch (ParseException e) {
          elementDate = 0;
        }
        logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType);
        // add check to determine if this is a url which should be cached
        if (au.shouldBeCached(elementUrl) && elementUrl.startsWith("http:")) {
          ArchiveEntry ae =
              new ArchiveEntry(
                  elementUrl,
                  elementLength,
                  elementDate,
                  element, // ArchiveRecord extends InputStream
                  this,
                  fetchUrl);
          ae.setHeaderFields(makeCIProperties(elementHeader));
          long bytesStored = elementLength;
          logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored);
          try {
            helper.process(ae);
          } catch (PluginException ex) {
            throw new CacheException.ExploderException("helper.process() threw", ex);
          }
          if (ae.getBaseUrl() != null) {
            if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) {
              storeEntry(ae);
              handleAddText(ae);
              goodEntries++;
              // this needs to use the correct depth ? how
              CrawlUrlData cud = new CrawlUrlData(elementUrl, 0);
              crawlFacade.addToParseQueue(cud);
              crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored);
            }
          } else {
            badEntries++;
            logger.debug2("Can't map " + elementUrl + " from " + archiveUrl);
          }
        }
      }
    } catch (IOException ex) {
      throw new CacheException.ExploderException(ex);
    } finally {
      if (arcReader != null) {
        try {
          arcReader.close();
        } catch (IOException ex) {
          throw new CacheException.ExploderException(ex);
        }
      }
      IOUtil.safeClose(arcStream);
    }
    // report failed fetches
    if (badEntries != 0) {
      String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries";
      throw new CacheException.UnretryableException(msg);
    }
  }