public void testFindCorrectEntries() throws Exception {
   Set expected = new HashSet();
   for (String link : links) {
     expected.add(srcUrl + link);
   }
   assertEquals(expected, extractUrls(withLinks));
 }
Beispiel #2
0
 /**
  * Get a list of all the TdbAu.Ids for this Tdb
  *
  * @return a collection of TdbAu objects
  */
 public Set<TdbAu.Id> getAllTdbAuIds() {
   if (pluginIdTdbAuIdsMap == null) {
     return Collections.<TdbAu.Id>emptySet();
   }
   Set<TdbAu.Id> allAuIds = new HashSet<TdbAu.Id>();
   // For each plugin's AU set, add them all to the set.
   for (Collection<TdbAu.Id> auIds : pluginIdTdbAuIdsMap.values()) {
     allAuIds.addAll(auIds);
   }
   return allAuIds;
 }
 /** engqueue a size calculation for the node */
 public void queueSizeCalc(RepositoryNode node) {
   synchronized (sizeCalcQueue) {
     if (sizeCalcQueue.add(node)) {
       log.debug2("Queue size calc: " + node);
       startOrKickThread();
     }
   }
 }
  private void outputSummary(String baseUrl, Set fetched, Set toCrawl, long elapsedTime) {
    int fetchCount = fetched.size();
    outputMessage(
        "\n\nSummary for starting Url: " + baseUrl + " and depth: " + m_crawlDepth,
        TEST_SUMMARY_MESSAGE);
    outputMessage(
        "\nUrls fetched: " + fetchCount + "    Urls extracted: " + m_extracted.size(),
        PLAIN_MESSAGE);

    outputMessage("\nDepth  Fetched  Parsed  New URLs", PLAIN_MESSAGE);
    for (int depth = 1; depth <= m_crawlDepth; depth++) {
      PrintfFormat pf = new PrintfFormat("%5d  %7d  %6d  %8d");
      Integer[] args =
          new Integer[] {
            new Integer(depth),
            new Integer(depth_fetched[depth - 1]),
            new Integer(depth_parsed[depth - 1]),
            new Integer(depth_incl[depth - 1]),
          };
      String s = pf.sprintf(args);
      outputMessage(s, PLAIN_MESSAGE);
    }

    outputMessage("\nRemaining unfetched: " + toCrawl.size(), PLAIN_MESSAGE);
    if (false) {
      for (Iterator iter = toCrawl.iterator(); iter.hasNext(); ) {
        String url = (String) iter.next();
        outputMessage(url, PLAIN_MESSAGE);
      }
    }
    long secs = elapsedTime / Constants.SECOND;
    long fetchRate = 0;
    if (secs > 0) {
      fetchRate = fetchCount * 60 * Constants.SECOND / elapsedTime;
    }
    outputMessage(
        "\nElapsed Time: " + secs + " secs." + "    Fetch Rate: " + fetchRate + " p/m",
        PLAIN_MESSAGE);
  }
Beispiel #5
0
  protected CIProperties makeCIProperties(ArchiveRecordHeader elementHeader) throws IOException {
    CIProperties ret = new CIProperties();
    Set elementHeaderFieldKeys = elementHeader.getHeaderFieldKeys();
    for (Iterator i = elementHeaderFieldKeys.iterator(); i.hasNext(); ) {
      String key = (String) i.next();
      try {

        Object valueObject = elementHeader.getHeaderValue(key);

        if (valueObject == null) {
          logger.warning("Ignoring null value for key '" + key + "'.");
        } else {
          String value = valueObject.toString();
          logger.debug3(key + ": " + value);
          ret.put(key, value);
        }

      } catch (ClassCastException ex) {
        logger.error("makeCIProperties: " + key + " threw ", ex);
        throw new CacheException.ExploderException(ex);
      }
    }
    return (ret);
  }
 public void foundLink(String url) {
   foundUrls.add(url);
 }
  private Set outputUrlResults(String url, Set m_inclset, Set m_exclset) {
    Set new_incls = new TreeSet(CollectionUtils.subtract(m_inclset, m_reported));
    Set new_excls = new TreeSet(CollectionUtils.subtract(m_exclset, m_reported));
    if (!m_inclset.isEmpty()) {
      outputMessage(
          "\nIncluded Urls: ("
              + new_incls.size()
              + " new, "
              + (m_inclset.size() - new_incls.size())
              + " old)",
          URL_SUMMARY_MESSAGE);
      depth_incl[m_curDepth - 1] += new_incls.size();
    }
    for (Iterator it = new_incls.iterator(); it.hasNext(); ) {
      outputMessage(it.next().toString(), PLAIN_MESSAGE);
    }

    if (!m_exclset.isEmpty()) {
      outputMessage(
          "\nExcluded Urls: ("
              + new_excls.size()
              + " new, "
              + (m_exclset.size() - new_excls.size())
              + " old)",
          URL_SUMMARY_MESSAGE);
    }
    for (Iterator it = new_excls.iterator(); it.hasNext(); ) {
      outputMessage(it.next().toString(), PLAIN_MESSAGE);
    }
    m_reported.addAll(new_incls);
    m_reported.addAll(new_excls);

    if (m_outWriter != null) {
      try {
        m_outWriter.flush();
      } catch (IOException ex) {
      }
    }
    return new_incls;
  }
Beispiel #8
0
 protected void recordExportFile(File file) {
   if (fileSet.add(file)) {
     fileList.add(file);
   }
 }
 public int sizeCalcQueueLen() {
   synchronized (sizeCalcQueue) {
     return sizeCalcQueue.size();
   }
 }
Beispiel #10
0
  public void lockssHandleRequest() throws IOException {
    resetVars();
    boolean showForm = true;
    String action = getParameter(KEY_ACTION);

    if (!StringUtil.isNullString(action)) {

      formAuid = getParameter(KEY_AUID);
      formDepth = getParameter(KEY_REFETCH_DEPTH);

      UserAccount acct = getUserAccount();
      if (acct != null && !noAuditActions.contains(action)) {
        acct.auditableEvent("used debug panel action: " + action + " AU ID: " + formAuid);
      }
    }

    if (ACTION_MAIL_BACKUP.equals(action)) {
      doMailBackup();
    }
    if (ACTION_RELOAD_CONFIG.equals(action)) {
      doReloadConfig();
    }
    if (ACTION_SLEEP.equals(action)) {
      doSleep();
    }
    if (ACTION_THROW_IOEXCEPTION.equals(action)) {
      doThrow();
    }
    if (ACTION_START_V3_POLL.equals(action)) {
      doV3Poll();
    }
    if (ACTION_FORCE_START_V3_POLL.equals(action)) {
      forceV3Poll();
    }
    if (ACTION_START_CRAWL.equals(action)) {
      doCrawl(false, false);
    }
    if (ACTION_FORCE_START_CRAWL.equals(action)) {
      doCrawl(true, false);
    }
    if (ACTION_START_DEEP_CRAWL.equals(action)) {
      doCrawl(false, true);
    }
    if (ACTION_FORCE_START_DEEP_CRAWL.equals(action)) {
      doCrawl(true, true);
    }
    if (ACTION_CHECK_SUBSTANCE.equals(action)) {
      doCheckSubstance();
    }
    if (ACTION_CRAWL_PLUGINS.equals(action)) {
      crawlPluginRegistries();
    }
    if (ACTION_FIND_URL.equals(action)) {
      showForm = doFindUrl();
    }
    if (ACTION_REINDEX_METADATA.equals(action)) {
      doReindexMetadata();
    }
    if (ACTION_FORCE_REINDEX_METADATA.equals(action)) {
      forceReindexMetadata();
    }
    if (ACTION_DISABLE_METADATA_INDEXING.equals(action)) {
      doDisableMetadataIndexing();
    }
    if (showForm) {
      displayPage();
    }
  }