public void testFindCorrectEntries() throws Exception { Set expected = new HashSet(); for (String link : links) { expected.add(srcUrl + link); } assertEquals(expected, extractUrls(withLinks)); }
/** * Get a list of all the TdbAu.Ids for this Tdb * * @return a collection of TdbAu objects */ public Set<TdbAu.Id> getAllTdbAuIds() { if (pluginIdTdbAuIdsMap == null) { return Collections.<TdbAu.Id>emptySet(); } Set<TdbAu.Id> allAuIds = new HashSet<TdbAu.Id>(); // For each plugin's AU set, add them all to the set. for (Collection<TdbAu.Id> auIds : pluginIdTdbAuIdsMap.values()) { allAuIds.addAll(auIds); } return allAuIds; }
/** engqueue a size calculation for the node */ public void queueSizeCalc(RepositoryNode node) { synchronized (sizeCalcQueue) { if (sizeCalcQueue.add(node)) { log.debug2("Queue size calc: " + node); startOrKickThread(); } } }
private void outputSummary(String baseUrl, Set fetched, Set toCrawl, long elapsedTime) { int fetchCount = fetched.size(); outputMessage( "\n\nSummary for starting Url: " + baseUrl + " and depth: " + m_crawlDepth, TEST_SUMMARY_MESSAGE); outputMessage( "\nUrls fetched: " + fetchCount + " Urls extracted: " + m_extracted.size(), PLAIN_MESSAGE); outputMessage("\nDepth Fetched Parsed New URLs", PLAIN_MESSAGE); for (int depth = 1; depth <= m_crawlDepth; depth++) { PrintfFormat pf = new PrintfFormat("%5d %7d %6d %8d"); Integer[] args = new Integer[] { new Integer(depth), new Integer(depth_fetched[depth - 1]), new Integer(depth_parsed[depth - 1]), new Integer(depth_incl[depth - 1]), }; String s = pf.sprintf(args); outputMessage(s, PLAIN_MESSAGE); } outputMessage("\nRemaining unfetched: " + toCrawl.size(), PLAIN_MESSAGE); if (false) { for (Iterator iter = toCrawl.iterator(); iter.hasNext(); ) { String url = (String) iter.next(); outputMessage(url, PLAIN_MESSAGE); } } long secs = elapsedTime / Constants.SECOND; long fetchRate = 0; if (secs > 0) { fetchRate = fetchCount * 60 * Constants.SECOND / elapsedTime; } outputMessage( "\nElapsed Time: " + secs + " secs." + " Fetch Rate: " + fetchRate + " p/m", PLAIN_MESSAGE); }
protected CIProperties makeCIProperties(ArchiveRecordHeader elementHeader) throws IOException { CIProperties ret = new CIProperties(); Set elementHeaderFieldKeys = elementHeader.getHeaderFieldKeys(); for (Iterator i = elementHeaderFieldKeys.iterator(); i.hasNext(); ) { String key = (String) i.next(); try { Object valueObject = elementHeader.getHeaderValue(key); if (valueObject == null) { logger.warning("Ignoring null value for key '" + key + "'."); } else { String value = valueObject.toString(); logger.debug3(key + ": " + value); ret.put(key, value); } } catch (ClassCastException ex) { logger.error("makeCIProperties: " + key + " threw ", ex); throw new CacheException.ExploderException(ex); } } return (ret); }
public void foundLink(String url) { foundUrls.add(url); }
private Set outputUrlResults(String url, Set m_inclset, Set m_exclset) { Set new_incls = new TreeSet(CollectionUtils.subtract(m_inclset, m_reported)); Set new_excls = new TreeSet(CollectionUtils.subtract(m_exclset, m_reported)); if (!m_inclset.isEmpty()) { outputMessage( "\nIncluded Urls: (" + new_incls.size() + " new, " + (m_inclset.size() - new_incls.size()) + " old)", URL_SUMMARY_MESSAGE); depth_incl[m_curDepth - 1] += new_incls.size(); } for (Iterator it = new_incls.iterator(); it.hasNext(); ) { outputMessage(it.next().toString(), PLAIN_MESSAGE); } if (!m_exclset.isEmpty()) { outputMessage( "\nExcluded Urls: (" + new_excls.size() + " new, " + (m_exclset.size() - new_excls.size()) + " old)", URL_SUMMARY_MESSAGE); } for (Iterator it = new_excls.iterator(); it.hasNext(); ) { outputMessage(it.next().toString(), PLAIN_MESSAGE); } m_reported.addAll(new_incls); m_reported.addAll(new_excls); if (m_outWriter != null) { try { m_outWriter.flush(); } catch (IOException ex) { } } return new_incls; }
protected void recordExportFile(File file) { if (fileSet.add(file)) { fileList.add(file); } }
public int sizeCalcQueueLen() { synchronized (sizeCalcQueue) { return sizeCalcQueue.size(); } }
public void lockssHandleRequest() throws IOException { resetVars(); boolean showForm = true; String action = getParameter(KEY_ACTION); if (!StringUtil.isNullString(action)) { formAuid = getParameter(KEY_AUID); formDepth = getParameter(KEY_REFETCH_DEPTH); UserAccount acct = getUserAccount(); if (acct != null && !noAuditActions.contains(action)) { acct.auditableEvent("used debug panel action: " + action + " AU ID: " + formAuid); } } if (ACTION_MAIL_BACKUP.equals(action)) { doMailBackup(); } if (ACTION_RELOAD_CONFIG.equals(action)) { doReloadConfig(); } if (ACTION_SLEEP.equals(action)) { doSleep(); } if (ACTION_THROW_IOEXCEPTION.equals(action)) { doThrow(); } if (ACTION_START_V3_POLL.equals(action)) { doV3Poll(); } if (ACTION_FORCE_START_V3_POLL.equals(action)) { forceV3Poll(); } if (ACTION_START_CRAWL.equals(action)) { doCrawl(false, false); } if (ACTION_FORCE_START_CRAWL.equals(action)) { doCrawl(true, false); } if (ACTION_START_DEEP_CRAWL.equals(action)) { doCrawl(false, true); } if (ACTION_FORCE_START_DEEP_CRAWL.equals(action)) { doCrawl(true, true); } if (ACTION_CHECK_SUBSTANCE.equals(action)) { doCheckSubstance(); } if (ACTION_CRAWL_PLUGINS.equals(action)) { crawlPluginRegistries(); } if (ACTION_FIND_URL.equals(action)) { showForm = doFindUrl(); } if (ACTION_REINDEX_METADATA.equals(action)) { doReindexMetadata(); } if (ACTION_FORCE_REINDEX_METADATA.equals(action)) { forceReindexMetadata(); } if (ACTION_DISABLE_METADATA_INDEXING.equals(action)) { doDisableMetadataIndexing(); } if (showForm) { displayPage(); } }