public void loadAuConfigDescrs(Configuration config) throws ConfigurationException { super.loadAuConfigDescrs(config); this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey()); // Now we can construct a valid CC permission checker. m_permissionCheckers = // ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl)); ListUtil.list(new CreativeCommonsPermissionChecker()); paramMap.putLong( KEY_AU_NEW_CONTENT_CRAWL_INTERVAL, CurrentConfig.getTimeIntervalParam( PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL)); if (log.isDebug2()) { log.debug2( "Setting Registry AU recrawl interval to " + StringUtil.timeIntervalToString( paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL))); } }
public class SingleCrawlStatusAccessor implements StatusAccessor { private static final String MIME_TYPE_NAME = "mime_type_name"; private static final String MIME_TYPE_NUM_URLS = "mime_type_num_urls"; private static final String MIMETYPES_URLS_KEY = "mime-type"; private static final String CRAWL_URLS_STATUS_ACCESSOR = CrawlManagerImpl.CRAWL_URLS_STATUS_TABLE; private List colDescsMimeTypes = ListUtil.fromArray( new ColumnDescriptor[] { new ColumnDescriptor(MIME_TYPE_NAME, "Mime Type", ColumnDescriptor.TYPE_STRING), new ColumnDescriptor( MIME_TYPE_NUM_URLS, "URLs Found", ColumnDescriptor.TYPE_INT, "Number of pages of that mime type fetched during this crawl"), }); private static final List statusSortRules = ListUtil.list(new StatusTable.SortRule(MIME_TYPE_NAME, true)); private CrawlManager.StatusSource statusSource; public SingleCrawlStatusAccessor(CrawlManager.StatusSource statusSource) { this.statusSource = statusSource; } public void populateTable(StatusTable table) throws StatusService.NoSuchTableException { if (table == null) { throw new IllegalArgumentException("Called with null table"); } else if (table.getKey() == null) { throw new IllegalArgumentException("SingleCrawlStatusAccessor requires a key"); } String key = table.getKey(); CrawlerStatus status = statusSource.getStatus().getCrawlerStatus(key); if (status == null) { throw new StatusService.NoSuchTableException( "Status info from that crawl is no longer available"); } table.setDefaultSortRules(statusSortRules); table.setColumnDescriptors(colDescsMimeTypes); table.setTitle(getTableTitle(status)); table.setRows(getRows(status, key)); table.setSummaryInfo(getSummaryInfo(status)); } private String getTableTitle(CrawlerStatus status) { return "Status of crawl of " + status.getAuName(); } /** iterate over the mime-types makeRow for each */ private List getRows(CrawlerStatus status, String key) { Collection mimeTypes = status.getMimeTypes(); List rows = new ArrayList(); if (mimeTypes != null) { String mimeType; for (Iterator it = mimeTypes.iterator(); it.hasNext(); ) { mimeType = (String) it.next(); rows.add(makeRow(status, mimeType, key)); } } return rows; } private Map makeRow(CrawlerStatus status, String mimeType, String key) { Map row = new HashMap(); row.put(MIME_TYPE_NAME, mimeType); row.put( MIME_TYPE_NUM_URLS, makeRefIfColl(status.getMimeTypeCtr(mimeType), key, MIMETYPES_URLS_KEY + ":" + mimeType)); return row; } /** Return a reference object to the table, displaying the value */ private Object makeRef(long value, String tableName, String key) { return new StatusTable.Reference(new Long(value), tableName, key); } /** If the UrlCounter has a collection, return a reference to it, else just the count */ Object makeRefIfColl(CrawlerStatus.UrlCount ctr, String crawlKey, String subkey) { if (ctr.hasCollection()) { return makeRef(ctr.getCount(), CRAWL_URLS_STATUS_ACCESSOR, crawlKey + "." + subkey); } return new Long(ctr.getCount()); } public String getDisplayName() { throw new UnsupportedOperationException("No generic name for MimeTypeStatusCrawler"); } public boolean requiresKey() { return true; } public static final String FOOT_NO_SUBSTANCE_CRAWL_STATUS = "Though the crawl finished successfully, no files containing substantial content were collected."; private List getSummaryInfo(CrawlerStatus status) { List res = new ArrayList(); StatusTable.SummaryInfo statusSi = new StatusTable.SummaryInfo( "Status", ColumnDescriptor.TYPE_STRING, status.getCrawlStatusMsg()); ArchivalUnit au = status.getAu(); if (au != null) { AuState aus = AuUtil.getAuState(au); if (status.getCrawlStatus() == Crawler.STATUS_SUCCESSFUL && aus.hasNoSubstance()) { statusSi.setValueFootnote(FOOT_NO_SUBSTANCE_CRAWL_STATUS); } } res.add(statusSi); String sources = StringUtil.separatedString(status.getSources()); res.add(new StatusTable.SummaryInfo("Source", ColumnDescriptor.TYPE_STRING, sources)); String startUrls = StringUtil.separatedString(status.getStartUrls()); res.add( new StatusTable.SummaryInfo("Starting Url(s)", ColumnDescriptor.TYPE_STRING, startUrls)); return res; } private void addIfNonZero(List res, String head, int val) { if (val != 0) { res.add(new StatusTable.SummaryInfo(head, ColumnDescriptor.TYPE_INT, new Long(val))); } } }