コード例 #1
0
  public void loadAuConfigDescrs(Configuration config) throws ConfigurationException {
    super.loadAuConfigDescrs(config);
    this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey());
    // Now we can construct a valid CC permission checker.
    m_permissionCheckers =
        //       ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl));
        ListUtil.list(new CreativeCommonsPermissionChecker());

    paramMap.putLong(
        KEY_AU_NEW_CONTENT_CRAWL_INTERVAL,
        CurrentConfig.getTimeIntervalParam(
            PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL));
    if (log.isDebug2()) {
      log.debug2(
          "Setting Registry AU recrawl interval to "
              + StringUtil.timeIntervalToString(
                  paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL)));
    }
  }
コード例 #2
0
public class SingleCrawlStatusAccessor implements StatusAccessor {

  private static final String MIME_TYPE_NAME = "mime_type_name";
  private static final String MIME_TYPE_NUM_URLS = "mime_type_num_urls";
  private static final String MIMETYPES_URLS_KEY = "mime-type";
  private static final String CRAWL_URLS_STATUS_ACCESSOR = CrawlManagerImpl.CRAWL_URLS_STATUS_TABLE;

  private List colDescsMimeTypes =
      ListUtil.fromArray(
          new ColumnDescriptor[] {
            new ColumnDescriptor(MIME_TYPE_NAME, "Mime Type", ColumnDescriptor.TYPE_STRING),
            new ColumnDescriptor(
                MIME_TYPE_NUM_URLS,
                "URLs Found",
                ColumnDescriptor.TYPE_INT,
                "Number of pages of that mime type fetched during this crawl"),
          });

  private static final List statusSortRules =
      ListUtil.list(new StatusTable.SortRule(MIME_TYPE_NAME, true));

  private CrawlManager.StatusSource statusSource;

  public SingleCrawlStatusAccessor(CrawlManager.StatusSource statusSource) {
    this.statusSource = statusSource;
  }

  public void populateTable(StatusTable table) throws StatusService.NoSuchTableException {
    if (table == null) {
      throw new IllegalArgumentException("Called with null table");
    } else if (table.getKey() == null) {
      throw new IllegalArgumentException("SingleCrawlStatusAccessor requires a key");
    }
    String key = table.getKey();
    CrawlerStatus status = statusSource.getStatus().getCrawlerStatus(key);
    if (status == null) {
      throw new StatusService.NoSuchTableException(
          "Status info from that crawl is no longer available");
    }
    table.setDefaultSortRules(statusSortRules);
    table.setColumnDescriptors(colDescsMimeTypes);
    table.setTitle(getTableTitle(status));
    table.setRows(getRows(status, key));
    table.setSummaryInfo(getSummaryInfo(status));
  }

  private String getTableTitle(CrawlerStatus status) {
    return "Status of crawl of " + status.getAuName();
  }

  /** iterate over the mime-types makeRow for each */
  private List getRows(CrawlerStatus status, String key) {
    Collection mimeTypes = status.getMimeTypes();
    List rows = new ArrayList();
    if (mimeTypes != null) {
      String mimeType;
      for (Iterator it = mimeTypes.iterator(); it.hasNext(); ) {
        mimeType = (String) it.next();
        rows.add(makeRow(status, mimeType, key));
      }
    }
    return rows;
  }

  private Map makeRow(CrawlerStatus status, String mimeType, String key) {
    Map row = new HashMap();
    row.put(MIME_TYPE_NAME, mimeType);
    row.put(
        MIME_TYPE_NUM_URLS,
        makeRefIfColl(status.getMimeTypeCtr(mimeType), key, MIMETYPES_URLS_KEY + ":" + mimeType));
    return row;
  }

  /** Return a reference object to the table, displaying the value */
  private Object makeRef(long value, String tableName, String key) {
    return new StatusTable.Reference(new Long(value), tableName, key);
  }

  /** If the UrlCounter has a collection, return a reference to it, else just the count */
  Object makeRefIfColl(CrawlerStatus.UrlCount ctr, String crawlKey, String subkey) {
    if (ctr.hasCollection()) {
      return makeRef(ctr.getCount(), CRAWL_URLS_STATUS_ACCESSOR, crawlKey + "." + subkey);
    }
    return new Long(ctr.getCount());
  }

  public String getDisplayName() {
    throw new UnsupportedOperationException("No generic name for MimeTypeStatusCrawler");
  }

  public boolean requiresKey() {
    return true;
  }

  public static final String FOOT_NO_SUBSTANCE_CRAWL_STATUS =
      "Though the crawl finished successfully, no files containing substantial content were collected.";

  private List getSummaryInfo(CrawlerStatus status) {
    List res = new ArrayList();
    StatusTable.SummaryInfo statusSi =
        new StatusTable.SummaryInfo(
            "Status", ColumnDescriptor.TYPE_STRING, status.getCrawlStatusMsg());
    ArchivalUnit au = status.getAu();
    if (au != null) {
      AuState aus = AuUtil.getAuState(au);
      if (status.getCrawlStatus() == Crawler.STATUS_SUCCESSFUL && aus.hasNoSubstance()) {
        statusSi.setValueFootnote(FOOT_NO_SUBSTANCE_CRAWL_STATUS);
      }
    }
    res.add(statusSi);
    String sources = StringUtil.separatedString(status.getSources());
    res.add(new StatusTable.SummaryInfo("Source", ColumnDescriptor.TYPE_STRING, sources));
    String startUrls = StringUtil.separatedString(status.getStartUrls());
    res.add(
        new StatusTable.SummaryInfo("Starting Url(s)", ColumnDescriptor.TYPE_STRING, startUrls));
    return res;
  }

  private void addIfNonZero(List res, String head, int val) {
    if (val != 0) {
      res.add(new StatusTable.SummaryInfo(head, ColumnDescriptor.TYPE_INT, new Long(val)));
    }
  }
}