// If there is a <title> element on the start page, use that as our AU
 // name.
 String recomputeRegName() {
   if (!isStarted()) {
     // This can get invoked (seveeral times, mostly from logging) before
     // enough mechanism has started to make it possible to resolve the CuUrl
     // below.
     return null;
   }
   try {
     CachedUrl cu = makeCachedUrl(m_registryUrl);
     if (cu == null) return null;
     URL cuUrl = CuUrl.fromCu(cu);
     Parser parser = new Parser(cuUrl.toString());
     NodeList nodelst = parser.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));
     Node nodes[] = nodelst.toNodeArray();
     recomputeRegName = false;
     if (nodes.length < 1) return null;
     // Get the first title found
     TitleTag tag = (TitleTag) nodes[0];
     if (tag == null) return null;
     return tag.getTitle();
   } catch (MalformedURLException e) {
     log.warning("recomputeRegName", e);
     return null;
   } catch (ParserException e) {
     if (e.getThrowable() instanceof FileNotFoundException) {
       log.warning("recomputeRegName: " + e.getThrowable().toString());
     } else {
       log.warning("recomputeRegName", e);
     }
     return null;
   }
 }
  public void loadAuConfigDescrs(Configuration config) throws ConfigurationException {
    super.loadAuConfigDescrs(config);
    this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey());
    // Now we can construct a valid CC permission checker.
    m_permissionCheckers =
        //       ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl));
        ListUtil.list(new CreativeCommonsPermissionChecker());

    paramMap.putLong(
        KEY_AU_NEW_CONTENT_CRAWL_INTERVAL,
        CurrentConfig.getTimeIntervalParam(
            PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL));
    if (log.isDebug2()) {
      log.debug2(
          "Setting Registry AU recrawl interval to "
              + StringUtil.timeIntervalToString(
                  paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL)));
    }
  }
/**
 * PluginArchivalUnit: The Archival Unit Class for PluginPlugin. This archival unit uses a base url
 * to define an archival unit.
 *
 * @author Seth Morabito
 * @version 1.0
 */
public class RegistryArchivalUnit extends BaseArchivalUnit {
  protected static final Logger log = Logger.getLogger("RegistryArchivalUnit");

  /** The interval between recrawls of the loadable plugin registry AUs. */
  static final String PARAM_REGISTRY_CRAWL_INTERVAL = RegistryPlugin.PREFIX + "crawlInterval";

  static final long DEFAULT_REGISTRY_CRAWL_INTERVAL = Constants.DAY;

  /**
   * If "au", registry AUs will crawl in parallel using individual rate limiters; if "plugin"
   * they'll crawl sequentially using a shared rate limiter
   */
  static final String PARAM_REGISTRY_FETCH_RATE_LIMITER_SOURCE =
      RegistryPlugin.PREFIX + "fetchRateLimiterSource";

  static final String DEFAULT_REGISTRY_FETCH_RATE_LIMITER_SOURCE = "au";

  /** Limits fetch rate of registry crawls */
  static final String PARAM_REGISTRY_FETCH_RATE = RegistryPlugin.PREFIX + "fetchRate";

  static final String DEFAULT_REGISTRY_FETCH_RATE = "20/10s";

  /** Run polls on Plugin registry AUs */
  static final String PARAM_ENABLE_REGISTRY_POLLS = RegistryPlugin.PREFIX + "enablePolls";

  static final boolean DEFAULT_ENABLE_REGISTRY_POLLS = true;

  private String m_registryUrl = null;
  private int m_maxRefetchDepth = NewContentCrawler.DEFAULT_MAX_CRAWL_DEPTH;
  private List m_permissionCheckers = null;
  private boolean recomputeRegName = true;
  private boolean enablePolls = DEFAULT_ENABLE_REGISTRY_POLLS;
  private String regName = null;

  public RegistryArchivalUnit(RegistryPlugin plugin) {
    super(plugin);
  }

  // Called by RegistryPlugin iff any config below RegistryPlugin.PREFIX
  // has changed
  protected void setConfig(
      Configuration config, Configuration prevConfig, Configuration.Differences changedKeys) {
    m_maxRefetchDepth =
        config.getInt(
            NewContentCrawler.PARAM_MAX_CRAWL_DEPTH, NewContentCrawler.DEFAULT_MAX_CRAWL_DEPTH);
    fetchRateLimiter = recomputeFetchRateLimiter(fetchRateLimiter);
    enablePolls = config.getBoolean(PARAM_ENABLE_REGISTRY_POLLS, DEFAULT_ENABLE_REGISTRY_POLLS);
  }

  public void loadAuConfigDescrs(Configuration config) throws ConfigurationException {
    super.loadAuConfigDescrs(config);
    this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey());
    // Now we can construct a valid CC permission checker.
    m_permissionCheckers =
        //       ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl));
        ListUtil.list(new CreativeCommonsPermissionChecker());

    paramMap.putLong(
        KEY_AU_NEW_CONTENT_CRAWL_INTERVAL,
        CurrentConfig.getTimeIntervalParam(
            PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL));
    if (log.isDebug2()) {
      log.debug2(
          "Setting Registry AU recrawl interval to "
              + StringUtil.timeIntervalToString(
                  paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL)));
    }
  }

  /**
   * return a string that represents the plugin registry. This is just the base URL.
   *
   * @return The base URL.
   */
  protected String makeName() {
    return "Plugin registry at '" + m_registryUrl + "'";
  }

  public String getName() {
    if (recomputeRegName) {
      regName = recomputeRegName();
    }
    if (regName != null) {
      return regName;
    } else {
      return super.getName();
    }
  }

  // If there is a <title> element on the start page, use that as our AU
  // name.
  String recomputeRegName() {
    if (!isStarted()) {
      // This can get invoked (seveeral times, mostly from logging) before
      // enough mechanism has started to make it possible to resolve the CuUrl
      // below.
      return null;
    }
    try {
      CachedUrl cu = makeCachedUrl(m_registryUrl);
      if (cu == null) return null;
      URL cuUrl = CuUrl.fromCu(cu);
      Parser parser = new Parser(cuUrl.toString());
      NodeList nodelst = parser.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));
      Node nodes[] = nodelst.toNodeArray();
      recomputeRegName = false;
      if (nodes.length < 1) return null;
      // Get the first title found
      TitleTag tag = (TitleTag) nodes[0];
      if (tag == null) return null;
      return tag.getTitle();
    } catch (MalformedURLException e) {
      log.warning("recomputeRegName", e);
      return null;
    } catch (ParserException e) {
      if (e.getThrowable() instanceof FileNotFoundException) {
        log.warning("recomputeRegName: " + e.getThrowable().toString());
      } else {
        log.warning("recomputeRegName", e);
      }
      return null;
    }
  }

  boolean isStarted() {
    return getPlugin().getDaemon().getPluginManager().getAuFromId(getAuId()) != null;
  }

  /**
   * return a string that points to the plugin registry page.
   *
   * @return a string that points to the plugin registry page for this registry. This is just the
   *     base URL.
   */
  protected String makeStartUrl() {
    return m_registryUrl;
  }

  /** Call top level polls iff configured to do so. */
  public boolean shouldCallTopLevelPoll(AuState aus) {
    if (!enablePolls) {
      return false;
    }
    return super.shouldCallTopLevelPoll(aus);
  }

  /**
   * Return a new CrawlSpec with the appropriate collect AND redistribute permissions, and with the
   * maximum refetch depth.
   *
   * @return CrawlSpec
   */
  protected CrawlSpec makeCrawlSpec() throws LockssRegexpException {
    CrawlRule rule = makeRules();
    List startUrls = getNewContentCrawlUrls();
    return new SpiderCrawlSpec(startUrls, startUrls, rule, m_maxRefetchDepth, null, null);
  }

  /**
   * return the collection of crawl rules used to crawl and cache a list of Plugin JAR files.
   *
   * @return CrawlRule
   */
  protected CrawlRule makeRules() {
    return new RegistryRule();
  }

  // Might need to recompute name if refetch start page
  public UrlCacher makeUrlCacher(String url) {
    if (url.equals(m_registryUrl)) {
      recomputeRegName = true;
    }
    return super.makeUrlCacher(url);
  }

  protected RateLimiter recomputeFetchRateLimiter(RateLimiter oldLimiter) {
    String rate = CurrentConfig.getParam(PARAM_REGISTRY_FETCH_RATE, DEFAULT_REGISTRY_FETCH_RATE);
    Object limiterKey = getFetchRateLimiterKey();

    if (limiterKey == null) {
      return RateLimiter.getRateLimiter(oldLimiter, rate, DEFAULT_REGISTRY_FETCH_RATE);
    } else {
      RateLimiter.Pool pool = RateLimiter.getPool();
      return pool.findNamedRateLimiter(limiterKey, rate, DEFAULT_REGISTRY_FETCH_RATE);
    }
  }

  protected String getFetchRateLimiterSource() {
    return CurrentConfig.getParam(
        PARAM_REGISTRY_FETCH_RATE_LIMITER_SOURCE, DEFAULT_REGISTRY_FETCH_RATE_LIMITER_SOURCE);
  }

  // Registry AU crawl rule implementation
  private class RegistryRule implements CrawlRule {
    public int match(String url) {
      if (StringUtil.equalStringsIgnoreCase(url, m_registryUrl)
          || StringUtil.endsWithIgnoreCase(url, ".jar")) {
        return CrawlRule.INCLUDE;
      } else {
        return CrawlRule.EXCLUDE;
      }
    }
  }
}