예제 #1
0
  private boolean startCrawl(ArchivalUnit au, boolean force, boolean deep)
      throws CrawlManagerImpl.NotEligibleException {
    CrawlManagerImpl cmi = (CrawlManagerImpl) crawlMgr;
    if (force) {
      RateLimiter limit = cmi.getNewContentRateLimiter(au);
      if (!limit.isEventOk()) {
        limit.unevent();
      }
    }
    cmi.checkEligibleToQueueNewContentCrawl(au);
    String delayMsg = "";
    String deepMsg = "";
    try {
      cmi.checkEligibleForNewContentCrawl(au);
    } catch (CrawlManagerImpl.NotEligibleException e) {
      delayMsg = ", Start delayed due to: " + e.getMessage();
    }
    Configuration config = ConfigManager.getCurrentConfig();
    int pri = config.getInt(PARAM_CRAWL_PRIORITY, DEFAULT_CRAWL_PRIORITY);

    CrawlReq req;
    try {
      req = new CrawlReq(au);
      req.setPriority(pri);
      if (deep) {
        int d = Integer.parseInt(formDepth);
        if (d < 0) {
          errMsg = "Illegal refetch depth: " + d;
          return false;
        }
        req.setRefetchDepth(d);
        deepMsg = "Deep (" + req.getRefetchDepth() + ") ";
      }
    } catch (NumberFormatException e) {
      errMsg = "Illegal refetch depth: " + formDepth;
      return false;
    } catch (RuntimeException e) {
      log.error("Couldn't create CrawlReq: " + au, e);
      errMsg = "Couldn't create CrawlReq: " + e.toString();
      return false;
    }
    cmi.startNewContentCrawl(req, null);
    statusMsg = deepMsg + "Crawl requested for " + au.getName() + delayMsg;
    return true;
  }
예제 #2
0
 protected void initMimeMap() throws PluginException.InvalidDefinition {
   for (Iterator iter = definitionMap.entrySet().iterator(); iter.hasNext(); ) {
     Map.Entry ent = (Map.Entry) iter.next();
     String key = (String) ent.getKey();
     Object val = ent.getValue();
     if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " link extractor: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         LinkExtractorFactory fact =
             (LinkExtractorFactory) newAuxClass(factName, LinkExtractorFactory.class);
         mti.setLinkExtractorFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY)) {
       // XXX This clause must precede the one for SUFFIX_HASH_FILTER_FACTORY
       // XXX unless/until that key is changed to not be a terminal substring
       // XXX of this one
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " crawl filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setCrawlFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setHashFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT);
       if (val instanceof String) {
         String rate = (String) val;
         log.debug(mime + " fetch rate: " + rate);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         RateLimiter limit = mti.getFetchRateLimiter();
         if (limit != null) {
           limit.setRate(rate);
         } else {
           mti.setFetchRateLimiter(new RateLimiter(rate));
         }
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY);
       String factName = (String) val;
       log.debug(mime + " link rewriter: " + factName);
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       LinkRewriterFactory fact =
           (LinkRewriterFactory) newAuxClass(factName, LinkRewriterFactory.class);
       mti.setLinkRewriterFactory(fact);
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP);
       Map factNameMap = (Map) val;
       Map factClassMap = new HashMap();
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       for (Iterator it = factNameMap.keySet().iterator(); it.hasNext(); ) {
         String mdTypes = (String) it.next();
         String factName = (String) factNameMap.get(mdTypes);
         log.debug(mime + " (" + mdTypes + ") metadata extractor: " + factName);
         for (String mdType : (List<String>) StringUtil.breakAt(mdTypes, ";")) {
           setMdTypeFact(factClassMap, mdType, factName);
         }
       }
       mti.setFileMetadataExtractorFactoryMap(factClassMap);
     }
   }
 }