예제 #1
0
 void setConfig(Configuration config) {
   log.debug("config: " + config);
   proxyHost = config.get(PARAM_PROXY_HOST);
   proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT);
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     String http_proxy = System.getenv("http_proxy");
     if (!StringUtil.isNullString(http_proxy)) {
       try {
         HostPortParser hpp = new HostPortParser(http_proxy);
         proxyHost = hpp.getHost();
         proxyPort = hpp.getPort();
       } catch (HostPortParser.InvalidSpec e) {
         log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e);
       }
     }
   }
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     proxyHost = null;
   } else {
     log.info("Proxying through " + proxyHost + ":" + proxyPort);
   }
   userAgent = config.get(PARAM_USER_AGENT);
   if (StringUtil.isNullString(userAgent)) {
     userAgent = null;
   } else {
     log.debug("Setting User-Agent to " + userAgent);
   }
 }
예제 #2
0
  protected FilterRule constructFilterRule(String contentType) {
    String mimeType = HeaderUtil.getMimeTypeFromContentType(contentType);

    Object filter_el =
        definitionMap.getMapElement(mimeType + DefinableArchivalUnit.SUFFIX_FILTER_RULE);

    if (filter_el instanceof String) {
      log.debug("Loading filter " + filter_el);
      return (FilterRule) newAuxClass((String) filter_el, FilterRule.class);
    } else if (filter_el instanceof List) {
      if (((List) filter_el).size() > 0) {
        return new DefinableFilterRule((List) filter_el);
      }
    }
    return super.constructFilterRule(mimeType);
  }
예제 #3
0
 /** If in testing mode FOO, copy values from FOO_override map, if any, to main map */
 void processOverrides(TypedEntryMap map) {
   String testMode = getTestingMode();
   if (StringUtil.isNullString(testMode)) {
     return;
   }
   Object o = map.getMapElement(testMode + DefinableArchivalUnit.SUFFIX_OVERRIDE);
   if (o == null) {
     return;
   }
   if (o instanceof Map) {
     Map overrideMap = (Map) o;
     for (Map.Entry entry : (Set<Map.Entry>) overrideMap.entrySet()) {
       String key = (String) entry.getKey();
       Object val = entry.getValue();
       log.debug(getDefaultPluginName() + ": Overriding " + key + " with " + val);
       map.setMapElement(key, val);
     }
   }
 }
예제 #4
0
 private void callV3ContentPoll(ArchivalUnit au) throws PollManager.NotEligibleException {
   log.debug("Enqueuing a V3 Content Poll on " + au.getName());
   PollSpec spec = new PollSpec(au.getAuCachedUrlSet(), Poll.V3_POLL);
   pollManager.enqueueHighPriorityPoll(au, spec);
   statusMsg = "Enqueued V3 poll for " + au.getName();
 }
예제 #5
0
 protected void initMimeMap() throws PluginException.InvalidDefinition {
   for (Iterator iter = definitionMap.entrySet().iterator(); iter.hasNext(); ) {
     Map.Entry ent = (Map.Entry) iter.next();
     String key = (String) ent.getKey();
     Object val = ent.getValue();
     if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " link extractor: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         LinkExtractorFactory fact =
             (LinkExtractorFactory) newAuxClass(factName, LinkExtractorFactory.class);
         mti.setLinkExtractorFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY)) {
       // XXX This clause must precede the one for SUFFIX_HASH_FILTER_FACTORY
       // XXX unless/until that key is changed to not be a terminal substring
       // XXX of this one
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " crawl filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setCrawlFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setHashFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT);
       if (val instanceof String) {
         String rate = (String) val;
         log.debug(mime + " fetch rate: " + rate);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         RateLimiter limit = mti.getFetchRateLimiter();
         if (limit != null) {
           limit.setRate(rate);
         } else {
           mti.setFetchRateLimiter(new RateLimiter(rate));
         }
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY);
       String factName = (String) val;
       log.debug(mime + " link rewriter: " + factName);
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       LinkRewriterFactory fact =
           (LinkRewriterFactory) newAuxClass(factName, LinkRewriterFactory.class);
       mti.setLinkRewriterFactory(fact);
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP);
       Map factNameMap = (Map) val;
       Map factClassMap = new HashMap();
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       for (Iterator it = factNameMap.keySet().iterator(); it.hasNext(); ) {
         String mdTypes = (String) it.next();
         String factName = (String) factNameMap.get(mdTypes);
         log.debug(mime + " (" + mdTypes + ") metadata extractor: " + factName);
         for (String mdType : (List<String>) StringUtil.breakAt(mdTypes, ";")) {
           setMdTypeFact(factClassMap, mdType, factName);
         }
       }
       mti.setFileMetadataExtractorFactoryMap(factClassMap);
     }
   }
 }