Пример #1
0
 void setConfig(Configuration config) {
   log.debug("config: " + config);
   proxyHost = config.get(PARAM_PROXY_HOST);
   proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT);
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     String http_proxy = System.getenv("http_proxy");
     if (!StringUtil.isNullString(http_proxy)) {
       try {
         HostPortParser hpp = new HostPortParser(http_proxy);
         proxyHost = hpp.getHost();
         proxyPort = hpp.getPort();
       } catch (HostPortParser.InvalidSpec e) {
         log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e);
       }
     }
   }
   if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) {
     proxyHost = null;
   } else {
     log.info("Proxying through " + proxyHost + ":" + proxyPort);
   }
   userAgent = config.get(PARAM_USER_AGENT);
   if (StringUtil.isNullString(userAgent)) {
     userAgent = null;
   } else {
     log.debug("Setting User-Agent to " + userAgent);
   }
 }
Пример #2
0
 protected void initFeatureVersions() throws PluginException.InvalidDefinition {
   if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) {
     Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>();
     Map<String, String> spec =
         (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP);
     log.debug2("features: " + spec);
     for (Map.Entry<String, String> ent : spec.entrySet()) {
       try {
         // Prefix version string with feature name to create separate
         // namespace for each feature
         String key = ent.getKey();
         map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue());
       } catch (RuntimeException e) {
         log.warning(
             getPluginName()
                 + " set unknown feature: "
                 + ent.getKey()
                 + " to version "
                 + ent.getValue(),
             e);
         throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e);
       }
     }
     featureVersion = map;
   } else {
     featureVersion = null;
   }
 }
Пример #3
0
 void checkParamAgreement(String key, PrintfContext context) {
   List<String> printfList = getElementList(key);
   if (printfList == null) {
     return;
   }
   for (String printf : printfList) {
     if (StringUtil.isNullString(printf)) {
       log.warning("Null printf string in " + key);
       continue;
     }
     PrintfUtil.PrintfData p_data = PrintfUtil.stringToPrintf(printf);
     Collection<String> p_args = p_data.getArguments();
     for (String arg : p_args) {
       ConfigParamDescr descr = findAuConfigDescr(arg);
       if (descr == null) {
         throw new PluginException.InvalidDefinition(
             "Not a declared parameter: " + arg + " in " + printf + " in " + getPluginName());
       }
       // ensure range and set params used only in legal context
       switch (context) {
         case Regexp:
         case Display:
           // everything is legal in a regexp or a display string
           break;
         case URL:
           // NUM_RANGE and SET legal because can enumerate.  Can't
           // enumerate RANGE
           switch (descr.getType()) {
             case ConfigParamDescr.TYPE_RANGE:
               throw new PluginException.InvalidDefinition(
                   "Range parameter ("
                       + arg
                       + ") used in illegal context in "
                       + getPluginName()
                       + ": "
                       + key
                       + ": "
                       + printf);
             default:
           }
       }
     }
   }
 }
 /*
  * This is comlicated. MOST AUs have articles that live below and issue level TOC
  * that is,
  * <blah>/<journal_id>/vol#/iss#/ is a toc with no relevant metadata
  * <blah>/<journal_id>/vol#/iss#/xxx is an article with metadata
  * (eg Economist Voice V1)
  * BUT
  * in some AUs there are issues with only 1 article, in which case
  * <blah>/<journal_id>/vol#/iss#/ is an abstract with metadata
  * (eg Rhodes Cook V4)
  * and a few AUs with a mixture
  * (eg Forum for Health Economics V5)
  * So to identify ALL articles, we'll also have to capture issue level items and then look
  * at the html and if it has article metadata in it, count it as an article.
  *
  */
 @Override
 protected ArticleFiles createArticleFiles(CachedUrl cu) {
   String url = cu.getUrl();
   Matcher mat = pattern.matcher(url);
   if (mat.find()) {
     // we matched, but could this pattern potentially be a toc?
     Matcher tocmat = TOC_pattern.matcher(url);
     // if we could be a TOC then we must have metadata to be considered an article
     if (tocmat.find()) {
       if (hasArticleMetadata(cu)) {
         return processUrl(cu, mat);
       }
     } else {
       // we're not a potential TOC, so treat this as an article without checking
       return processUrl(cu, mat);
     }
     return null; // this was a TOC, not an article
   }
   log.warning("Mismatch between article iterator factory and article iterator: " + url);
   return null;
 }