void setConfig(Configuration config) { log.debug("config: " + config); proxyHost = config.get(PARAM_PROXY_HOST); proxyPort = config.getInt(PARAM_PROXY_PORT, DEFAULT_PROXY_PORT); if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { String http_proxy = System.getenv("http_proxy"); if (!StringUtil.isNullString(http_proxy)) { try { HostPortParser hpp = new HostPortParser(http_proxy); proxyHost = hpp.getHost(); proxyPort = hpp.getPort(); } catch (HostPortParser.InvalidSpec e) { log.warning("Can't parse http_proxy environment var, ignoring: " + http_proxy + ": " + e); } } } if (StringUtil.isNullString(proxyHost) || proxyPort <= 0) { proxyHost = null; } else { log.info("Proxying through " + proxyHost + ":" + proxyPort); } userAgent = config.get(PARAM_USER_AGENT); if (StringUtil.isNullString(userAgent)) { userAgent = null; } else { log.debug("Setting User-Agent to " + userAgent); } }
protected void initFeatureVersions() throws PluginException.InvalidDefinition { if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) { Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>(); Map<String, String> spec = (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP); log.debug2("features: " + spec); for (Map.Entry<String, String> ent : spec.entrySet()) { try { // Prefix version string with feature name to create separate // namespace for each feature String key = ent.getKey(); map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue()); } catch (RuntimeException e) { log.warning( getPluginName() + " set unknown feature: " + ent.getKey() + " to version " + ent.getValue(), e); throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e); } } featureVersion = map; } else { featureVersion = null; } }
void checkParamAgreement(String key, PrintfContext context) { List<String> printfList = getElementList(key); if (printfList == null) { return; } for (String printf : printfList) { if (StringUtil.isNullString(printf)) { log.warning("Null printf string in " + key); continue; } PrintfUtil.PrintfData p_data = PrintfUtil.stringToPrintf(printf); Collection<String> p_args = p_data.getArguments(); for (String arg : p_args) { ConfigParamDescr descr = findAuConfigDescr(arg); if (descr == null) { throw new PluginException.InvalidDefinition( "Not a declared parameter: " + arg + " in " + printf + " in " + getPluginName()); } // ensure range and set params used only in legal context switch (context) { case Regexp: case Display: // everything is legal in a regexp or a display string break; case URL: // NUM_RANGE and SET legal because can enumerate. Can't // enumerate RANGE switch (descr.getType()) { case ConfigParamDescr.TYPE_RANGE: throw new PluginException.InvalidDefinition( "Range parameter (" + arg + ") used in illegal context in " + getPluginName() + ": " + key + ": " + printf); default: } } } } }
/* * This is comlicated. MOST AUs have articles that live below and issue level TOC * that is, * <blah>/<journal_id>/vol#/iss#/ is a toc with no relevant metadata * <blah>/<journal_id>/vol#/iss#/xxx is an article with metadata * (eg Economist Voice V1) * BUT * in some AUs there are issues with only 1 article, in which case * <blah>/<journal_id>/vol#/iss#/ is an abstract with metadata * (eg Rhodes Cook V4) * and a few AUs with a mixture * (eg Forum for Health Economics V5) * So to identify ALL articles, we'll also have to capture issue level items and then look * at the html and if it has article metadata in it, count it as an article. * */ @Override protected ArticleFiles createArticleFiles(CachedUrl cu) { String url = cu.getUrl(); Matcher mat = pattern.matcher(url); if (mat.find()) { // we matched, but could this pattern potentially be a toc? Matcher tocmat = TOC_pattern.matcher(url); // if we could be a TOC then we must have metadata to be considered an article if (tocmat.find()) { if (hasArticleMetadata(cu)) { return processUrl(cu, mat); } } else { // we're not a potential TOC, so treat this as an article without checking return processUrl(cu, mat); } return null; // this was a TOC, not an article } log.warning("Mismatch between article iterator factory and article iterator: " + url); return null; }