private void buildUrlSets(String url) { try { outputMessage("\nFetching " + url, TEST_SUMMARY_MESSAGE); URL srcUrl = new URL(url); // URLConnection conn = srcUrl.openConnection(); // String type = conn.getContentType(); // type = conn.getHeaderField("content-type"); // InputStream istr = conn.getInputStream(); LockssUrlConnection conn = UrlUtil.openConnection(url, connectionPool); if (proxyHost != null) { conn.setProxy(proxyHost, proxyPort); } if (userAgent != null) { conn.setRequestProperty("user-agent", userAgent); } try { conn.execute(); int resp = conn.getResponseCode(); if (resp != 200) { outputMessage("Resp: " + resp + ": " + conn.getResponseMessage(), TEST_SUMMARY_MESSAGE); return; } depth_fetched[m_curDepth - 1]++; String cookies = conn.getResponseHeaderValue("Set-Cookie"); if (cookies != null) { outputMessage("Cookies: " + cookies, PLAIN_MESSAGE); } String type = conn.getResponseContentType(); if (type == null || !type.toLowerCase().startsWith("text/html")) { outputMessage("Type: " + type + ", not parsing", URL_SUMMARY_MESSAGE); return; } outputMessage("Type: " + type + ", extracting Urls", URL_SUMMARY_MESSAGE); InputStream istr = conn.getResponseInputStream(); InputStreamReader reader = new InputStreamReader(istr); // MyMockCachedUrl mcu = new MyMockCachedUrl(srcUrl.toString(), reader); GoslingHtmlLinkExtractor extractor = new GoslingHtmlLinkExtractor(); extractor.extractUrls(null, istr, null, srcUrl.toString(), new MyLinkExtractorCallback()); istr.close(); depth_parsed[m_curDepth - 1]++; } finally { conn.release(); } } catch (MalformedURLException murle) { murle.printStackTrace(); outputErrResults(url, "Malformed URL:" + murle.getMessage()); } catch (IOException ex) { ex.printStackTrace(); outputErrResults(url, "IOException: " + ex.getMessage()); } }
private ExternalizableMap loadMap(String extMapName, ClassLoader loader) throws FileNotFoundException { String first = null; String next = extMapName; List<String> urls = new ArrayList<String>(); ExternalizableMap res = null; while (next != null) { // convert the plugin class name to an xml file name String mapFile = next.replace('.', '/') + MAP_SUFFIX; URL url = loader.getResource(mapFile); if (url != null && urls.contains(url.toString())) { throw new PluginException.InvalidDefinition("Plugin inheritance loop: " + next); } // load into map ExternalizableMap oneMap = new ExternalizableMap(); oneMap.loadMapFromResource(mapFile, loader); urls.add(url.toString()); // apply overrides one plugin at a time in inheritance chain processOverrides(oneMap); if (res == null) { res = oneMap; } else { for (Map.Entry ent : oneMap.entrySet()) { String key = (String) ent.getKey(); Object val = ent.getValue(); if (!res.containsKey(key)) { res.setMapElement(key, val); } } } if (oneMap.containsKey(KEY_PLUGIN_PARENT)) { next = oneMap.getString(KEY_PLUGIN_PARENT); } else { next = null; } } loadedFromUrls = urls; return res; }