/** * Return the archive file type corresponding to the filename extension in the URL, or null if * none. */ public String getFromUrl(String url) throws MalformedURLException { if (StringUtil.endsWithIgnoreCase(url, ".tar.gz")) { return getExtMimeMap().get(".tar.gz"); } String ext = UrlUtil.getFileExtension(url).toLowerCase(); return getExtMimeMap().get("." + ext); }
/** * A method to remove any non-canonical '..' or '.' elements in the path, as well as protecting * against illegal path traversal. * * @param url the raw url * @return String the canonicalized url * @throws MalformedURLException */ public String canonicalizePath(String url) throws MalformedURLException { String canonUrl = UrlUtil.normalizeUrl(url, UrlUtil.PATH_TRAVERSAL_ACTION_THROW); // canonicalize "dir" and "dir/" // XXX if these are ever two separate nodes, this is wrong if (canonUrl.endsWith(UrlUtil.URL_PATH_SEPARATOR)) { canonUrl = canonUrl.substring(0, canonUrl.length() - 1); } return canonUrl; }
private void buildUrlSets(String url) { try { outputMessage("\nFetching " + url, TEST_SUMMARY_MESSAGE); URL srcUrl = new URL(url); // URLConnection conn = srcUrl.openConnection(); // String type = conn.getContentType(); // type = conn.getHeaderField("content-type"); // InputStream istr = conn.getInputStream(); LockssUrlConnection conn = UrlUtil.openConnection(url, connectionPool); if (proxyHost != null) { conn.setProxy(proxyHost, proxyPort); } if (userAgent != null) { conn.setRequestProperty("user-agent", userAgent); } try { conn.execute(); int resp = conn.getResponseCode(); if (resp != 200) { outputMessage("Resp: " + resp + ": " + conn.getResponseMessage(), TEST_SUMMARY_MESSAGE); return; } depth_fetched[m_curDepth - 1]++; String cookies = conn.getResponseHeaderValue("Set-Cookie"); if (cookies != null) { outputMessage("Cookies: " + cookies, PLAIN_MESSAGE); } String type = conn.getResponseContentType(); if (type == null || !type.toLowerCase().startsWith("text/html")) { outputMessage("Type: " + type + ", not parsing", URL_SUMMARY_MESSAGE); return; } outputMessage("Type: " + type + ", extracting Urls", URL_SUMMARY_MESSAGE); InputStream istr = conn.getResponseInputStream(); InputStreamReader reader = new InputStreamReader(istr); // MyMockCachedUrl mcu = new MyMockCachedUrl(srcUrl.toString(), reader); GoslingHtmlLinkExtractor extractor = new GoslingHtmlLinkExtractor(); extractor.extractUrls(null, istr, null, srcUrl.toString(), new MyLinkExtractorCallback()); istr.close(); depth_parsed[m_curDepth - 1]++; } finally { conn.release(); } } catch (MalformedURLException murle) { murle.printStackTrace(); outputErrResults(url, "Malformed URL:" + murle.getMessage()); } catch (IOException ex) { ex.printStackTrace(); outputErrResults(url, "IOException: " + ex.getMessage()); } }
public void foundLink(String url) { m_extracted.add(url); try { String normUrl = UrlUtil.normalizeUrl(url); if (BaseCrawler.isSupportedUrlProtocol(normUrl) && m_au.shouldBeCached(normUrl)) { m_incls.add(normUrl); } else { m_excls.add(normUrl); } } catch (MalformedURLException e) { m_excls.add(url); } }
protected String urlEncode(String param) { return UrlUtil.encodeUrl(param); }
/** Common request handling. */ public void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { resetState(); boolean success = false; HttpSession session = req.getSession(false); try { this.req = req; this.resp = resp; if (log.isDebug()) { logParams(); } resp.setContentType("text/html"); if (!mayPageBeCached()) { resp.setHeader("pragma", "no-cache"); resp.setHeader("Cache-control", "no-cache"); } reqURL = new URL(UrlUtil.getRequestURL(req)); clientAddr = getLocalIPAddr(); // check that current user has permission to run this servlet if (!isServletAllowed(myServletDescr())) { displayWarningInLieuOfPage("You are not authorized to use " + myServletDescr().heading); return; } // check whether servlet is disabled String reason = ServletUtil.servletDisabledReason(myServletDescr().getServletName()); if (reason != null) { displayWarningInLieuOfPage("This function is disabled. " + reason); return; } if (session != null) { session.setAttribute(SESSION_KEY_RUNNING_SERVLET, getHeading()); String reqHost = req.getRemoteHost(); String forw = req.getHeader(HttpFields.__XForwardedFor); if (!StringUtil.isNullString(forw)) { reqHost += " (proxies for " + forw + ")"; } session.setAttribute(SESSION_KEY_REQUEST_HOST, reqHost); } lockssHandleRequest(); success = (errMsg == null); } catch (ServletException e) { log.error("Servlet threw", e); throw e; } catch (IOException e) { log.error("Servlet threw", e); throw e; } catch (RuntimeException e) { log.error("Servlet threw", e); throw e; } finally { if (session != null) { session.setAttribute(SESSION_KEY_RUNNING_SERVLET, null); session.setAttribute(LockssFormAuthenticator.__J_AUTH_ACTIVITY, TimeBase.nowMs()); } if ("please".equalsIgnoreCase(req.getHeader("X-Lockss-Result"))) { log.debug3("X-Lockss-Result: " + (success ? "Ok" : "Fail")); resp.setHeader("X-Lockss-Result", success ? "Ok" : "Fail"); } resetMyLocals(); resetLocals(); } }