public static URLResult resolveURL(String url) { String quotedUrl = quote(url); for (URLResolver resolver : urlResolvers) { URLResult res = resolver.urlResolve(url); if (res != null) { if (StringUtils.isEmpty(res.url) || quotedUrl.equals(quote(res.url))) { res.url = null; } if (res.precoder != null && res.precoder.isEmpty()) { res.precoder = null; } if (res.args != null && res.args.isEmpty()) { res.args = null; } if (res.url != null || res.precoder != null || res.args != null) { LOGGER.debug( ((ExternalListener) resolver).name() + " resolver:" + (res.url == null ? "" : " url=" + res.url) + (res.precoder == null ? "" : " precoder=" + res.precoder) + (res.args == null ? "" : " args=" + res.args)); return res; } } } return null; }
public static void addURLResolver(URLResolver res) { if (urlResolvers.contains(res)) { return; } if (urlResolvers.isEmpty()) { urlResolvers.add(res); return; } String[] tmp = PMS.getConfiguration().getURLResolveOrder(); if (tmp.length == 0) { // no order at all, just add it urlResolvers.add(res); return; } int id = -1; for (int i = 0; i < tmp.length; i++) { if (tmp[i].equalsIgnoreCase(res.name())) { id = i; break; } } if (id == -1) { // no order here, just add it urlResolvers.add(res); return; } if (id > urlResolvers.size()) { // add it last urlResolvers.add(res); return; } urlResolvers.add(id, res); }
/** * Gets the canonical url, starting from a relative or absolute url found in a given context * (baseURL). * * @param url the url string defining the reference * @param baseURL the context in which this url was found * @return the canonical url */ public static String getCanonicalURL(String url, String baseURL) { try { /* Build the absolute URL, from the url and the baseURL */ String resolvedURL = URLResolver.resolveUrl(baseURL == null ? "" : baseURL, url); log.debug("Resolved URL: " + resolvedURL); URI canonicalURI = new URI(resolvedURL); /* Some checking. */ if (canonicalURI.getScheme() == null) { throw new MalformedURLException( "Protocol could not be reliably evaluated from uri: " + canonicalURI + " and base url: " + baseURL); } if (canonicalURI.getHost() == null) { throw new MalformedURLException( "Host could not be reliably evaluated from: " + canonicalURI); } /* * Normalize: no empty segments (i.e., "//"), no segments equal to ".", and no segments equal to * ".." that are preceded by a segment not equal to "..". */ String path = canonicalURI.normalize().getRawPath(); /* Convert '//' -> '/' */ int idx = path.indexOf("//"); while (idx >= 0) { path = path.replace("//", "/"); idx = path.indexOf("//"); } /* Drop starting '/../' */ while (path.startsWith("/../")) { path = path.substring(3); } /* Trim */ path = path.trim(); /* Process parameters and sort them. */ final SortedMap<String, String> params = createParameterMap(canonicalURI.getRawQuery()); final String queryString; String canonicalParams = canonicalize(params); queryString = (canonicalParams.isEmpty() ? "" : "?" + canonicalParams); /* Add starting slash if needed */ if (path.length() == 0) { path = "/" + path; } /* Drop default port: example.com:80 -> example.com */ int port = canonicalURI.getPort(); if (port == 80) { port = -1; } /* Lowercasing protocol and host */ String protocol = canonicalURI.getScheme().toLowerCase(); String host = canonicalURI.getHost().toLowerCase(); String pathAndQueryString = normalizePath(path) + queryString; URL result = new URL(protocol, host, port, pathAndQueryString); return result.toExternalForm(); } catch (MalformedURLException ex) { log.warn( "Error while Processing URL in the spidering process (on base " + baseURL + "): " + ex.getMessage()); return null; } catch (URISyntaxException ex) { log.warn( "Error while Processing URI in the spidering process (on base " + baseURL + "): " + ex.getMessage()); return null; } }