public static URLResult resolveURL(String url) {
   String quotedUrl = quote(url);
   for (URLResolver resolver : urlResolvers) {
     URLResult res = resolver.urlResolve(url);
     if (res != null) {
       if (StringUtils.isEmpty(res.url) || quotedUrl.equals(quote(res.url))) {
         res.url = null;
       }
       if (res.precoder != null && res.precoder.isEmpty()) {
         res.precoder = null;
       }
       if (res.args != null && res.args.isEmpty()) {
         res.args = null;
       }
       if (res.url != null || res.precoder != null || res.args != null) {
         LOGGER.debug(
             ((ExternalListener) resolver).name()
                 + " resolver:"
                 + (res.url == null ? "" : " url=" + res.url)
                 + (res.precoder == null ? "" : " precoder=" + res.precoder)
                 + (res.args == null ? "" : " args=" + res.args));
         return res;
       }
     }
   }
   return null;
 }
  public static void addURLResolver(URLResolver res) {
    if (urlResolvers.contains(res)) {
      return;
    }
    if (urlResolvers.isEmpty()) {
      urlResolvers.add(res);
      return;
    }

    String[] tmp = PMS.getConfiguration().getURLResolveOrder();
    if (tmp.length == 0) {
      // no order at all, just add it
      urlResolvers.add(res);
      return;
    }
    int id = -1;
    for (int i = 0; i < tmp.length; i++) {
      if (tmp[i].equalsIgnoreCase(res.name())) {
        id = i;
        break;
      }
    }

    if (id == -1) {
      // no order here, just add it
      urlResolvers.add(res);
      return;
    }
    if (id > urlResolvers.size()) {
      // add it last
      urlResolvers.add(res);
      return;
    }
    urlResolvers.add(id, res);
  }
Exemple #3
0
  /**
   * Gets the canonical url, starting from a relative or absolute url found in a given context
   * (baseURL).
   *
   * @param url the url string defining the reference
   * @param baseURL the context in which this url was found
   * @return the canonical url
   */
  public static String getCanonicalURL(String url, String baseURL) {

    try {
      /* Build the absolute URL, from the url and the baseURL */
      String resolvedURL = URLResolver.resolveUrl(baseURL == null ? "" : baseURL, url);
      log.debug("Resolved URL: " + resolvedURL);
      URI canonicalURI = new URI(resolvedURL);

      /* Some checking. */
      if (canonicalURI.getScheme() == null) {
        throw new MalformedURLException(
            "Protocol could not be reliably evaluated from uri: "
                + canonicalURI
                + " and base url: "
                + baseURL);
      }
      if (canonicalURI.getHost() == null) {
        throw new MalformedURLException(
            "Host could not be reliably evaluated from: " + canonicalURI);
      }

      /*
       * Normalize: no empty segments (i.e., "//"), no segments equal to ".", and no segments equal to
       * ".." that are preceded by a segment not equal to "..".
       */
      String path = canonicalURI.normalize().getRawPath();

      /* Convert '//' -> '/' */
      int idx = path.indexOf("//");
      while (idx >= 0) {
        path = path.replace("//", "/");
        idx = path.indexOf("//");
      }

      /* Drop starting '/../' */
      while (path.startsWith("/../")) {
        path = path.substring(3);
      }

      /* Trim */
      path = path.trim();

      /* Process parameters and sort them. */
      final SortedMap<String, String> params = createParameterMap(canonicalURI.getRawQuery());
      final String queryString;
      String canonicalParams = canonicalize(params);
      queryString = (canonicalParams.isEmpty() ? "" : "?" + canonicalParams);

      /* Add starting slash if needed */
      if (path.length() == 0) {
        path = "/" + path;
      }

      /* Drop default port: example.com:80 -> example.com */
      int port = canonicalURI.getPort();
      if (port == 80) {
        port = -1;
      }

      /* Lowercasing protocol and host */
      String protocol = canonicalURI.getScheme().toLowerCase();
      String host = canonicalURI.getHost().toLowerCase();
      String pathAndQueryString = normalizePath(path) + queryString;

      URL result = new URL(protocol, host, port, pathAndQueryString);
      return result.toExternalForm();

    } catch (MalformedURLException ex) {
      log.warn(
          "Error while Processing URL in the spidering process (on base "
              + baseURL
              + "): "
              + ex.getMessage());
      return null;
    } catch (URISyntaxException ex) {
      log.warn(
          "Error while Processing URI in the spidering process (on base "
              + baseURL
              + "): "
              + ex.getMessage());
      return null;
    }
  }