public String urlStringToKey(final String urlString) throws URIException {

    if (urlString.startsWith("dns:")) {
      return urlString;
    }
    String searchUrl = canonicalize(urlString);
    String scheme = UrlOperations.urlToScheme(searchUrl);
    if (scheme != null) {
      searchUrl = searchUrl.substring(scheme.length());
    } else {
      scheme = UrlOperations.HTTP_SCHEME;
    }

    if (-1 == searchUrl.indexOf("/")) {
      searchUrl = scheme + searchUrl + "/";
    } else {
      searchUrl = scheme + searchUrl;
    }

    // Custom rules

    for (CanonicalizationRule rule : getProcessingRules()) {
      searchUrl = rule.processIfMatches(new CanonicalizationInput(searchUrl));
    }

    // Core rules

    // TODO: These next few lines look crazy -- need to be reworked.. This
    // was the only easy way I could find to get the correct unescaping
    // out of UsableURIs, possible a bug. Definitely needs some TLC in any case,
    // as building UsableURIs is *not* a cheap operation.

    // unescape anything that can be:
    UsableURI tmpURI = null;
    try {
      tmpURI = UsableURIFactory.getInstance(searchUrl);
    } catch (StringIndexOutOfBoundsException e) {
      LOGGER.warning(e.getMessage() + ": " + searchUrl);
      return searchUrl;
      //		} catch(URIException e) {
      //			LOGGER.warning(e.getMessage() + ": " + searchUrl);
      //			return searchUrl;
    }
    tmpURI.setPath(tmpURI.getPath());

    // convert to UsableURI to perform required URI fixup:
    UsableURI searchURI = UsableURIFactory.getInstance(tmpURI.getURI());

    // replace ' ' with '+' (this is only to match Alexa's canonicalization)
    String newPath = searchURI.getEscapedPath().replace("%20", "+");

    // replace multiple consecutive '/'s in the path.
    while (newPath.contains("//")) {
      newPath = newPath.replace("//", "/");
    }

    // this would remove trailing a '/' character, unless the path is empty
    // but we're not going to do this just yet..
    //		if((newPath.length() > 1) && newPath.endsWith("/")) {
    //			newPath = newPath.substring(0,newPath.length()-1);
    //		}

    StringBuilder sb = new StringBuilder(searchUrl.length());
    sb.append(searchURI.getHostBasename());

    // omit port if scheme default:
    int defaultSchemePort = UrlOperations.schemeToDefaultPort(scheme);
    if (searchURI.getPort() != defaultSchemePort && searchURI.getPort() != -1) {

      sb.append(":").append(searchURI.getPort());
    }

    sb.append(newPath);
    if (searchURI.getEscapedQuery() != null) {
      sb.append("?").append(searchURI.getEscapedQuery());
    }

    return sb.toString();
  }