/**
   * A method to remove any non-canonical '..' or '.' elements in the path, as well as protecting
   * against illegal path traversal.
   *
   * @param url the raw url
   * @return String the canonicalized url
   * @throws MalformedURLException
   */
  public String canonicalizePath(String url) throws MalformedURLException {
    String canonUrl = UrlUtil.normalizeUrl(url, UrlUtil.PATH_TRAVERSAL_ACTION_THROW);
    // canonicalize "dir" and "dir/"
    // XXX if these are ever two separate nodes, this is wrong
    if (canonUrl.endsWith(UrlUtil.URL_PATH_SEPARATOR)) {
      canonUrl = canonUrl.substring(0, canonUrl.length() - 1);
    }

    return canonUrl;
  }
Ejemplo n.º 2
0
  private void buildUrlSets(String url) {

    try {
      outputMessage("\nFetching " + url, TEST_SUMMARY_MESSAGE);
      URL srcUrl = new URL(url);
      //       URLConnection conn = srcUrl.openConnection();
      //       String type = conn.getContentType();
      //       type = conn.getHeaderField("content-type");
      //       InputStream istr = conn.getInputStream();

      LockssUrlConnection conn = UrlUtil.openConnection(url, connectionPool);
      if (proxyHost != null) {
        conn.setProxy(proxyHost, proxyPort);
      }
      if (userAgent != null) {
        conn.setRequestProperty("user-agent", userAgent);
      }
      try {
        conn.execute();
        int resp = conn.getResponseCode();
        if (resp != 200) {
          outputMessage("Resp: " + resp + ": " + conn.getResponseMessage(), TEST_SUMMARY_MESSAGE);
          return;
        }
        depth_fetched[m_curDepth - 1]++;
        String cookies = conn.getResponseHeaderValue("Set-Cookie");
        if (cookies != null) {
          outputMessage("Cookies: " + cookies, PLAIN_MESSAGE);
        }
        String type = conn.getResponseContentType();
        if (type == null || !type.toLowerCase().startsWith("text/html")) {
          outputMessage("Type: " + type + ", not parsing", URL_SUMMARY_MESSAGE);
          return;
        }
        outputMessage("Type: " + type + ", extracting Urls", URL_SUMMARY_MESSAGE);
        InputStream istr = conn.getResponseInputStream();
        InputStreamReader reader = new InputStreamReader(istr);
        //       MyMockCachedUrl mcu = new MyMockCachedUrl(srcUrl.toString(), reader);
        GoslingHtmlLinkExtractor extractor = new GoslingHtmlLinkExtractor();
        extractor.extractUrls(null, istr, null, srcUrl.toString(), new MyLinkExtractorCallback());
        istr.close();
        depth_parsed[m_curDepth - 1]++;
      } finally {
        conn.release();
      }
    } catch (MalformedURLException murle) {
      murle.printStackTrace();
      outputErrResults(url, "Malformed URL:" + murle.getMessage());
    } catch (IOException ex) {
      ex.printStackTrace();
      outputErrResults(url, "IOException: " + ex.getMessage());
    }
  }
Ejemplo n.º 3
0
    public void foundLink(String url) {

      m_extracted.add(url);
      try {
        String normUrl = UrlUtil.normalizeUrl(url);
        if (BaseCrawler.isSupportedUrlProtocol(normUrl) && m_au.shouldBeCached(normUrl)) {
          m_incls.add(normUrl);
        } else {
          m_excls.add(normUrl);
        }
      } catch (MalformedURLException e) {
        m_excls.add(url);
      }
    }
Ejemplo n.º 4
0
 protected String urlEncode(String param) {
   return UrlUtil.encodeUrl(param);
 }
Ejemplo n.º 5
0
  /** Common request handling. */
  public void service(HttpServletRequest req, HttpServletResponse resp)
      throws ServletException, IOException {
    resetState();
    boolean success = false;
    HttpSession session = req.getSession(false);
    try {
      this.req = req;
      this.resp = resp;
      if (log.isDebug()) {
        logParams();
      }
      resp.setContentType("text/html");

      if (!mayPageBeCached()) {
        resp.setHeader("pragma", "no-cache");
        resp.setHeader("Cache-control", "no-cache");
      }

      reqURL = new URL(UrlUtil.getRequestURL(req));
      clientAddr = getLocalIPAddr();

      // check that current user has permission to run this servlet
      if (!isServletAllowed(myServletDescr())) {
        displayWarningInLieuOfPage("You are not authorized to use " + myServletDescr().heading);
        return;
      }

      // check whether servlet is disabled
      String reason = ServletUtil.servletDisabledReason(myServletDescr().getServletName());
      if (reason != null) {
        displayWarningInLieuOfPage("This function is disabled. " + reason);
        return;
      }
      if (session != null) {
        session.setAttribute(SESSION_KEY_RUNNING_SERVLET, getHeading());
        String reqHost = req.getRemoteHost();
        String forw = req.getHeader(HttpFields.__XForwardedFor);
        if (!StringUtil.isNullString(forw)) {
          reqHost += " (proxies for " + forw + ")";
        }
        session.setAttribute(SESSION_KEY_REQUEST_HOST, reqHost);
      }
      lockssHandleRequest();
      success = (errMsg == null);
    } catch (ServletException e) {
      log.error("Servlet threw", e);
      throw e;
    } catch (IOException e) {
      log.error("Servlet threw", e);
      throw e;
    } catch (RuntimeException e) {
      log.error("Servlet threw", e);
      throw e;
    } finally {
      if (session != null) {
        session.setAttribute(SESSION_KEY_RUNNING_SERVLET, null);
        session.setAttribute(LockssFormAuthenticator.__J_AUTH_ACTIVITY, TimeBase.nowMs());
      }
      if ("please".equalsIgnoreCase(req.getHeader("X-Lockss-Result"))) {
        log.debug3("X-Lockss-Result: " + (success ? "Ok" : "Fail"));
        resp.setHeader("X-Lockss-Result", success ? "Ok" : "Fail");
      }
      resetMyLocals();
      resetLocals();
    }
  }