示例#1
0
  /**
   * Предполагается, что на этапе запуска приожения, если с MainUrl что-то не так то контейнер не
   * запустится :-)
   */
  @PostConstruct
  public void init() {
    try {
      mainURI = new URI(properties.getProperty("MainUrl"), true, "UTF-8");
    } catch (Exception e) {
      throw new RuntimeException(ERR_MSG + e.getMessage());
    }
    if (!mainURI.isAbsoluteURI()) {
      throw new RuntimeException(ERR_MSG + "URI not absolute path");
    }

    try {
      String mainHost = mainURI.getHost();
      if (mainHost == null) {
        throw new RuntimeException(ERR_MSG + "bad URI host");
      }
    } catch (URIException e) {
      throw new RuntimeException(ERR_MSG + e.getMessage());
    }

    try {
      secureURI =
          new URI(
              properties.getProperty("SecureUrl", mainURI.toString().replaceFirst("http", "https")),
              true,
              "UTF-8");
    } catch (Exception e) {
      throw new RuntimeException(ERR_MSG + e.getMessage());
    }
  }
示例#2
0
  private boolean isValidMessageToScan(HttpMessage msg) {
    if (getScannerOptions().isScanHeadersAllRequests()) {
      return true;
    }

    // First we check if it's a dynamic or static page
    // I'd to do this because scanning starts to be veeeeery slow
    // --
    // this is a trivial implementation, should be good to have
    // a page dynamic check at the parent plugin level which should
    // use or not Variants according to the behavior of the request
    // (e.g. different content or status error/redirect)
    String query = null;
    try {
      query = msg.getRequestHeader().getURI().getQuery();

    } catch (URIException e) {
      log.error(e.getMessage(), e);
    }

    // If there's almost one GET parameter go ahead
    if (query == null || query.isEmpty()) {
      // If also the Request body is null maybe it's a static page oer a null parameter page
      if (msg.getRequestBody().length() == 0) {
        return false;
      }
    }
    return true;
  }
示例#3
0
 /**
  * 获取网络图片
  *
  * @param url
  * @return
  */
 public static Bitmap getBitmapByNet(String url) throws AppException {
   // System.out.println("image_url==> "+url);
   URI uri = null;
   try {
     uri = new URI(url, false, "UTF-8");
   } catch (URIException e) {
     e.printStackTrace();
   }
   if (uri != null) url = uri.toString();
   HttpClient httpClient = null;
   GetMethod httpGet = null;
   Bitmap bitmap = null;
   int time = 0;
   do {
     try {
       httpClient = HttpHelper.getHttpClient();
       httpGet = HttpHelper.getHttpGet(url, HttpHelper.getUserAgent());
       int statusCode = httpClient.executeMethod(httpGet);
       if (statusCode != HttpStatus.SC_OK) {
         throw AppException.http(statusCode);
       }
       InputStream inStream = httpGet.getResponseBodyAsStream();
       bitmap = BitmapFactory.decodeStream(inStream);
       inStream.close();
       break;
     } catch (HttpException e) {
       time++;
       if (time < RETRY_TIME) {
         try {
           Thread.sleep(1000);
         } catch (InterruptedException e1) {
         }
         continue;
       }
       // 发生致命的异常,可能是协议不对或者返回的内容有问题
       e.printStackTrace();
       throw AppException.http(e);
     } catch (IOException e) {
       time++;
       if (time < RETRY_TIME) {
         try {
           Thread.sleep(1000);
         } catch (InterruptedException e1) {
         }
         continue;
       }
       // 发生网络异常
       e.printStackTrace();
       throw AppException.network(e);
     } finally {
       // 释放连接
       httpGet.releaseConnection();
     }
   } while (time < RETRY_TIME);
   return bitmap;
 }
示例#4
0
  /**
   * parameter와 queryString 를 가져온다.
   *
   * @param method
   * @return
   */
  private String getHttpInfoDumy(HttpMethod method) {
    NameValuePair[] params = null;
    String methodType = "GET";
    String reqBody = null;
    if (method instanceof PostMethod) {
      params = ((PostMethod) method).getParameters();
      methodType = "POST";
      StringRequestEntity sre = (StringRequestEntity) ((PostMethod) method).getRequestEntity();
      reqBody = sre.getContent();
    }

    StringBuffer sb = new StringBuffer();

    try {
      sb.append("#### getHttpInfoDumy ####");
      sb.append("\n## " + methodType + " [" + method.getURI() + "], hscd[" + this.hashCode() + "]");
    } catch (URIException e) {
      sb.append("\n## getParamsQueryStr- URIException " + e.getMessage() + "]");
      return sb.toString();
    }

    if (method.getQueryString() != null && method.getQueryString().length() > 0)
      sb.append("\n" + "## queryString[" + method.getQueryString() + "]");

    if (params != null) {
      for (int i = 0; i < params.length; i++) {
        NameValuePair param = params[i];
        sb.append(
            "\n"
                + "## POST body param["
                + i
                + "], name["
                + param.getName()
                + "], value["
                + param.getValue()
                + "]");
      }
    }

    if (reqBody != null) {
      sb.append("\n" + "## POST body String [" + reqBody + "]");
    }

    sb.append("\n##########");

    return sb.toString();
  }
  public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method, String payload) {
    // http is not used.
    // payload is not used.
    boolean result = false;
    Map formItems = null;
    try {
      formItems = getFormItems(curi);
    } catch (AttributeNotFoundException e1) {
      logger.severe("Failed get of form items for " + curi);
    }
    if (formItems == null || formItems.size() <= 0) {
      try {
        logger.severe("No form items for " + method.getURI());
      } catch (URIException e) {
        logger.severe("No form items and exception getting uri: " + e.getMessage());
      }
      return result;
    }

    NameValuePair[] data = new NameValuePair[formItems.size()];
    int index = 0;
    String key = null;
    for (Iterator i = formItems.keySet().iterator(); i.hasNext(); ) {
      key = (String) i.next();
      data[index++] = new NameValuePair(key, (String) formItems.get(key));
    }
    if (method instanceof PostMethod) {
      ((PostMethod) method).setRequestBody(data);
      result = true;
    } else if (method instanceof GetMethod) {
      // Append these values to the query string.
      // Get current query string, then add data, then get it again
      // only this time its our data only... then append.
      HttpMethodBase hmb = (HttpMethodBase) method;
      String currentQuery = hmb.getQueryString();
      hmb.setQueryString(data);
      String newQuery = hmb.getQueryString();
      hmb.setQueryString(((currentQuery != null) ? currentQuery : "") + "&" + newQuery);
      result = true;
    } else {
      logger.severe("Unknown method type: " + method);
    }
    return result;
  }
 public boolean isPrerequisite(final CrawlURI curi) {
   boolean result = false;
   String curiStr = curi.getUURI().toString();
   String loginUri = getPrerequisite(curi);
   if (loginUri != null) {
     try {
       UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri);
       if (uuri != null && curiStr != null && uuri.toString().equals(curiStr)) {
         result = true;
         if (!curi.isPrerequisite()) {
           curi.setPrerequisite(true);
           logger.fine(curi + " is prereq.");
         }
       }
     } catch (URIException e) {
       logger.severe("Failed to uuri: " + curi + ", " + e.getMessage());
     }
   }
   return result;
 }
示例#7
0
  protected void addHeaderLink(CrawlURI curi, Header loc) {
    if (loc == null) {
      // If null, return without adding anything.
      return;
    }
    // TODO: consider possibility of multiple headers
    try {
      /**
       * 302重定向使用自定义的方法存储link
       *
       * @modify: wuliufu
       * @since : 2012-05-11
       */
      curi.createAndAddLocationLink(
          curi.getVia(), loc.getValue(), loc.getName() + ":", Link.REFER_HOP);

      if (curi.getObject(URLInfo.ATTACH) != null) {
        UURI outUURI = UURIFactory.getInstance(curi.getUURI(), loc.getValue());
        logger.debug(
            "ParseHTTP: curi = "
                + curi.getUURI().toString()
                + "&& "
                + loc.getName()
                + "="
                + outUURI.toString());
        curi.putObject(outUURI.toString(), curi.getObject(URLInfo.ATTACH));
      }

      numberOfLinksExtracted++;
    } catch (URIException e) {
      // There may not be a controller (e.g. If we're being run
      // by the extractor tool).
      if (getController() != null) {
        getController().logUriError(e, curi.getUURI(), loc.getValue());
      } else {
        logger.info(curi + ", " + loc.getValue() + ": " + e.getMessage());
      }
    }
  }
示例#8
0
  /** Run method of the thread */
  public void run() {

    queue = manager.workQueue;
    while (manager.hasWorkLeft()) {

      working = false;
      // code to make the worker pause, if the pause button has been presed

      // if the stop signal has been given stop the thread
      if (stop) {
        return;
      }

      // this pasuses the thread
      synchronized (this) {
        while (pleaseWait) {
          try {
            wait();
          } catch (InterruptedException e) {
            return;
          } catch (Exception e) {
            e.printStackTrace();
          }
        }
      }

      GetMethod httpget = null;
      HeadMethod httphead = null;

      try {

        work = (WorkUnit) queue.take();
        working = true;
        url = work.getWork();
        int code = 0;

        String responce = "";
        String rawResponce = "";

        // if the work is a head request
        if (work.getMethod().equalsIgnoreCase("HEAD")) {
          if (Config.debug) {
            System.out.println("DEBUG Worker[" + threadId + "]: HEAD " + url.toString());
          }

          httphead = new HeadMethod(url.toString());

          // set the custom HTTP headers
          Vector HTTPheaders = manager.getHTTPHeaders();
          for (int a = 0; a < HTTPheaders.size(); a++) {
            HTTPHeader httpHeader = (HTTPHeader) HTTPheaders.elementAt(a);
            /*
             * Host header has to be set in a different way!
             */
            if (httpHeader.getHeader().startsWith("Host")) {
              httphead.getParams().setVirtualHost(httpHeader.getValue());
            } else {
              httphead.setRequestHeader(httpHeader.getHeader(), httpHeader.getValue());
            }
          }
          httphead.setFollowRedirects(Config.followRedirects);

          /*
           * this code is used to limit the number of request/sec
           */
          if (manager.isLimitRequests()) {
            while (manager.getTotalDone()
                    / ((System.currentTimeMillis() - manager.getTimestarted()) / 1000.0)
                > manager.getLimitRequestsTo()) {
              Thread.sleep(100);
            }
          }
          /*
           * Send the head request
           */
          code = httpclient.executeMethod(httphead);
          if (Config.debug) {
            System.out.println("DEBUG Worker[" + threadId + "]: " + code + " " + url.toString());
          }
          httphead.releaseConnection();

        }
        // if we are doing a get request
        else if (work.getMethod().equalsIgnoreCase("GET")) {
          // make the request;
          if (Config.debug) {
            System.out.println("DEBUG Worker[" + threadId + "]: GET " + url.toString());
          }
          httpget = new GetMethod(url.toString());

          // set the custom HTTP headers
          Vector HTTPheaders = manager.getHTTPHeaders();
          for (int a = 0; a < HTTPheaders.size(); a++) {

            HTTPHeader httpHeader = (HTTPHeader) HTTPheaders.elementAt(a);
            /*
             * Host header has to be set in a different way!
             */
            if (httpHeader.getHeader().startsWith("Host")) {
              httpget.getParams().setVirtualHost(httpHeader.getValue());
            } else {
              httpget.setRequestHeader(httpHeader.getHeader(), httpHeader.getValue());
            }
          }
          httpget.setFollowRedirects(Config.followRedirects);

          /*
           * this code is used to limit the number of request/sec
           */
          if (manager.isLimitRequests()) {
            while (manager.getTotalDone()
                    / ((System.currentTimeMillis() - manager.getTimestarted()) / 1000.0)
                > manager.getLimitRequestsTo()) {
              Thread.sleep(100);
            }
          }

          code = httpclient.executeMethod(httpget);

          if (Config.debug) {
            System.out.println("DEBUG Worker[" + threadId + "]: " + code + " " + url.toString());
          }

          // set up the input stream
          BufferedReader input =
              new BufferedReader(new InputStreamReader(httpget.getResponseBodyAsStream()));

          // save the headers into a string, used in viewing raw responce
          String rawHeader;
          rawHeader = httpget.getStatusLine() + "\r\n";
          Header headers[] = httpget.getResponseHeaders();

          StringBuffer buf = new StringBuffer();
          for (int a = 0; a < headers.length; a++) {
            buf.append(headers[a].getName() + ": " + headers[a].getValue() + "\r\n");
          }

          rawHeader = rawHeader + buf.toString();

          buf = new StringBuffer();
          // read in the responce body
          String line;
          while ((line = input.readLine()) != null) {
            buf.append("\r\n" + line);
          }
          responce = buf.toString();
          input.close();

          rawResponce = rawHeader + responce;
          // clean the responce

          // parse the html of what we have found

          if (Config.parseHTML && !work.getBaseCaseObj().isUseRegexInstead()) {
            Header contentType = httpget.getResponseHeader("Content-Type");

            if (contentType != null) {
              if (contentType.getValue().startsWith("text")) {
                manager.addHTMLToParseQueue(new HTMLparseWorkUnit(responce, work));
              }
            }
          }

          responce = FilterResponce.CleanResponce(responce, work);

          Thread.sleep(10);
          httpget.releaseConnection();
        } else {
          // There is no need to deal with requests other than HEAD or GET
        }

        // if we need to check the against the base case
        if (work.getMethod().equalsIgnoreCase("GET")
            && work.getBaseCaseObj().useContentAnalysisMode()) {
          if (code == 200) {
            if (Config.debug) {
              System.out.println(
                  "DEBUG Worker[" + threadId + "]: Base Case Check " + url.toString());
            }

            // TODO move this option to the Adv options
            // if the responce does not match the base case
            Pattern regexFindFile = Pattern.compile(".*file not found.*", Pattern.CASE_INSENSITIVE);

            Matcher m = regexFindFile.matcher(responce);

            // need to clean the base case of the item we are looking for
            String basecase =
                FilterResponce.removeItemCheckedFor(
                    work.getBaseCaseObj().getBaseCase(), work.getItemToCheck());

            if (m.find()) {
              // do nothing as we have a 404
            } else if (!responce.equalsIgnoreCase(basecase)) {
              if (work.isDir()) {
                if (Config.debug) {
                  System.out.println(
                      "DEBUG Worker[" + threadId + "]: Found Dir (base case)" + url.toString());
                }
                // we found a dir
                manager.foundDir(url, code, responce, basecase, rawResponce, work.getBaseCaseObj());
              } else {
                // found a file
                if (Config.debug) {
                  System.out.println(
                      "DEBUG Worker[" + threadId + "]: Found File (base case)" + url.toString());
                }
                manager.foundFile(
                    url,
                    code,
                    responce,
                    work.getBaseCaseObj().getBaseCase(),
                    rawResponce,
                    work.getBaseCaseObj());
              }
            }
          } else if (code == 404 || code == 400) {
            // again do nothing as it is not there
          } else {
            if (work.isDir()) {
              if (Config.debug) {
                System.out.println(
                    "DEBUG Worker[" + threadId + "]: Found Dir (base case)" + url.toString());
              }
              // we found a dir
              manager.foundDir(
                  url,
                  code,
                  responce,
                  work.getBaseCaseObj().getBaseCase(),
                  rawResponce,
                  work.getBaseCaseObj());
            } else {
              // found a file
              if (Config.debug) {
                System.out.println(
                    "DEBUG Worker[" + threadId + "]: Found File (base case)" + url.toString());
              }
              manager.foundFile(
                  url,
                  code,
                  responce,
                  work.getBaseCaseObj().getBaseCase(),
                  rawResponce,
                  work.getBaseCaseObj());
            }
            // manager.foundError(url, "Base Case Mode Error - Responce code came back as " + code +
            // " it should have been 200");
            // manager.workDone();
          }
        }
        /*
         * use the custom regex check instead
         */
        else if (work.getBaseCaseObj().isUseRegexInstead()) {
          Pattern regexFindFile = Pattern.compile(work.getBaseCaseObj().getRegex());

          Matcher m = regexFindFile.matcher(rawResponce);
          /*
          System.out.println("======Trying to find======");
          System.out.println(work.getBaseCaseObj().getRegex());
          System.out.println("======In======");
          System.out.println(responce);
          System.out.println("======/In======");
           */
          if (m.find()) {
            // do nothing as we have a 404
            if (Config.debug) {

              System.out.println(
                  "DEBUG Worker[" + threadId + "]: Regex matched so it's a 404, " + url.toString());
            }

          } else {
            if (Config.parseHTML) {
              Header contentType = httpget.getResponseHeader("Content-Type");

              if (contentType != null) {
                if (contentType.getValue().startsWith("text")) {
                  manager.addHTMLToParseQueue(new HTMLparseWorkUnit(rawResponce, work));
                }
              }
            }
            if (work.isDir()) {
              if (Config.debug) {
                System.out.println(
                    "DEBUG Worker[" + threadId + "]: Found Dir (regex) " + url.toString());
              }
              // we found a dir
              manager.foundDir(
                  url,
                  code,
                  responce,
                  work.getBaseCaseObj().getBaseCase(),
                  rawResponce,
                  work.getBaseCaseObj());
            } else {
              // found a file
              if (Config.debug) {
                System.out.println(
                    "DEBUG Worker[" + threadId + "]: Found File (regex) " + url.toString());
              }
              manager.foundFile(
                  url,
                  code,
                  responce,
                  work.getBaseCaseObj().getBaseCase(),
                  rawResponce,
                  work.getBaseCaseObj());
            }
            // manager.foundError(url, "Base Case Mode Error - Responce code came back as " + code +
            // " it should have been 200");
            // manager.workDone();
          }

        }
        // just check the responce code
        else {
          // if is not the fail code, a 404 or a 400 then we have a possible
          if (code != work.getBaseCaseObj().getFailCode()
              && code != 404
              && code != 0
              && code != 400) {
            if (work.getMethod().equalsIgnoreCase("HEAD")) {
              if (Config.debug) {
                System.out.println(
                    "DEBUG Worker[" + threadId + "]: Getting responce via GET " + url.toString());
              }
              rawResponce = "";

              httpget = new GetMethod(url.toString());
              Vector HTTPheaders = manager.getHTTPHeaders();
              for (int a = 0; a < HTTPheaders.size(); a++) {
                HTTPHeader httpHeader = (HTTPHeader) HTTPheaders.elementAt(a);
                httpget.setRequestHeader(httpHeader.getHeader(), httpHeader.getValue());
              }
              httpget.setFollowRedirects(Config.followRedirects);

              /*
               * this code is used to limit the number of request/sec
               */
              if (manager.isLimitRequests()) {
                while (manager.getTotalDone()
                        / ((System.currentTimeMillis() - manager.getTimestarted()) / 1000.0)
                    > manager.getLimitRequestsTo()) {
                  Thread.sleep(100);
                }
              }

              int newCode = httpclient.executeMethod(httpget);

              // in some cases the second get can return a different result, than the first head
              // request!
              if (newCode != code) {
                manager.foundError(
                    url,
                    "Return code for first HEAD, is different to the second GET: "
                        + code
                        + " - "
                        + newCode);
              }

              rawResponce = "";
              // build a string version of the headers
              rawResponce = httpget.getStatusLine() + "\r\n";
              Header headers[] = httpget.getResponseHeaders();

              StringBuffer buf = new StringBuffer();
              for (int a = 0; a < headers.length; a++) {
                buf.append(headers[a].getName() + ": " + headers[a].getValue() + "\r\n");
              }

              buf.append("\r\n");

              rawResponce = rawResponce + buf.toString();

              if (httpget.getResponseContentLength() > 0) {

                // get the http body
                BufferedReader input =
                    new BufferedReader(new InputStreamReader(httpget.getResponseBodyAsStream()));

                String line;

                String tempResponce = "";

                buf = new StringBuffer();
                while ((line = input.readLine()) != null) {
                  buf.append("\r\n" + line);
                }
                tempResponce = buf.toString();
                input.close();

                rawResponce = rawResponce + tempResponce;

                Header contentType = httpget.getResponseHeader("Content-Type");

                if (Config.parseHTML) {
                  contentType = httpget.getResponseHeader("Content-Type");

                  if (contentType != null) {
                    if (contentType.getValue().startsWith("text")) {
                      manager.addHTMLToParseQueue(new HTMLparseWorkUnit(tempResponce, work));
                    }
                  }
                }
              }

              httpget.releaseConnection();
            }

            if (work.isDir()) {
              manager.foundDir(url, code, rawResponce, work.getBaseCaseObj());
            } else {
              manager.foundFile(url, code, rawResponce, work.getBaseCaseObj());
            }
          }
        }

        manager.workDone();
        Thread.sleep(20);

      } catch (NoHttpResponseException e) {
        manager.foundError(url, "NoHttpResponseException " + e.getMessage());
        manager.workDone();
      } catch (ConnectTimeoutException e) {
        manager.foundError(url, "ConnectTimeoutException " + e.getMessage());
        manager.workDone();
      } catch (URIException e) {
        manager.foundError(url, "URIException " + e.getMessage());
        manager.workDone();
      } catch (IOException e) {

        manager.foundError(url, "IOException " + e.getMessage());
        manager.workDone();
      } catch (InterruptedException e) {
        // manager.foundError(url, "InterruptedException " + e.getMessage());
        manager.workDone();
        return;
      } catch (IllegalArgumentException e) {

        e.printStackTrace();
        manager.foundError(url, "IllegalArgumentException " + e.getMessage());
        manager.workDone();
      } finally {
        if (httpget != null) {
          httpget.releaseConnection();
        }

        if (httphead != null) {
          httphead.releaseConnection();
        }
      }
    }
  }
  /** @param args program arguments */
  public static void main(String[] args) {
    AggressiveUrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer();
    int n = 0;
    int i = 0;
    ArrayList<Integer> columns = new ArrayList<Integer>();

    long lineNumber = 0;
    boolean cdxPassThru = false;
    String delimiter = " ";
    while (n < args.length) {
      String arg = args[n];
      if (arg.compareTo("-cdx") == 0) {
        cdxPassThru = true;
        n++;
        continue;
      }
      if (n == (args.length - 1)) {
        USAGE();
      }
      String val = args[n + 1];
      if (arg.compareTo("-f") == 0) {
        columns.add(new Integer(val));
      } else if (arg.compareTo("-d") == 0) {
        delimiter = val;
      } else {
        USAGE();
      }
      n += 2;
    }
    // place default '0' in case none specified:
    if (columns.size() == 0) {
      columns.add(new Integer(1));
    }

    // convert to int[]:
    int[] cols = new int[columns.size()];
    for (int idx = 0; idx < columns.size(); idx++) {
      cols[idx] = columns.get(idx).intValue() - 1;
    }
    BufferedReader r = new BufferedReader(new InputStreamReader(System.in, ByteOp.UTF8));
    StringBuilder sb = new StringBuilder();
    String line = null;

    while (true) {
      try {
        line = r.readLine();
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
      if (line == null) {
        break;
      }
      lineNumber++;
      if (cdxPassThru && line.startsWith(CDX_PREFIX)) {
        System.out.println(line);
        continue;
      }
      String parts[] = line.split(delimiter);
      for (int column : cols) {
        if (column >= parts.length) {
          System.err.println("Invalid line " + lineNumber + " (" + line + ") skipped");
        } else {
          try {
            parts[column] = canonicalizer.urlStringToKey(parts[column]);
          } catch (URIException e) {
            System.err.println(
                "Invalid URL in line "
                    + lineNumber
                    + " ("
                    + line
                    + ") skipped ("
                    + parts[column]
                    + ")");
            e.printStackTrace();
            continue;
          } catch (StringIndexOutOfBoundsException e) {
            System.err.println(
                "Invalid URL in line "
                    + lineNumber
                    + " ("
                    + line
                    + ") skipped ("
                    + parts[column]
                    + ")");
            e.printStackTrace();
            continue;
          }
        }
      }
      sb.setLength(0);
      for (i = 0; i < parts.length; i++) {
        sb.append(parts[i]);
        if (i < (parts.length - 1)) {
          sb.append(delimiter);
        }
      }
      System.out.println(sb.toString());
    }
  }
示例#10
0
  public void getCdx(CDXQuery query, AuthToken authToken, CDXWriter responseWriter)
      throws IOException {
    CloseableIterator<String> iter = null;

    try {
      // Check for wildcards as shortcuts for matchType
      if (query.matchType == null) {
        if (query.url.startsWith("*.")) {
          query.matchType = MatchType.domain;
          query.url = query.url.substring(2);
        } else if (query.url.endsWith("*")) {
          query.matchType = MatchType.prefix;
          query.url = query.url.substring(0, query.url.length() - 1);
        } else {
          query.matchType = MatchType.exact;
        }
      }

      CDXAccessFilter accessChecker = null;

      if (!authChecker.isAllUrlAccessAllowed(authToken)) {
        accessChecker = authChecker.createAccessFilter(authToken);
      }

      //			// For now, don't support domain or host output w/o key as access check is too slow
      //			if (query.matchType == MatchType.domain || query.matchType == MatchType.host) {
      //				if (!authChecker.isAllUrlAccessAllowed(authToken)) {
      //					return;
      //				}
      //			}

      String startEndUrl[] =
          urlSurtRangeComputer.determineRange(query.url, query.matchType, "", "");

      if (startEndUrl == null) {
        responseWriter.printError(
            "Sorry, matchType=" + query.matchType.name() + " is not supported by this server");
        return;
      }

      if ((accessChecker != null) && !accessChecker.includeUrl(startEndUrl[0], query.url)) {
        if (query.showNumPages) {
          // Default to 1 page even if no results
          responseWriter.printNumPages(1, false);
        }
        return;
      }

      if (query.last || query.limit == -1) {
        query.limit = 1;
        query.setSort(SortType.reverse);
      }

      int maxLimit;

      if (query.fastLatest == null) {
        // Optimize: default fastLatest to true for last line or closest
        // sorted results
        if ((query.limit == -1) || (!query.closest.isEmpty() && (query.limit > 0))) {
          query.fastLatest = true;
        } else {
          query.fastLatest = false;
        }
      }

      // Paged query
      if (query.page >= 0 || query.showNumPages) {
        iter = createPagedCdxIterator(startEndUrl, query, authToken, responseWriter);

        if (iter == null) {
          return;
        }

        // Page size determines the max limit here
        maxLimit = Integer.MAX_VALUE;

      } else {
        // Non-Paged Merged query
        iter = createBoundedCdxIterator(startEndUrl, query, null, null);

        // TODO: apply collection-view filtering here. It should happen separately
        // from exclusion check. We'd need to parse CDX lines into CDXLine object
        // before passing it to writeCdxResponse(). Pass CDXFilter to getCdx()?
        // Pass CDX source object that escapsulates collection-view filtering?

        maxLimit = this.queryMaxLimit;
      }

      writeCdxResponse(responseWriter, iter, maxLimit, query, authToken, accessChecker);

    } catch (URIException e) {
      responseWriter.printError(e.toString());
    } catch (URISyntaxException e) {
      responseWriter.printError(e.toString());
    } finally {
      if (iter != null) {
        iter.close();
      }
    }
  }