Пример #1
0
  public static void main(String args[]) throws Exception {
    HttpProtocol protocol = new HttpProtocol();
    Config conf = new Config();

    String url = args[0];
    ConfUtils.loadConf(args[1], conf);
    protocol.configure(conf);

    if (!protocol.skipRobots) {
      BaseRobotRules rules = protocol.getRobotRules(url);
      System.out.println("is allowed : " + rules.isAllowed(url));
    }

    Metadata md = new Metadata();
    ProtocolResponse response = protocol.getProtocolOutput(url, md);
    System.out.println(url);
    System.out.println(response.getMetadata());
    System.out.println(response.getStatusCode());
    System.out.println(response.getContent().length);
  }
Пример #2
0
  @Override
  public ProtocolResponse handleResponse(HttpResponse response)
      throws ClientProtocolException, IOException {
    int status = response.getStatusLine().getStatusCode();
    Metadata metadata = new Metadata();
    HeaderIterator iter = response.headerIterator();
    while (iter.hasNext()) {
      Header header = iter.nextHeader();
      metadata.addValue(header.getName().toLowerCase(Locale.ROOT), header.getValue());
    }

    MutableBoolean trimmed = new MutableBoolean();

    byte[] bytes = HttpProtocol.toByteArray(response.getEntity(), maxContent, trimmed);

    if (trimmed.booleanValue()) {
      metadata.setValue("http.trimmed", "true");
      LOG.warn("HTTP content trimmed to {}", bytes.length);
    }

    return new ProtocolResponse(bytes, status, metadata);
  }