public static void main(String args[]) throws Exception { HttpProtocol protocol = new HttpProtocol(); Config conf = new Config(); String url = args[0]; ConfUtils.loadConf(args[1], conf); protocol.configure(conf); if (!protocol.skipRobots) { BaseRobotRules rules = protocol.getRobotRules(url); System.out.println("is allowed : " + rules.isAllowed(url)); } Metadata md = new Metadata(); ProtocolResponse response = protocol.getProtocolOutput(url, md); System.out.println(url); System.out.println(response.getMetadata()); System.out.println(response.getStatusCode()); System.out.println(response.getContent().length); }
@Override public ProtocolResponse handleResponse(HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); Metadata metadata = new Metadata(); HeaderIterator iter = response.headerIterator(); while (iter.hasNext()) { Header header = iter.nextHeader(); metadata.addValue(header.getName().toLowerCase(Locale.ROOT), header.getValue()); } MutableBoolean trimmed = new MutableBoolean(); byte[] bytes = HttpProtocol.toByteArray(response.getEntity(), maxContent, trimmed); if (trimmed.booleanValue()) { metadata.setValue("http.trimmed", "true"); LOG.warn("HTTP content trimmed to {}", bytes.length); } return new ProtocolResponse(bytes, status, metadata); }