public void download() throws WontFetchException { logger.debug("Downloading %s", pageInfo.getUrl()); HttpGet request = new HttpGet(pageInfo.getUrl()); request.setHeader("User-Agent", USER_AGENT); request.setHeader("Referer", pageInfo.getReferURL()); // request.setHeader("Accept-Encoding", "gzip"); request.setHeader("Connection", "keep-alive"); HttpResponse response; try { response = httpClient.execute(request); } catch (IOException e) { logger.error("Error communicating to server: " + pageInfo.getUrl()); throw new WontFetchException(); } int statusCode = response.getStatusLine().getStatusCode(); pageInfo.setHttpStatus(statusCode); if (statusCode == HttpStatus.SC_NOT_MODIFIED) { } else if (statusCode != HttpStatus.SC_OK) { } for (Header header : response.getAllHeaders()) { String name = header.getName(); String value = header.getValue(); pageInfo.getHeaders().put(name, value); } String contentType = pageInfo.getHeaders().get("Content-Type"); if (contentType != null && !contentType.matches("(application|text)/(xml|xhtml|html)(\\s*;.*)?")) { logger.error("Wrong content type: " + contentType); throw new WontFetchException(); } HttpEntity entity = response.getEntity(); try { String body = IOUtils.toString(entity.getContent(), crawler.getEncoding()); pageInfo.setContent(body); } catch (IOException e) { e.printStackTrace(); } }