protected String loadCustomPage(URL url) { logger.info("Loading custom page: " + url); CloseableHttpResponse response = null; int attempts = 0; int retry_ = (retry == 0) ? 1 : retry; boolean responseOk = false; while (attempts < retry_ && !responseOk) { if (attempts > 0) { try { logger.finest("Going to sleep for " + retryAfter + "ms " + url); Thread.sleep(retryAfter); } catch (InterruptedException e) { // ignore logger.severe("Failed to sleep for " + retryAfter + "ms " + url); } logger.info( "Retrying to get page after " + retryAfter + "ms : Attempt " + new Integer(attempts + 1) + "/" + retry + " " + url); } try { response = LecturaCrawlerSuite.getResponse(url, useProxy); } catch (Exception e) { logger.severe("Failed to retrieve response from custom page: " + e.getMessage()); } responseOk = (response != null && response.getStatusLine().getStatusCode() == 200); attempts++; } try { HttpEntity entity = response.getEntity(); // if the status code is not OK, report a problem if (response.getStatusLine().getStatusCode() != 200) { Header[] headers = response.getAllHeaders(); String headersStr = response.getStatusLine().toString(); for (int i = 0; i < headers.length; i++) { headersStr += " | " + headers[i]; } logger.warning("Status code not OK: [" + url.toString() + "] " + headersStr); logger.warning(EntityUtils.toString(entity)); } if (entity != null) { return EntityUtils.toString(entity); } else { return null; } } catch (Exception e) { logger.severe("Failed to load custom page: [" + url.toString() + "] " + e.getMessage()); return null; } finally { try { response.close(); } catch (IOException e) { logger.severe("Failed to close HTTP response: " + e.getMessage()); } } }
protected void loadPage(URL url, Parser parser) { // toto je prasarna jak kreten StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); // if testListing called this, perform no check if (!stackTraceElements[2].getMethodName().equals("testListing")) { if (parser.equals(listingParser)) { if (listingExists(url)) { currentListing = new Listing(); currentListing.setDuplicate(true); // avoid the request return; } } } CloseableHttpResponse response = null; int attempts = 0; int retry_ = (retry == 0) ? 1 : retry; boolean responseOk = false; while (attempts < retry_ && !responseOk) { if (attempts > 0) { // consume the entity, close the previous response -> release the resources (connection, // ...) if (response != null) { try { // this will consume the entity and release the resources automatically, e.g. connection EntityUtils.consume(response.getEntity()); } catch (IOException e) { logger.severe("Failed to consume the entity: " + e.getMessage()); } try { response.close(); } catch (IOException e) { logger.severe("Failed to close HTTP response: " + e.getMessage()); } } try { logger.finest("Going to sleep for " + retryAfter + "ms " + url); Thread.sleep(retryAfter); } catch (InterruptedException e) { // ignore logger.severe("Failed to sleep for " + retryAfter + "ms " + url); } logger.info( "Retrying to get page after " + retryAfter + "ms : Attempt " + new Integer(attempts + 1) + "/" + retry + " " + url); } try { response = LecturaCrawlerSuite.getResponse(url, useProxy); } catch (Exception e) { logger.severe("Failed to retrieve response: " + e.getMessage()); } responseOk = (response != null && response.getStatusLine().getStatusCode() == HttpStatus.SC_OK); attempts++; } try { HttpEntity entity = response.getEntity(); // if the status code is not OK, report a problem if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { Header[] headers = response.getAllHeaders(); String headersStr = response.getStatusLine().toString(); for (int i = 0; i < headers.length; i++) { headersStr += " | " + headers[i]; } logger.warning("Status code not OK: [" + url.toString() + "] " + headersStr); logger.warning(EntityUtils.toString(entity)); } if (entity != null) { Header[] headers = response.getHeaders("Content-Type"); Charset charset = Charset.forName("UTF-8"); String regexCharset = ".*?charset=(.*)$"; for (Header header : headers) { String val = header.getValue(); if (val.matches(regexCharset)) { try { charset = Charset.forName(val.replaceAll(regexCharset, "$1")); break; } catch (IllegalCharsetNameException e) { // ignore, it is already set to utf-8 } } } parser.parse(EntityUtils.toString(entity, charset)); } } catch (Exception e) { logger.severe("Failed to load page: [" + url + "] " + e.getMessage()); } finally { try { // this will consume the entity and release the resources automatically, e.g. connection EntityUtils.consume(response.getEntity()); } catch (IOException e) { logger.severe("Failed to consume the entity: " + e.getMessage()); } try { response.close(); } catch (IOException e) { logger.severe("Failed to close HTTP response: " + e.getMessage()); } } }