@Override public CrawlResultPojo crawl(UrlPojo urlPojo) { if (urlPojo == null) { return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); CloseableHttpResponse response1 = null; BufferedReader br = null; try { HttpGet httpget = new HttpGet(urlPojo.getUrl()); response1 = httpclient.execute(httpget); HttpEntity entity = response1.getEntity(); InputStreamReader isr = new InputStreamReader(entity.getContent(), "utf-8"); br = new BufferedReader(isr); String line = null; StringBuilder stringBuilder = new StringBuilder(); while ((line = br.readLine()) != null) { stringBuilder.append(line + "\n"); } crawlResultPojo.setSuccess(true); crawlResultPojo.setPageContent(stringBuilder.toString()); return crawlResultPojo; } catch (Exception e) { e.printStackTrace(); crawlResultPojo.setSuccess(false); } finally { if (response1 != null) { try { response1.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } if (br != null) { try { br.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } return crawlResultPojo; }
public static void main(String[] args) throws Exception { HttpClientCrawlerImpl httpClientCrawlerImpl = new HttpClientCrawlerImpl(); String url = "http://www.wdzj.com/front_select-plat"; UrlPojo urlPojo = new UrlPojo(url); Map<String, Object> paramsMap = new HashMap<String, Object>(); int max_page_number = 1000; paramsMap.put("currPage", 2); paramsMap.put("params", ""); paramsMap.put("sort", 0); urlPojo.setParamsMap(paramsMap); CrawlResultPojo resultPojo = httpClientCrawlerImpl.crawl4Post(urlPojo); if (resultPojo != null) { System.out.println(resultPojo.getPageContent()); } }
/** 传入加入参数post 参数的url */ public CrawlResultPojo crawl4Post(UrlPojo urlPojo) { if (urlPojo == null) { return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); CloseableHttpResponse response1 = null; BufferedReader br = null; try { RequestBuilder rb = RequestBuilder.post().setUri(new URI(urlPojo.getUrl())); /* HttpUriRequest urlRequest = RequestBuilder.post() .setUri(new URI(urlPojo.getUrl())) .addParameter("IDToken1", "username") .addParameter("IDToken2", "password") .build();*/ Map<String, Object> paramsMap = urlPojo.getParamsMap(); if (paramsMap != null) { for (Entry<String, Object> entry : paramsMap.entrySet()) { rb.addParameter(entry.getKey(), entry.getValue().toString()); } } HttpUriRequest httpRequst = rb.build(); response1 = httpclient.execute(httpRequst); HttpEntity entity = response1.getEntity(); InputStreamReader isr = new InputStreamReader(entity.getContent(), "utf-8"); br = new BufferedReader(isr); String line = null; StringBuilder stringBuilder = new StringBuilder(); while ((line = br.readLine()) != null) { stringBuilder.append(line + "\n"); } crawlResultPojo.setSuccess(true); crawlResultPojo.setPageContent(stringBuilder.toString()); return crawlResultPojo; } catch (Exception e) { e.printStackTrace(); crawlResultPojo.setSuccess(false); } finally { if (response1 != null) { try { response1.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } if (br != null) { try { br.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } return crawlResultPojo; }