protected <T> ResponseEntity<T> getForEntity(String path, Class<T> responseType) throws IOException, OAuthCommunicationException, OAuthExpectationFailedException, OAuthMessageSignerException { // lot of this could be refactored out or made more general (for post requests, etc) HttpClientOAuthConsumer consumer = new HttpClientOAuthConsumer(consumerKey, secret); consumer.setSigningStrategy(new AuthorizationHeaderSigningStrategy()); String uri = constructUrl(path); HttpGet getRequest = new HttpGet(uri); consumer.sign(getRequest); HttpHeaders headers = new HttpHeaders(); for (Header header : getRequest.getAllHeaders()) { headers.add(header.getName(), header.getValue()); } ResponseEntity<T> entity = new TestRestTemplate().getForEntity(uri, responseType, headers); return entity; }
/** * 抓取目标url的内容 * * @date 2013-1-7 上午11:08:54 * @param toFetchURL * @return */ public FetchResult fetch(FetchRequest req) throws Exception { if (req.getHttpMethod() != null && !Http.Method.GET.equals(req.getHttpMethod())) { // 获取到URL后面的QueryParam String query = new URL(req.getUrl()).getQuery(); for (String q : query.split("\\&")) { String[] qv = q.split("="); String name = qv[0]; String val = qv[1]; List<Object> vals = req.getParams().get(name); if (vals == null) { vals = new ArrayList<Object>(); req.getParams().put(name, vals); } vals.add(val); } return request(req); } FetchResult fetchResult = new FetchResult(); HttpGet get = null; HttpEntity entity = null; String toFetchURL = req.getUrl(); try { get = new HttpGet(toFetchURL); // 设置请求GZIP压缩,注意,前面必须设置GZIP解压缩处理 get.addHeader("Accept-Encoding", "gzip"); for (Iterator<Entry<String, String>> it = headers.entrySet().iterator(); it.hasNext(); ) { Entry<String, String> entry = it.next(); get.addHeader(entry.getKey(), entry.getValue()); } // 同步信号量,在真正对服务端进行访问之前进行访问间隔的控制 // TODO 针对每个请求有一个delay的参数设置 synchronized (mutex) { // 获取当前时间 long now = (new Date()).getTime(); // 对同一个Host抓取时间间隔进行控制,若在设置的时限内则进行休眠 if (now - lastFetchTime < config.getPolitenessDelay()) Thread.sleep(config.getPolitenessDelay() - (now - lastFetchTime)); // 不断更新最后的抓取时间,注意,是针对HOST的,不是针对某个URL的 lastFetchTime = (new Date()).getTime(); } // 记录get请求信息 Header[] headers = get.getAllHeaders(); for (Header h : headers) { Map<String, List<String>> hs = req.getHeaders(); String key = h.getName(); List<String> val = hs.get(key); if (val == null) val = new ArrayList<String>(); val.add(h.getValue()); hs.put(key, val); } req.getCookies().putAll(this.cookies); fetchResult.setReq(req); // 执行get访问,获取服务端返回内容 HttpResponse response = httpClient.execute(get); headers = response.getAllHeaders(); for (Header h : headers) { Map<String, List<String>> hs = fetchResult.getHeaders(); String key = h.getName(); List<String> val = hs.get(key); if (val == null) val = new ArrayList<String>(); val.add(h.getValue()); hs.put(key, val); } // 设置已访问URL fetchResult.setFetchedUrl(toFetchURL); String uri = get.getURI().toString(); if (!uri.equals(toFetchURL)) if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) fetchResult.setFetchedUrl(uri); entity = response.getEntity(); // 服务端返回的状态码 int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != HttpStatus.SC_OK) { if (statusCode != HttpStatus.SC_NOT_FOUND) { Header locationHeader = response.getFirstHeader("Location"); // 如果是301、302跳转,获取跳转URL即可返回 if (locationHeader != null && (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY)) fetchResult.setMovedToUrl( URLCanonicalizer.getCanonicalURL(locationHeader.getValue(), toFetchURL)); } // 只要不是OK的除了设置跳转URL外设置statusCode即可返回 // 判断是否有忽略状态码的设置 if (this.site.getSkipStatusCode() != null && this.site.getSkipStatusCode().trim().length() > 0) { String[] scs = this.site.getSkipStatusCode().split(","); for (String code : scs) { int c = CommonUtil.toInt(code); // 忽略此状态码,依然解析entity if (statusCode == c) { assemPage(fetchResult, entity); break; } } } fetchResult.setStatusCode(statusCode); return fetchResult; } // 处理服务端返回的实体内容 if (entity != null) { fetchResult.setStatusCode(statusCode); assemPage(fetchResult, entity); return fetchResult; } } catch (Throwable e) { fetchResult.setFetchedUrl(e.toString()); fetchResult.setStatusCode(Status.INTERNAL_SERVER_ERROR.ordinal()); return fetchResult; } finally { try { if (entity == null && get != null) get.abort(); } catch (Exception e) { throw e; } } fetchResult.setStatusCode(Status.UNSPECIFIED_ERROR.ordinal()); return fetchResult; }