protected <T> ResponseEntity<T> getForEntity(String path, Class<T> responseType)
      throws IOException, OAuthCommunicationException, OAuthExpectationFailedException,
          OAuthMessageSignerException {
    // lot of this could be refactored out or made more general (for post requests, etc)
    HttpClientOAuthConsumer consumer = new HttpClientOAuthConsumer(consumerKey, secret);
    consumer.setSigningStrategy(new AuthorizationHeaderSigningStrategy());

    String uri = constructUrl(path);
    HttpGet getRequest = new HttpGet(uri);

    consumer.sign(getRequest);

    HttpHeaders headers = new HttpHeaders();
    for (Header header : getRequest.getAllHeaders()) {
      headers.add(header.getName(), header.getValue());
    }

    ResponseEntity<T> entity = new TestRestTemplate().getForEntity(uri, responseType, headers);

    return entity;
  }
Ejemplo n.º 2
0
  /**
   * 抓取目标url的内容
   *
   * @date 2013-1-7 上午11:08:54
   * @param toFetchURL
   * @return
   */
  public FetchResult fetch(FetchRequest req) throws Exception {
    if (req.getHttpMethod() != null && !Http.Method.GET.equals(req.getHttpMethod())) {
      // 获取到URL后面的QueryParam
      String query = new URL(req.getUrl()).getQuery();
      for (String q : query.split("\\&")) {
        String[] qv = q.split("=");
        String name = qv[0];
        String val = qv[1];
        List<Object> vals = req.getParams().get(name);
        if (vals == null) {
          vals = new ArrayList<Object>();
          req.getParams().put(name, vals);
        }

        vals.add(val);
      }

      return request(req);
    }
    FetchResult fetchResult = new FetchResult();
    HttpGet get = null;
    HttpEntity entity = null;
    String toFetchURL = req.getUrl();
    try {
      get = new HttpGet(toFetchURL);
      // 设置请求GZIP压缩,注意,前面必须设置GZIP解压缩处理
      get.addHeader("Accept-Encoding", "gzip");
      for (Iterator<Entry<String, String>> it = headers.entrySet().iterator(); it.hasNext(); ) {
        Entry<String, String> entry = it.next();
        get.addHeader(entry.getKey(), entry.getValue());
      }

      // 同步信号量,在真正对服务端进行访问之前进行访问间隔的控制
      // TODO 针对每个请求有一个delay的参数设置
      synchronized (mutex) {
        // 获取当前时间
        long now = (new Date()).getTime();
        // 对同一个Host抓取时间间隔进行控制,若在设置的时限内则进行休眠
        if (now - lastFetchTime < config.getPolitenessDelay())
          Thread.sleep(config.getPolitenessDelay() - (now - lastFetchTime));
        // 不断更新最后的抓取时间,注意,是针对HOST的,不是针对某个URL的
        lastFetchTime = (new Date()).getTime();
      }

      // 记录get请求信息
      Header[] headers = get.getAllHeaders();
      for (Header h : headers) {
        Map<String, List<String>> hs = req.getHeaders();
        String key = h.getName();
        List<String> val = hs.get(key);
        if (val == null) val = new ArrayList<String>();
        val.add(h.getValue());

        hs.put(key, val);
      }

      req.getCookies().putAll(this.cookies);

      fetchResult.setReq(req);
      // 执行get访问,获取服务端返回内容
      HttpResponse response = httpClient.execute(get);
      headers = response.getAllHeaders();
      for (Header h : headers) {
        Map<String, List<String>> hs = fetchResult.getHeaders();
        String key = h.getName();
        List<String> val = hs.get(key);
        if (val == null) val = new ArrayList<String>();
        val.add(h.getValue());

        hs.put(key, val);
      }
      // 设置已访问URL
      fetchResult.setFetchedUrl(toFetchURL);
      String uri = get.getURI().toString();
      if (!uri.equals(toFetchURL))
        if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL))
          fetchResult.setFetchedUrl(uri);

      entity = response.getEntity();
      // 服务端返回的状态码
      int statusCode = response.getStatusLine().getStatusCode();
      if (statusCode != HttpStatus.SC_OK) {
        if (statusCode != HttpStatus.SC_NOT_FOUND) {
          Header locationHeader = response.getFirstHeader("Location");
          // 如果是301、302跳转,获取跳转URL即可返回
          if (locationHeader != null
              && (statusCode == HttpStatus.SC_MOVED_PERMANENTLY
                  || statusCode == HttpStatus.SC_MOVED_TEMPORARILY))
            fetchResult.setMovedToUrl(
                URLCanonicalizer.getCanonicalURL(locationHeader.getValue(), toFetchURL));
        }
        // 只要不是OK的除了设置跳转URL外设置statusCode即可返回
        // 判断是否有忽略状态码的设置
        if (this.site.getSkipStatusCode() != null
            && this.site.getSkipStatusCode().trim().length() > 0) {
          String[] scs = this.site.getSkipStatusCode().split(",");
          for (String code : scs) {
            int c = CommonUtil.toInt(code);
            // 忽略此状态码,依然解析entity
            if (statusCode == c) {
              assemPage(fetchResult, entity);
              break;
            }
          }
        }
        fetchResult.setStatusCode(statusCode);
        return fetchResult;
      }

      // 处理服务端返回的实体内容
      if (entity != null) {
        fetchResult.setStatusCode(statusCode);
        assemPage(fetchResult, entity);
        return fetchResult;
      }
    } catch (Throwable e) {
      fetchResult.setFetchedUrl(e.toString());
      fetchResult.setStatusCode(Status.INTERNAL_SERVER_ERROR.ordinal());
      return fetchResult;
    } finally {
      try {
        if (entity == null && get != null) get.abort();
      } catch (Exception e) {
        throw e;
      }
    }

    fetchResult.setStatusCode(Status.UNSPECIFIED_ERROR.ordinal());
    return fetchResult;
  }