Пример #1
0
  public String searchCommentsByUid(String uid) {

    String url = "http://www.weibo.com/u/" + uid;
    String cookieValue =
        "SINAGLOBAL=8556698272004.724.1417744632425; myuid=5438576807; wvr=6; YF-Ugrow-G0=ad06784f6deda07eea88e095402e4243; SSOLoginState=1423150079; YF-V5-G0=32eb5467e9bfc8b60c2d771056535ac5; _s_tentry=www.weibo.com; Apache=6264929557219.147.1423150103832; ULV=1423150103842:18:2:2:6264929557219.147.1423150103832:1422769721265; ULOGIN_IMG=1423233797946; YF-Page-G0=82cdcdfb16327a659fbb60cc9368fb19; SUS=SID-2035860051-1423286223-GZ-jdkh4-c8ea11de0a42151313986e52f9aa6017; SUE=es%3D8701ff5aca59244ff1ff263cf985bee6%26ev%3Dv1%26es2%3D7995c9eb7455697c09fac4f7486e14eb%26rs0%3DTyXXIRjcEw%252BeS5PaVSM%252FhQjc2JGhKBOe3uFTgShiIUAbPFI2eKtrgxM2wIi9A1xndiTFFM72zY%252FDKYFXONrgkao5cRo%252FHkydV%252FnaQjNmXoeESu5gi6Iq0aX883NhGR0utBVNZb5XaIG3X6HMMfBJC%252B7pnVHogEo8eD6cx8nzN5c%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1423286223%26et%3D1423372623%26d%3Dc909%26i%3D6017%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D0%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A2550e-fDeTxGeRO6FcZ9i7Mzj2IHXVap0ZXrDV8PUNbvtBuLWnTkW-gBGVORTA7J_lSZzAqzW6E50JjBQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; SUHB=0M20OGRPiOKzyc; ALF=1454822222; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn";
    headers.put(
        "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    headers.put("Accept-Language", "zh-CN");
    headers.put(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
    headers.put("Connection", "Keep-Alive");
    headers.put("Cache-Control", "max-age=0");
    headers.put("Cookie", cookieValue);
    this.headers.put("Host", "www.weibo.com");
    HttpResponse response = HttpUtils.doGet(url, headers);
    String responseText = HttpUtils.getStringFromResponse(response);
    responseText = EncodeUtils.unicdoeToGB2312(responseText);

    System.out.println("searchCommentsByUid start");
    System.out.println(responseText);
    System.out.println("searchCommentsByUid end");

    return responseText;
  }
Пример #2
0
  public String search(String keyword, int pageNo) {

    // %25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6 表示编码后的宁波大学
    String url =
        "http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&page="
            + pageNo;

    /*uid是用户id;根据uid可以获取用户的基本信息,如粉丝,关注数等.
    1)如用户微博尚未设置个性域名,则用户在新浪微博的ID即是用户微博访问地址后面的数字串,
    如访问地址“http://t.sina.cn/12345678……”,则用户在新浪的微博UID为“12345678……”数字串
    2)如用户微博已设置了个性域名,则可将鼠标移至用户微博右侧显示粉丝数的链接,查看链接地址,
    链接中带上的数字串即是用户的微博UID.如 http://weibo.com/12345678.../fans?leftnav=1&wvr=4,则用户在新浪的微博UID为 “12345678……”数字串
    */
    // wvr 这个仅仅是微博的一个版本号,无关影响的.
    String cookieValue =
        "SINAGLOBAL=8556698272004.724.1417744632425; [email protected]; myuid=2283740497; wvr=6; [email protected]; _s_tentry=developer.51cto.com; SWB=usrmdinst_14; SUS=SID-5438576807-1419173757-GZ-lrze7-d8e1e3f082b428c12412c8ba30f0a6de; SUE=es%3D4cdfdd5d5f0f75141c092b32f89525a2%26ev%3Dv1%26es2%3D469e50c869315e57efeec3012c3bb6a8%26rs0%3DoWdG36CQ33LUEtKTvGn907Zy1mwFETvSVJsxeHEiaMPcKDB7pFxg596a2pLhFLJfQmswf4AvXYAkzTfemrYgWrz%252BQPustEA2wLNYufYpAZqFsGWanhTBq6elzB2yoZp41xcpy1WwXn1CuvzIzzEYpuILjHahkmJDQDQy6KaxlbA%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1419173757%26et%3D1419260157%26d%3Dc909%26i%3Da6de%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D27%26st%3D0%26uid%3D5438576807%26name%3Dsm2014121904%2540126.com%26nick%3DSocialMedia%25E5%259B%259B%25E5%25A8%2583%26fmp%3D%26lcp%3D; SUB=_2A255kq8tDeTxGeNK6FoU9yjEyzuIHXVa6DVlrDV8PUNbvtBeLW3TkW-bMoi0G_bBfpbS3TMqcXg6zDWFLA..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhGThsH46uNrx1VY0ApV0SR5JpX5KMt; ALF=1450709756; SSOLoginState=1419173757; WBStore=bc5ad8450c3f8a48|undefined; Apache=1027467835228.8901.1419173761694; ULV=1419173761704:6:6:1:1027467835228.8901.1419173761694:1418797827169; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; ULOGIN_IMG=14192385783486";
    headers.put(
        "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    // headers.put("Accept-Encoding", "gzip, deflate, sdch");
    headers.put("Accept-Language", "zh-CN");
    headers.put(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
    headers.put("Connection", "Keep-Alive");
    headers.put("Cache-Control", "max-age=0");
    headers.put(
        "Referer",
        "http://login.sina.com.cn/sso/login.php?url=http%3A%2F%2Fs.weibo.com%2Fweibo%2F%2525E6%252583%2525A0%2525E6%252599%2525AE%26page%3D2&_rand=1419173756.6387&gateway=1&service=weibo&entry=miniblog&useticket=1&returntype=META");
    headers.put("Cookie", cookieValue);
    this.headers.put("Host", "s.weibo.com");
    HttpResponse response = HttpUtils.doGet(url, headers);
    String responseText = HttpUtils.getStringFromResponse(response);
    responseText = EncodeUtils.unicdoeToGB2312(responseText);
    return responseText;
  }
Пример #3
0
 /*
  * 得到微群一级分类
  */
 public String getGroupCategory() {
   String url = "http://q.weibo.com/";
   this.headers.put("Host", "q.weibo.com");
   HttpResponse response = HttpUtils.doGet(url, headers);
   String responseText = HttpUtils.getStringFromResponse(response);
   responseText = EncodeUtils.unicdoeToGB2312(responseText);
   return responseText;
 }
Пример #4
0
  // 爬虫根据关键字,查询时间断,和查询页数  来得到htmlContent
  public String search(String keyword, int pageNo, String fromdate, String todate) {
    StringBuffer stringBuffer = new StringBuffer(200);
    // 宁波大学 - %25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6

    // 测试时候用的
    // keyword="%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6";

    stringBuffer.append("http://s.weibo.com/weibo/" + keyword + "&page=");
    stringBuffer.append(pageNo);
    stringBuffer.append("&typeall=1&suball=1&timescope=custom:");
    stringBuffer.append(fromdate);
    stringBuffer.append(":");
    stringBuffer.append(todate);
    stringBuffer.append("&Refer=g");
    // stringBuffer.append("&scope=ori");

    // url为拼接后,最终要用这个url来在sina中搜索.
    String url = stringBuffer.toString();
    String cookieValue =
        headers.get(
            "Cookie"); // "SINAGLOBAL=8556698272004.724.1417744632425; myuid=2035860051;
                       // UOR=www.ilehao.com,widget.weibo.com,www.doc88.com; wvr=6;
                       // SUS=SID-2035860051-1423150079-GZ-piynz-c9f28711a1769c9212aba24b28f86017;
                       // SUE=es%3D8aa55aff3d74de668e06c0650147da8e%26ev%3Dv1%26es2%3Dd379acdd7f57f87a76db1e42850b9413%26rs0%3DEXf55ZsIsekO2mCCwVBK6NUvWg39VceT3eD2n65n1u16wcoi13qakdd3tO0Lcl4vsJ3a0%252BMoRLsQFY44%252FTCxHWm7whpMmzdufha%252F9aYa9RRG%252Bbh2qTtblj2xqBbtUZkZWNymxkiYD%252Bes7EzYyiCSo7UcB%252BOWnPIAvCVjXhBxVQ0%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1423150079%26et%3D1423236479%26d%3Dc909%26i%3D6017%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D0%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A2551_uvDeTxGeRO6FcZ9i7Mzj2IHXVapWpnrDV8PUNbvtBuLRnDkW-eWi1k7w8OWWE-t_uJb0Q4Owmh5w..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; SUHB=0u5gtgJdzQeFzj; ALF=1454686075; SSOLoginState=1423150079; SWB=usrmdinst_12; _s_tentry=www.weibo.com; Apache=6264929557219.147.1423150103832; ULV=1423150103842:18:2:2:6264929557219.147.1423150103832:1422769721265; WBStore=3f8ca8cd96b39592|undefined";
    headers.put(
        "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    // headers.put("Accept-Encoding", "gzip, deflate, sdch");
    headers.put("Accept-Language", "zh-CN");
    headers.put(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
    headers.put("Connection", "Keep-Alive");
    headers.put("Cache-Control", "max-age=0");
    headers.put(
        "Referer",
        "http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&typeall=1&suball=1&timescope=custom:"
            + fromdate
            + ":"
            + todate
            + "&Refer=g");
    headers.put("Cookie", cookieValue);
    this.headers.put("Host", "s.weibo.com");
    HttpResponse response = HttpUtils.doGet(url, headers);
    String responseText = HttpUtils.getStringFromResponse(response);
    responseText = EncodeUtils.unicdoeToGB2312(responseText);
    return responseText;
  }
Пример #5
0
  /*
   * 根据分类得到微群信息
   * categroyID :分类ID号
   * pagenumber:页号
   * sort:分类方式 1 按成员人数 2按 微群博数 3按创建时间分类
   * count:每页的记录数目
   */
  public String getGroupByCategroy(int categroyID, int pagenumber, int sort, int count) {
    this.headers.put("Referer", "http://q.weibo.com/class/category/?id=" + categroyID);
    this.headers.put("Host", "q.weibo.com");
    this.headers.put("Content-Type", "application/x-www-form-urlencoded");
    this.headers.put("x-requested-with", "XMLHttpRequest");
    Map<String, String> params = new HashMap<String, String>();
    params.put("_t", "0");
    params.put("page", pagenumber + "");
    params.put("id", categroyID + "");
    params.put("sort", sort + "");
    params.put("count", count + "");

    String url = "http://q.weibo.com/ajax/class/category";
    HttpResponse response = HttpUtils.doPost(url, headers, params);
    String responseText = HttpUtils.getStringFromResponse(response);
    responseText = EncodeUtils.unicdoeToGB2312(responseText);
    return responseText;
  }
Пример #6
0
 public void forwardToWeiboPage() {
   String url = Constant.personalHomePage;
   headers.put(
       "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
   // headers.put("Accept-Encoding", "gzip, deflate, sdch");
   headers.put("Accept-Language", "zh-CN");
   headers.put(
       "User-Agent",
       "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
   headers.put("Connection", "Keep-Alive");
   // headers.put("Cookie", headers.get("Cookie"));
   this.headers.put("Host", "s.weibo.com");
   HttpResponse response = HttpUtils.doGet(url, headers);
   String responseText = HttpUtils.getStringFromResponse(response);
   responseText = EncodeUtils.unicdoeToGB2312(responseText);
   List<Cookie> cookies = HttpUtils.getResponseCookies(response);
   String cookie = HttpUtils.setCookie2String(cookies);
   // System.out.println("forward cookie:"+cookie);
   headers.put("Cookie", cookie);
 }