Esempio n. 1
0
  public static void main(String[] args) throws IOException, ParseException {
    ObjectMapper objectMapper = new ObjectMapper();
    CrawlerIndex crawlerIndex = new CrawlerIndex();
    List<Weibo> weibos = Lists.newArrayList();
    // 关键词搜索微博
    int total =
        crawlerIndex.search(
            "",
            0,
            5,
            weibos,
            "repostcount",
            SortOrder.DESC,
            DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"),
            DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss"),
            true,
            0);
    System.out.println(total);
    System.out.println(objectMapper.writeValueAsString(weibos.get(0)));
    // 搜索一段时间的微博
    weibos = Lists.newArrayList();
    total =
        crawlerIndex.search(
            null,
            0,
            5,
            weibos,
            "repostcount",
            SortOrder.DESC,
            DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"),
            DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss"),
            true,
            0);
    System.out.println(total);
    System.out.println(objectMapper.writeValueAsString(weibos.get(0)));
    // 按用户ID搜索微博
    weibos = Lists.newArrayList();
    total =
        crawlerIndex.searchByUid(
            "2028810631",
            0,
            5,
            weibos,
            "repostcount",
            SortOrder.DESC,
            DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"),
            DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss"));
    System.out.println("按用户名搜索微博 " + total);
    System.out.println(objectMapper.writeValueAsString(weibos.get(0)));
    // 通过id找微博
    Long id = 3847603911216922L;
    Weibo w = crawlerIndex.getWeibo(id.toString());
    System.out.println("!!!" + objectMapper.writeValueAsString(w));

    // 找转发
    crawlerIndex.findAllReposts(w);
    // w.setReposts(Lists.<Weibo> newArrayList());
    System.out.println(objectMapper.writeValueAsString(w));
  }
Esempio n. 2
0
  public Object analyseSingleWeibo(String id, int mode) {

    boolean calcRepostList = (mode == 0) || (mode == 2);
    boolean calcKeyword = (mode == 0) || (mode == 2);
    boolean calcKeyUser = (mode == 0) || (mode == 1);
    boolean simple = mode == 1;
    Map<String, Object> rsMap = Maps.newHashMap();
    try {

      JSONObject json = elasticSearch.analyseSingleWeibo(id, calcRepostList || calcKeyword);
      String[] names = JSONObject.getNames(json);

      for (String string : names) {
        rsMap.put(string, json.get(string));
      }

      JSONObject time = (JSONObject) json.get("time");
      JSONObject top = (JSONObject) json.get("top");
      JSONObject contents = (JSONObject) json.get("content");

      if (calcKeyUser) {
        List<Weibo> keyuserArray = Lists.newArrayList();
        for (Iterator iter = top.keys(); iter.hasNext(); ) {

          String mid = iter.next().toString();
          if (mid.startsWith("si")) {
            mid = mid.substring(2);
          }
          Weibo msg = findWeiboById(Long.parseLong(mid));
          // BSONObject msg = mongoDBClient.findWeiboById(mid);
          for (int i = 0; i < keyuserArray.size(); i++) {
            int max = i;
            for (int j = i + 1; j < keyuserArray.size(); j++) {
              if (keyuserArray.get(j).getRepostCount() > keyuserArray.get(max).getRepostCount()) {
                max = j;
              }
            }
            if (max != i) {
              Weibo tmp = keyuserArray.get(max);
              keyuserArray.set(max, keyuserArray.get(i));
              keyuserArray.set(i, tmp);
            }
          }
          keyuserArray.add(msg);
        }
        rsMap.put("keyuser", keyuserArray);
      }
      if (calcKeyword) {
        Weibo w = findWeiboById(Long.parseLong(id));

        rsMap.put("senti", 0);
        rsMap.put("keyword", KeyWordMapJson);
        if (ok) {
          ok = true;
          try {
            int senti = (int) (10 * SentimentAnalysis.calcSentiScore(w.getContent()));
            rsMap.put("senti", senti);
          } catch (Exception e) {
            rsMap.put("senti", 0);
          }

          try {
            StringBuilder sb = new StringBuilder(w.getContent());

            for (Iterator iter = contents.keys(); iter.hasNext(); ) {

              String mid = iter.next().toString();
              String ww = contents.get(mid).toString();
              ww = StringUtils.substringBefore(ww, "//");
              if (ww.length() > 5) {
                sb.append(ww);
              }
            }

            String map = KeywordExtractor.extractKeyword(sb.toString());

            if (map.equals("0")) {
              throw new RuntimeException();
            }
            rsMap.put("keyword", JSON.parse(map));
            ok = true;
          } catch (Throwable e) {
            e.printStackTrace();
          }
        }
      }

      if (calcRepostList && (elasticSearch instanceof CrawlerIndex)) {
        Weibo ww = elasticSearch.getWeibo(id);
        ((CrawlerIndex) elasticSearch).findAllReposts(ww);

        rsMap.put("repostList", ww);
      }

    } catch (JSONException e) {
      e.printStackTrace();
    }
    return rsMap;
  }