Beispiel #1
0
 public void setHilight(List<Weibo> weibos, String word) {
   String ww = join(IK.token(word), "|");
   Pattern p = Pattern.compile("(" + ww + ")");
   for (Weibo weibo : weibos) {
     String m = p.matcher(weibo.getContent()).replaceAll("<b class='keyword'>$1</b>");
     weibo.setContent(m);
   }
 }
Beispiel #2
0
  public Weibo findWeiboById(Long id) {

    Weibo w = elasticSearch.getWeibo(id.toString());
    if (changsha) {
      hbaseWeiboMsg.getHeadPic_changsha(w);
    } else {
      w =
          hbaseWeiboMsg.getWeibo(
              DateFormatUtils.format(w.getReleaseDate(), "yyyyMMddHHmmss") + "-" + id.toString());
    }
    // mongoDBClient.findWeiboById(w);
    return w;
  }
Beispiel #3
0
  public Object analyseSingleWeibo(String id, int mode) {

    boolean calcRepostList = (mode == 0) || (mode == 2);
    boolean calcKeyword = (mode == 0) || (mode == 2);
    boolean calcKeyUser = (mode == 0) || (mode == 1);
    boolean simple = mode == 1;
    Map<String, Object> rsMap = Maps.newHashMap();
    try {

      JSONObject json = elasticSearch.analyseSingleWeibo(id, calcRepostList || calcKeyword);
      String[] names = JSONObject.getNames(json);

      for (String string : names) {
        rsMap.put(string, json.get(string));
      }

      JSONObject time = (JSONObject) json.get("time");
      JSONObject top = (JSONObject) json.get("top");
      JSONObject contents = (JSONObject) json.get("content");

      if (calcKeyUser) {
        List<Weibo> keyuserArray = Lists.newArrayList();
        for (Iterator iter = top.keys(); iter.hasNext(); ) {

          String mid = iter.next().toString();
          if (mid.startsWith("si")) {
            mid = mid.substring(2);
          }
          Weibo msg = findWeiboById(Long.parseLong(mid));
          // BSONObject msg = mongoDBClient.findWeiboById(mid);
          for (int i = 0; i < keyuserArray.size(); i++) {
            int max = i;
            for (int j = i + 1; j < keyuserArray.size(); j++) {
              if (keyuserArray.get(j).getRepostCount() > keyuserArray.get(max).getRepostCount()) {
                max = j;
              }
            }
            if (max != i) {
              Weibo tmp = keyuserArray.get(max);
              keyuserArray.set(max, keyuserArray.get(i));
              keyuserArray.set(i, tmp);
            }
          }
          keyuserArray.add(msg);
        }
        rsMap.put("keyuser", keyuserArray);
      }
      if (calcKeyword) {
        Weibo w = findWeiboById(Long.parseLong(id));

        rsMap.put("senti", 0);
        rsMap.put("keyword", KeyWordMapJson);
        if (ok) {
          ok = true;
          try {
            int senti = (int) (10 * SentimentAnalysis.calcSentiScore(w.getContent()));
            rsMap.put("senti", senti);
          } catch (Exception e) {
            rsMap.put("senti", 0);
          }

          try {
            StringBuilder sb = new StringBuilder(w.getContent());

            for (Iterator iter = contents.keys(); iter.hasNext(); ) {

              String mid = iter.next().toString();
              String ww = contents.get(mid).toString();
              ww = StringUtils.substringBefore(ww, "//");
              if (ww.length() > 5) {
                sb.append(ww);
              }
            }

            String map = KeywordExtractor.extractKeyword(sb.toString());

            if (map.equals("0")) {
              throw new RuntimeException();
            }
            rsMap.put("keyword", JSON.parse(map));
            ok = true;
          } catch (Throwable e) {
            e.printStackTrace();
          }
        }
      }

      if (calcRepostList && (elasticSearch instanceof CrawlerIndex)) {
        Weibo ww = elasticSearch.getWeibo(id);
        ((CrawlerIndex) elasticSearch).findAllReposts(ww);

        rsMap.put("repostList", ww);
      }

    } catch (JSONException e) {
      e.printStackTrace();
    }
    return rsMap;
  }