public void setHilight(List<Weibo> weibos, String word) { String ww = join(IK.token(word), "|"); Pattern p = Pattern.compile("(" + ww + ")"); for (Weibo weibo : weibos) { String m = p.matcher(weibo.getContent()).replaceAll("<b class='keyword'>$1</b>"); weibo.setContent(m); } }
public Weibo findWeiboById(Long id) { Weibo w = elasticSearch.getWeibo(id.toString()); if (changsha) { hbaseWeiboMsg.getHeadPic_changsha(w); } else { w = hbaseWeiboMsg.getWeibo( DateFormatUtils.format(w.getReleaseDate(), "yyyyMMddHHmmss") + "-" + id.toString()); } // mongoDBClient.findWeiboById(w); return w; }
public Object analyseSingleWeibo(String id, int mode) { boolean calcRepostList = (mode == 0) || (mode == 2); boolean calcKeyword = (mode == 0) || (mode == 2); boolean calcKeyUser = (mode == 0) || (mode == 1); boolean simple = mode == 1; Map<String, Object> rsMap = Maps.newHashMap(); try { JSONObject json = elasticSearch.analyseSingleWeibo(id, calcRepostList || calcKeyword); String[] names = JSONObject.getNames(json); for (String string : names) { rsMap.put(string, json.get(string)); } JSONObject time = (JSONObject) json.get("time"); JSONObject top = (JSONObject) json.get("top"); JSONObject contents = (JSONObject) json.get("content"); if (calcKeyUser) { List<Weibo> keyuserArray = Lists.newArrayList(); for (Iterator iter = top.keys(); iter.hasNext(); ) { String mid = iter.next().toString(); if (mid.startsWith("si")) { mid = mid.substring(2); } Weibo msg = findWeiboById(Long.parseLong(mid)); // BSONObject msg = mongoDBClient.findWeiboById(mid); for (int i = 0; i < keyuserArray.size(); i++) { int max = i; for (int j = i + 1; j < keyuserArray.size(); j++) { if (keyuserArray.get(j).getRepostCount() > keyuserArray.get(max).getRepostCount()) { max = j; } } if (max != i) { Weibo tmp = keyuserArray.get(max); keyuserArray.set(max, keyuserArray.get(i)); keyuserArray.set(i, tmp); } } keyuserArray.add(msg); } rsMap.put("keyuser", keyuserArray); } if (calcKeyword) { Weibo w = findWeiboById(Long.parseLong(id)); rsMap.put("senti", 0); rsMap.put("keyword", KeyWordMapJson); if (ok) { ok = true; try { int senti = (int) (10 * SentimentAnalysis.calcSentiScore(w.getContent())); rsMap.put("senti", senti); } catch (Exception e) { rsMap.put("senti", 0); } try { StringBuilder sb = new StringBuilder(w.getContent()); for (Iterator iter = contents.keys(); iter.hasNext(); ) { String mid = iter.next().toString(); String ww = contents.get(mid).toString(); ww = StringUtils.substringBefore(ww, "//"); if (ww.length() > 5) { sb.append(ww); } } String map = KeywordExtractor.extractKeyword(sb.toString()); if (map.equals("0")) { throw new RuntimeException(); } rsMap.put("keyword", JSON.parse(map)); ok = true; } catch (Throwable e) { e.printStackTrace(); } } } if (calcRepostList && (elasticSearch instanceof CrawlerIndex)) { Weibo ww = elasticSearch.getWeibo(id); ((CrawlerIndex) elasticSearch).findAllReposts(ww); rsMap.put("repostList", ww); } } catch (JSONException e) { e.printStackTrace(); } return rsMap; }