public static void main(String[] args) throws IOException, ParseException { ObjectMapper objectMapper = new ObjectMapper(); CrawlerIndex crawlerIndex = new CrawlerIndex(); List<Weibo> weibos = Lists.newArrayList(); // 关键词搜索微博 int total = crawlerIndex.search( "", 0, 5, weibos, "repostcount", SortOrder.DESC, DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"), DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss"), true, 0); System.out.println(total); System.out.println(objectMapper.writeValueAsString(weibos.get(0))); // 搜索一段时间的微博 weibos = Lists.newArrayList(); total = crawlerIndex.search( null, 0, 5, weibos, "repostcount", SortOrder.DESC, DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"), DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss"), true, 0); System.out.println(total); System.out.println(objectMapper.writeValueAsString(weibos.get(0))); // 按用户ID搜索微博 weibos = Lists.newArrayList(); total = crawlerIndex.searchByUid( "2028810631", 0, 5, weibos, "repostcount", SortOrder.DESC, DateUtils.parseDate("20150528 000000", "yyyyMMdd HHmmss"), DateUtils.parseDate("20150529 000000", "yyyyMMdd HHmmss")); System.out.println("按用户名搜索微博 " + total); System.out.println(objectMapper.writeValueAsString(weibos.get(0))); // 通过id找微博 Long id = 3847603911216922L; Weibo w = crawlerIndex.getWeibo(id.toString()); System.out.println("!!!" + objectMapper.writeValueAsString(w)); // 找转发 crawlerIndex.findAllReposts(w); // w.setReposts(Lists.<Weibo> newArrayList()); System.out.println(objectMapper.writeValueAsString(w)); }
public Object analyseSingleWeibo(String id, int mode) { boolean calcRepostList = (mode == 0) || (mode == 2); boolean calcKeyword = (mode == 0) || (mode == 2); boolean calcKeyUser = (mode == 0) || (mode == 1); boolean simple = mode == 1; Map<String, Object> rsMap = Maps.newHashMap(); try { JSONObject json = elasticSearch.analyseSingleWeibo(id, calcRepostList || calcKeyword); String[] names = JSONObject.getNames(json); for (String string : names) { rsMap.put(string, json.get(string)); } JSONObject time = (JSONObject) json.get("time"); JSONObject top = (JSONObject) json.get("top"); JSONObject contents = (JSONObject) json.get("content"); if (calcKeyUser) { List<Weibo> keyuserArray = Lists.newArrayList(); for (Iterator iter = top.keys(); iter.hasNext(); ) { String mid = iter.next().toString(); if (mid.startsWith("si")) { mid = mid.substring(2); } Weibo msg = findWeiboById(Long.parseLong(mid)); // BSONObject msg = mongoDBClient.findWeiboById(mid); for (int i = 0; i < keyuserArray.size(); i++) { int max = i; for (int j = i + 1; j < keyuserArray.size(); j++) { if (keyuserArray.get(j).getRepostCount() > keyuserArray.get(max).getRepostCount()) { max = j; } } if (max != i) { Weibo tmp = keyuserArray.get(max); keyuserArray.set(max, keyuserArray.get(i)); keyuserArray.set(i, tmp); } } keyuserArray.add(msg); } rsMap.put("keyuser", keyuserArray); } if (calcKeyword) { Weibo w = findWeiboById(Long.parseLong(id)); rsMap.put("senti", 0); rsMap.put("keyword", KeyWordMapJson); if (ok) { ok = true; try { int senti = (int) (10 * SentimentAnalysis.calcSentiScore(w.getContent())); rsMap.put("senti", senti); } catch (Exception e) { rsMap.put("senti", 0); } try { StringBuilder sb = new StringBuilder(w.getContent()); for (Iterator iter = contents.keys(); iter.hasNext(); ) { String mid = iter.next().toString(); String ww = contents.get(mid).toString(); ww = StringUtils.substringBefore(ww, "//"); if (ww.length() > 5) { sb.append(ww); } } String map = KeywordExtractor.extractKeyword(sb.toString()); if (map.equals("0")) { throw new RuntimeException(); } rsMap.put("keyword", JSON.parse(map)); ok = true; } catch (Throwable e) { e.printStackTrace(); } } } if (calcRepostList && (elasticSearch instanceof CrawlerIndex)) { Weibo ww = elasticSearch.getWeibo(id); ((CrawlerIndex) elasticSearch).findAllReposts(ww); rsMap.put("repostList", ww); } } catch (JSONException e) { e.printStackTrace(); } return rsMap; }