@Override public void onStop() { if (conn != null) { conn.close(); conn = null; producer.close(); consumer.close(); } }
@Override public void run() { logger.info("begin to consume tweets"); controller.running(); while (true) { susSem.acquireUninterruptibly(); try { for (Entry<String, List<Status>> topicData : consumer.nextStatus().entrySet()) { try { // 在高速转发的情况下,这样可以减轻写入到kafka中的状态数据 Map<String, TimeSeriesUpdateState> states = new HashMap<String, TimeSeriesUpdateState>(); List<TimeSeriesUpdateState> segSignal = new ArrayList<TimeSeriesUpdateState>(); for (Status cur : topicData.getValue()) { tweetDao.putTweet(cur); if (cur.getRetweetedStatus() != null) { // rand.nextFloat() < 0.01 && if (tweetDao.getStatusByMid(cur.getRetweetedStatus().getMid()) == null) { tweetDao.putTweet(cur.getRetweetedStatus()); } tweetDao.putRtweet(cur); for (TimeSeriesUpdateState state : tweetDao.updateRtTimeSeries(cur)) { states.put(state.getMid(), state); } } else { // maybe it is the first tweet or an indication // for end of monitoring if (cur.getMid() != null) { SegState state = segDao.getSegState(cur.getMid()); if (state != null) { long updateDate = DateUtil.roundByHour(System.currentTimeMillis()); segSignal.add(new TimeSeriesUpdateState(state.mid, updateDate, true)); List<String> mids = tweetDao.getRtMids(state.mid); for (String rtMid : mids) { segSignal.add(new TimeSeriesUpdateState(rtMid, updateDate, true)); } } } } } for (TimeSeriesUpdateState state : segSignal) { states.put(state.mid, state); } for (TimeSeriesUpdateState state : states.values()) { logger.info("update time series " + state); producer.storeTsUpdateState(state); } } catch (Exception ex) { ex.printStackTrace(); } } } finally { susSem.release(); } } }
public void init(String kafkaServers, String dbServers, boolean restart) throws Exception { logger.info("connecting to cassandra"); conn = new CassandraConn(); conn.connect(dbServers); logger.info("create TweetDao"); tweetDao = new TweetDao(conn); segDao = new SegStateDao(conn); logger.info("connecting to kafka"); consumer = new TweetConsumer(); consumer.open( Arrays.asList(KafkaTopics.RETWEET_TOPIC), KafkaTopics.RTSERIES_GROUP, kafkaServers, restart); producer = new TweetKafkaProducer(KafkaProducerFactory.createProducer(kafkaServers)); MetricBasedPerfProfile.registerServer(controller); }