Пример #1
0
  /**
   * Start processing events. This uses the Twitter Streaming API to sample Twitter, and process
   * tweets.
   */
  @Override
  public void start() {
    // The channel is the piece of Flume that sits between the Source and Sink,
    // and is used to process events.
    final ChannelProcessor channel = getChannelProcessor();

    final Map<String, String> headers = new HashMap<String, String>();

    // The StatusListener is a twitter4j API, which can be added to a Twitter
    // stream, and will execute methods every time a message comes in through
    // the stream.
    StatusListener listener =
        new StatusListener() {
          // The onStatus method is executed every time a new tweet comes in.
          public void onStatus(Status status) {
            // The EventBuilder is used to build an event using the headers and
            // the raw JSON of a tweet
            logger.debug(status.getUser().getScreenName() + ": " + status.getText());

            headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
            Event event =
                EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);

            channel.processEvent(event);
          }

          // This listener will ignore everything except for new tweets
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {}

          public void onStallWarning(StallWarning warning) {}
        };

    logger.debug(
        "Setting up Twitter sample stream using consumer key {} and" + " access token {}",
        new String[] {consumerKey, accessToken});
    // Set up the stream's listener (defined above), and set any necessary
    // security information.
    twitterStream.addListener(listener);
    twitterStream.setOAuthConsumer(consumerKey, consumerSecret);
    AccessToken token = new AccessToken(accessToken, accessTokenSecret);
    twitterStream.setOAuthAccessToken(token);

    // Set up a filter to pull out industry-relevant tweets
    if (keywords.length == 0) {
      logger.debug("Starting up Twitter sampling...");
      twitterStream.sample();
    } else {
      logger.debug("Starting up Twitter filtering...");
      FilterQuery query = new FilterQuery().track(keywords);
      twitterStream.filter(query);
    }
    super.start();
  }
Пример #2
0
 public boolean startSample(long lifeTime) {
   if (accessConfigBuilder == null) return false;
   StatusListener listener = new SampleStreamListener(lifeTime);
   TwitterStream twitterStream =
       new TwitterStreamFactory(accessConfigBuilder.build()).getInstance();
   twitterStream.addListener(listener);
   twitterStream.sample();
   return true;
 }
Пример #3
0
  /** Call this method to strt reading tweets */
  public void readTweets() {

    /* Create the TweetStream reader thread */

    TwitterStream mTwitterStream =
        new TwitterStreamFactory(this).getInstance("mahateam", "mahateam1");

    /* Start reading the Twitter Stream */

    mTwitterStream.sample();
  }
Пример #4
0
  @Override
  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    queue = new LinkedBlockingQueue<Status>(1000);
    _collector = collector;

    StatusListener listener =
        new StatusListener() {

          @Override
          public void onStatus(Status status) {

            queue.offer(status);
          }

          @Override
          public void onDeletionNotice(StatusDeletionNotice sdn) {}

          @Override
          public void onTrackLimitationNotice(int i) {}

          @Override
          public void onScrubGeo(long l, long l1) {}

          @Override
          public void onException(Exception ex) {}

          @Override
          public void onStallWarning(StallWarning arg0) {
            // TODO Auto-generated method stub

          }
        };

    _twitterStream =
        new TwitterStreamFactory(new ConfigurationBuilder().setJSONStoreEnabled(true).build())
            .getInstance();

    _twitterStream.addListener(listener);
    _twitterStream.setOAuthConsumer(consumerKey, consumerSecret);
    AccessToken token = new AccessToken(accessToken, accessTokenSecret);
    _twitterStream.setOAuthAccessToken(token);

    if (keyWords.length == 0) {

      _twitterStream.sample();
    } else {

      FilterQuery query = new FilterQuery().track(keyWords);
      _twitterStream.filter(query);
    }
  }
Пример #5
0
  @Override
  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    statuses = new LinkedBlockingQueue<Status>(1000);
    this.spoutOutputCollector = collector;

    ConfigurationBuilder configurationBuilder = new ConfigurationBuilder();
    configurationBuilder
        .setOAuthConsumerKey(consumerKey)
        .setOAuthConsumerSecret(consumerSecret)
        .setOAuthAccessToken(accessToken)
        .setOAuthAccessTokenSecret(accessTokenSecret);

    OAuthAuthorization authAuthorization = new OAuthAuthorization(configurationBuilder.build());

    twitterStream = new TwitterStreamFactory().getInstance(authAuthorization);
    twitterStream.addListener(
        new StatusListener() {
          @Override
          public void onStatus(Status status) {
            statuses.offer(status);
          }

          @Override
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          @Override
          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          @Override
          public void onScrubGeo(long userId, long upToStatusId) {}

          @Override
          public void onStallWarning(StallWarning warning) {}

          @Override
          public void onException(Exception ex) {}
        });

    twitter = new TwitterFactory().getInstance(authAuthorization);
    filterQuery = new FilterQuery();

    if (filterQuery == null) {
      twitterStream.sample();
      ;
    } else {
      twitterStream.filter(filterQuery.track(filterWords));
      twitterStream.filter(filterQuery.language(filterLanguages));
    }
  }
Пример #6
0
  /*

  * Main entry of this application.
  *
  * @param args
  */
  public static void main(String[] args) throws TwitterException {
    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();

    final DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    final String startDate = formatter.format(new Timestamp((new Date()).getTime()));
    final TweetStats stats = new TweetStats();
    final long startTime = System.currentTimeMillis();
    // to split the tweet text into words
    final Pattern splitPattern = Pattern.compile("\\b");
    final Pattern nameFilter = Pattern.compile("^[a-zA-Z0-9_]+$");

    // System.out.println("digraph mentions {");
    // I did not find a different way to count words:
    // hashtables in java allows to manage sets of data with a <key,value> structure
    // in this part of the worl, the key is the word and the value is the count.
    final Hashtable<String, Integer> wordstats = new Hashtable<String, Integer>();
    final DecimalFormat df = new DecimalFormat("#.###");
    StatusListener listener =
        new StatusListener() {
          public void onStatus(Status status) {
            long actualTime = System.currentTimeMillis();
            String text = status.getText().toLowerCase();
            String wordsPerText[] = splitPattern.split(text);

            // to avoid problems with the counting process, this work use synchronized
            synchronized (this) {
              stats.incrementTweetCount();
              long tweetCount = stats.getTweetCount();
              long timeElapsed = (actualTime - startTime) / 1000;
              long statET = stats.getElapsedTime();
              long mentionCount;

              // with this code, I show the stats every 15 seconds, comparing the change of time

              List<String> names;
              Extractor extractor = new Extractor();

              names = extractor.extractMentionedScreennames(status.getText());
              mentionCount = stats.getMentionCount();

              String userName = status.getUser().getName().toLowerCase();

              // I had some problems with international names and the graphic tools, so i decided
              // To filer all the names with non ascii characters.
              Matcher matcherUserName = nameFilter.matcher(userName);
              if (matcherUserName.find() && names.size() > 0) {
                // I'm countiong the act of mention one or more tweeters as one action.
                stats.incrementMentionCount();
                for (String name : names) {
                  String mentionName = name.toLowerCase();
                  Matcher matcherMention = nameFilter.matcher(mentionName);
                  if (matcherMention.find()) {
                    // userName = userName.replaceAll("\\u202E|\\u200E|\\t|\\r|\\n", "").trim();
                    // mentionName = mentionName.replaceAll("\\u202E|\\u200E|\\t|\\r|\\n",
                    // "").trim();
                    // Are there other cleanings on names???
                    System.out.println("\"" + userName + "\",\"" + mentionName + "\"");
                  }
                }
              }

              if (mentionCount > 30000
                  && timeElapsed > 0
                  && statET != timeElapsed
                  && timeElapsed % 15 == 0) {
                // System.out.println("}");
                stats.setElapsedTime(timeElapsed);
                String report = "";
                report += " |Date: " + startDate;
                report += " |Tweet Count: " + tweetCount;
                report += " |Mention Cpunt: " + mentionCount;
                report += " |Elapsed Time (s): " + timeElapsed;
                report += " |Avg. (Tweets per Second): " + (double) (tweetCount / timeElapsed);
                System.out.println(report);

                System.exit(0);
              }
            }
          }

          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            // System.out.println("Got a status deletion notice id:" +
            // statusDeletionNotice.getStatusId());
          }

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            // System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
          }

          public void onScrubGeo(long userId, long upToStatusId) {
            // System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" +
            // upToStatusId);
          }

          public void onException(Exception ex) {
            ex.printStackTrace();
          }
        };
    twitterStream.addListener(listener);
    twitterStream.sample();
  }
Пример #7
0
  public static void main(String args[]) {
    FilterQuery tweetFilterQuery = new FilterQuery();
    String a[] = {"en"};
    tweetFilterQuery.language(a);
    ConfigurationBuilder _configurationBuilder = new ConfigurationBuilder();
    _configurationBuilder
        .setOAuthConsumerKey(consumerKey)
        .setOAuthConsumerSecret(consumerSecret)
        .setOAuthAccessToken(accessToken)
        .setOAuthAccessTokenSecret(accessTokenSecret)
        .setJSONStoreEnabled(true);

    twitterStream = new TwitterStreamFactory(_configurationBuilder.build()).getInstance();

    StatusListener listener =
        new StatusListener() {

          @Override
          public void onException(Exception arg0) {
            System.out.println("Error occured: " + arg0.getMessage());
            arg0.printStackTrace();
          }

          @Override
          public void onTrackLimitationNotice(int arg0) {
            System.out.println("Track limitation notice for " + arg0);
          }

          @Override
          public void onStatus(Status status) {
            // if(status.getLang().equals("en"))
            //	System.out.println(status.getText().replaceAll("[\n\r]", ""));
            String filename = totalCount / 100 + ".txt";
            File f = new File(filename);
            PrintWriter out = null;

            if (f.exists() && !f.isDirectory()) {
              // System.out.println("@" + status.getUser().getScreenName() + " - " +
              // status.getText() + " -> "+ status.getCreatedAt());
              // String mapstring = status.getUser().getScreenName() + " - " + status.getText() + "
              // -> "+ status.getCreatedAt() +"\n";
              String mapstring = status.getText().replace("\n", " ") + "\n";

              try {
                out = new PrintWriter(new FileOutputStream(new File(filename), true));
              } catch (FileNotFoundException e) {
                e.printStackTrace();
              }
              out.append(mapstring);
              out.close();
            } else {

              try {
                out = new PrintWriter(filename);
              } catch (FileNotFoundException e) {
                e.printStackTrace();
              }
              String mapstring = status.getText().replace("\n", " ") + "\n";
              out.println(mapstring);
              out.close();
            }
            totalCount++;
          }

          @Override
          public void onStallWarning(StallWarning arg0) {
            // TODO Auto-generated method stub

          }

          @Override
          public void onScrubGeo(long arg0, long arg1) {
            // TODO Auto-generated method stub

          }

          @Override
          public void onDeletionNotice(StatusDeletionNotice arg0) {
            // TODO Auto-generated method stub

          }
        };
    twitterStream.addListener(listener);

    twitterStream.sample();
  }
Пример #8
0
  public static void main(String[] args)
      throws IOException, TwitterException, FileNotFoundException {
    ConfigurationBuilder cb = new ConfigurationBuilder();
    cb.setDebugEnabled(true)
        .setOAuthConsumerKey("vGsMAca82HjVYYm7wQlN5Q")
        .setOAuthConsumerSecret("6mpRAAlMI6sIWruQNpomBzN1kIfodHexYBrKWPKmsuM")
        .setOAuthAccessToken("776768857-hCnK0Eu3orKK6qP98W7LyTF29QXadxydL21gIuon")
        .setOAuthAccessTokenSecret("V3VAJgO2x1zWO2JMHsRRrWLzYOeXxmlittF1pBBNWA4");

    TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
    System.out.println("time " + System.currentTimeMillis());

    StatusListener listener =
        new StatusListener() {
          int counter = 0;
          FileWriter fstream = new FileWriter("/Users/jacobportnoff/Desktop/week7SunJ.txt");
          BufferedWriter out = new BufferedWriter(fstream);
          long start = System.currentTimeMillis();
          double elapsedTimeSec = 0.0;

          public void onStatus(Status status) {
            HashtagEntity[] hashtags = status.getHashtagEntities();
            String hashes = "";
            if (hashtags.length == 0) {
              hashes = "null";
            } else {
              for (HashtagEntity hash : hashtags) {
                hashes = hashes + hash.getText() + " ";
              }
            }
            String tweet =
                status.getCreatedAt()
                    + "\t"
                    + status.getUser().getScreenName()
                    + "\t"
                    + status.getText()
                    + "\t"
                    + status.getGeoLocation()
                    + "\t"
                    + hashes
                    + "\n";
            // System.out.print(tweet);
            try {
              out.write(tweet);
            } catch (Exception e) { // Catch exception if any
              System.err.println("Error: " + e.getMessage());
            }
          }

          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {
            ex.printStackTrace();
          }
        };
    twitterStream.addListener(listener);
    twitterStream.sample();
  }
  private void start(final Context context) throws IOException {

    // Producer properties
    Properties props = new Properties();
    props.put("metadata.broker.list", context.getString(TwitterSourceConstant.BROKER_LIST));
    props.put("serializer.class", context.getString(TwitterSourceConstant.SERIALIZER));
    props.put("partitioner.class", context.getString(TwitterSourceConstant.PARTITIONER));
    props.put("request.required.acks", context.getString(TwitterSourceConstant.REQUIRED_ACKS));

    ProducerConfig config = new ProducerConfig(props);

    final Producer<String, String> producer = new Producer<String, String>(config);

    /** Twitter properties * */
    consumerKey = context.getString(TwitterSourceConstant.CONSUMER_KEY_KEY);
    consumerSecret = context.getString(TwitterSourceConstant.CONSUMER_SECRET_KEY);
    accessToken = context.getString(TwitterSourceConstant.ACCESS_TOKEN_KEY);
    accessTokenSecret = context.getString(TwitterSourceConstant.ACCESS_TOKEN_SECRET_KEY);

    ConfigurationBuilder cb = new ConfigurationBuilder();
    cb.setOAuthConsumerKey(consumerKey);
    cb.setOAuthConsumerSecret(consumerSecret);
    cb.setOAuthAccessToken(accessToken);
    cb.setOAuthAccessTokenSecret(accessTokenSecret);
    cb.setJSONStoreEnabled(true);
    cb.setIncludeEntitiesEnabled(true);
    cb.setHttpProxyHost("proxy.tcs.com");
    cb.setHttpProxyPort(8080);
    cb.setHttpProxyUser("876216");
    cb.setHttpProxyPassword("Apple@123");
    twitterStream = new TwitterStreamFactory(cb.build()).getInstance();

    final Map<String, String> headers = new HashMap<String, String>();

    /** Twitter listener * */
    StatusListener listener =
        new StatusListener() {
          // The onStatus method is executed every time a new tweet comes
          // in.
          public void onStatus(Status status) {
            // The EventBuilder is used to build an event using the
            // the raw JSON of a tweet

            System.out.println("Listening :");
            KeyedMessage<String, String> data =
                new KeyedMessage<String, String>(
                    "testing1", TwitterObjectFactory.getRawJSON(status));

            producer.send(data);
            System.out.println(data);
          }

          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {
            logger.info("ShutDown");
            twitterStream.shutdown();
          }

          public void onStallWarning(StallWarning warning) {}
        };

    twitterStream.addListener(listener);
    /** GOGOGO * */
    twitterStream.sample();
    FilterQuery query =
        new FilterQuery()
            .track(
                Tweety.hashtags[0],
                Tweety.hashtags[1],
                Tweety.hashtags[2],
                Tweety.hashtags[3],
                Tweety.hashtags[4]);
    twitterStream.filter(query);
    /** Bind the listener * */
  }
  /**
   * Start processing events. This uses the Twitter Streaming API to sample Twitter, and process
   * tweets.
   */
  @Override
  public void start() {
    // The channel is the piece of Flume that sits between the Source and Sink,
    // and is used to process events.
    final ChannelProcessor channel = getChannelProcessor();

    final Map<String, String> headers = new HashMap<String, String>();

    // The StatusListener is a twitter4j API, which can be added to a Twitter
    // stream, and will execute methods every time a message comes in through
    // the stream.
    StatusListener listener =
        new StatusListener() {
          // The onStatus method is executed every time a new tweet comes in.
          public void onStatus(Status status) {
            // The EventBuilder is used to build an event using the headers and
            // the raw JSON of a tweet
            logger.debug(status.getUser().getScreenName() + ": " + status.getText());

            headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
            Event event =
                EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);
            channel.processEvent(event);
          }

          // This listener will ignore everything except for new tweets
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {
            logger.error("Stream Error ", ex);
          }

          public void onStallWarning(StallWarning warning) {
            int percentFull = warning.getPercentFull();
            logger.warn("Stall Warning Received ", warning);
            if (percentFull > 95) {
              logger.warn("Stallwarning Stream full more han 95 %. Going to wait for 2 minutes");
              try {
                Thread.sleep(2 * 60 * 000);
              } catch (InterruptedException e) {
                e.printStackTrace();
              }
            }
          }
        };

    logger.debug(
        "Setting up Twitter sample stream using consumer key {} and" + " access token {}",
        new String[] {consumerKey, accessToken});
    // Set up the stream's listener (defined above),
    twitterStream.addListener(listener);

    // Set up a filter to pull out industry-relevant tweets
    if (keywords.length == 0) {
      logger.debug("Starting up Twitter sampling...");
      twitterStream.sample();
    } else {
      logger.debug("Starting up Twitter filtering...");

      FilterQuery query = new FilterQuery().track(keywords);
      twitterStream.filter(query);
    }
    super.start();
  }