예제 #1
0
  /**
   * This method performed followint tasks 1. Extract tweet text from tweetline and created_at time
   * 2. Clean the tweet text by removing unicodes and replacing non-ascii characters 3. Persist
   * cleaned tweet text and time into file
   *
   * @param cleanTweetFileWriter
   * @param tweet
   * @throws Exception
   */
  private void persistCleantedTweet(BufferedWriter cleanTweetFileWriter, Tweet tweet)
      throws Exception {

    if (cleanTweetFileWriter != null && tweet != null) {

      // Format the tweet text and created date
      String formattedTweetText =
          tweet.getTweetText().getTweetText() + " (" + tweet.getCreatedDate() + ")";

      // Persist formatted text into file and append new line.
      cleanTweetFileWriter.write(formattedTweetText);
      cleanTweetFileWriter.newLine();
    }
  }
예제 #2
0
  public void performAndPersistTweetAnalysis() {

    // Holders for tweet input file and analysis output files
    BufferedReader tweetFeed = null;
    BufferedWriter cleanTweetWriter = null;
    BufferedWriter hashTagDegreeWriter = null;

    // To hold last 60 seconds tweets
    List<Tweet> last60SecondsTweets = new ArrayList<Tweet>(20);

    // To hold the count of tweets which has unicodes
    long tweetCount = 0;

    try {
      // Open input tweet file
      tweetFeed =
          new BufferedReader(
              new InputStreamReader(
                  this.getClass().getResourceAsStream("../../../tweet_input/tweets.txt")));

      // Performed substring to remove "file:/" from path
      String cleanTweetFilePath =
          this.getClass().getResource("../../../tweet_output/ft1.txt").toString().substring(6);
      String degreeCalcFilePath =
          this.getClass().getResource("../../../tweet_output/ft2.txt").toString().substring(6);

      // Open output files to persist cleaned tweet and hashtag degree.
      cleanTweetWriter =
          new BufferedWriter(
              new OutputStreamWriter(new FileOutputStream(new File(cleanTweetFilePath))));
      hashTagDegreeWriter =
          new BufferedWriter(
              new OutputStreamWriter(new FileOutputStream(new File(degreeCalcFilePath))));

      // To hold tweet from input file and model
      String tweetText = null;
      Tweet tweet = null;

      // Iterate through the tweets and process it
      while ((tweetText = tweetFeed.readLine()) != null) {
        // To get Tweet model from raw tweet
        tweet = TweetProcessor.parseTweet(tweetText);
        if (tweet.getTweetText() != null) {
          // Persist cleaned tweet text with created date
          persistCleantedTweet(cleanTweetWriter, tweet);

          // Persist average degree of hashtag graph from tweet
          persistHashTagDegree(hashTagDegreeWriter, last60SecondsTweets, tweet);

          if (tweet.getTweetText() != null && tweet.getTweetText().isHasUniCodes()) {
            tweetCount++;
          }
        }
      }

      // To add unicode tweet text statistic to the file
      if (tweetCount > 0) {
        String statText = Long.toString(tweetCount) + " tweets contained unicode.";
        cleanTweetWriter.newLine();
        cleanTweetWriter.write(statText);
      }

    } catch (Exception ex) {

      System.out.println("Exception :" + ex.getMessage());
      ex.printStackTrace();
    } finally {

      // To close input/output file connections
      try {
        if (tweetFeed != null) {
          tweetFeed.close();
        }
        if (cleanTweetWriter != null) {
          cleanTweetWriter.close();
        }
        if (hashTagDegreeWriter != null) {
          hashTagDegreeWriter.close();
        }
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }