Пример #1
0
  /**
   * This method construct hashTag graph to calculate the degree
   *
   * @param tweetList
   * @return
   */
  private HashMap<String, Set<String>> getHashTagGraph(List<Tweet> tweetList) {

    HashMap<String, Set<String>> hashTagGraph = new HashMap<String, Set<String>>(20);

    // Iterate through each tweet's hastags to construct the graph
    for (Tweet tweet : tweetList) {

      Set<String> hashTags = tweet.getHashTags();

      if (hashTags.size() >= 2) {
        for (String hashTag : hashTags) {
          for (String innerHashTag : hashTags) {
            if (!hashTag.equalsIgnoreCase(innerHashTag)) {
              Set<String> entries = hashTagGraph.get(hashTag);

              if (entries == null) {
                entries = new HashSet<String>();
              }

              entries.add(innerHashTag);

              hashTagGraph.put(hashTag, entries);
            }
          }
        }
      }
    }

    return hashTagGraph;
  }
Пример #2
0
  /**
   * This method performed followint tasks 1. Extract tweet text from tweetline and created_at time
   * 2. Clean the tweet text by removing unicodes and replacing non-ascii characters 3. Persist
   * cleaned tweet text and time into file
   *
   * @param cleanTweetFileWriter
   * @param tweet
   * @throws Exception
   */
  private void persistCleantedTweet(BufferedWriter cleanTweetFileWriter, Tweet tweet)
      throws Exception {

    if (cleanTweetFileWriter != null && tweet != null) {

      // Format the tweet text and created date
      String formattedTweetText =
          tweet.getTweetText().getTweetText() + " (" + tweet.getCreatedDate() + ")";

      // Persist formatted text into file and append new line.
      cleanTweetFileWriter.write(formattedTweetText);
      cleanTweetFileWriter.newLine();
    }
  }
Пример #3
0
  /**
   * This method iterates through tweets and removes it from the list if its created timestamp is
   * older than 60seconds from latest tweet
   *
   * @param tweetList
   * @param latestTweet
   */
  private List<Tweet> getLast60SecondsTweets(List<Tweet> tweetList, Tweet latestTweet) {

    List<Tweet> last60SecondsTweetList = new ArrayList<Tweet>(20);

    for (Tweet tweet : tweetList) {

      if ((latestTweet.getTimeInMS() - tweet.getTimeInMS()) / 1000 <= 60) {
        last60SecondsTweetList.add(tweet);
      }
    }

    last60SecondsTweetList.add(latestTweet);

    return last60SecondsTweetList;
  }
Пример #4
0
  public void performAndPersistTweetAnalysis() {

    // Holders for tweet input file and analysis output files
    BufferedReader tweetFeed = null;
    BufferedWriter cleanTweetWriter = null;
    BufferedWriter hashTagDegreeWriter = null;

    // To hold last 60 seconds tweets
    List<Tweet> last60SecondsTweets = new ArrayList<Tweet>(20);

    // To hold the count of tweets which has unicodes
    long tweetCount = 0;

    try {
      // Open input tweet file
      tweetFeed =
          new BufferedReader(
              new InputStreamReader(
                  this.getClass().getResourceAsStream("../../../tweet_input/tweets.txt")));

      // Performed substring to remove "file:/" from path
      String cleanTweetFilePath =
          this.getClass().getResource("../../../tweet_output/ft1.txt").toString().substring(6);
      String degreeCalcFilePath =
          this.getClass().getResource("../../../tweet_output/ft2.txt").toString().substring(6);

      // Open output files to persist cleaned tweet and hashtag degree.
      cleanTweetWriter =
          new BufferedWriter(
              new OutputStreamWriter(new FileOutputStream(new File(cleanTweetFilePath))));
      hashTagDegreeWriter =
          new BufferedWriter(
              new OutputStreamWriter(new FileOutputStream(new File(degreeCalcFilePath))));

      // To hold tweet from input file and model
      String tweetText = null;
      Tweet tweet = null;

      // Iterate through the tweets and process it
      while ((tweetText = tweetFeed.readLine()) != null) {
        // To get Tweet model from raw tweet
        tweet = TweetProcessor.parseTweet(tweetText);
        if (tweet.getTweetText() != null) {
          // Persist cleaned tweet text with created date
          persistCleantedTweet(cleanTweetWriter, tweet);

          // Persist average degree of hashtag graph from tweet
          persistHashTagDegree(hashTagDegreeWriter, last60SecondsTweets, tweet);

          if (tweet.getTweetText() != null && tweet.getTweetText().isHasUniCodes()) {
            tweetCount++;
          }
        }
      }

      // To add unicode tweet text statistic to the file
      if (tweetCount > 0) {
        String statText = Long.toString(tweetCount) + " tweets contained unicode.";
        cleanTweetWriter.newLine();
        cleanTweetWriter.write(statText);
      }

    } catch (Exception ex) {

      System.out.println("Exception :" + ex.getMessage());
      ex.printStackTrace();
    } finally {

      // To close input/output file connections
      try {
        if (tweetFeed != null) {
          tweetFeed.close();
        }
        if (cleanTweetWriter != null) {
          cleanTweetWriter.close();
        }
        if (hashTagDegreeWriter != null) {
          hashTagDegreeWriter.close();
        }
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }