/** * This method performed followint tasks 1. Extract tweet text from tweetline and created_at time * 2. Clean the tweet text by removing unicodes and replacing non-ascii characters 3. Persist * cleaned tweet text and time into file * * @param cleanTweetFileWriter * @param tweet * @throws Exception */ private void persistCleantedTweet(BufferedWriter cleanTweetFileWriter, Tweet tweet) throws Exception { if (cleanTweetFileWriter != null && tweet != null) { // Format the tweet text and created date String formattedTweetText = tweet.getTweetText().getTweetText() + " (" + tweet.getCreatedDate() + ")"; // Persist formatted text into file and append new line. cleanTweetFileWriter.write(formattedTweetText); cleanTweetFileWriter.newLine(); } }
public void performAndPersistTweetAnalysis() { // Holders for tweet input file and analysis output files BufferedReader tweetFeed = null; BufferedWriter cleanTweetWriter = null; BufferedWriter hashTagDegreeWriter = null; // To hold last 60 seconds tweets List<Tweet> last60SecondsTweets = new ArrayList<Tweet>(20); // To hold the count of tweets which has unicodes long tweetCount = 0; try { // Open input tweet file tweetFeed = new BufferedReader( new InputStreamReader( this.getClass().getResourceAsStream("../../../tweet_input/tweets.txt"))); // Performed substring to remove "file:/" from path String cleanTweetFilePath = this.getClass().getResource("../../../tweet_output/ft1.txt").toString().substring(6); String degreeCalcFilePath = this.getClass().getResource("../../../tweet_output/ft2.txt").toString().substring(6); // Open output files to persist cleaned tweet and hashtag degree. cleanTweetWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(cleanTweetFilePath)))); hashTagDegreeWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(degreeCalcFilePath)))); // To hold tweet from input file and model String tweetText = null; Tweet tweet = null; // Iterate through the tweets and process it while ((tweetText = tweetFeed.readLine()) != null) { // To get Tweet model from raw tweet tweet = TweetProcessor.parseTweet(tweetText); if (tweet.getTweetText() != null) { // Persist cleaned tweet text with created date persistCleantedTweet(cleanTweetWriter, tweet); // Persist average degree of hashtag graph from tweet persistHashTagDegree(hashTagDegreeWriter, last60SecondsTweets, tweet); if (tweet.getTweetText() != null && tweet.getTweetText().isHasUniCodes()) { tweetCount++; } } } // To add unicode tweet text statistic to the file if (tweetCount > 0) { String statText = Long.toString(tweetCount) + " tweets contained unicode."; cleanTweetWriter.newLine(); cleanTweetWriter.write(statText); } } catch (Exception ex) { System.out.println("Exception :" + ex.getMessage()); ex.printStackTrace(); } finally { // To close input/output file connections try { if (tweetFeed != null) { tweetFeed.close(); } if (cleanTweetWriter != null) { cleanTweetWriter.close(); } if (hashTagDegreeWriter != null) { hashTagDegreeWriter.close(); } } catch (Exception ex) { System.out.println(ex.getMessage()); } } }