public void filterRetweets(String inputFile, String outputFile) { ArrayList<String> tweets = Utility.readFile(inputFile); ArrayList<String> filteredTweets = new ArrayList<String>(); HashMap<String, Boolean> tweetsToBeAdded = new HashMap<String, Boolean>(); for (String tweet : tweets) { tweet = tweet.toLowerCase().trim(); if (tweet.contains("ã") || tweet.startsWith("rt") || tweet.startsWith("@") || tweet.startsWith("http") || tweet.equals("") || tweet.startsWith("\"rt")) continue; // what we have here are the tweets that survived. String tokens[] = tweet.split(Utility.space); StringBuilder sb = new StringBuilder(); for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; token = Utility.processToken(token); if (token != null) { token = token.trim(); sb.append(token).append(Utility.space); } } String t = sb.toString().trim(); if (!tweetsToBeAdded.containsKey(t) && t.length() > 25) { tweetsToBeAdded.put(t, true); } } Iterator<Map.Entry<String, Boolean>> it = tweetsToBeAdded.entrySet().iterator(); while (it.hasNext()) { Map.Entry<String, Boolean> pair = (Map.Entry<String, Boolean>) it.next(); filteredTweets.add(pair.getKey()); } Utility.writeFile(outputFile, filteredTweets); }