public void filterRetweets2(String fileName) { ArrayList<String> tweets = Utility.readFile(fileName); ArrayList<String> filteredTweets = new ArrayList<String>(); HashMap<String, Boolean> tweetsToBeAdded = new HashMap<String, Boolean>(); for (String tweet : tweets) { tweet = tweet.toLowerCase().trim(); /*if(!Utility.isEnglish(tweet)) { System.out.println("Ignoring tweet: ["+tweet+"], since I don't think it's english..."); continue; }*/ /*if(tweet.contains("ã")||tweet.startsWith("@")||tweet.startsWith("http")||tweet.startsWith("rt") || tweet.equals("") ||tweet.startsWith("\"rt")) continue; //what we have here are the tweets that survived. String tokens [] = tweet.split(Utility.space); StringBuilder sb = new StringBuilder(); for(int i=0;i<tokens.length;i++) { String token = tokens[i]; token = Utility.processToken(token); if(token != null) { token = token.trim(); sb.append(token).append(Utility.space); } }*/ // String t = sb.toString().trim(); String t = tweet.toLowerCase().trim(); // if(!tweetsToBeAdded.containsKey(t) && t.length() >25) if (!tweet.contains("#sarcasm")) { tweetsToBeAdded.put(t, true); } } Iterator<Map.Entry<String, Boolean>> it = tweetsToBeAdded.entrySet().iterator(); while (it.hasNext()) { Map.Entry<String, Boolean> pair = (Map.Entry<String, Boolean>) it.next(); filteredTweets.add(pair.getKey()); } Utility.writeFile("files/RandomOutput", filteredTweets); }
private static Optional<String> getRepositoryIdFromJson(Path p) { try { String repoId = new Model( (SerializableModel) new Gson() .fromJson( Utility.readFile(String.valueOf(p.toAbsolutePath())).get(), new TypeToken<SerializableModel>() {}.getType())) .getRepoId(); if (String.valueOf(p.getFileName()).equalsIgnoreCase(escapeRepoName(repoId))) { logger.info("Adding " + p.getFileName() + " to stored repository list. "); return Optional.of(repoId); } } catch (NullPointerException | JsonParseException e) { logger.error("Unable to load repository from " + p.getFileName()); } return Optional.empty(); }
public void filterRetweets(String inputFile, String outputFile) { ArrayList<String> tweets = Utility.readFile(inputFile); ArrayList<String> filteredTweets = new ArrayList<String>(); HashMap<String, Boolean> tweetsToBeAdded = new HashMap<String, Boolean>(); for (String tweet : tweets) { tweet = tweet.toLowerCase().trim(); if (tweet.contains("ã") || tweet.startsWith("rt") || tweet.startsWith("@") || tweet.startsWith("http") || tweet.equals("") || tweet.startsWith("\"rt")) continue; // what we have here are the tweets that survived. String tokens[] = tweet.split(Utility.space); StringBuilder sb = new StringBuilder(); for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; token = Utility.processToken(token); if (token != null) { token = token.trim(); sb.append(token).append(Utility.space); } } String t = sb.toString().trim(); if (!tweetsToBeAdded.containsKey(t) && t.length() > 25) { tweetsToBeAdded.put(t, true); } } Iterator<Map.Entry<String, Boolean>> it = tweetsToBeAdded.entrySet().iterator(); while (it.hasNext()) { Map.Entry<String, Boolean> pair = (Map.Entry<String, Boolean>) it.next(); filteredTweets.add(pair.getKey()); } Utility.writeFile(outputFile, filteredTweets); }