/** * Returns the five most common terms of the tweetsOfEvent that belong to this event. * * @return A List of Strings with the most common terms. * @see generateCommonTerms generateCommonTerms() method. */ public final List<String> getCommonTerms() { if (!commonTerms.isEmpty()) { return commonTerms; } else { PrintUtilities.printWarningMessageln("No common terms have been calculated yet!"); PrintUtilities.printInfoMessageln( "Run " + PeakFindingEvent.class + "." + "generateCommonTerms() method first."); return null; } }
/** * Returns the five most common terms as a single String. * * @return A String containing the five most common terms. * @see #getCommonTerms() getCommonTerms() method. */ public final String getCommonTermsAsString() { if (commonTerms.isEmpty()) { PrintUtilities.printWarningMessageln("No common terms have been calculated yet!"); PrintUtilities.printInfoMessageln( "Run " + PeakFindingEvent.class + "." + "generateCommonTerms() method first."); return null; } String commonTermsString = ""; for (String term : commonTerms) { commonTermsString = commonTermsString + term + " "; } return commonTermsString; }
/** * Method to retrieve and store historical tweets by collecting them with their ID. * * @param tweetIDs The IDs of the tweets that are going to be collected. * @param mongoDB A MongoHandler object. * @param config A configuration object. * @param event The ground truth event for which the tweets that are going to be collected, are * referring to. */ public final void retrieveTweetsById( List<String> tweetIDs, MongoHandler mongoDB, Config config, String event) { ConfigurationBuilder cb = getAuthorization(); Twitter twitter = new TwitterFactory(cb.build()).getInstance(); tweetIDs .stream() .forEach( (item) -> { try { // Get tweet and all its metadata and store it Status status = twitter.showStatus(Long.parseLong(item)); mongoDB.insertSingleTweetIntoMongoDB(status, event); } catch (TwitterException e) { PrintUtilities.printErrorMessageln("Failed to retrieve tweet with ID: " + item); Logger.getLogger(TweetsRetriever.class.getName()).log(Level.SEVERE, null, e); } }); }
@Override public void apply() { long startTime = System.currentTimeMillis(); double minTermOccur = minTermSupport * countCorpus; // Min support * Message count corpus double maxTermOccur = maxTermSupport * countCorpus; // Max support * Message count corpus int windows = (timeSliceB - timeSliceA) / delta2; termDocMap = new HashMap<>(); eventList = new LinkedList<>(); PrintUtilities.printInfoMessageln("Calculating term frequencies..."); List<String> terms = corpus.getTerms(); for (int i = 0; i < terms.size(); i++) { String term = terms.get(i); if (term.length() > 1) { // Stopwords check removed as they are already ommitted when creating the dataset Integer[] frequency = corpus.getDocumentsTermFrequency(i); int cf = 0; for (int freq : frequency) { cf += freq; } if (cf > minTermOccur && cf < maxTermOccur) { termDocMap.put(term, frequency); } } } PrintUtilities.printInfoMessageln("Calculating windows..."); for (int i = 0; i < windows; i++) { PrintUtilities.printInfoMessageln("Calculating window " + (i + 1) + "\n"); try { processWindow(i); } catch (Exception ex) { Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex); } } Collections.sort(eventList); events = new EDCoWEvents(); eventList .stream() .forEach( (event) -> { // try { events.list.add( new EDCoWEvent( event.getKeywordsIDsAsString(), corpus.getDateFromTimeSlice((int) event.startSlice) + "," + corpus.getDateFromTimeSlice((int) event.endSlice - 1), corpus.getIDsOfWindowAsString( corpus.getDateFromTimeSlice((int) event.startSlice), corpus.getDateFromTimeSlice((int) event.endSlice - 1)))); }); events.setFullList(); long endTime = System.currentTimeMillis(); executionTime = (endTime - startTime) / 1000; PrintUtilities.printExecutionTime( startTime, endTime, EDCoW.class.getName(), Thread.currentThread().getStackTrace()[1].getMethodName()); }