예제 #1
0
 /**
  * Returns the five most common terms of the tweetsOfEvent that belong to this event.
  *
  * @return A List of Strings with the most common terms.
  * @see generateCommonTerms generateCommonTerms() method.
  */
 public final List<String> getCommonTerms() {
   if (!commonTerms.isEmpty()) {
     return commonTerms;
   } else {
     PrintUtilities.printWarningMessageln("No common terms have been calculated yet!");
     PrintUtilities.printInfoMessageln(
         "Run " + PeakFindingEvent.class + "." + "generateCommonTerms() method first.");
     return null;
   }
 }
예제 #2
0
 /**
  * Returns the five most common terms as a single String.
  *
  * @return A String containing the five most common terms.
  * @see #getCommonTerms() getCommonTerms() method.
  */
 public final String getCommonTermsAsString() {
   if (commonTerms.isEmpty()) {
     PrintUtilities.printWarningMessageln("No common terms have been calculated yet!");
     PrintUtilities.printInfoMessageln(
         "Run " + PeakFindingEvent.class + "." + "generateCommonTerms() method first.");
     return null;
   }
   String commonTermsString = "";
   for (String term : commonTerms) {
     commonTermsString = commonTermsString + term + " ";
   }
   return commonTermsString;
 }
예제 #3
0
  /**
   * Method to retrieve and store historical tweets by collecting them with their ID.
   *
   * @param tweetIDs The IDs of the tweets that are going to be collected.
   * @param mongoDB A MongoHandler object.
   * @param config A configuration object.
   * @param event The ground truth event for which the tweets that are going to be collected, are
   *     referring to.
   */
  public final void retrieveTweetsById(
      List<String> tweetIDs, MongoHandler mongoDB, Config config, String event) {

    ConfigurationBuilder cb = getAuthorization();
    Twitter twitter = new TwitterFactory(cb.build()).getInstance();

    tweetIDs
        .stream()
        .forEach(
            (item) -> {
              try {
                // Get tweet and all its metadata and store it
                Status status = twitter.showStatus(Long.parseLong(item));
                mongoDB.insertSingleTweetIntoMongoDB(status, event);
              } catch (TwitterException e) {
                PrintUtilities.printErrorMessageln("Failed to retrieve tweet with ID: " + item);
                Logger.getLogger(TweetsRetriever.class.getName()).log(Level.SEVERE, null, e);
              }
            });
  }
예제 #4
0
파일: EDCoW.java 프로젝트: Lefteris008/EvS
  @Override
  public void apply() {
    long startTime = System.currentTimeMillis();

    double minTermOccur = minTermSupport * countCorpus; // Min support * Message count corpus
    double maxTermOccur = maxTermSupport * countCorpus; // Max support * Message count corpus

    int windows = (timeSliceB - timeSliceA) / delta2;
    termDocMap = new HashMap<>();
    eventList = new LinkedList<>();

    PrintUtilities.printInfoMessageln("Calculating term frequencies...");
    List<String> terms = corpus.getTerms();
    for (int i = 0; i < terms.size(); i++) {
      String term = terms.get(i);
      if (term.length()
          > 1) { // Stopwords check removed as they are already ommitted when creating the dataset
        Integer[] frequency = corpus.getDocumentsTermFrequency(i);
        int cf = 0;
        for (int freq : frequency) {
          cf += freq;
        }
        if (cf > minTermOccur && cf < maxTermOccur) {
          termDocMap.put(term, frequency);
        }
      }
    }
    PrintUtilities.printInfoMessageln("Calculating windows...");
    for (int i = 0; i < windows; i++) {
      PrintUtilities.printInfoMessageln("Calculating window " + (i + 1) + "\n");
      try {
        processWindow(i);
      } catch (Exception ex) {
        Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    Collections.sort(eventList);
    events = new EDCoWEvents();

    eventList
        .stream()
        .forEach(
            (event) -> {
              // try {
              events.list.add(
                  new EDCoWEvent(
                      event.getKeywordsIDsAsString(),
                      corpus.getDateFromTimeSlice((int) event.startSlice)
                          + ","
                          + corpus.getDateFromTimeSlice((int) event.endSlice - 1),
                      corpus.getIDsOfWindowAsString(
                          corpus.getDateFromTimeSlice((int) event.startSlice),
                          corpus.getDateFromTimeSlice((int) event.endSlice - 1))));
            });

    events.setFullList();

    long endTime = System.currentTimeMillis();
    executionTime = (endTime - startTime) / 1000;
    PrintUtilities.printExecutionTime(
        startTime,
        endTime,
        EDCoW.class.getName(),
        Thread.currentThread().getStackTrace()[1].getMethodName());
  }