Ejemplo n.º 1
0
  /**
   * Generates a List with the most common terms of the tweetsOfEvent that belong to the specific
   * event. <br>
   * More formally, it parses every single tweet of the event, tokenizes it and stores the terms in
   * a HashMap with their respective occurencies as values.
   */
  private void generateCommonTerms() {
    HashMap<String, Integer> unsortedTokens = new HashMap<>();

    tweetsOfEvent
        .stream()
        .forEach(
            (tweet) -> {
              String text = tweet.getText();
              Tokenizer tokens =
                  new Tokenizer(
                      corpus.getConfigHandler(),
                      text,
                      corpus
                          .getStopWordsHandlers()
                          .getSWHandlerAccordingToLanguage(
                              LangUtils.getLangISOFromString(tweet.getLanguage())));
              stemsHandler
                  .getStemsAsList(
                      tokens.getCleanTokensAndHashtags(),
                      Stemmers.getStemmer(LangUtils.getLangISOFromString(tweet.getLanguage())))
                  .stream()
                  .forEach(
                      (token) -> {
                        if (unsortedTokens.containsKey(token)) {
                          unsortedTokens.put(token, unsortedTokens.get(token) + 1); // Count it
                        } else {
                          unsortedTokens.put(token, 1);
                        }
                      });
            });
    allTerms = new HashSet<>(unsortedTokens.keySet());
    sortMapByValue(unsortedTokens, stemsHandler);
  }
Ejemplo n.º 2
0
  /**
   * Auxiliary method to sort a Map by value.
   *
   * @param unsortedMap The Map to be sorted.
   * @param stemHandler A StemUtils object.
   */
  public final void sortMapByValue(HashMap<String, Integer> unsortedMap, StemUtils stemHandler) {
    // Initialize variables
    Entry<String, Integer> entry;
    String currentKey;
    int currentValue;

    // Get the 5 greatest tokens by value
    // If the HashMap has less than 5 elements, just sort them
    int size = (unsortedMap.keySet().size() < 5 ? unsortedMap.size() : 5);
    for (int i = 0; i < size; i++) {
      entry = unsortedMap.entrySet().iterator().next();
      currentKey = entry.getKey();
      currentValue = entry.getValue();
      for (String key : unsortedMap.keySet()) {
        if (unsortedMap.get(key) > currentValue) {
          currentValue = unsortedMap.get(key);
          currentKey = key;
        }
      }
      commonTerms.add(stemHandler.getOriginalWord(currentKey));
      unsortedMap.remove(currentKey);
    }
  }