/**
   * @param args
   * @throws IOException
   * @throws FileNotFoundException
   */
  public static void main(String[] args) throws IOException {
    FileUtils.makeDirectory(ConfigConstant.TEMP_VOCABULARY_OUTPUT_DIR);
    // read one by one line from the review tips location and extract the
    // words
    Set<String> vocubalary = new HashSet<>();
    for (String fileNames :
        FileUtils.getAllFiles(ConfigConstant.TEMP_REVIEW_TIPS_ML_TOPIC_DATA_OUT_LOCATIONS)) {

      updateVocabulary(
          FileUtils.getFullPath(
              ConfigConstant.TEMP_REVIEW_TIPS_ML_TOPIC_DATA_OUT_LOCATIONS, fileNames),
          vocubalary);
    }

    // remove all words that appeared very less often
    vocabDist.forEach(
        (word, value) -> {
          if (value <= DEFAULT_WORD_COUNT_ALLOWED) {
            vocubalary.remove(word);
          }
        });

    vocabDist.clear();

    // write the remaining files
    writeToFile(
        new TreeSet<String>(vocubalary),
        ConfigConstant.TEMP_RAW_ORIGINAL_VOCABULRAY_TEXT_LOCATIONS);
  }