예제 #1
0
 public void loadStopwords() {
   List<String> lines = Utils.getResourceList("stopwords-utils.txt");
   for (String line : lines) {
     stopWordsSet.add(line);
   }
   logger.info("stopWordSize is {}", stopWordsSet.size());
 }
 private static Set<String> loadDictionarySet(String filePath) {
   Set<String> lexicons = new HashSet<String>();
   List<String> strings = Utils.getResourceList(filePath);
   for (String line : strings) {
     if (!line.startsWith("#")) lexicons.add(line.trim());
   }
   return lexicons;
 }
예제 #3
0
  /**
   * 把词按照频率排序返回,用于查看高频词或低频词 便于加入停词中
   *
   * @return
   */
  public List<Entry<String, Integer>> showFrequencyWords(boolean flag) {
    Map<String, Integer> tMap = new HashMap<>();
    for (Entry<String, AtomicInteger> entry : wordCountPerUnit.entrySet()) {
      String key = entry.getKey();
      int i = entry.getValue().get();
      tMap.put(key, i);
    }

    ArrayList<Entry<String, Integer>> countList = new ArrayList<>(tMap.entrySet());
    Utils.sortMapStringAndInteger(countList, flag);
    return countList;
  }
 private static Map<String, Double> loadDictionaryMap(String filePath) {
   Map<String, Double> lexicons = new HashMap<>();
   List<String> resourceList = Utils.getResourceList(filePath);
   for (String line : resourceList) {
     if (!line.equals("") || !line.startsWith("#")) {
       int index = line.indexOf(":");
       if (index != -1) {
         lexicons.put(line.substring(0, index), Double.parseDouble(line.substring(index + 1)));
       }
     }
   }
   return lexicons;
 }
예제 #5
0
 /**
  * 把词按照频率排序返回,用于查看高频词或低频词 便于加入停词中
  *
  * @return
  */
 public List<Entry<String, Integer>> showFrequencyWordsWithWeight(boolean flag) {
   ArrayList<Entry<String, Integer>> countList = new ArrayList<>(posMap.entrySet());
   Utils.sortMapStringAndInteger(countList, flag);
   return countList;
 }
예제 #6
0
 /**
  * 把词按照权重排序返回
  *
  * @return
  */
 public List<Entry<String, Double>> showWordsWeight(boolean flag) {
   ArrayList<Entry<String, Double>> countList = new ArrayList<>(weightMap.entrySet());
   Utils.sortMapStringAndDouble(countList, flag);
   return countList;
 }