public void loadStopwords() { List<String> lines = Utils.getResourceList("stopwords-utils.txt"); for (String line : lines) { stopWordsSet.add(line); } logger.info("stopWordSize is {}", stopWordsSet.size()); }
private static Set<String> loadDictionarySet(String filePath) { Set<String> lexicons = new HashSet<String>(); List<String> strings = Utils.getResourceList(filePath); for (String line : strings) { if (!line.startsWith("#")) lexicons.add(line.trim()); } return lexicons; }
/** * 把词按照频率排序返回,用于查看高频词或低频词 便于加入停词中 * * @return */ public List<Entry<String, Integer>> showFrequencyWords(boolean flag) { Map<String, Integer> tMap = new HashMap<>(); for (Entry<String, AtomicInteger> entry : wordCountPerUnit.entrySet()) { String key = entry.getKey(); int i = entry.getValue().get(); tMap.put(key, i); } ArrayList<Entry<String, Integer>> countList = new ArrayList<>(tMap.entrySet()); Utils.sortMapStringAndInteger(countList, flag); return countList; }
private static Map<String, Double> loadDictionaryMap(String filePath) { Map<String, Double> lexicons = new HashMap<>(); List<String> resourceList = Utils.getResourceList(filePath); for (String line : resourceList) { if (!line.equals("") || !line.startsWith("#")) { int index = line.indexOf(":"); if (index != -1) { lexicons.put(line.substring(0, index), Double.parseDouble(line.substring(index + 1))); } } } return lexicons; }
/** * 把词按照频率排序返回,用于查看高频词或低频词 便于加入停词中 * * @return */ public List<Entry<String, Integer>> showFrequencyWordsWithWeight(boolean flag) { ArrayList<Entry<String, Integer>> countList = new ArrayList<>(posMap.entrySet()); Utils.sortMapStringAndInteger(countList, flag); return countList; }
/** * 把词按照权重排序返回 * * @return */ public List<Entry<String, Double>> showWordsWeight(boolean flag) { ArrayList<Entry<String, Double>> countList = new ArrayList<>(weightMap.entrySet()); Utils.sortMapStringAndDouble(countList, flag); return countList; }