private void loadPrepDict() { _PrepDict = new DictSegment((char) 0); File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } if (is == null) { throw new RuntimeException("Preposition Dictionary not found!!!"); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _PrepDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { is.close(); is = null; } catch (IOException e) { logger.error("ik-analyzer", e); } } }
/** 加载量词词典 */ private void loadQuantifierDict() { // 建立一个量词典实例 _QuantifierDict = new DictSegment((char) 0); // 读取量词词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _QuantifierDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { logger.error("Quantifier Dictionary loading exception."); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } }
/** 加载用户配置的扩展词典到主词库表 */ private void loadExtDict() { // 加载扩展词典配置 List<String> extDictFiles = configuration.getExtDictionarys(); if (extDictFiles != null) { InputStream is = null; for (String extDictName : extDictFiles) { // 读取扩展词典文件 logger.info("[Dict Loading]" + extDictName); File file = new File(configuration.getDictRoot(), extDictName); try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } // 如果找不到扩展的字典,则忽略 if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { // 加载扩展词典数据到主内存词典中 _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } } } }
/** * 加载填充词典片段 * * @param charArray * @param begin * @param length */ public synchronized void fillSegment(char[] charArray, int begin, int length) { // 获取字典表中的汉字对象 Character beginChar = new Character(charArray[begin]); Character keyChar = charMap.get(beginChar); // 字典中没有该字,则将其添加入字典 if (keyChar == null) { charMap.put(beginChar, beginChar); keyChar = beginChar; } // 搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建 DictSegment ds = lookforSegment(keyChar); // 处理keyChar对应的segment if (length > 1) { // 词元还没有完全加入词典树 ds.fillSegment(charArray, begin + 1, length - 1); } else if (length == 1) { // 已经是词元的最后一个char,设置当前节点状态为1,表明一个完整的词 ds.nodeState = 1; } }
/** 加载主词典及扩展词典 */ private void loadMainDict() { // 建立一个主词典实例 _MainDict = new DictSegment((char) 0); // 读取主词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { e.printStackTrace(); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } // 加载扩展词典 this.loadExtDict(); }
/** 加载用户扩展的停止词词典 */ private void loadStopWordDict() { // 建立主词典实例 _StopWords = new DictSegment((char) 0); // 读取主词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { e.printStackTrace(); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } // 加载扩展停止词典 List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys(); if (extStopWordDictFiles != null) { is = null; for (String extStopWordDictName : extStopWordDictFiles) { logger.info("[Dict Loading]" + extStopWordDictName); // 读取扩展词典文件 file = new File(configuration.getDictRoot(), extStopWordDictName); try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } // 如果找不到扩展的字典,则忽略 if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { // 加载扩展停止词典数据到内存中 _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } } } }