private void loadPrepDict() { _PrepDict = new DictSegment((char) 0); File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } if (is == null) { throw new RuntimeException("Preposition Dictionary not found!!!"); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _PrepDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { is.close(); is = null; } catch (IOException e) { logger.error("ik-analyzer", e); } } }
/** 加载量词词典 */ private void loadQuantifierDict() { // 建立一个量词典实例 _QuantifierDict = new DictSegment((char) 0); // 读取量词词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _QuantifierDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { logger.error("Quantifier Dictionary loading exception."); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } }
/** 加载用户配置的扩展词典到主词库表 */ private void loadExtDict() { // 加载扩展词典配置 List<String> extDictFiles = configuration.getExtDictionarys(); if (extDictFiles != null) { InputStream is = null; for (String extDictName : extDictFiles) { // 读取扩展词典文件 logger.info("[Dict Loading]" + extDictName); File file = new File(configuration.getDictRoot(), extDictName); try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } // 如果找不到扩展的字典,则忽略 if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { // 加载扩展词典数据到主内存词典中 _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } } } }
/** 加载主词典及扩展词典 */ private void loadMainDict() { // 建立一个主词典实例 _MainDict = new DictSegment((char) 0); // 读取主词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { e.printStackTrace(); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } // 加载扩展词典 this.loadExtDict(); }
/** 加载用户扩展的停止词词典 */ private void loadStopWordDict() { // 建立主词典实例 _StopWords = new DictSegment((char) 0); // 读取主词典文件 File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); InputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException e) { e.printStackTrace(); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } // 加载扩展停止词典 List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys(); if (extStopWordDictFiles != null) { is = null; for (String extStopWordDictName : extStopWordDictFiles) { logger.info("[Dict Loading]" + extStopWordDictName); // 读取扩展词典文件 file = new File(configuration.getDictRoot(), extStopWordDictName); try { is = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("ik-analyzer", e); } // 如果找不到扩展的字典,则忽略 if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { // 加载扩展停止词典数据到内存中 _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } } } }
/** * IK主分词器构造函数 * * @param input * @param isMaxWordLength 当为true时,分词器进行最大词长切分 */ public IKSegmentation(Reader input, boolean isMaxWordLength) { this.input = input; segmentBuff = new char[BUFF_SIZE]; context = new Context(segmentBuff, isMaxWordLength); segmenters = Configuration.loadSegmenter(); }