private void loadPrepDict() {

    _PrepDict = new DictSegment((char) 0);
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      logger.error("ik-analyzer", e);
    }
    if (is == null) {
      throw new RuntimeException("Preposition Dictionary not found!!!");
    }
    try {

      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {

          _PrepDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);
    } catch (IOException e) {
      logger.error("ik-analyzer", e);
    } finally {
      try {
        is.close();
        is = null;
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
  }
  /** 加载量词词典 */
  private void loadQuantifierDict() {
    // 建立一个量词典实例
    _QuantifierDict = new DictSegment((char) 0);
    // 读取量词词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      logger.error("ik-analyzer", e);
    }
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _QuantifierDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException ioe) {
      logger.error("Quantifier Dictionary loading exception.");

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
  }
  /** 加载用户配置的扩展词典到主词库表 */
  private void loadExtDict() {
    // 加载扩展词典配置
    List<String> extDictFiles = configuration.getExtDictionarys();
    if (extDictFiles != null) {
      InputStream is = null;
      for (String extDictName : extDictFiles) {
        // 读取扩展词典文件
        logger.info("[Dict Loading]" + extDictName);
        File file = new File(configuration.getDictRoot(), extDictName);
        try {
          is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
          logger.error("ik-analyzer", e);
        }

        // 如果找不到扩展的字典,则忽略
        if (is == null) {
          continue;
        }
        try {
          BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
          String theWord = null;
          do {
            theWord = br.readLine();
            if (theWord != null && !"".equals(theWord.trim())) {
              // 加载扩展词典数据到主内存词典中
              _MainDict.fillSegment(theWord.trim().toCharArray());
            }
          } while (theWord != null);

        } catch (IOException e) {
          logger.error("ik-analyzer", e);
        } finally {
          try {
            if (is != null) {
              is.close();
              is = null;
            }
          } catch (IOException e) {
            logger.error("ik-analyzer", e);
          }
        }
      }
    }
  }
Example #4
0
  /**
   * 加载填充词典片段
   *
   * @param charArray
   * @param begin
   * @param length
   */
  public synchronized void fillSegment(char[] charArray, int begin, int length) {
    // 获取字典表中的汉字对象
    Character beginChar = new Character(charArray[begin]);
    Character keyChar = charMap.get(beginChar);
    // 字典中没有该字,则将其添加入字典
    if (keyChar == null) {
      charMap.put(beginChar, beginChar);
      keyChar = beginChar;
    }

    // 搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建
    DictSegment ds = lookforSegment(keyChar);
    // 处理keyChar对应的segment
    if (length > 1) {
      // 词元还没有完全加入词典树
      ds.fillSegment(charArray, begin + 1, length - 1);
    } else if (length == 1) {
      // 已经是词元的最后一个char,设置当前节点状态为1,表明一个完整的词
      ds.nodeState = 1;
    }
  }
  /** 加载主词典及扩展词典 */
  private void loadMainDict() {
    // 建立一个主词典实例
    _MainDict = new DictSegment((char) 0);

    // 读取主词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);

    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _MainDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException e) {
      logger.error("ik-analyzer", e);

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
    // 加载扩展词典
    this.loadExtDict();
  }
  /** 加载用户扩展的停止词词典 */
  private void loadStopWordDict() {
    // 建立主词典实例
    _StopWords = new DictSegment((char) 0);

    // 读取主词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);

    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _StopWords.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException e) {
      logger.error("ik-analyzer", e);

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }

    // 加载扩展停止词典
    List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
    if (extStopWordDictFiles != null) {
      is = null;
      for (String extStopWordDictName : extStopWordDictFiles) {
        logger.info("[Dict Loading]" + extStopWordDictName);

        // 读取扩展词典文件
        file = new File(configuration.getDictRoot(), extStopWordDictName);
        try {
          is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
          logger.error("ik-analyzer", e);
        }
        // 如果找不到扩展的字典,则忽略
        if (is == null) {
          continue;
        }
        try {
          BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
          String theWord = null;
          do {
            theWord = br.readLine();
            if (theWord != null && !"".equals(theWord.trim())) {
              // 加载扩展停止词典数据到内存中
              _StopWords.fillSegment(theWord.trim().toCharArray());
            }
          } while (theWord != null);

        } catch (IOException e) {
          logger.error("ik-analyzer", e);

        } finally {
          try {
            if (is != null) {
              is.close();
              is = null;
            }
          } catch (IOException e) {
            logger.error("ik-analyzer", e);
          }
        }
      }
    }
  }