private void loadPrepDict() {

    _PrepDict = new DictSegment((char) 0);
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      logger.error("ik-analyzer", e);
    }
    if (is == null) {
      throw new RuntimeException("Preposition Dictionary not found!!!");
    }
    try {

      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {

          _PrepDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);
    } catch (IOException e) {
      logger.error("ik-analyzer", e);
    } finally {
      try {
        is.close();
        is = null;
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
  }
  /** 加载量词词典 */
  private void loadQuantifierDict() {
    // 建立一个量词典实例
    _QuantifierDict = new DictSegment((char) 0);
    // 读取量词词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      logger.error("ik-analyzer", e);
    }
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _QuantifierDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException ioe) {
      logger.error("Quantifier Dictionary loading exception.");

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
  }
  /** 加载用户配置的扩展词典到主词库表 */
  private void loadExtDict() {
    // 加载扩展词典配置
    List<String> extDictFiles = configuration.getExtDictionarys();
    if (extDictFiles != null) {
      InputStream is = null;
      for (String extDictName : extDictFiles) {
        // 读取扩展词典文件
        logger.info("[Dict Loading]" + extDictName);
        File file = new File(configuration.getDictRoot(), extDictName);
        try {
          is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
          logger.error("ik-analyzer", e);
        }

        // 如果找不到扩展的字典,则忽略
        if (is == null) {
          continue;
        }
        try {
          BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
          String theWord = null;
          do {
            theWord = br.readLine();
            if (theWord != null && !"".equals(theWord.trim())) {
              // 加载扩展词典数据到主内存词典中
              _MainDict.fillSegment(theWord.trim().toCharArray());
            }
          } while (theWord != null);

        } catch (IOException e) {
          logger.error("ik-analyzer", e);
        } finally {
          try {
            if (is != null) {
              is.close();
              is = null;
            }
          } catch (IOException e) {
            logger.error("ik-analyzer", e);
          }
        }
      }
    }
  }
  /** 加载主词典及扩展词典 */
  private void loadMainDict() {
    // 建立一个主词典实例
    _MainDict = new DictSegment((char) 0);

    // 读取主词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);

    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _MainDict.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException e) {
      logger.error("ik-analyzer", e);

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }
    // 加载扩展词典
    this.loadExtDict();
  }
  /** 加载用户扩展的停止词词典 */
  private void loadStopWordDict() {
    // 建立主词典实例
    _StopWords = new DictSegment((char) 0);

    // 读取主词典文件
    File file = new File(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);

    InputStream is = null;
    try {
      is = new FileInputStream(file);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
      String theWord = null;
      do {
        theWord = br.readLine();
        if (theWord != null && !"".equals(theWord.trim())) {
          _StopWords.fillSegment(theWord.trim().toCharArray());
        }
      } while (theWord != null);

    } catch (IOException e) {
      logger.error("ik-analyzer", e);

    } finally {
      try {
        if (is != null) {
          is.close();
          is = null;
        }
      } catch (IOException e) {
        logger.error("ik-analyzer", e);
      }
    }

    // 加载扩展停止词典
    List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
    if (extStopWordDictFiles != null) {
      is = null;
      for (String extStopWordDictName : extStopWordDictFiles) {
        logger.info("[Dict Loading]" + extStopWordDictName);

        // 读取扩展词典文件
        file = new File(configuration.getDictRoot(), extStopWordDictName);
        try {
          is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
          logger.error("ik-analyzer", e);
        }
        // 如果找不到扩展的字典,则忽略
        if (is == null) {
          continue;
        }
        try {
          BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
          String theWord = null;
          do {
            theWord = br.readLine();
            if (theWord != null && !"".equals(theWord.trim())) {
              // 加载扩展停止词典数据到内存中
              _StopWords.fillSegment(theWord.trim().toCharArray());
            }
          } while (theWord != null);

        } catch (IOException e) {
          logger.error("ik-analyzer", e);

        } finally {
          try {
            if (is != null) {
              is.close();
              is = null;
            }
          } catch (IOException e) {
            logger.error("ik-analyzer", e);
          }
        }
      }
    }
  }
Example #6
0
 /**
  * IK主分词器构造函数
  *
  * @param input
  * @param isMaxWordLength 当为true时,分词器进行最大词长切分
  */
 public IKSegmentation(Reader input, boolean isMaxWordLength) {
   this.input = input;
   segmentBuff = new char[BUFF_SIZE];
   context = new Context(segmentBuff, isMaxWordLength);
   segmenters = Configuration.loadSegmenter();
 }