/* (non-Javadoc)
   * @see org.apache.lucene.analysis.TokenStream#incrementToken()
   */
  @Override
  public boolean incrementToken() throws IOException {
    // 清除所有的词元属性
    clearAttributes();
    skippedPositions = 0;

    Lexeme nextLexeme = _IKImplement.next();
    if (nextLexeme != null) {
      posIncrAtt.setPositionIncrement(skippedPositions + 1);

      // 将Lexeme转成Attributes
      // 设置词元文本
      termAtt.append(nextLexeme.getLexemeText());
      // 设置词元长度
      termAtt.setLength(nextLexeme.getLength());
      // 设置词元位移
      offsetAtt.setOffset(
          correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition()));

      // 记录分词的最后位置
      endPosition = nextLexeme.getEndPosition();
      // 记录词元分类
      typeAtt.setType(nextLexeme.getLexemeTypeString());
      // 返会true告知还有下个词元
      return true;
    }
    // 返会false告知词元输出完毕
    return false;
  }
Ejemplo n.º 2
0
 public static LinkedList<String> segment(String content, int skiplength) {
   StringReader text = new StringReader(content);
   IKSegmenter segment = new IKSegmenter(text, true);
   LinkedList<String> result = new LinkedList<String>();
   try {
     for (Lexeme lexeme = segment.next(); lexeme != null; lexeme = segment.next()) {
       String word = lexeme.getLexemeText();
       if (!CharacterEncoding.isChinese(word)) continue;
       if (skiplength >= word.length()) continue;
       result.add(word);
     }
   } catch (IOException e) {
     e.printStackTrace();
   }
   return result;
 }
Ejemplo n.º 3
0
  public static List<String> segQuery(String query) {
    List<String> keys = new ArrayList<String>();
    try {
      Reader r = new StringReader(query);

      IKSegmenter seg = new IKSegmenter(r, true);

      Lexeme t = seg.next();
      while (t != null) {
        keys.add(t.getLexemeText());

        t = seg.next();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    return keys;
  }
Ejemplo n.º 4
0
 @Override
 public String nextToken() {
   try {
     Lexeme lexeme = null;
     if ((lexeme = aSeg.next()) != null) {
       return lexeme.lexemeText();
     }
   } catch (IOException e) {
     e.printStackTrace();
   }
   return null;
 }
 /*
  * (non-Javadoc)
  * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
  */
 @Override
 public void reset() throws IOException {
   super.reset();
   _IKImplement.reset(input);
   skippedPositions = 0;
 }