@Override
 public void reset() throws IOException {
   super.reset();
   offset = 0;
   inputLength = 0;
   tokenCollector.clear();
   tokenIteractor = null;
 }
  @Override
  public boolean incrementToken() throws IOException {
    clearAttributes();

    // 已经穷尽tokensIteractor的Token对象,则继续请求reader流入数据
    while (tokenIteractor == null || !tokenIteractor.hasNext()) {
      // System.out.println(dissected);
      int read = 0;
      int remainning = -1; // 重新从reader读入字符前,buffer中还剩下的字符数,负数表示当前暂不需要从reader中读入字符
      if (dissected >= beef.length()) {
        remainning = 0;
      } else if (dissected < 0) {
        remainning = bufferLength + dissected;
      }
      if (remainning >= 0) {
        if (remainning > 0) {
          System.arraycopy(buffer, -dissected, buffer, 0, remainning);
        }
        read = input.read(buffer, remainning, bufferLength - remainning);
        inputLength += read;
        int charCount = remainning + read;
        if (charCount < 0) {
          // reader已尽,按接口next()要求返回null.
          return false;
        }
        if (charCount < bufferLength) {
          buffer[charCount++] = 0;
        }
        // 构造“牛”,并使用knife“解”之
        beef.set(0, charCount);
        offset += Math.abs(dissected);
        // offset -= remainning;
        dissected = 0;
      }
      dissected = knife.dissect(this, beef, dissected);
      // offset += read;// !!!
      tokenIteractor = tokenCollector.iterator();
    }

    if (tokenIteractor.hasNext()) {
      // 返回tokensIteractor下一个Token对象
      Token token = tokenIteractor.next();
      termAtt.setEmpty();
      termAtt.append(token.charSequence());
      offsetAtt.setOffset(correctOffset(token.startOffset()), correctOffset(token.endOffset()));
      positionIncrementAttribute.setPositionIncrement(token.endOffset());
      return true;
    }
    return tokenIteractor.hasNext();
  }
Esempio n. 3
0
 @Override
 public Token next() throws IOException {
   //
   while (tokenIteractor == null || !tokenIteractor.hasNext()) {
     int read = 0;
     int remainning = -1; // the remaining word in buffer."-1" means that
     // no word in buffer
     if (dissected >= beef.length()) {
       remainning = 0;
     } else if (dissected < 0) {
       remainning = bufferLength + dissected;
     }
     if (remainning != -1) {
       if (remainning > 0) {
         System.arraycopy(buffer, -dissected, buffer, 0, remainning);
       }
       read = input.read(buffer, remainning, bufferLength - remainning);
       int charCount = remainning + read;
       if (charCount < 0) {
         // the reader in complete. return
         return null;
       }
       if (charCount < bufferLength) {
         buffer[charCount++] = 0;
       }
       // construct the dictionary. use knife to analyzer
       beef.set(0, charCount);
       offset -= remainning;
       dissected = 0;
     }
     dissected = knife.dissect((Collector) this, beef, dissected);
     offset += read; // !!!
     tokenIteractor = tokenCollector.iterator();
   }
   // return tokensIteractor's next Token
   return tokenIteractor.next();
 }
Esempio n. 4
0
 public void collect(String word, int offset, int end) {
   tokenCollector.collect(word, this.offset + offset, this.offset + end);
 }
 private static void doTest(final String text, final String... expected) {
   TokenCollector collector = new TokenCollector();
   LiteralExpressionTokenizer.processTextWithEscapeSequences(null, text, collector);
   assertOrderedEquals(collector.getTokenTexts(), expected);
 }