@Override public void reset() throws IOException { super.reset(); offset = 0; inputLength = 0; tokenCollector.clear(); tokenIteractor = null; }
@Override public boolean incrementToken() throws IOException { clearAttributes(); // 已经穷尽tokensIteractor的Token对象,则继续请求reader流入数据 while (tokenIteractor == null || !tokenIteractor.hasNext()) { // System.out.println(dissected); int read = 0; int remainning = -1; // 重新从reader读入字符前,buffer中还剩下的字符数,负数表示当前暂不需要从reader中读入字符 if (dissected >= beef.length()) { remainning = 0; } else if (dissected < 0) { remainning = bufferLength + dissected; } if (remainning >= 0) { if (remainning > 0) { System.arraycopy(buffer, -dissected, buffer, 0, remainning); } read = input.read(buffer, remainning, bufferLength - remainning); inputLength += read; int charCount = remainning + read; if (charCount < 0) { // reader已尽,按接口next()要求返回null. return false; } if (charCount < bufferLength) { buffer[charCount++] = 0; } // 构造“牛”,并使用knife“解”之 beef.set(0, charCount); offset += Math.abs(dissected); // offset -= remainning; dissected = 0; } dissected = knife.dissect(this, beef, dissected); // offset += read;// !!! tokenIteractor = tokenCollector.iterator(); } if (tokenIteractor.hasNext()) { // 返回tokensIteractor下一个Token对象 Token token = tokenIteractor.next(); termAtt.setEmpty(); termAtt.append(token.charSequence()); offsetAtt.setOffset(correctOffset(token.startOffset()), correctOffset(token.endOffset())); positionIncrementAttribute.setPositionIncrement(token.endOffset()); return true; } return tokenIteractor.hasNext(); }
@Override public Token next() throws IOException { // while (tokenIteractor == null || !tokenIteractor.hasNext()) { int read = 0; int remainning = -1; // the remaining word in buffer."-1" means that // no word in buffer if (dissected >= beef.length()) { remainning = 0; } else if (dissected < 0) { remainning = bufferLength + dissected; } if (remainning != -1) { if (remainning > 0) { System.arraycopy(buffer, -dissected, buffer, 0, remainning); } read = input.read(buffer, remainning, bufferLength - remainning); int charCount = remainning + read; if (charCount < 0) { // the reader in complete. return return null; } if (charCount < bufferLength) { buffer[charCount++] = 0; } // construct the dictionary. use knife to analyzer beef.set(0, charCount); offset -= remainning; dissected = 0; } dissected = knife.dissect((Collector) this, beef, dissected); offset += read; // !!! tokenIteractor = tokenCollector.iterator(); } // return tokensIteractor's next Token return tokenIteractor.next(); }
public void collect(String word, int offset, int end) { tokenCollector.collect(word, this.offset + offset, this.offset + end); }
private static void doTest(final String text, final String... expected) { TokenCollector collector = new TokenCollector(); LiteralExpressionTokenizer.processTextWithEscapeSequences(null, text, collector); assertOrderedEquals(collector.getTokenTexts(), expected); }