@Override public final boolean incrementToken() throws IOException { clearAttributes(); if (delimitersCount == -1) { int length = 0; delimiterPositions.add(0); while (true) { int c = input.read(); if (c < 0) { break; } length++; if (c == delimiter) { delimiterPositions.add(length); resultToken.append(replacement); } else { resultToken.append((char) c); } } delimitersCount = delimiterPositions.size(); if (delimiterPositions.get(delimitersCount - 1) < length) { delimiterPositions.add(length); delimitersCount++; } if (resultTokenBuffer.length < resultToken.length()) { resultTokenBuffer = new char[resultToken.length()]; } resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0); resultToken.setLength(0); int idx = delimitersCount - 1 - skip; if (idx >= 0) { // otherwise its ok, because we will skip and return false endPosition = delimiterPositions.get(idx); } finalOffset = correctOffset(length); posAtt.setPositionIncrement(1); } else { posAtt.setPositionIncrement(0); } while (skipped < delimitersCount - skip - 1) { int start = delimiterPositions.get(skipped); termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start); offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition)); skipped++; return true; } return false; }
@Override public final boolean incrementToken() throws IOException { if (isMailto) { termAtt.setEmpty(); // return the scheme + the mail part isMailto = false; posIncrAtt.setPositionIncrement(0); termAtt.copyBuffer(termBuffer.array(), 0, termBuffer.position()); return true; } if (input.incrementToken()) { final String type = typeAtt.type(); if (type.equals(TupleTokenizer.getTokenTypes()[TupleTokenizer.URI]) && this.isMailtoScheme()) { this.updateBuffer(); termBuffer.put(termAtt.buffer(), 0, termAtt.length()); // return only the mail part posIncrAtt.setPositionIncrement(1); termAtt.copyBuffer(termBuffer.array(), 7, termBuffer.position() - 7); } return true; } return false; }
@Override public boolean incrementToken() throws IOException { if (!terms.isEmpty()) { char[] buffer = terms.poll(); termAttribute.setEmpty(); termAttribute.copyBuffer(buffer, 0, buffer.length); posIncAttr.setPositionIncrement(1); return true; } if (!input.incrementToken()) { return false; } else { final char term[] = termAttribute.buffer(); final int length = termAttribute.length(); int k = 0; for (; k < length; k++) { if (term[k] == tokenDelimiter) { break; } } LinkedList<CharBuffer> buffers = permuteTerms(term, 0, length); Iterator iter = buffers.iterator(); while (iter.hasNext()) { CharBuffer cb = (CharBuffer) iter.next(); terms.add(cb.array()); } // we return true and leave the original token unchanged return true; } }
@Override public final boolean incrementToken() throws IOException { if (!tokens.isEmpty()) { assert current != null; CompoundToken token = tokens.removeFirst(); restoreState(current); // keep all other attributes untouched termAtt.setEmpty().append(token.txt); offsetAtt.setOffset(token.startOffset, token.endOffset); posIncAtt.setPositionIncrement(0); return true; } current = null; // not really needed, but for safety if (input.incrementToken()) { // Only words longer than minWordSize get processed if (termAtt.length() >= this.minWordSize) { decompose(); // only capture the state if we really need it for producing new tokens if (!tokens.isEmpty()) { current = captureState(); } } // return original token: return true; } else { return false; } }
/* (non-Javadoc) * @see org.apache.lucene.analysis.TokenStream#incrementToken() */ @Override public boolean incrementToken() throws IOException { // 清除所有的词元属性 clearAttributes(); skippedPositions = 0; Lexeme nextLexeme = _IKImplement.next(); if (nextLexeme != null) { posIncrAtt.setPositionIncrement(skippedPositions + 1); // 将Lexeme转成Attributes // 设置词元文本 termAtt.append(nextLexeme.getLexemeText()); // 设置词元长度 termAtt.setLength(nextLexeme.getLength()); // 设置词元位移 offsetAtt.setOffset( correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition())); // 记录分词的最后位置 endPosition = nextLexeme.getEndPosition(); // 记录词元分类 typeAtt.setType(nextLexeme.getLexemeTypeString()); // 返会true告知还有下个词元 return true; } // 返会false告知词元输出完毕 return false; }
/* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#next() */ @Override public final boolean incrementToken() throws IOException { clearAttributes(); skippedPositions = 0; while (true) { int tokenType = scanner.getNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return false; } if (scanner.yylength() <= maxTokenLength) { posIncrAtt.setPositionIncrement(skippedPositions + 1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]); return true; } else // When we skip a too-long term, we still increment the // position increment skippedPositions++; } }
/* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#next() */ @Override public final boolean incrementToken() throws IOException { clearAttributes(); skippedPositions = 0; while (true) { int tokenType = scanner.getNextToken(); if (tokenType == StandardTokenizerInterface.YYEOF) { return false; } if (scanner.yylength() <= maxTokenLength) { posIncrAtt.setPositionIncrement(skippedPositions + 1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizer.ACRONYM_DEP) { typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]); termAtt.setLength(termAtt.length() - 1); // remove extra '.' } else { typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]); } return true; } else // When we skip a too-long term, we still increment the // position increment skippedPositions++; } }
@Override public final void end() throws IOException { super.end(); // set final offset int finalOffset = correctOffset(this.endPosition); offsetAtt.setOffset(finalOffset, finalOffset); posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); }
@Override public final void end() throws IOException { super.end(); // set final offset int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); offsetAtt.setOffset(finalOffset, finalOffset); // adjust any skipped tokens posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); }
@Override public boolean incrementToken() throws IOException { // parse() is able to return w/o producing any new // tokens, when the tokens it had produced were entirely // punctuation. So we loop here until we get a real // token or we end: while (pending.size() == 0) { if (end) { return false; } // Push Viterbi forward some more: parse(); } final Token token = pending.remove(pending.size() - 1); int position = token.getPosition(); int length = token.getLength(); clearAttributes(); assert length > 0; // System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + // token.getSurfaceForm().length); termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length); offsetAtt.setOffset(correctOffset(position), correctOffset(position + length)); basicFormAtt.setToken(token); posAtt.setToken(token); readingAtt.setToken(token); inflectionAtt.setToken(token); if (token.getPosition() == lastTokenPos) { posIncAtt.setPositionIncrement(0); posLengthAtt.setPositionLength(token.getPositionLength()); } else { assert token.getPosition() > lastTokenPos; posIncAtt.setPositionIncrement(1); posLengthAtt.setPositionLength(1); } if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": incToken: return token=" + token); } lastTokenPos = token.getPosition(); return true; }
@Override public boolean incrementToken() throws IOException { if (currentPrefix != null) { if (!currentPrefix.hasNext()) { return input.incrementToken(); } else { posAttr.setPositionIncrement(0); } } else { currentPrefix = prefixes.iterator(); termAttr.setEmpty(); posAttr.setPositionIncrement(1); assert (currentPrefix.hasNext()) : "one or more prefixes needed"; } termAttr.setEmpty(); termAttr.append(currentPrefix.next()); termAttr.append(separator); return true; }
@Override public final boolean incrementToken() throws IOException { while (true) { if (curTermBuffer == null) { if (!input.incrementToken()) { return false; } else { curTermBuffer = termAtt.buffer().clone(); curTermLength = termAtt.length(); curCodePointCount = charUtils.codePointCount(termAtt); curGramSize = minGram; tokStart = offsetAtt.startOffset(); tokEnd = offsetAtt.endOffset(); savePosIncr += posIncrAtt.getPositionIncrement(); savePosLen = posLenAtt.getPositionLength(); } } if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any // n-grams // grab gramSize chars from front or back clearAttributes(); offsetAtt.setOffset(tokStart, tokEnd); // first ngram gets increment, others don't if (curGramSize == minGram) { posIncrAtt.setPositionIncrement(savePosIncr); savePosIncr = 0; } else { posIncrAtt.setPositionIncrement(0); } posLenAtt.setPositionLength(savePosLen); final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize); termAtt.copyBuffer(curTermBuffer, 0, charLength); curGramSize++; return true; } } curTermBuffer = null; } }
public final boolean incrementToken() throws IOException { int increment = 0; while (input.incrementToken()) { if (!stopWords.contains(termAttr.termBuffer(), 0, termAttr.termLength())) { posIncrAttr.setPositionIncrement(posIncrAttr.getPositionIncrement() + increment); return true; } increment += posIncrAttr.getPositionIncrement(); } return false; }
@Override public boolean incrementToken() { clearAttributes(); if (upto == 4) { return false; } if (upto == 0) { posIncr.setPositionIncrement(1); term.setEmpty().append("a"); } else if (upto == 1) { posIncr.setPositionIncrement(1); term.setEmpty().append("b"); } else if (upto == 2) { posIncr.setPositionIncrement(0); term.setEmpty().append("c"); } else { posIncr.setPositionIncrement(0); term.setEmpty().append("d"); } upto++; return true; }
@Override public boolean incrementToken() throws IOException { clearAttributes(); // 已经穷尽tokensIteractor的Token对象,则继续请求reader流入数据 while (tokenIteractor == null || !tokenIteractor.hasNext()) { // System.out.println(dissected); int read = 0; int remainning = -1; // 重新从reader读入字符前,buffer中还剩下的字符数,负数表示当前暂不需要从reader中读入字符 if (dissected >= beef.length()) { remainning = 0; } else if (dissected < 0) { remainning = bufferLength + dissected; } if (remainning >= 0) { if (remainning > 0) { System.arraycopy(buffer, -dissected, buffer, 0, remainning); } read = input.read(buffer, remainning, bufferLength - remainning); inputLength += read; int charCount = remainning + read; if (charCount < 0) { // reader已尽,按接口next()要求返回null. return false; } if (charCount < bufferLength) { buffer[charCount++] = 0; } // 构造“牛”,并使用knife“解”之 beef.set(0, charCount); offset += Math.abs(dissected); // offset -= remainning; dissected = 0; } dissected = knife.dissect(this, beef, dissected); // offset += read;// !!! tokenIteractor = tokenCollector.iterator(); } if (tokenIteractor.hasNext()) { // 返回tokensIteractor下一个Token对象 Token token = tokenIteractor.next(); termAtt.setEmpty(); termAtt.append(token.charSequence()); offsetAtt.setOffset(correctOffset(token.startOffset()), correctOffset(token.endOffset())); positionIncrementAttribute.setPositionIncrement(token.endOffset()); return true; } return tokenIteractor.hasNext(); }
@Override public boolean incrementToken() throws IOException { if (index >= tokens.length) return false; else { clearAttributes(); Token token = tokens[index++]; termAtt.setEmpty().append(token); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); payloadAtt.setPayload(token.getPayload()); return true; } }
@Override public boolean incrementToken() throws IOException { while (true) { final boolean gotOne = input.incrementToken(); if (!gotOne) { return false; } else if (termAtt.toString().equals("a")) { pendingPosInc += posIncAtt.getPositionIncrement(); } else { posIncAtt.setPositionIncrement(pendingPosInc + posIncAtt.getPositionIncrement()); pendingPosInc = 0; return true; } } }
@Override public boolean incrementToken() throws IOException { boolean tokenAvailable = false; int builtGramSize = 0; if (gramSize.atMinValue() || inputWindow.size() < gramSize.getValue()) { shiftInputWindow(); gramBuilder.setLength(0); } else { builtGramSize = gramSize.getPreviousValue(); } if (inputWindow.size() >= gramSize.getValue()) { boolean isAllFiller = true; InputWindowToken nextToken = null; Iterator<InputWindowToken> iter = inputWindow.iterator(); for (int gramNum = 1; iter.hasNext() && builtGramSize < gramSize.getValue(); ++gramNum) { nextToken = iter.next(); if (builtGramSize < gramNum) { if (builtGramSize > 0) { gramBuilder.append(tokenSeparator); } gramBuilder.append(nextToken.termAtt.buffer(), 0, nextToken.termAtt.length()); ++builtGramSize; } if (isAllFiller && nextToken.isFiller) { if (gramNum == gramSize.getValue()) { gramSize.advance(); } } else { isAllFiller = false; } } if (!isAllFiller && builtGramSize == gramSize.getValue()) { inputWindow.getFirst().attSource.copyTo(this); posIncrAtt.setPositionIncrement(isOutputHere ? 0 : 1); termAtt.setEmpty().append(gramBuilder); if (gramSize.getValue() > 1) { typeAtt.setType(tokenType); noShingleOutput = false; } offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset()); posLenAtt.setPositionLength(builtGramSize); isOutputHere = true; gramSize.advance(); tokenAvailable = true; } } return tokenAvailable; }
@Override public boolean incrementToken() throws IOException { // return the first non-stop word found int skippedPositions = 0; while (input.incrementToken()) { if (!filter.run(termAtt.buffer(), 0, termAtt.length())) { if (enablePositionIncrements) { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); } return true; } skippedPositions += posIncrAtt.getPositionIncrement(); } // reached EOS -- return false return false; }
@Override public final boolean incrementToken() throws IOException { if (addSynonym) { // inject our synonym clearAttributes(); termAtt.setEmpty().append("國"); posIncAtt.setPositionIncrement(0); addSynonym = false; return true; } if (input.incrementToken()) { addSynonym = termAtt.toString().equals("国"); return true; } else { return false; } }
@Override public boolean incrementToken() { if (upto < tokens.length) { final Token token = tokens[upto++]; // TODO: can we just capture/restoreState so // we get all attrs...? clearAttributes(); termAtt.setEmpty(); termAtt.append(token.toString()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); posLengthAtt.setPositionLength(token.getPositionLength()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); payloadAtt.setPayload(token.getPayload()); return true; } else { return false; } }
@Override public boolean incrementToken() throws IOException { if (tokens == null) { fillTokens(); } // System.out.println("graphTokenizer: incr upto=" + upto + " vs " + tokens.size()); if (upto == tokens.size()) { // System.out.println(" END @ " + tokens.size()); return false; } final Token t = tokens.get(upto++); // System.out.println(" return token=" + t); clearAttributes(); termAtt.append(t.toString()); offsetAtt.setOffset(t.startOffset(), t.endOffset()); posIncrAtt.setPositionIncrement(t.getPositionIncrement()); posLengthAtt.setPositionLength(t.getPositionLength()); return true; }
private void setAttributesFromQueue(boolean isFirst) { final KoreanToken iw = morphQueue.removeFirst(); if (isFirst && !morphQueue.isEmpty()) { // our queue has more elements remaining (e.g. we decompounded) // capture state for those. We set the term attribute to be empty // so we save lots of array copying later. termAtt.setEmpty(); currentState = captureState(); } termAtt.setEmpty().append(iw.getTerm()); offsetAtt.setOffset(iw.getOffset(), iw.getOffset() + iw.getLength()); morphAtt.setToken(iw); // on the first Token we preserve incoming increment: if (!isFirst) { posIncrAtt.setPositionIncrement(iw.getPosInc()); } // TODO: How to handle PositionLengthAttribute correctly? }
/** * Generates a word/number part, updating the appropriate attributes * * @param isSingleWord {@code true} if the generation is occurring from a single word, {@code * false} otherwise */ private void generatePart(boolean isSingleWord) { clearAttributes(); termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current); int startOffset = savedStartOffset + iterator.current; int endOffset = savedStartOffset + iterator.end; if (hasIllegalOffsets) { // historically this filter did this regardless for 'isSingleWord', // but we must do a sanity check: if (isSingleWord && startOffset <= savedEndOffset) { offsetAttribute.setOffset(startOffset, savedEndOffset); } else { offsetAttribute.setOffset(savedStartOffset, savedEndOffset); } } else { offsetAttribute.setOffset(startOffset, endOffset); } posIncAttribute.setPositionIncrement(position(false)); typeAttribute.setType(savedType); }
private void emit(char[] token) { Log.debug("emit: " + new String(token)); if (replaceWhitespaceWith != null) { token = replaceWhiteSpace(token); } CharTermAttribute termAttr = getTermAttribute(); termAttr.setEmpty(); termAttr.append(new StringBuilder().append(token)); OffsetAttribute offAttr = getOffsetAttribute(); if (offAttr != null && offAttr.endOffset() >= token.length) { int start = offAttr.endOffset() - token.length; offAttr.setOffset(start, offAttr.endOffset()); } PositionIncrementAttribute pia = getPositionIncrementAttribute(); if (pia != null) { pia.setPositionIncrement(++positionIncr); } lastEmitted = token; }
@Override public final boolean incrementToken() throws IOException { // initialise the numeric attribute if (!isInitialised) { final long value = parser.parseAndConvert(this.input); numericAtt.init(parser.getNumericType(), value, parser.getValueSize()); isInitialised = true; } // this will only clear all other attributes in this TokenStream this.clearAttributes(); // increment the shift and generate next token final boolean hasNext = numericAtt.incrementShift(termAtt); // set other attributes after the call to incrementShift since getShift // is undefined before first call typeAtt.setType( (numericAtt.getShift() == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((numericAtt.getShift() == 0) ? 1 : 0); return hasNext; }
@Override public boolean incrementToken() throws IOException { while (true) { if (!hasSavedState) { // process a new input word if (!input.incrementToken()) { return false; } int termLength = termAttribute.length(); char[] termBuffer = termAttribute.buffer(); accumPosInc += posIncAttribute.getPositionIncrement(); iterator.setText(termBuffer, termLength); iterator.next(); // word of no delimiters, or protected word: just return it if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.contains(termBuffer, 0, termLength))) { posIncAttribute.setPositionIncrement(accumPosInc); accumPosInc = 0; first = false; return true; } // word of simply delimiters if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL)) { // if the posInc is 1, simply ignore it in the accumulation // TODO: proper hole adjustment (FilteringTokenFilter-like) instead of this previous // logic! if (posIncAttribute.getPositionIncrement() == 1 && !first) { accumPosInc--; } continue; } saveState(); hasOutputToken = false; hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL); lastConcatCount = 0; if (has(PRESERVE_ORIGINAL)) { posIncAttribute.setPositionIncrement(accumPosInc); accumPosInc = 0; first = false; return true; } } // at the end of the string, output any concatenations if (iterator.end == WordDelimiterIterator.DONE) { if (!concat.isEmpty()) { if (flushConcatenation(concat)) { buffer(); continue; } } if (!concatAll.isEmpty()) { // only if we haven't output this same combo above! if (concatAll.subwordCount > lastConcatCount) { concatAll.writeAndClear(); buffer(); continue; } concatAll.clear(); } if (bufferedPos < bufferedLen) { if (bufferedPos == 0) { sorter.sort(0, bufferedLen); } clearAttributes(); restoreState(buffered[bufferedPos++]); if (first && posIncAttribute.getPositionIncrement() == 0) { // can easily happen with strange combinations (e.g. not outputting numbers, but // concat-all) posIncAttribute.setPositionIncrement(1); } first = false; return true; } // no saved concatenations, on to the next input word bufferedPos = bufferedLen = 0; hasSavedState = false; continue; } // word surrounded by delimiters: always output if (iterator.isSingleWord()) { generatePart(true); iterator.next(); first = false; return true; } int wordType = iterator.type(); // do we already have queued up incompatible concatenations? if (!concat.isEmpty() && (concat.type & wordType) == 0) { if (flushConcatenation(concat)) { hasOutputToken = false; buffer(); continue; } hasOutputToken = false; } // add subwords depending upon options if (shouldConcatenate(wordType)) { if (concat.isEmpty()) { concat.type = wordType; } concatenate(concat); } // add all subwords (catenateAll) if (has(CATENATE_ALL)) { concatenate(concatAll); } // if we should output the word or number part if (shouldGenerateParts(wordType)) { generatePart(false); buffer(); } iterator.next(); } }
private void applyToken(Token token) { termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); posAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); }
public static void setPositionIncrement(AttributeSource source, int posIncr) { PositionIncrementAttribute attr = source.addAttribute(PositionIncrementAttribute.class); attr.setPositionIncrement(posIncr); }
public static void assertTokenStreamContents( TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException { assertNotNull(output); CheckClearAttributesAttribute checkClearAtt = (CheckClearAttributesAttribute) ts.addAttribute(CheckClearAttributesAttribute.class); assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class)); TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class)); offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class); } TypeAttribute typeAtt = null; if (types != null) { assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class)); typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { assertTrue( "has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class)); posIncrAtt = (PositionIncrementAttribute) ts.getAttribute(PositionIncrementAttribute.class); } ts.reset(); for (int i = 0; i < output.length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.clearAttributes(); termAtt.setTermBuffer("bogusTerm"); if (offsetAtt != null) offsetAtt.setOffset(14584724, 24683243); if (typeAtt != null) typeAtt.setType("bogusType"); if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before assertTrue("token " + i + " does not exist", ts.incrementToken()); assertTrue( "clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled()); assertEquals("term " + i, output[i], termAtt.term()); if (startOffsets != null) assertEquals("startOffset " + i, startOffsets[i], offsetAtt.startOffset()); if (endOffsets != null) assertEquals("endOffset " + i, endOffsets[i], offsetAtt.endOffset()); if (types != null) assertEquals("type " + i, types[i], typeAtt.type()); if (posIncrements != null) assertEquals("posIncrement " + i, posIncrements[i], posIncrAtt.getPositionIncrement()); } assertFalse("end of stream", ts.incrementToken()); ts.end(); if (finalOffset != null) assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset()); ts.close(); }