@Override public final boolean incrementToken() throws IOException { clearAttributes(); if (delimitersCount == -1) { int length = 0; delimiterPositions.add(0); while (true) { int c = input.read(); if (c < 0) { break; } length++; if (c == delimiter) { delimiterPositions.add(length); resultToken.append(replacement); } else { resultToken.append((char) c); } } delimitersCount = delimiterPositions.size(); if (delimiterPositions.get(delimitersCount - 1) < length) { delimiterPositions.add(length); delimitersCount++; } if (resultTokenBuffer.length < resultToken.length()) { resultTokenBuffer = new char[resultToken.length()]; } resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0); resultToken.setLength(0); int idx = delimitersCount - 1 - skip; if (idx >= 0) { // otherwise its ok, because we will skip and return false endPosition = delimiterPositions.get(idx); } finalOffset = correctOffset(length); posAtt.setPositionIncrement(1); } else { posAtt.setPositionIncrement(0); } while (skipped < delimitersCount - skip - 1) { int start = delimiterPositions.get(skipped); termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start); offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition)); skipped++; return true; } return false; }
private String tokenizerToString(Tokenizer tokenizer) throws Exception { OffsetAttribute extOffset = tokenizer.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAtt = tokenizer.addAttribute(PositionIncrementAttribute.class); PositionLengthAttribute posLengthAtt = tokenizer.addAttribute(PositionLengthAttribute.class); CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class); TypeAttribute type = tokenizer.addAttribute(TypeAttribute.class); SemanticClassAttribute semanticClass = tokenizer.addAttribute(SemanticClassAttribute.class); PartOfSpeechAttribute pos = tokenizer.addAttribute(PartOfSpeechAttribute.class); StringBuilder result = new StringBuilder(); tokenizer.reset(); while (tokenizer.incrementToken() == true) { result.append(new String(term.buffer(), 0, term.length())).append(":"); result.append(type.type()).append(":"); result.append(pos.partOfSpeech()).append(":"); result.append(semanticClass.semanticClass()).append(":"); result.append(String.valueOf(posIncrAtt.getPositionIncrement())).append(":"); result.append(String.valueOf(posLengthAtt.getPositionLength())).append(":"); result.append(String.valueOf(extOffset.startOffset())).append(":"); result.append(String.valueOf(extOffset.endOffset())); result.append(","); } tokenizer.end(); return result.toString(); }
/** * Sugar: analyzes the text with the analyzer and separates by {@link * SynonymMap#WORD_SEPARATOR}. reuse and its chars must not be null. */ public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException { try (TokenStream ts = analyzer.tokenStream("", text)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); reuse.clear(); while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new IllegalArgumentException( "term: " + text + " analyzed to a zero-length token"); } if (posIncAtt.getPositionIncrement() != 1) { throw new IllegalArgumentException( "term: " + text + " analyzed to a token with posinc != 1"); } reuse.grow(reuse.length() + length + 1); /* current + word + separator */ int end = reuse.length(); if (reuse.length() > 0) { reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR); reuse.setLength(reuse.length() + 1); } System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length); reuse.setLength(reuse.length() + length); } ts.end(); } if (reuse.length() == 0) { throw new IllegalArgumentException( "term: " + text + " was completely eliminated by analyzer"); } return reuse.get(); }
@Override public final boolean incrementToken() throws IOException { if (isMailto) { termAtt.setEmpty(); // return the scheme + the mail part isMailto = false; posIncrAtt.setPositionIncrement(0); termAtt.copyBuffer(termBuffer.array(), 0, termBuffer.position()); return true; } if (input.incrementToken()) { final String type = typeAtt.type(); if (type.equals(TupleTokenizer.getTokenTypes()[TupleTokenizer.URI]) && this.isMailtoScheme()) { this.updateBuffer(); termBuffer.put(termAtt.buffer(), 0, termAtt.length()); // return only the mail part posIncrAtt.setPositionIncrement(1); termAtt.copyBuffer(termBuffer.array(), 7, termBuffer.position() - 7); } return true; } return false; }
@Override public final void end() throws IOException { super.end(); // set final offset int finalOffset = correctOffset(this.endPosition); offsetAtt.setOffset(finalOffset, finalOffset); posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); }
private static String[] groupTokens(Analyzer analyzer, String input) throws IOException { if (Resources.debug) { Resources.LOGGER.debug("TokenParser:" + input); Resources.LOGGER.debug("Analyzer:" + analyzer.getClass()); } TokenStream tokenStream = analyzer.tokenStream("input", input); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); PositionIncrementAttribute positionIncrementAttribute = tokenStream.addAttribute(PositionIncrementAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); TypeAttribute typeAttribute = tokenStream.addAttribute(TypeAttribute.class); tokenStream.reset(); int position = 0; List<TermInfo> infos = new ArrayList<TermInfo>(); while (tokenStream.incrementToken()) { int increment = positionIncrementAttribute.getPositionIncrement(); if (increment > 0) { position = position + increment; if (Resources.debug) { Resources.LOGGER.debug(position + ":"); } } int startOffset = offsetAttribute.startOffset(); int endOffset = offsetAttribute.endOffset(); String term = charTermAttribute.toString(); TermInfo info = new TermInfo(); info.setStart(startOffset); info.setEnd(endOffset); infos.add(info); if (Resources.debug) { Resources.LOGGER.debug( "[" + term + "]" + ":(" + startOffset + "-->" + endOffset + "):" + typeAttribute.type()); } } tokenStream.end(); tokenStream.close(); Stack<TermInfo> tiStack = groupTokenInfos(infos); List<String> terms = new ArrayList<String>(); while (!tiStack.isEmpty()) { TermInfo termInfo = tiStack.pop(); if (termInfo.getEnd() <= input.length() && termInfo.getStart() >= 1) { String term = input.substring(termInfo.getStart(), termInfo.getEnd()); terms.add(term); } } return terms.toArray(new String[] {}); }
@Override public final void end() throws IOException { super.end(); // set final offset int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); offsetAtt.setOffset(finalOffset, finalOffset); // adjust any skipped tokens posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); }
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); stream.reset(); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.println("posIncr=" + posIncr.getPositionIncrement()); } stream.close(); }
public TermSubQueryFactory termToFactory(String fieldname, Term sourceTerm, FieldBoost boost) throws IOException { CacheKey cacheKey = null; if (termQueryCache != null) { cacheKey = new CacheKey(fieldname, sourceTerm); TermQueryCacheValue cacheValue = termQueryCache.get(cacheKey); if (cacheValue != null) { // The cache references factories with pre-analyzed terms, or cache entries without a // query factory if the term does not exist in the index. cacheValue.hasQuery() returns // true/false correspondingly. // Cache entries don't have a boost factor, it is only added later via the queryFactory. return (cacheValue.hasQuery()) ? new TermSubQueryFactory(cacheValue, boost) : null; } } LuceneQueryFactoryAndPRMSQuery root = null; TokenStream ts = null; try { ts = analyzer.tokenStream(fieldname, new CharSequenceReader(sourceTerm)); CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAttr = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); PositionSequence<org.apache.lucene.index.Term> sequence = new PositionSequence<>(); while (ts.incrementToken()) { int inc = posIncAttr.getPositionIncrement(); if (inc > 0 || sequence.isEmpty()) { sequence.nextPosition(); } sequence.addElement(new org.apache.lucene.index.Term(fieldname, new BytesRef(termAttr))); } root = positionSequenceToQueryFactoryAndPRMS(sequence); } finally { if (ts != null) { try { ts.close(); } catch (IOException e) { } } } putQueryFactoryAndPRMSQueryIntoCache(cacheKey, root); return root == null ? null : new TermSubQueryFactory(root, boost); }
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); TypeAttribute type = stream.addAttribute(TypeAttribute.class); PayloadAttribute payload = stream.addAttribute(PayloadAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ":"); } BytesRef pl = payload.getPayload(); if (pl != null) { System.out.print( "[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset() + ":" + type.type() + ":" + new String(pl.bytes) + "] "); } else { System.out.print( "[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset() + ":" + type.type() + "] "); } } System.out.println(); }
public final boolean incrementToken() throws IOException { int increment = 0; while (input.incrementToken()) { if (!stopWords.contains(termAttr.termBuffer(), 0, termAttr.termLength())) { posIncrAttr.setPositionIncrement(posIncrAttr.getPositionIncrement() + increment); return true; } increment += posIncrAttr.getPositionIncrement(); } return false; }
/** * Count position increments in a token stream. Package private for testing. * * @param analyzer analyzer to create token stream * @param fieldName field name to pass to analyzer * @param fieldValue field value to pass to analyzer * @return number of position increments in a token stream * @throws IOException if tokenStream throws it */ static int countPositions(Analyzer analyzer, String fieldName, String fieldValue) throws IOException { try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) { int count = 0; PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { count += position.getPositionIncrement(); } tokenStream.end(); count += position.getPositionIncrement(); return count; } }
@Override public boolean incrementToken() throws IOException { while (true) { final boolean gotOne = input.incrementToken(); if (!gotOne) { return false; } else if (termAtt.toString().equals("a")) { pendingPosInc += posIncAtt.getPositionIncrement(); } else { posIncAtt.setPositionIncrement(pendingPosInc + posIncAtt.getPositionIncrement()); pendingPosInc = 0; return true; } } }
@Override public boolean incrementToken() throws IOException { // return the first non-stop word found int skippedPositions = 0; while (input.incrementToken()) { if (!filter.run(termAtt.buffer(), 0, termAtt.length())) { if (enablePositionIncrements) { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); } return true; } skippedPositions += posIncrAtt.getPositionIncrement(); } // reached EOS -- return false return false; }
/* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#next() */ @Override public final boolean incrementToken() throws IOException { clearAttributes(); skippedPositions = 0; while (true) { int tokenType = scanner.getNextToken(); if (tokenType == StandardTokenizerInterface.YYEOF) { return false; } if (scanner.yylength() <= maxTokenLength) { posIncrAtt.setPositionIncrement(skippedPositions + 1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizer.ACRONYM_DEP) { typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]); termAtt.setLength(termAtt.length() - 1); // remove extra '.' } else { typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]); } return true; } else // When we skip a too-long term, we still increment the // position increment skippedPositions++; } }
@Override public boolean incrementToken() throws IOException { if (!terms.isEmpty()) { char[] buffer = terms.poll(); termAttribute.setEmpty(); termAttribute.copyBuffer(buffer, 0, buffer.length); posIncAttr.setPositionIncrement(1); return true; } if (!input.incrementToken()) { return false; } else { final char term[] = termAttribute.buffer(); final int length = termAttribute.length(); int k = 0; for (; k < length; k++) { if (term[k] == tokenDelimiter) { break; } } LinkedList<CharBuffer> buffers = permuteTerms(term, 0, length); Iterator iter = buffers.iterator(); while (iter.hasNext()) { CharBuffer cb = (CharBuffer) iter.next(); terms.add(cb.array()); } // we return true and leave the original token unchanged return true; } }
/* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#next() */ @Override public final boolean incrementToken() throws IOException { clearAttributes(); skippedPositions = 0; while (true) { int tokenType = scanner.getNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return false; } if (scanner.yylength() <= maxTokenLength) { posIncrAtt.setPositionIncrement(skippedPositions + 1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]); return true; } else // When we skip a too-long term, we still increment the // position increment skippedPositions++; } }
@Override public final boolean incrementToken() throws IOException { if (!tokens.isEmpty()) { assert current != null; CompoundToken token = tokens.removeFirst(); restoreState(current); // keep all other attributes untouched termAtt.setEmpty().append(token.txt); offsetAtt.setOffset(token.startOffset, token.endOffset); posIncAtt.setPositionIncrement(0); return true; } current = null; // not really needed, but for safety if (input.incrementToken()) { // Only words longer than minWordSize get processed if (termAtt.length() >= this.minWordSize) { decompose(); // only capture the state if we really need it for producing new tokens if (!tokens.isEmpty()) { current = captureState(); } } // return original token: return true; } else { return false; } }
/* (non-Javadoc) * @see org.apache.lucene.analysis.TokenStream#incrementToken() */ @Override public boolean incrementToken() throws IOException { // 清除所有的词元属性 clearAttributes(); skippedPositions = 0; Lexeme nextLexeme = _IKImplement.next(); if (nextLexeme != null) { posIncrAtt.setPositionIncrement(skippedPositions + 1); // 将Lexeme转成Attributes // 设置词元文本 termAtt.append(nextLexeme.getLexemeText()); // 设置词元长度 termAtt.setLength(nextLexeme.getLength()); // 设置词元位移 offsetAtt.setOffset( correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition())); // 记录分词的最后位置 endPosition = nextLexeme.getEndPosition(); // 记录词元分类 typeAtt.setType(nextLexeme.getLexemeTypeString()); // 返会true告知还有下个词元 return true; } // 返会false告知词元输出完毕 return false; }
private static String[] mmsegTokens(Analyzer analyzer, String input) throws IOException { if (Resources.debug) { Resources.LOGGER.debug("TokenParser:" + input); Resources.LOGGER.debug("Analyzer:" + analyzer.getClass()); } TokenStream tokenStream = analyzer.tokenStream("input", input); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); PositionIncrementAttribute positionIncrementAttribute = tokenStream.addAttribute(PositionIncrementAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); TypeAttribute typeAttribute = tokenStream.addAttribute(TypeAttribute.class); tokenStream.reset(); int position = 0; List<String> tokens = new ArrayList<String>(); while (tokenStream.incrementToken()) { int increment = positionIncrementAttribute.getPositionIncrement(); if (increment > 0) { position = position + increment; if (Resources.debug) { Resources.LOGGER.debug(position + ":"); } } int startOffset = offsetAttribute.startOffset(); int endOffset = offsetAttribute.endOffset(); String term = charTermAttribute.toString(); tokens.add(term); if (Resources.debug) { Resources.LOGGER.debug( "[" + term + "]" + ":(" + startOffset + "-->" + endOffset + "):" + typeAttribute.type()); } } tokenStream.end(); tokenStream.close(); return tokens.toArray(new String[] {}); }
@Override public boolean incrementToken() throws IOException { // parse() is able to return w/o producing any new // tokens, when the tokens it had produced were entirely // punctuation. So we loop here until we get a real // token or we end: while (pending.size() == 0) { if (end) { return false; } // Push Viterbi forward some more: parse(); } final Token token = pending.remove(pending.size() - 1); int position = token.getPosition(); int length = token.getLength(); clearAttributes(); assert length > 0; // System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + // token.getSurfaceForm().length); termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length); offsetAtt.setOffset(correctOffset(position), correctOffset(position + length)); basicFormAtt.setToken(token); posAtt.setToken(token); readingAtt.setToken(token); inflectionAtt.setToken(token); if (token.getPosition() == lastTokenPos) { posIncAtt.setPositionIncrement(0); posLengthAtt.setPositionLength(token.getPositionLength()); } else { assert token.getPosition() > lastTokenPos; posIncAtt.setPositionIncrement(1); posLengthAtt.setPositionLength(1); } if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": incToken: return token=" + token); } lastTokenPos = token.getPosition(); return true; }
/* (non-Javadoc) * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token) */ public boolean isNewFragment() { boolean isNewFrag = false; int minFragLen = (int) ((1.0f - slop) * targetFragChars); int endOffset = offsetAtt.endOffset(); // ** determin isNewFrag if (posIncAtt.getPositionIncrement() > incrementGapThreshold) { // large position gaps always imply new fragments isNewFrag = true; } else if (endOffset - currentOffset < minFragLen) { // we're not in our range of flexibility isNewFrag = false; } else if (targetOffset > 0) { // we've already decided on a target isNewFrag = endOffset > targetOffset; } else { // we might be able to do something int minOffset = currentOffset + minFragLen; int maxOffset = (int) (currentOffset + (1.0f + slop) * targetFragChars); int hotIndex; // look for a close hotspot hotIndex = Arrays.binarySearch(hotspots, endOffset); if (hotIndex < 0) hotIndex = -hotIndex; if (hotIndex >= hotspots.length) { // no more hotspots in this input stream targetOffset = currentOffset + targetFragChars; } else if (hotspots[hotIndex] > maxOffset) { // no hotspots within slop targetOffset = currentOffset + targetFragChars; } else { // try to find hotspot in slop int goal = hotspots[hotIndex]; while (goal < minOffset && hotIndex < hotspots.length) { hotIndex++; goal = hotspots[hotIndex]; } targetOffset = goal <= maxOffset ? goal : currentOffset + targetFragChars; } isNewFrag = endOffset > targetOffset; } // ** operate on isNewFrag if (isNewFrag) { currentNumFrags++; currentOffset = endOffset; targetOffset = -1; } return isNewFrag; }
@Override public boolean incrementToken() throws IOException { if (currentPrefix != null) { if (!currentPrefix.hasNext()) { return input.incrementToken(); } else { posAttr.setPositionIncrement(0); } } else { currentPrefix = prefixes.iterator(); termAttr.setEmpty(); posAttr.setPositionIncrement(1); assert (currentPrefix.hasNext()) : "one or more prefixes needed"; } termAttr.setEmpty(); termAttr.append(currentPrefix.next()); termAttr.append(separator); return true; }
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream( "contents", // #A new StringReader(text)); stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D } System.out.print( "[" + // #E term + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } stream.close(); System.out.println(); }
@Override public final boolean incrementToken() throws IOException { while (true) { if (curTermBuffer == null) { if (!input.incrementToken()) { return false; } else { curTermBuffer = termAtt.buffer().clone(); curTermLength = termAtt.length(); curCodePointCount = charUtils.codePointCount(termAtt); curGramSize = minGram; tokStart = offsetAtt.startOffset(); tokEnd = offsetAtt.endOffset(); savePosIncr += posIncrAtt.getPositionIncrement(); savePosLen = posLenAtt.getPositionLength(); } } if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any // n-grams // grab gramSize chars from front or back clearAttributes(); offsetAtt.setOffset(tokStart, tokEnd); // first ngram gets increment, others don't if (curGramSize == minGram) { posIncrAtt.setPositionIncrement(savePosIncr); savePosIncr = 0; } else { posIncrAtt.setPositionIncrement(0); } posLenAtt.setPositionLength(savePosLen); final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize); termAtt.copyBuffer(curTermBuffer, 0, charLength); curGramSize++; return true; } } curTermBuffer = null; } }
/* (non-Javadoc) * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token) */ @Override public boolean isNewFragment() { int endOffset = offsetAtt.endOffset(); boolean isNewFrag = endOffset >= fragOffset + getFragmentSize() || posIncAtt.getPositionIncrement() > INCREMENT_THRESHOLD; if (isNewFrag) { fragOffset = endOffset; } return isNewFrag; }
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ":"); } System.out.print("[" + term.toString() + "] "); } System.out.println(); }
private void buffer() { if (bufferedLen == buffered.length) { int newSize = ArrayUtil.oversize(bufferedLen + 1, 8); buffered = Arrays.copyOf(buffered, newSize); startOff = Arrays.copyOf(startOff, newSize); posInc = Arrays.copyOf(posInc, newSize); } startOff[bufferedLen] = offsetAttribute.startOffset(); posInc[bufferedLen] = posIncAttribute.getPositionIncrement(); buffered[bufferedLen] = captureState(); bufferedLen++; }
@Override public boolean incrementToken() { clearAttributes(); if (upto == 4) { return false; } if (upto == 0) { posIncr.setPositionIncrement(1); term.setEmpty().append("a"); } else if (upto == 1) { posIncr.setPositionIncrement(1); term.setEmpty().append("b"); } else if (upto == 2) { posIncr.setPositionIncrement(0); term.setEmpty().append("c"); } else { posIncr.setPositionIncrement(0); term.setEmpty().append("d"); } upto++; return true; }
private void emit(char[] token) { Log.debug("emit: " + new String(token)); if (replaceWhitespaceWith != null) { token = replaceWhiteSpace(token); } CharTermAttribute termAttr = getTermAttribute(); termAttr.setEmpty(); termAttr.append(new StringBuilder().append(token)); OffsetAttribute offAttr = getOffsetAttribute(); if (offAttr != null && offAttr.endOffset() >= token.length) { int start = offAttr.endOffset() - token.length; offAttr.setOffset(start, offAttr.endOffset()); } PositionIncrementAttribute pia = getPositionIncrementAttribute(); if (pia != null) { pia.setPositionIncrement(++positionIncr); } lastEmitted = token; }