@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { keywordAttr.setKeyword(true); } return true; } else { return false; } }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); termAtt.setLength(newlen); } return true; } else { return false; } }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { // this stemmer increases word length by 1: worst case '*çom' -> '*ción' final int len = termAtt.length(); final int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len); termAtt.setLength(newlen); } return true; } else { return false; } }
/** @return Returns true for next token in the stream, or false at EOS */ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { String term = termAtt.toString(); if (!keywordAttr.isKeyword()) { String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) termAtt.setEmpty().append(s); } return true; } else { return false; } }
/** * @return true if token was added to search/analysis stream * @throws IOException */ @Override public final boolean incrementToken() throws IOException { if (!input.incrementToken()) { return false; } Optional<CharSequence> lemma = lemmatizer.lemmatize(termAtt); if (lemma.isPresent()) { if (!keywordAttr.isKeyword() && !equalCharSequences(lemma.get(), termAtt)) { termAtt.setEmpty().append(lemma.get()); } } return true; }