/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the provided * {@link Reader}. * * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with {@link * LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and {@link * ArabicStemFilter}. */ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new ArabicLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); streams.result = new ArabicNormalizationFilter(streams.result); streams.result = new ArabicStemFilter(streams.result); setPreviousTokenStream(streams); } else { streams.source.reset(reader); } return streams.result; }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { class SavedStreams { StandardTokenizer tokenStream; TokenStream filteredTokenStream; } SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET); streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH); return streams.filteredTokenStream; }