Exemple #1
0
 /**
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the provided
  * {@link Reader}.
  *
  * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with {@link
  *     LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and {@link
  *     ArabicStemFilter}.
  */
 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
   SavedStreams streams = (SavedStreams) getPreviousTokenStream();
   if (streams == null) {
     streams = new SavedStreams();
     streams.source = new ArabicLetterTokenizer(reader);
     streams.result = new LowerCaseFilter(streams.source);
     streams.result =
         new StopFilter(
             StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
             streams.result,
             stoptable);
     streams.result = new ArabicNormalizationFilter(streams.result);
     streams.result = new ArabicStemFilter(streams.result);
     setPreviousTokenStream(streams);
   } else {
     streams.source.reset(reader);
   }
   return streams.result;
 }
    @Override
    public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
      class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
      }

      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream =
            new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
      } else {
        streams.tokenStream.reset(reader);
      }
      streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

      return streams.filteredTokenStream;
    }