/**
  * same as {@link #getWordSet(ResourceLoader, String, boolean)}, except the input is in snowball
  * format.
  */
 protected final CharArraySet getSnowballWordSet(
     ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException {
   List<String> files = splitFileNames(wordFiles);
   CharArraySet words = null;
   if (files.size() > 0) {
     // default stopwords list has 35 or so words, but maybe don't make it that
     // big to start
     words = new CharArraySet(files.size() * 10, ignoreCase);
     for (String file : files) {
       InputStream stream = null;
       Reader reader = null;
       try {
         stream = loader.openResource(file.trim());
         CharsetDecoder decoder =
             StandardCharsets.UTF_8
                 .newDecoder()
                 .onMalformedInput(CodingErrorAction.REPORT)
                 .onUnmappableCharacter(CodingErrorAction.REPORT);
         reader = new InputStreamReader(stream, decoder);
         WordlistLoader.getSnowballWordSet(reader, words);
       } finally {
         IOUtils.closeWhileHandlingException(reader, stream);
       }
     }
   }
   return words;
 }
Example #2
0
 /**
  * Builds an analyzer with the stop words from the given file.
  *
  * @see WordlistLoader#getWordSet(Reader, Version)
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopwords File to read stop words from
  */
 public PhaidraAnalyzer(Version matchVersion, File stopwords) throws IOException {
   this(
       matchVersion,
       WordlistLoader.getWordSet(
           IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));
 }
Example #3
0
 /**
  * Builds an analyzer with the stop words from the given reader.
  *
  * @see WordlistLoader#getWordSet(Reader, Version)
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopwords Reader to read stop words from
  */
 public PhaidraAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
   this(matchVersion, WordlistLoader.getWordSet(stopwords, matchVersion));
 }
 /** Returns the resource's lines (with content treated as UTF-8) */
 protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
   return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
 }
 /**
  * Builds an analyzer with the stop words from the given file.
  *
  * @see WordlistLoader#getWordSet(File)
  * @param matchVersion See <a href="#version">above</a>
  * @param stopwordsFile File to load stop words from
  */
 public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
   this(matchVersion, WordlistLoader.getWordSet(stopwordsFile));
 }