Java WordlistLoader Examples

Programming Language: Java

Class/Type: WordlistLoader

Examples at hotexamples.com: 5

Java WordlistLoader - 5 examples found. These are the top rated real world Java examples of WordlistLoader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getWordSet(3)

getLines(1)

getSnowballWordSet(1)

Example #1

Show file

File: AbstractAnalysisFactory.java Project: PATRIC3/p3_solr

 /**
  * same as {@link #getWordSet(ResourceLoader, String, boolean)}, except the input is in snowball
  * format.
  */
 protected final CharArraySet getSnowballWordSet(
     ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException {
   List<String> files = splitFileNames(wordFiles);
   CharArraySet words = null;
   if (files.size() > 0) {
     // default stopwords list has 35 or so words, but maybe don't make it that
     // big to start
     words = new CharArraySet(files.size() * 10, ignoreCase);
     for (String file : files) {
       InputStream stream = null;
       Reader reader = null;
       try {
         stream = loader.openResource(file.trim());
         CharsetDecoder decoder =
             StandardCharsets.UTF_8
                 .newDecoder()
                 .onMalformedInput(CodingErrorAction.REPORT)
                 .onUnmappableCharacter(CodingErrorAction.REPORT);
         reader = new InputStreamReader(stream, decoder);
         WordlistLoader.getSnowballWordSet(reader, words);
       } finally {
         IOUtils.closeWhileHandlingException(reader, stream);
       }
     }
   }
   return words;
 }

Example #2

Show file

File: PhaidraAnalyzer.java Project: phaidra/gsearch

 /**
  * Builds an analyzer with the stop words from the given file.
  *
  * @see WordlistLoader#getWordSet(Reader, Version)
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopwords File to read stop words from
  */
 public PhaidraAnalyzer(Version matchVersion, File stopwords) throws IOException {
   this(
       matchVersion,
       WordlistLoader.getWordSet(
           IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));
 }

Example #3

Show file

File: PhaidraAnalyzer.java Project: phaidra/gsearch

 /**
  * Builds an analyzer with the stop words from the given reader.
  *
  * @see WordlistLoader#getWordSet(Reader, Version)
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopwords Reader to read stop words from
  */
 public PhaidraAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
   this(matchVersion, WordlistLoader.getWordSet(stopwords, matchVersion));
 }

Example #4

Show file

File: AbstractAnalysisFactory.java Project: PATRIC3/p3_solr

 /** Returns the resource's lines (with content treated as UTF-8) */
 protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
   return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
 }

Example #5

Show file

File: StopAnalyzer.java Project: simplegeo/lucene-solr-3.1

 /**
  * Builds an analyzer with the stop words from the given file.
  *
  * @see WordlistLoader#getWordSet(File)
  * @param matchVersion See <a href="#version">above</a>
  * @param stopwordsFile File to load stop words from
  */
 public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
   this(matchVersion, WordlistLoader.getWordSet(stopwordsFile));
 }