/** * Count words in sentences. * * @param sentences The sentences. * @param stopWords Stop words. * @return Map of words to WordCountAndSentence objects. */ public static <W extends Comparable> Map<String, WordCountAndSentences> countWordsInSentences( List<List<W>> sentences, StopWords stopWords) { // Holds map between each word // and the word's count and appearance. Map<String, WordCountAndSentences> wordCounts = new TreeMap<String, WordCountAndSentences>(); // Note if we are filtering using // a stop word list. boolean checkStopWords = (stopWords != null); // Loop over sentences. for (int i = 0; i < sentences.size(); i++) { // Get next sentence. List<W> sentence = sentences.get(i); // Loop over words in sentence. for (int j = 0; j < sentence.size(); j++) { // Get next word. W word = sentence.get(j); // Get string version of word in // lower case. String lcWord = word.toString().toLowerCase(); // Ignore punctuation and symbols. if (CharUtils.isPunctuationOrSymbol(lcWord)) { } // Ignore stop words. else if (checkStopWords && stopWords.isStopWord(lcWord)) { } else { // Create/update count and appearance data // for this word. WordCountAndSentences wcs = wordCounts.get(lcWord); if (wcs == null) { wcs = new WordCountAndSentences(lcWord); wordCounts.put(lcWord, wcs); } wcs.count++; wcs.sentences.add(i); } } } return wordCounts; }
@Override public String toString() { StringBuilder builder = new StringBuilder(this.getClass().getSimpleName() + "["); builder.append("worker: " + ((worker == null) ? worker : worker.toString())); builder.append(", "); builder.append( "messageProducer: " + ((messageProducer == null) ? messageProducer : messageProducer.toString())); builder.append("]"); return builder.toString(); }
@Override public String toString() { return wrapped.toString(); }