/**
   * Count words in sentences.
   *
   * @param sentences The sentences.
   * @param stopWords Stop words.
   * @return Map of words to WordCountAndSentence objects.
   */
  public static <W extends Comparable> Map<String, WordCountAndSentences> countWordsInSentences(
      List<List<W>> sentences, StopWords stopWords) {
    //	Holds map between each word
    //	and the word's count and appearance.

    Map<String, WordCountAndSentences> wordCounts = new TreeMap<String, WordCountAndSentences>();

    //	Note if we are filtering using
    //	a stop word list.

    boolean checkStopWords = (stopWords != null);

    //	Loop over sentences.

    for (int i = 0; i < sentences.size(); i++) {
      //	Get next sentence.

      List<W> sentence = sentences.get(i);

      //	Loop over words in sentence.

      for (int j = 0; j < sentence.size(); j++) {
        //	Get next word.

        W word = sentence.get(j);

        //	Get string version of word in
        //	lower case.

        String lcWord = word.toString().toLowerCase();

        //	Ignore punctuation and symbols.

        if (CharUtils.isPunctuationOrSymbol(lcWord)) {
        }
        //	Ignore stop words.

        else if (checkStopWords && stopWords.isStopWord(lcWord)) {
        } else {
          //	Create/update count and appearance data
          //	for this word.

          WordCountAndSentences wcs = wordCounts.get(lcWord);

          if (wcs == null) {
            wcs = new WordCountAndSentences(lcWord);
            wordCounts.put(lcWord, wcs);
          }

          wcs.count++;
          wcs.sentences.add(i);
        }
      }
    }

    return wordCounts;
  }
  /**
   * Summarize text.
   *
   * @param sentences Tokenized sentences to summarize.
   * @param summarySentences Maximum number of sentences to return in the summary.
   * @return Summary of the input text.
   */
  public <T extends Comparable> List<Integer> summarize(
      List<List<T>> sentences, int summarySentences) {
    //  Get word counts ignoring stop words.

    Map<String, WordCountAndSentences> wordCounts =
        WordCountUtils.countWordsInSentences(sentences, new BuckleyAndSaltonStopWords());
    //  Sort the counts into descending
    //  order by count.

    List<WordCountAndSentences> wcsData = new SortedArrayList<WordCountAndSentences>();

    Iterator<String> iterator = wordCounts.keySet().iterator();

    while (iterator.hasNext()) {
      wcsData.add(wordCounts.get(iterator.next()));
    }
    //  Holds summary sentence indices.

    Set<Integer> summarySentencesSet = new TreeSet<Integer>();

    //  Use up to 100 most commonly used words.

    int maxWords = Math.min(100, wcsData.size());

    //  For each commonly word word,
    //  find the first sentence in which
    //  that word appears, and add it to the
    //  summary sentences collection.
    for (int i = 0; (i < wcsData.size()) && (summarySentencesSet.size() < summarySentences); i++) {
      WordCountAndSentences wcs = wcsData.get(i);

      if (CharUtils.isNumber(wcs.word.toString())) continue;
      if (CharUtils.hasDigit(wcs.word.toString())) continue;

      Integer[] sentenceNumbers =
          (Integer[]) wcs.sentences.toArray(new Integer[wcs.sentences.size()]);

      summarySentencesSet.add(sentenceNumbers[0]);
    }
    //  Return indices of selected
    //  summary sentences.

    return new ArrayList<Integer>(summarySentencesSet);
  }
예제 #3
0
  private void writeCharacters(final char[] chosenBuffer, final int start, final int length) {
    myTerminalTextBuffer.lock();
    try {
      wrapLines();
      scrollY();

      if (length != 0) {
        myTerminalTextBuffer.writeBytes(myCursorX, myCursorY, chosenBuffer, start, length);
      }

      myCursorX += CharUtils.getTextLength(chosenBuffer, start, length);
      finishText();
    } finally {
      myTerminalTextBuffer.unlock();
    }
  }