예제 #1
0
  public DoubleMatrix getScoreMatrix(File file) {
    Counter<String> docWords = new Counter<String>();
    try {
      LineIterator iter = FileUtils.lineIterator(file);
      while (iter.hasNext()) {
        Tokenizer t =
            tokenizerFactory.create((new InputHomogenization(iter.nextLine()).transform()));
        while (t.hasMoreTokens()) {
          docWords.incrementCount(t.nextToken(), 1.0);
        }
      }

      iter.close();
    } catch (IOException e) {
      throw new IllegalStateException("Unable to read file", e);
    }
    DoubleMatrix ret = new DoubleMatrix(1, currVocab.size());

    for (int i = 0; i < currVocab.size(); i++) {
      if (docWords.getCount(currVocab.get(i).toString()) > 0) {
        ret.put(i, wordScores.getCount(currVocab.get(i).toString()));
      }
    }

    return ret;
  }
 /**
  * Returns the word contained at the given index or null
  *
  * @param index the index of the word to get
  * @return the word at the given index
  */
 @Override
 public synchronized String wordAtIndex(int index) {
   return (String) wordIndex.get(index);
 }
 /**
  * Returns the word contained at the given index or null
  *
  * @param index the index of the word to get
  * @return the word at the given index
  */
 @Override
 public String wordAtIndex(int index) {
   return (String) wordIndex.get(index);
 }