public DoubleMatrix getScoreMatrix(File file) { Counter<String> docWords = new Counter<String>(); try { LineIterator iter = FileUtils.lineIterator(file); while (iter.hasNext()) { Tokenizer t = tokenizerFactory.create((new InputHomogenization(iter.nextLine()).transform())); while (t.hasMoreTokens()) { docWords.incrementCount(t.nextToken(), 1.0); } } iter.close(); } catch (IOException e) { throw new IllegalStateException("Unable to read file", e); } DoubleMatrix ret = new DoubleMatrix(1, currVocab.size()); for (int i = 0; i < currVocab.size(); i++) { if (docWords.getCount(currVocab.get(i).toString()) > 0) { ret.put(i, wordScores.getCount(currVocab.get(i).toString())); } } return ret; }
/** * Returns the word contained at the given index or null * * @param index the index of the word to get * @return the word at the given index */ @Override public synchronized String wordAtIndex(int index) { return (String) wordIndex.get(index); }
/** * Returns the word contained at the given index or null * * @param index the index of the word to get * @return the word at the given index */ @Override public String wordAtIndex(int index) { return (String) wordIndex.get(index); }