void addTokensToFreqMap(String document) { for (String s : new HashSet<String>(extractTokens(document))) { if (s.length() > 1) { docFreqMap.update(s); } } ++docCount; }
void addToTfIdf(String document) { CountingMap termFreq = new CountingMap(); for (String s : extractTokens(document)) { if (s.length() > 1) { termFreq.update(s); } } for (Map.Entry<String, Integer> entry : termFreq.entrySet()) { String token = entry.getKey(); if (termFreq.containsKey(token)) { int df = docFreqMap.get(token); if (df < minimalDocFrequency) { continue; } double f = (double) docCount / docFreqMap.get(token); double tfIdf = (double) termFreq.get(token) * Math.log(f); bondedSet.add(new Token(token, tfIdf)); } } }