コード例 #1
0
 void addTokensToFreqMap(String document) {
   for (String s : new HashSet<String>(extractTokens(document))) {
     if (s.length() > 1) {
       docFreqMap.update(s);
     }
   }
   ++docCount;
 }
コード例 #2
0
  void addToTfIdf(String document) {
    CountingMap termFreq = new CountingMap();

    for (String s : extractTokens(document)) {
      if (s.length() > 1) {
        termFreq.update(s);
      }
    }

    for (Map.Entry<String, Integer> entry : termFreq.entrySet()) {
      String token = entry.getKey();
      if (termFreq.containsKey(token)) {
        int df = docFreqMap.get(token);
        if (df < minimalDocFrequency) {
          continue;
        }
        double f = (double) docCount / docFreqMap.get(token);
        double tfIdf = (double) termFreq.get(token) * Math.log(f);
        bondedSet.add(new Token(token, tfIdf));
      }
    }
  }