コード例 #1
0
 /** Accumulate statistics on how often each token value occurs */
 public void train(StringWrapperIterator i) {
   Set seenTokens = new HashSet();
   while (i.hasNext()) {
     BagOfTokens bag = asBagOfTokens(i.nextStringWrapper());
     seenTokens.clear();
     for (Iterator j = bag.tokenIterator(); j.hasNext(); ) {
       totalTokenCount++;
       Token tokj = (Token) j.next();
       if (!seenTokens.contains(tokj)) {
         seenTokens.add(tokj);
         // increment documentFrequency counts
         Integer df = (Integer) documentFrequency.get(tokj);
         if (df == null) documentFrequency.put(tokj, ONE);
         else if (df == ONE) documentFrequency.put(tokj, TWO);
         else if (df == TWO) documentFrequency.put(tokj, THREE);
         else documentFrequency.put(tokj, new Integer(df.intValue() + 1));
       }
     }
     collectionSize++;
   }
 }
コード例 #2
0
 public Iterator tokenIterator() {
   return documentFrequency.keySet().iterator();
 }
コード例 #3
0
 public int getDocumentFrequency(Token tok) {
   Integer freqInteger = (Integer) documentFrequency.get(tok);
   if (freqInteger == null) return 0;
   else return freqInteger.intValue();
 }