double product(TokenizedCharSequence thatDoc) { double sum = 0.0; for (String token : mTokenCounter.keySet()) { int count = thatDoc.mTokenCounter.getCount(token); if (count == 0) continue; // tf = sqrt(count); sum += tf1 * tf2 sum += Math.sqrt(count * mTokenCounter.getCount(token)); } return sum; }
static double length(ObjectToCounterMap<String> otc) { double sum = 0.0; for (Counter counter : otc.values()) { double count = counter.doubleValue(); sum += count; // tf =sqrt(count); sum += tf * tf } return Math.sqrt(sum); }
public TokenizedCharSequence(String input) throws IOException { mString = input; mText = input.toCharArray(); Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(mText, 0, mText.length); String token; while ((token = tokenizer.nextToken()) != null) mTokenCounter.increment(token.toLowerCase()); mLength = length(mTokenCounter); }