/** * @param file is the file to be classified * @return returns true if correctly classified otherwise false */ public boolean classify(String file) { String category = ""; double category_prob = 0; // Read document; DocumentI document = new Document(vocab); document.read_file(file, false); // Mapping for document Map<String, Integer> document_dictionary = document.get_words(); // Distinct words in vocabulary Set<String> vocab_distinct_words = vocab.distinct_words(); for (Entry<String, TextI> entry : categories.entrySet()) { double pv = (double) entry.getValue().num_docs() / (double) total_docs; double pwv = 1; double prevpwv = 1; for (String word : document_dictionary.keySet()) { if (vocab_distinct_words.contains(word)) { if ((pwv *= entry.getValue().pwv(word)) == 0) { pwv = prevpwv; break; } prevpwv = pwv; } } if (pwv * pv > category_prob) { category = entry.getKey(); category_prob = pwv * pv; } } String predicted_category = category.replace(this.directory + "\\", ""); String actual_category = document.getCategory(); if (actual_category.contains(predicted_category)) { return true; } else { return false; } }