public List<Entry<String, Double>> getClassificationProbabilities(BagOfWords queryBag) { checkFinishedTraining(); if (bagNotAlreadyAnalyzed(queryBag)) { List<Entry<String, Double>> classifications = new ArrayList<>(docClasses.size()); // Test only if the distances differ from the cosine similarity?! No they don't! List<Entry<String, Double>> distances = new ArrayList<>(docClasses.size()); Map<String, Double> preparedQueryBag = prepareQueryBagOfWords(queryBag); for (DocumentClass docClass : docClasses.values()) { Double cosinSimilarity = calculateCosinSimilairty(preparedQueryBag, docClass); classifications.add(new SimpleEntry<>(docClass.getName(), cosinSimilarity)); Double distance = calculateDistances(preparedQueryBag, docClass); distances.add(new SimpleEntry<>(docClass.getName(), distance)); } lastClassifiedBag = queryBag.hashCode(); lastClassificationResult = new ArrayList<>(classifications); // Order results. Not in own method while not clear if mor than one analyze method will be // used (ie. KNN plus cosineSimilarity) Collections.sort( lastClassificationResult, new Comparator<Entry<String, Double>>() { @Override public int compare(Entry<String, Double> o1, Entry<String, Double> o2) { return o2.getValue().compareTo(o1.getValue()); } }); // Ascending because the smallest is the most important one! Collections.sort( distances, new Comparator<Entry<String, Double>>() { @Override public int compare(Entry<String, Double> o1, Entry<String, Double> o2) { return o1.getValue().compareTo(o2.getValue()); } }); // System.out.println(lastClassificationResult); // System.out.println(distances); } return Collections.unmodifiableList(lastClassificationResult); }
private boolean bagNotAlreadyAnalyzed(BagOfWords bag) { return lastClassifiedBag != bag.hashCode(); }