コード例 #1
0
  public List<Entry<String, Double>> getClassificationProbabilities(BagOfWords queryBag) {
    checkFinishedTraining();
    if (bagNotAlreadyAnalyzed(queryBag)) {
      List<Entry<String, Double>> classifications = new ArrayList<>(docClasses.size());

      // Test only if the distances differ from the cosine similarity?! No they don't!
      List<Entry<String, Double>> distances = new ArrayList<>(docClasses.size());

      Map<String, Double> preparedQueryBag = prepareQueryBagOfWords(queryBag);

      for (DocumentClass docClass : docClasses.values()) {
        Double cosinSimilarity = calculateCosinSimilairty(preparedQueryBag, docClass);
        classifications.add(new SimpleEntry<>(docClass.getName(), cosinSimilarity));
        Double distance = calculateDistances(preparedQueryBag, docClass);
        distances.add(new SimpleEntry<>(docClass.getName(), distance));
      }

      lastClassifiedBag = queryBag.hashCode();
      lastClassificationResult = new ArrayList<>(classifications);

      // Order results. Not in own method while not clear if mor than one analyze method will be
      // used (ie. KNN plus cosineSimilarity)
      Collections.sort(
          lastClassificationResult,
          new Comparator<Entry<String, Double>>() {
            @Override
            public int compare(Entry<String, Double> o1, Entry<String, Double> o2) {
              return o2.getValue().compareTo(o1.getValue());
            }
          });

      // Ascending because the smallest is the most important one!
      Collections.sort(
          distances,
          new Comparator<Entry<String, Double>>() {
            @Override
            public int compare(Entry<String, Double> o1, Entry<String, Double> o2) {
              return o1.getValue().compareTo(o2.getValue());
            }
          });
      //				System.out.println(lastClassificationResult);
      //				System.out.println(distances);
    }
    return Collections.unmodifiableList(lastClassificationResult);
  }
コード例 #2
0
 private boolean bagNotAlreadyAnalyzed(BagOfWords bag) {
   return lastClassifiedBag != bag.hashCode();
 }