/**
   * Compares two sets of counts to see which items are interestingly over-represented in the first
   * set.
   *
   * @param a The first counts.
   * @param b The reference counts.
   * @param maxReturn The maximum number of items to return. Use maxReturn >= a.elementSet.size() to
   *     return all scores above the threshold.
   * @param threshold The minimum score for items to be returned. Use 0 to return all items more
   *     common in a than b. Use -Double.MAX_VALUE (not Double.MIN_VALUE !) to not use a threshold.
   * @return A list of scored items with their scores.
   */
  public static <T> List<ScoredItem<T>> compareFrequencies(
      Multiset<T> a, Multiset<T> b, int maxReturn, double threshold) {
    int totalA = a.size();
    int totalB = b.size();

    Ordering<ScoredItem<T>> byScoreAscending =
        new Ordering<ScoredItem<T>>() {
          @Override
          public int compare(ScoredItem<T> tScoredItem, ScoredItem<T> tScoredItem1) {
            return Double.compare(tScoredItem.score, tScoredItem1.score);
          }
        };
    Queue<ScoredItem<T>> best = new PriorityQueue<ScoredItem<T>>(maxReturn + 1, byScoreAscending);

    for (T t : a.elementSet()) {
      compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t);
    }

    // if threshold >= 0 we only iterate through a because anything not there can't be as or more
    // common than in b.
    if (threshold < 0) {
      for (T t : b.elementSet()) {
        // only items missing from a need be scored
        if (a.count(t) == 0) {
          compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t);
        }
      }
    }

    List<ScoredItem<T>> r = new ArrayList<ScoredItem<T>>(best);
    Collections.sort(r, byScoreAscending.reverse());
    return r;
  }
  public static void main(String[] args) {
    // Parse text to separate words
    String INPUT_TEXT = "Hello World! Hello All! Hi World!";
    // Create Multiset
    Multiset<String> multiset = LinkedHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));

    // Print count words
    System.out.println(
        multiset); // print [Hello x 2, World! x 2, All!, Hi]- in predictable iteration order
    // Print all unique words
    System.out.println(
        multiset.elementSet()); // print [Hello, World!, All!, Hi] - in predictable iteration order

    // Print count occurrences of words
    System.out.println("Hello = " + multiset.count("Hello")); // print 2
    System.out.println("World = " + multiset.count("World!")); // print 2
    System.out.println("All = " + multiset.count("All!")); // print 1
    System.out.println("Hi = " + multiset.count("Hi")); // print 1
    System.out.println("Empty = " + multiset.count("Empty")); // print 0

    // Print count all words
    System.out.println(multiset.size()); // print 6

    // Print count unique words
    System.out.println(multiset.elementSet().size()); // print 4
  }
 /**
  * @param scores
  * @param restrictionSet
  * @return intersection of set (Multiset<Integer>) and restrictionSet (if restrictionSet non-null
  *     & non-empty); otherwise return set
  */
 public static Multiset<Integer> intersect(
     final Multiset<Integer> scores, final Set<Integer> restrictionSet) {
   if (restrictionSet != null && !restrictionSet.isEmpty()) {
     int prevSize = scores.size();
     Multiset<Integer> intersection = HashMultiset.create(scores);
     intersection.retainAll(restrictionSet);
     log.debug(
         prevSize != 0
             ? ("Size saving by retainAll = "
                     + (((prevSize - intersection.size()) * 100) / prevSize))
                 + "%"
             : "");
     return intersection;
   }
   return scores;
 }
Example #4
0
  public static void main(String args[]) {

    // create a multiset collection
    Multiset<String> multiset = HashMultiset.create();

    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("d");
    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("b");
    multiset.add("b");
    multiset.add("b");

    // print the occurrence of an element
    System.out.println("Occurrence of 'b' : " + multiset.count("b"));

    // print the total size of the multiset
    System.out.println("Total Size : " + multiset.size());

    // get the distinct elements of the multiset as set
    Set<String> set = multiset.elementSet();

    // display the elements of the set
    System.out.println("Set [");

    for (String s : set) {
      System.out.println(s);
    }

    System.out.println("]");

    // display all the elements of the multiset using iterator
    Iterator<String> iterator = multiset.iterator();
    System.out.println("MultiSet [");

    while (iterator.hasNext()) {
      System.out.println(iterator.next());
    }

    System.out.println("]");

    // display the distinct elements of the multiset with their occurrence count
    System.out.println("MultiSet [");

    for (Multiset.Entry<String> entry : multiset.entrySet()) {
      System.out.println("Element: " + entry.getElement() + ", Occurrence(s): " + entry.getCount());
    }
    System.out.println("]");

    // remove extra occurrences
    multiset.remove("b", 2);

    // print the occurrence of an element
    System.out.println("Occurence of 'b' : " + multiset.count("b"));
  }
Example #5
0
 private static Variable computeFrame(Multiset<Variable> collectionVariables) {
   return collectionVariables.size() == 1 ? collectionVariables.iterator().next() : null;
 }