/** * Compares two sets of counts to see which items are interestingly over-represented in the first * set. * * @param a The first counts. * @param b The reference counts. * @param maxReturn The maximum number of items to return. Use maxReturn >= a.elementSet.size() to * return all scores above the threshold. * @param threshold The minimum score for items to be returned. Use 0 to return all items more * common in a than b. Use -Double.MAX_VALUE (not Double.MIN_VALUE !) to not use a threshold. * @return A list of scored items with their scores. */ public static <T> List<ScoredItem<T>> compareFrequencies( Multiset<T> a, Multiset<T> b, int maxReturn, double threshold) { int totalA = a.size(); int totalB = b.size(); Ordering<ScoredItem<T>> byScoreAscending = new Ordering<ScoredItem<T>>() { @Override public int compare(ScoredItem<T> tScoredItem, ScoredItem<T> tScoredItem1) { return Double.compare(tScoredItem.score, tScoredItem1.score); } }; Queue<ScoredItem<T>> best = new PriorityQueue<ScoredItem<T>>(maxReturn + 1, byScoreAscending); for (T t : a.elementSet()) { compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t); } // if threshold >= 0 we only iterate through a because anything not there can't be as or more // common than in b. if (threshold < 0) { for (T t : b.elementSet()) { // only items missing from a need be scored if (a.count(t) == 0) { compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t); } } } List<ScoredItem<T>> r = new ArrayList<ScoredItem<T>>(best); Collections.sort(r, byScoreAscending.reverse()); return r; }
public static void main(String[] args) { // Parse text to separate words String INPUT_TEXT = "Hello World! Hello All! Hi World!"; // Create Multiset Multiset<String> multiset = LinkedHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" "))); // Print count words System.out.println( multiset); // print [Hello x 2, World! x 2, All!, Hi]- in predictable iteration order // Print all unique words System.out.println( multiset.elementSet()); // print [Hello, World!, All!, Hi] - in predictable iteration order // Print count occurrences of words System.out.println("Hello = " + multiset.count("Hello")); // print 2 System.out.println("World = " + multiset.count("World!")); // print 2 System.out.println("All = " + multiset.count("All!")); // print 1 System.out.println("Hi = " + multiset.count("Hi")); // print 1 System.out.println("Empty = " + multiset.count("Empty")); // print 0 // Print count all words System.out.println(multiset.size()); // print 6 // Print count unique words System.out.println(multiset.elementSet().size()); // print 4 }
/** * @param scores * @param restrictionSet * @return intersection of set (Multiset<Integer>) and restrictionSet (if restrictionSet non-null * & non-empty); otherwise return set */ public static Multiset<Integer> intersect( final Multiset<Integer> scores, final Set<Integer> restrictionSet) { if (restrictionSet != null && !restrictionSet.isEmpty()) { int prevSize = scores.size(); Multiset<Integer> intersection = HashMultiset.create(scores); intersection.retainAll(restrictionSet); log.debug( prevSize != 0 ? ("Size saving by retainAll = " + (((prevSize - intersection.size()) * 100) / prevSize)) + "%" : ""); return intersection; } return scores; }
public static void main(String args[]) { // create a multiset collection Multiset<String> multiset = HashMultiset.create(); multiset.add("a"); multiset.add("b"); multiset.add("c"); multiset.add("d"); multiset.add("a"); multiset.add("b"); multiset.add("c"); multiset.add("b"); multiset.add("b"); multiset.add("b"); // print the occurrence of an element System.out.println("Occurrence of 'b' : " + multiset.count("b")); // print the total size of the multiset System.out.println("Total Size : " + multiset.size()); // get the distinct elements of the multiset as set Set<String> set = multiset.elementSet(); // display the elements of the set System.out.println("Set ["); for (String s : set) { System.out.println(s); } System.out.println("]"); // display all the elements of the multiset using iterator Iterator<String> iterator = multiset.iterator(); System.out.println("MultiSet ["); while (iterator.hasNext()) { System.out.println(iterator.next()); } System.out.println("]"); // display the distinct elements of the multiset with their occurrence count System.out.println("MultiSet ["); for (Multiset.Entry<String> entry : multiset.entrySet()) { System.out.println("Element: " + entry.getElement() + ", Occurrence(s): " + entry.getCount()); } System.out.println("]"); // remove extra occurrences multiset.remove("b", 2); // print the occurrence of an element System.out.println("Occurence of 'b' : " + multiset.count("b")); }
private static Variable computeFrame(Multiset<Variable> collectionVariables) { return collectionVariables.size() == 1 ? collectionVariables.iterator().next() : null; }