@Test public void groupBy() { FastList<Integer> iterable = FastList.newWithNValues( 10000000, new Function0<Integer>() { private int current; public Integer value() { if (this.current < 4) { return Integer.valueOf(this.current++); } this.current = 0; return Integer.valueOf(4); } }); iterable.shuffleThis(); Multimap<String, Integer> expected = iterable.toBag().groupBy(String::valueOf); Multimap<String, Integer> expectedAsSet = iterable.toSet().groupBy(String::valueOf); Multimap<String, Integer> result1 = ParallelIterate.groupBy(iterable.toList(), String::valueOf, 100); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result1)); Multimap<String, Integer> result2 = ParallelIterate.groupBy(iterable.toList(), String::valueOf); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result2)); Multimap<String, Integer> result3 = ParallelIterate.groupBy( iterable.toSet(), String::valueOf, SynchronizedPutUnifiedSetMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expectedAsSet, result3); Multimap<String, Integer> result4 = ParallelIterate.groupBy( iterable.toSet(), String::valueOf, SynchronizedPutUnifiedSetMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expectedAsSet, result4); Multimap<String, Integer> result5 = ParallelIterate.groupBy( iterable.toSortedSet(), String::valueOf, SynchronizedPutUnifiedSetMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expectedAsSet, result5); Multimap<String, Integer> result6 = ParallelIterate.groupBy( iterable.toSortedSet(), String::valueOf, SynchronizedPutUnifiedSetMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expectedAsSet, result6); Multimap<String, Integer> result7 = ParallelIterate.groupBy( iterable.toBag(), String::valueOf, SynchronizedPutHashBagMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expected, result7); Multimap<String, Integer> result8 = ParallelIterate.groupBy( iterable.toBag(), String::valueOf, SynchronizedPutHashBagMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expected, result8); Multimap<String, Integer> result9 = ParallelIterate.groupBy(iterable.toList().toImmutable(), String::valueOf); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result9)); Multimap<String, Integer> result10 = ParallelIterate.groupBy(iterable.toSortedList(), String::valueOf, 100); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result10)); Multimap<String, Integer> result11 = ParallelIterate.groupBy(iterable.toSortedList(), String::valueOf); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result11)); Multimap<String, Integer> result12 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderFastListMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result12)); Multimap<String, Integer> result13 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderFastListMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expected, HashBagMultimap.newMultimap(result13)); Multimap<String, Integer> result14 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderHashBagMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expected, result14); Multimap<String, Integer> result15 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderHashBagMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expected, result15); Multimap<String, Integer> result16 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderUnifiedSetMultimap.<String, Integer>newMultimap(), 100); Assert.assertEquals(expectedAsSet, result16); Multimap<String, Integer> result17 = ParallelIterate.groupBy( iterable, String::valueOf, MultiReaderUnifiedSetMultimap.<String, Integer>newMultimap()); Assert.assertEquals(expectedAsSet, result17); }
@State(Scope.Thread) @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public class AnagramBagTest { private static final int SIZE = 1_000_000; private static final int BATCH_SIZE = 10_000; private static final int SIZE_THRESHOLD = 10; private final HashBag<String> gscWords = HashBag.newBag( FastList.newWithNValues(SIZE, () -> RandomStringUtils.randomAlphabetic(5).toUpperCase())); private final Multiset<String> guavaWords = HashMultiset.create(this.gscWords); private ExecutorService executorService; @Setup public void setUp() { this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); } @TearDown public void tearDown() throws InterruptedException { this.executorService.shutdownNow(); this.executorService.awaitTermination(1L, TimeUnit.SECONDS); } @Warmup(iterations = 20) @Measurement(iterations = 10) @Benchmark public void serial_eager_gsc() { MutableListMultimap<Alphagram, String> groupBy = this.gscWords.groupBy(Alphagram::new, FastListMultimap.newMultimap()); groupBy .multiValuesView() .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed() .collect(iterable -> iterable.size() + ": " + iterable) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_eager_gsc() { MutableMultimap<Alphagram, String> groupBy = ParallelIterate.groupBy(this.gscWords, Alphagram::new); groupBy .multiValuesView() .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed() .collect(iterable -> iterable.size() + ": " + iterable) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_lazy_gsc() { ParallelUnsortedBag<String> parallelUnsortedBag = this.gscWords.asParallel(this.executorService, BATCH_SIZE); UnsortedBagMultimap<Alphagram, String> groupBy = parallelUnsortedBag.groupBy(Alphagram::new); groupBy .multiValuesView() .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed() .collect(iterable -> iterable.size() + ": " + iterable) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_eager_forkjoin_gsc() { MutableMultimap<Alphagram, String> groupBy = FJIterate.groupBy(this.gscWords, Alphagram::new); groupBy .multiValuesView() .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed() .collect(iterable -> iterable.size() + ": " + iterable) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void serial_lazy_jdk() { Map<Alphagram, List<String>> groupBy = this.guavaWords.stream().collect(Collectors.groupingBy(Alphagram::new)); groupBy .entrySet() .stream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<List<String>>comparingInt(List::size).reversed()) .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } @Benchmark public void parallel_lazy_jdk() { Map<Alphagram, List<String>> groupBy = this.guavaWords.parallelStream().collect(Collectors.groupingBy(Alphagram::new)); groupBy .entrySet() .parallelStream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<List<String>>comparingInt(List::size).reversed()) .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } private static final class Alphagram { private final char[] key; private Alphagram(String string) { this.key = string.toCharArray(); Arrays.sort(this.key); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || this.getClass() != o.getClass()) { return false; } Alphagram alphagram = (Alphagram) o; return Arrays.equals(this.key, alphagram.key); } @Override public int hashCode() { return Arrays.hashCode(this.key); } @Override public String toString() { return new String(this.key); } } }