/** Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method */ @Test public void testParallelRetailVs() throws Exception { PFPGrowth.runPFPGrowth(paramsImpl1); List<Pair<String, TopKStringPatterns>> frequentPatterns1 = PFPGrowth.readFrequentPattern(paramsImpl1); Map<Set<String>, Long> results1 = Maps.newHashMap(); for (Pair<String, TopKStringPatterns> topK : frequentPatterns1) { Iterator<Pair<List<String>, Long>> topKIt = topK.getSecond().iterator(); while (topKIt.hasNext()) { Pair<List<String>, Long> entry = topKIt.next(); results1.put(new HashSet<String>(entry.getFirst()), entry.getSecond()); } } PFPGrowth.runPFPGrowth(paramsImpl2); List<Pair<String, TopKStringPatterns>> frequentPatterns2 = PFPGrowth.readFrequentPattern(paramsImpl2); Map<Set<String>, Long> results2 = Maps.newHashMap(); for (Pair<String, TopKStringPatterns> topK : frequentPatterns2) { Iterator<Pair<List<String>, Long>> topKIt = topK.getSecond().iterator(); while (topKIt.hasNext()) { Pair<List<String>, Long> entry = topKIt.next(); results2.put(new HashSet<String>(entry.getFirst()), entry.getSecond()); } } for (Entry<Set<String>, Long> entry : results1.entrySet()) { Set<String> key = entry.getKey(); if (results2.get(key) == null) { System.out.println("spurious (1): " + key + " with " + entry.getValue()); } else { if (!results2.get(key).equals(results1.get(entry.getKey()))) { System.out.println( "invalid (1): " + key + ", expected: " + results2.get(key) + ", got: " + results1.get(entry.getKey())); } else { System.out.println("matched (1): " + key + ", with: " + results2.get(key)); } } } for (Entry<Set<String>, Long> entry : results2.entrySet()) { Set<String> key = entry.getKey(); if (results1.get(key) == null) { System.out.println("missing (1): " + key + " with " + entry.getValue()); } } assertEquals(results2.size(), results1.size()); }
@Override protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException { TransactionTree cTree = new TransactionTree(); for (TransactionTree tr : values) { for (Pair<IntArrayList, Long> p : tr) { cTree.addPattern(p.getFirst(), p.getSecond()); } } List<Pair<Integer, Long>> localFList = Lists.newArrayList(); for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) { localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong())); } Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>()); if (useFP2) { org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds fpGrowth = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds(); fpGrowth.generateTopKFrequentPatterns( cTree.iterator(), freqList, minSupport, maxHeapSize, PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures), new IntegerStringOutputConverter( new ContextWriteOutputCollector< IntWritable, TransactionTree, Text, TopKStringPatterns>(context), featureReverseMap), new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>( context)); } else { FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>(); fpGrowth.generateTopKFrequentPatterns( new IteratorAdapter(cTree.iterator()), localFList, minSupport, maxHeapSize, new HashSet<Integer>( PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures).toList()), new IntegerStringOutputConverter( new ContextWriteOutputCollector< IntWritable, TransactionTree, Text, TopKStringPatterns>(context), featureReverseMap), new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>( context)); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { featureReverseMap.add(e.getFirst()); freqList.add(e.getSecond()); } maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); numFeatures = featureReverseMap.size(); useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2)); }