private <KIn, VIn, KOut, VOut> Map<KOut, List<VOut>> combineForLocalReduction( MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector) { String taskId = mcc.getTaskId(); Reducer<KOut, VOut> combiner = mcc.getCombiner(); Map<KOut, List<VOut>> result = null; if (combiner != null) { result = new HashMap<KOut, List<VOut>>(); log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress()); MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance(); long start = log.isTraceEnabled() ? timeService.time() : 0; try { Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName()); taskLifecycleService.onPreExecute(combiner, cache); Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) { VOut combined; List<VOut> list = e.getValue(); List<VOut> l = new LinkedList<VOut>(); if (list.size() > 1) { combined = combiner.reduce(e.getKey(), list.iterator()); } else { combined = list.get(0); } l.add(combined); result.put(e.getKey(), l); log.tracef( "For m/r task %s combined %s to %s at %s", taskId, e.getKey(), combined, cdl.getAddress()); } } finally { if (log.isTraceEnabled()) { log.tracef( "Combine for task %s took %s milliseconds", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS)); } taskLifecycleService.onPostExecute(combiner); } } else { // Combiner not specified result = collector.collectedValues(); } return result; }
protected <KIn, VIn, KOut, VOut> Set<KOut> combine( MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector) throws Exception { String taskId = mcc.getTaskId(); boolean emitCompositeIntermediateKeys = mcc.isEmitCompositeIntermediateKeys(); Reducer<KOut, VOut> combiner = mcc.getCombiner(); Set<KOut> mapPhaseKeys = new HashSet<KOut>(); Cache<Object, DeltaAwareList<VOut>> tmpCache = null; if (emitCompositeIntermediateKeys) { tmpCache = cacheManager.getCache(DEFAULT_TMP_CACHE_CONFIGURATION_NAME); } else { tmpCache = cacheManager.getCache(taskId); } if (tmpCache == null) { throw new IllegalStateException( "Temporary cache for MapReduceTask " + taskId + " not found on " + cdl.getAddress()); } DistributionManager dm = tmpCache.getAdvancedCache().getDistributionManager(); if (combiner != null) { Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName()); log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress()); MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance(); Map<KOut, VOut> combinedMap = new ConcurrentHashMap<KOut, VOut>(); long start = log.isTraceEnabled() ? timeService.time() : 0; try { taskLifecycleService.onPreExecute(combiner, cache); Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) { List<VOut> list = e.getValue(); VOut combined; if (list.size() > 1) { combined = combiner.reduce(e.getKey(), list.iterator()); combinedMap.put(e.getKey(), combined); } else { combined = list.get(0); combinedMap.put(e.getKey(), combined); } log.tracef( "For m/r task %s combined %s to %s at %s", taskId, e.getKey(), combined, cdl.getAddress()); } } finally { if (log.isTraceEnabled()) { log.tracef( "Combine for task %s took %s milliseconds", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS)); } taskLifecycleService.onPostExecute(combiner); } Map<Address, List<KOut>> keysToNodes = mapKeysToNodes(dm, taskId, combinedMap.keySet(), emitCompositeIntermediateKeys); start = log.isTraceEnabled() ? timeService.time() : 0; try { for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) { List<KOut> keysHashedToAddress = entry.getValue(); try { log.tracef( "For m/r task %s migrating intermediate keys %s to %s", taskId, keysHashedToAddress, entry.getKey()); for (KOut key : keysHashedToAddress) { VOut value = combinedMap.get(key); DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value); if (emitCompositeIntermediateKeys) { tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta); } else { tmpCache.put(key, delta); } mapPhaseKeys.add(key); } } catch (Exception e) { throw new CacheException( "Could not move intermediate keys/values for M/R task " + taskId, e); } } } finally { if (log.isTraceEnabled()) { log.tracef( "Migrating keys for task %s took %s milliseconds (Migrated %s keys)", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS), mapPhaseKeys.size()); } } } else { // Combiner not specified so lets insert each key/uncombined-List pair into tmp cache Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); Map<Address, List<KOut>> keysToNodes = mapKeysToNodes(dm, taskId, collectedValues.keySet(), emitCompositeIntermediateKeys); long start = log.isTraceEnabled() ? timeService.time() : 0; try { for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) { List<KOut> keysHashedToAddress = entry.getValue(); try { log.tracef( "For m/r task %s migrating intermediate keys %s to %s", taskId, keysHashedToAddress, entry.getKey()); for (KOut key : keysHashedToAddress) { List<VOut> value = collectedValues.get(key); DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value); if (emitCompositeIntermediateKeys) { tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta); } else { tmpCache.put(key, delta); } mapPhaseKeys.add(key); } } catch (Exception e) { throw new CacheException( "Could not move intermediate keys/values for M/R task " + taskId, e); } } } finally { if (log.isTraceEnabled()) { log.tracef( "Migrating keys for task %s took %s milliseconds (Migrated %s keys)", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS), mapPhaseKeys.size()); } } } return mapPhaseKeys; }