private <KIn, VIn, KOut, VOut> Map<KOut, List<VOut>> combineForLocalReduction( MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector) { String taskId = mcc.getTaskId(); Reducer<KOut, VOut> combiner = mcc.getCombiner(); Map<KOut, List<VOut>> result = null; if (combiner != null) { result = new HashMap<KOut, List<VOut>>(); log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress()); MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance(); long start = log.isTraceEnabled() ? timeService.time() : 0; try { Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName()); taskLifecycleService.onPreExecute(combiner, cache); Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) { VOut combined; List<VOut> list = e.getValue(); List<VOut> l = new LinkedList<VOut>(); if (list.size() > 1) { combined = combiner.reduce(e.getKey(), list.iterator()); } else { combined = list.get(0); } l.add(combined); result.put(e.getKey(), l); log.tracef( "For m/r task %s combined %s to %s at %s", taskId, e.getKey(), combined, cdl.getAddress()); } } finally { if (log.isTraceEnabled()) { log.tracef( "Combine for task %s took %s milliseconds", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS)); } taskLifecycleService.onPostExecute(combiner); } } else { // Combiner not specified result = collector.collectedValues(); } return result; }
protected <KIn, VIn, KOut, VOut> Set<KOut> combine( MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector) throws Exception { String taskId = mcc.getTaskId(); boolean emitCompositeIntermediateKeys = mcc.isEmitCompositeIntermediateKeys(); Reducer<KOut, VOut> combiner = mcc.getCombiner(); Set<KOut> mapPhaseKeys = new HashSet<KOut>(); Cache<Object, DeltaAwareList<VOut>> tmpCache = null; if (emitCompositeIntermediateKeys) { tmpCache = cacheManager.getCache(DEFAULT_TMP_CACHE_CONFIGURATION_NAME); } else { tmpCache = cacheManager.getCache(taskId); } if (tmpCache == null) { throw new IllegalStateException( "Temporary cache for MapReduceTask " + taskId + " not found on " + cdl.getAddress()); } DistributionManager dm = tmpCache.getAdvancedCache().getDistributionManager(); if (combiner != null) { Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName()); log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress()); MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance(); Map<KOut, VOut> combinedMap = new ConcurrentHashMap<KOut, VOut>(); long start = log.isTraceEnabled() ? timeService.time() : 0; try { taskLifecycleService.onPreExecute(combiner, cache); Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) { List<VOut> list = e.getValue(); VOut combined; if (list.size() > 1) { combined = combiner.reduce(e.getKey(), list.iterator()); combinedMap.put(e.getKey(), combined); } else { combined = list.get(0); combinedMap.put(e.getKey(), combined); } log.tracef( "For m/r task %s combined %s to %s at %s", taskId, e.getKey(), combined, cdl.getAddress()); } } finally { if (log.isTraceEnabled()) { log.tracef( "Combine for task %s took %s milliseconds", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS)); } taskLifecycleService.onPostExecute(combiner); } Map<Address, List<KOut>> keysToNodes = mapKeysToNodes(dm, taskId, combinedMap.keySet(), emitCompositeIntermediateKeys); start = log.isTraceEnabled() ? timeService.time() : 0; try { for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) { List<KOut> keysHashedToAddress = entry.getValue(); try { log.tracef( "For m/r task %s migrating intermediate keys %s to %s", taskId, keysHashedToAddress, entry.getKey()); for (KOut key : keysHashedToAddress) { VOut value = combinedMap.get(key); DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value); if (emitCompositeIntermediateKeys) { tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta); } else { tmpCache.put(key, delta); } mapPhaseKeys.add(key); } } catch (Exception e) { throw new CacheException( "Could not move intermediate keys/values for M/R task " + taskId, e); } } } finally { if (log.isTraceEnabled()) { log.tracef( "Migrating keys for task %s took %s milliseconds (Migrated %s keys)", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS), mapPhaseKeys.size()); } } } else { // Combiner not specified so lets insert each key/uncombined-List pair into tmp cache Map<KOut, List<VOut>> collectedValues = collector.collectedValues(); Map<Address, List<KOut>> keysToNodes = mapKeysToNodes(dm, taskId, collectedValues.keySet(), emitCompositeIntermediateKeys); long start = log.isTraceEnabled() ? timeService.time() : 0; try { for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) { List<KOut> keysHashedToAddress = entry.getValue(); try { log.tracef( "For m/r task %s migrating intermediate keys %s to %s", taskId, keysHashedToAddress, entry.getKey()); for (KOut key : keysHashedToAddress) { List<VOut> value = collectedValues.get(key); DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value); if (emitCompositeIntermediateKeys) { tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta); } else { tmpCache.put(key, delta); } mapPhaseKeys.add(key); } } catch (Exception e) { throw new CacheException( "Could not move intermediate keys/values for M/R task " + taskId, e); } } } finally { if (log.isTraceEnabled()) { log.tracef( "Migrating keys for task %s took %s milliseconds (Migrated %s keys)", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS), mapPhaseKeys.size()); } } } return mapPhaseKeys; }
protected <KIn, VIn, KOut, VOut> CollectableCollector<KOut, VOut> map( MapCombineCommand<KIn, VIn, KOut, VOut> mcc) throws InterruptedException { Cache<KIn, VIn> cache = cacheManager.getCache(mcc.getCacheName()); Set<KIn> keys = mcc.getKeys(); Set<KIn> inputKeysCopy = null; Mapper<KIn, VIn, KOut, VOut> mapper = mcc.getMapper(); DistributionManager dm = cache.getAdvancedCache().getDistributionManager(); boolean inputKeysSpecified = keys != null && !keys.isEmpty(); Set<KIn> inputKeys = keys; if (!inputKeysSpecified) { inputKeys = filterLocalPrimaryOwner(cache.keySet(), dm); } else { inputKeysCopy = new HashSet<KIn>(keys); } // hook map function into lifecycle and execute it MapReduceTaskLifecycleService taskLifecycleService = MapReduceTaskLifecycleService.getInstance(); DefaultCollector<KOut, VOut> collector = new DefaultCollector<KOut, VOut>(); log.tracef("For m/r task %s invoking %s with input keys %s", mcc.getTaskId(), mcc, inputKeys); int interruptCount = 0; long start = log.isTraceEnabled() ? timeService.time() : 0; try { taskLifecycleService.onPreExecute(mapper, cache); for (KIn key : inputKeys) { if (checkInterrupt(interruptCount++) && Thread.currentThread().isInterrupted()) throw new InterruptedException(); VIn value = cache.get(key); mapper.map(key, value, collector); if (inputKeysSpecified) { inputKeysCopy.remove(key); } } Set<KIn> keysFromCacheLoader = null; if (inputKeysSpecified) { // load only specified remaining input keys - iff in CL and pinned to this primary owner keysFromCacheLoader = filterLocalPrimaryOwner(inputKeysCopy, dm); } else { // load everything from CL pinned to this primary owner keysFromCacheLoader = filterLocalPrimaryOwner(loadAllKeysFromCacheLoaderUsingFilter(inputKeys), dm); } log.tracef( "For m/r task %s cache loader input keys %s", mcc.getTaskId(), keysFromCacheLoader); interruptCount = 0; for (KIn key : keysFromCacheLoader) { if (checkInterrupt(interruptCount++) && Thread.currentThread().isInterrupted()) throw new InterruptedException(); VIn value = loadValueFromCacheLoader(key); if (value != null) { mapper.map(key, value, collector); } } } finally { if (log.isTraceEnabled()) { log.tracef( "Map phase for task %s took %s milliseconds", mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS)); } taskLifecycleService.onPostExecute(mapper); } return collector; }