コード例 #1
0
  private <KIn, VIn, KOut, VOut> Map<KOut, List<VOut>> combineForLocalReduction(
      MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector) {

    String taskId = mcc.getTaskId();
    Reducer<KOut, VOut> combiner = mcc.getCombiner();
    Map<KOut, List<VOut>> result = null;

    if (combiner != null) {
      result = new HashMap<KOut, List<VOut>>();
      log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress());
      MapReduceTaskLifecycleService taskLifecycleService =
          MapReduceTaskLifecycleService.getInstance();
      long start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
        Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName());
        taskLifecycleService.onPreExecute(combiner, cache);
        Map<KOut, List<VOut>> collectedValues = collector.collectedValues();
        for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) {
          VOut combined;
          List<VOut> list = e.getValue();
          List<VOut> l = new LinkedList<VOut>();
          if (list.size() > 1) {
            combined = combiner.reduce(e.getKey(), list.iterator());
          } else {
            combined = list.get(0);
          }
          l.add(combined);
          result.put(e.getKey(), l);
          log.tracef(
              "For m/r task %s combined %s to %s at %s",
              taskId, e.getKey(), combined, cdl.getAddress());
        }
      } finally {
        if (log.isTraceEnabled()) {
          log.tracef(
              "Combine for task %s took %s milliseconds",
              mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS));
        }
        taskLifecycleService.onPostExecute(combiner);
      }
    } else {
      // Combiner not specified
      result = collector.collectedValues();
    }
    return result;
  }
コード例 #2
0
  protected <KIn, VIn, KOut, VOut> Set<KOut> combine(
      MapCombineCommand<KIn, VIn, KOut, VOut> mcc, CollectableCollector<KOut, VOut> collector)
      throws Exception {

    String taskId = mcc.getTaskId();
    boolean emitCompositeIntermediateKeys = mcc.isEmitCompositeIntermediateKeys();
    Reducer<KOut, VOut> combiner = mcc.getCombiner();
    Set<KOut> mapPhaseKeys = new HashSet<KOut>();
    Cache<Object, DeltaAwareList<VOut>> tmpCache = null;
    if (emitCompositeIntermediateKeys) {
      tmpCache = cacheManager.getCache(DEFAULT_TMP_CACHE_CONFIGURATION_NAME);
    } else {
      tmpCache = cacheManager.getCache(taskId);
    }
    if (tmpCache == null) {
      throw new IllegalStateException(
          "Temporary cache for MapReduceTask " + taskId + " not found on " + cdl.getAddress());
    }
    DistributionManager dm = tmpCache.getAdvancedCache().getDistributionManager();

    if (combiner != null) {
      Cache<?, ?> cache = cacheManager.getCache(mcc.getCacheName());
      log.tracef("For m/r task %s invoking combiner %s at %s", taskId, mcc, cdl.getAddress());
      MapReduceTaskLifecycleService taskLifecycleService =
          MapReduceTaskLifecycleService.getInstance();
      Map<KOut, VOut> combinedMap = new ConcurrentHashMap<KOut, VOut>();
      long start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
        taskLifecycleService.onPreExecute(combiner, cache);
        Map<KOut, List<VOut>> collectedValues = collector.collectedValues();
        for (Entry<KOut, List<VOut>> e : collectedValues.entrySet()) {
          List<VOut> list = e.getValue();
          VOut combined;
          if (list.size() > 1) {
            combined = combiner.reduce(e.getKey(), list.iterator());
            combinedMap.put(e.getKey(), combined);
          } else {
            combined = list.get(0);
            combinedMap.put(e.getKey(), combined);
          }
          log.tracef(
              "For m/r task %s combined %s to %s at %s",
              taskId, e.getKey(), combined, cdl.getAddress());
        }
      } finally {
        if (log.isTraceEnabled()) {
          log.tracef(
              "Combine for task %s took %s milliseconds",
              mcc.getTaskId(), timeService.timeDuration(start, TimeUnit.MILLISECONDS));
        }
        taskLifecycleService.onPostExecute(combiner);
      }
      Map<Address, List<KOut>> keysToNodes =
          mapKeysToNodes(dm, taskId, combinedMap.keySet(), emitCompositeIntermediateKeys);

      start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
        for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) {
          List<KOut> keysHashedToAddress = entry.getValue();
          try {
            log.tracef(
                "For m/r task %s migrating intermediate keys %s to %s",
                taskId, keysHashedToAddress, entry.getKey());
            for (KOut key : keysHashedToAddress) {
              VOut value = combinedMap.get(key);
              DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value);
              if (emitCompositeIntermediateKeys) {
                tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta);
              } else {
                tmpCache.put(key, delta);
              }
              mapPhaseKeys.add(key);
            }
          } catch (Exception e) {
            throw new CacheException(
                "Could not move intermediate keys/values for M/R task " + taskId, e);
          }
        }
      } finally {
        if (log.isTraceEnabled()) {
          log.tracef(
              "Migrating keys for task %s took %s milliseconds (Migrated %s keys)",
              mcc.getTaskId(),
              timeService.timeDuration(start, TimeUnit.MILLISECONDS),
              mapPhaseKeys.size());
        }
      }
    } else {
      // Combiner not specified so lets insert each key/uncombined-List pair into tmp cache

      Map<KOut, List<VOut>> collectedValues = collector.collectedValues();
      Map<Address, List<KOut>> keysToNodes =
          mapKeysToNodes(dm, taskId, collectedValues.keySet(), emitCompositeIntermediateKeys);

      long start = log.isTraceEnabled() ? timeService.time() : 0;
      try {
        for (Entry<Address, List<KOut>> entry : keysToNodes.entrySet()) {
          List<KOut> keysHashedToAddress = entry.getValue();
          try {
            log.tracef(
                "For m/r task %s migrating intermediate keys %s to %s",
                taskId, keysHashedToAddress, entry.getKey());
            for (KOut key : keysHashedToAddress) {
              List<VOut> value = collectedValues.get(key);
              DeltaAwareList<VOut> delta = new DeltaAwareList<VOut>(value);
              if (emitCompositeIntermediateKeys) {
                tmpCache.put(new IntermediateCompositeKey<KOut>(taskId, key), delta);
              } else {
                tmpCache.put(key, delta);
              }
              mapPhaseKeys.add(key);
            }
          } catch (Exception e) {
            throw new CacheException(
                "Could not move intermediate keys/values for M/R task " + taskId, e);
          }
        }
      } finally {
        if (log.isTraceEnabled()) {
          log.tracef(
              "Migrating keys for task %s took %s milliseconds (Migrated %s keys)",
              mcc.getTaskId(),
              timeService.timeDuration(start, TimeUnit.MILLISECONDS),
              mapPhaseKeys.size());
        }
      }
    }
    return mapPhaseKeys;
  }