@Override
  public Option<MapStatus> stop(boolean success) {
    try {
      // Update task metrics from accumulators (null in UnsafeShuffleWriterSuite)
      Map<String, Accumulator<Object>> internalAccumulators =
          taskContext.internalMetricsToAccumulators();
      if (internalAccumulators != null) {
        internalAccumulators
            .apply(InternalAccumulator.PEAK_EXECUTION_MEMORY())
            .add(getPeakMemoryUsedBytes());
      }

      if (stopping) {
        return Option.apply(null);
      } else {
        stopping = true;
        if (success) {
          if (mapStatus == null) {
            throw new IllegalStateException("Cannot call stop(true) without having called write()");
          }
          return Option.apply(mapStatus);
        } else {
          // The map task failed, so delete our output data.
          shuffleBlockResolver.removeDataByMap(shuffleId, mapId);
          return Option.apply(null);
        }
      }
    } finally {
      if (sorter != null) {
        // If sorter is non-null, then this implies that we called stop() in response to an error,
        // so we need to clean up memory and spill files created by the sorter
        sorter.cleanupResources();
      }
    }
  }
Пример #2
0
  /** Sort and spill the current records in response to memory pressure. */
  @VisibleForTesting
  public void spill() throws IOException {
    logger.info(
        "Thread {} spilling sort data of {} to disk ({} {} so far)",
        Thread.currentThread().getId(),
        Utils.bytesToString(getMemoryUsage()),
        spillWriters.size(),
        spillWriters.size() > 1 ? " times" : " time");

    final UnsafeSorterSpillWriter spillWriter =
        new UnsafeSorterSpillWriter(
            blockManager, fileBufferSizeBytes, writeMetrics, sorter.numRecords());
    spillWriters.add(spillWriter);
    final UnsafeSorterIterator sortedRecords = sorter.getSortedIterator();
    while (sortedRecords.hasNext()) {
      sortedRecords.loadNext();
      final Object baseObject = sortedRecords.getBaseObject();
      final long baseOffset = sortedRecords.getBaseOffset();
      final int recordLength = sortedRecords.getRecordLength();
      spillWriter.write(baseObject, baseOffset, recordLength, sortedRecords.getKeyPrefix());
    }
    spillWriter.close();
    final long sorterMemoryUsage = sorter.getMemoryUsage();
    sorter = null;
    shuffleMemoryManager.release(sorterMemoryUsage);
    final long spillSize = freeMemory();
    taskContext.taskMetrics().incMemoryBytesSpilled(spillSize);
    initializeForWriting();
  }
 public UnsafeShuffleWriter(
     BlockManager blockManager,
     IndexShuffleBlockResolver shuffleBlockResolver,
     TaskMemoryManager memoryManager,
     ShuffleMemoryManager shuffleMemoryManager,
     UnsafeShuffleHandle<K, V> handle,
     int mapId,
     TaskContext taskContext,
     SparkConf sparkConf)
     throws IOException {
   final int numPartitions = handle.dependency().partitioner().numPartitions();
   if (numPartitions > UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS()) {
     throw new IllegalArgumentException(
         "UnsafeShuffleWriter can only be used for shuffles with at most "
             + UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS()
             + " reduce partitions");
   }
   this.blockManager = blockManager;
   this.shuffleBlockResolver = shuffleBlockResolver;
   this.memoryManager = memoryManager;
   this.shuffleMemoryManager = shuffleMemoryManager;
   this.mapId = mapId;
   final ShuffleDependency<K, V, V> dep = handle.dependency();
   this.shuffleId = dep.shuffleId();
   this.serializer = Serializer.getSerializer(dep.serializer()).newInstance();
   this.partitioner = dep.partitioner();
   this.writeMetrics = new ShuffleWriteMetrics();
   taskContext.taskMetrics().shuffleWriteMetrics_$eq(Option.apply(writeMetrics));
   this.taskContext = taskContext;
   this.sparkConf = sparkConf;
   this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
   open();
 }
 @Override
 public void configureOutput(JavaDStream<AggregateResult> aggregatedOutputs) throws IOException {
   aggregatedOutputs.foreachRDD(
       (aggregateResultRDD, time) -> {
         aggregateResultRDD.foreachPartition(
             aggregateResultIterator ->
                 writeToPubsubWithRetry(
                     time, TaskContext.getPartitionId(), aggregateResultIterator, 3));
         return null;
       });
 }