@Override public Option<MapStatus> stop(boolean success) { try { // Update task metrics from accumulators (null in UnsafeShuffleWriterSuite) Map<String, Accumulator<Object>> internalAccumulators = taskContext.internalMetricsToAccumulators(); if (internalAccumulators != null) { internalAccumulators .apply(InternalAccumulator.PEAK_EXECUTION_MEMORY()) .add(getPeakMemoryUsedBytes()); } if (stopping) { return Option.apply(null); } else { stopping = true; if (success) { if (mapStatus == null) { throw new IllegalStateException("Cannot call stop(true) without having called write()"); } return Option.apply(mapStatus); } else { // The map task failed, so delete our output data. shuffleBlockResolver.removeDataByMap(shuffleId, mapId); return Option.apply(null); } } } finally { if (sorter != null) { // If sorter is non-null, then this implies that we called stop() in response to an error, // so we need to clean up memory and spill files created by the sorter sorter.cleanupResources(); } } }
/** Sort and spill the current records in response to memory pressure. */ @VisibleForTesting public void spill() throws IOException { logger.info( "Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spillWriters.size(), spillWriters.size() > 1 ? " times" : " time"); final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter( blockManager, fileBufferSizeBytes, writeMetrics, sorter.numRecords()); spillWriters.add(spillWriter); final UnsafeSorterIterator sortedRecords = sorter.getSortedIterator(); while (sortedRecords.hasNext()) { sortedRecords.loadNext(); final Object baseObject = sortedRecords.getBaseObject(); final long baseOffset = sortedRecords.getBaseOffset(); final int recordLength = sortedRecords.getRecordLength(); spillWriter.write(baseObject, baseOffset, recordLength, sortedRecords.getKeyPrefix()); } spillWriter.close(); final long sorterMemoryUsage = sorter.getMemoryUsage(); sorter = null; shuffleMemoryManager.release(sorterMemoryUsage); final long spillSize = freeMemory(); taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); initializeForWriting(); }
public UnsafeShuffleWriter( BlockManager blockManager, IndexShuffleBlockResolver shuffleBlockResolver, TaskMemoryManager memoryManager, ShuffleMemoryManager shuffleMemoryManager, UnsafeShuffleHandle<K, V> handle, int mapId, TaskContext taskContext, SparkConf sparkConf) throws IOException { final int numPartitions = handle.dependency().partitioner().numPartitions(); if (numPartitions > UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS()) { throw new IllegalArgumentException( "UnsafeShuffleWriter can only be used for shuffles with at most " + UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS() + " reduce partitions"); } this.blockManager = blockManager; this.shuffleBlockResolver = shuffleBlockResolver; this.memoryManager = memoryManager; this.shuffleMemoryManager = shuffleMemoryManager; this.mapId = mapId; final ShuffleDependency<K, V, V> dep = handle.dependency(); this.shuffleId = dep.shuffleId(); this.serializer = Serializer.getSerializer(dep.serializer()).newInstance(); this.partitioner = dep.partitioner(); this.writeMetrics = new ShuffleWriteMetrics(); taskContext.taskMetrics().shuffleWriteMetrics_$eq(Option.apply(writeMetrics)); this.taskContext = taskContext; this.sparkConf = sparkConf; this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true); open(); }
@Override public void configureOutput(JavaDStream<AggregateResult> aggregatedOutputs) throws IOException { aggregatedOutputs.foreachRDD( (aggregateResultRDD, time) -> { aggregateResultRDD.foreachPartition( aggregateResultIterator -> writeToPubsubWithRetry( time, TaskContext.getPartitionId(), aggregateResultIterator, 3)); return null; }); }