/** * Handles the degenerate case where serialization fails to fit in the in-memory buffer, so we * must spill the record from collect directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); // we don't run the combiner for a single record for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! writer = new IFile.Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null); if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final // merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } ++numSpills; } finally { if (out != null) out.close(); } }
private void mergeParts() throws IOException { // get the approximate size of the final output/index files long finalOutFileSize = 0; long finalIndexFileSize = 0; final Path[] filename = new Path[numSpills]; final String taskIdentifier = outputContext.getUniqueIdentifier(); for (int i = 0; i < numSpills; i++) { filename[i] = mapOutputFile.getSpillFile(i); finalOutFileSize += rfs.getFileStatus(filename[i]).getLen(); } if (numSpills == 1) { // the spill is the final output sameVolRename(filename[0], mapOutputFile.getOutputFileForWriteInVolume(filename[0])); if (indexCacheList.size() == 0) { sameVolRename( mapOutputFile.getSpillIndexFile(0), mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0])); } else { indexCacheList .get(0) .writeToFile(mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]), conf); } return; } // read in paged indices for (int i = indexCacheList.size(); i < numSpills; ++i) { Path indexFileName = mapOutputFile.getSpillIndexFile(i); indexCacheList.add(new TezSpillRecord(indexFileName, conf)); } // make correction in the length to include the sequence file header // lengths for each partition finalOutFileSize += partitions * APPROX_HEADER_LENGTH; finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH; Path finalOutputFile = mapOutputFile.getOutputFileForWrite(finalOutFileSize); Path finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize); // The output stream for the final single output file FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); if (numSpills == 0) { // TODO Change event generation to say there is no data rather than generating a dummy file // create dummy files TezSpillRecord sr = new TezSpillRecord(partitions); try { for (int i = 0; i < partitions; i++) { long segmentStart = finalOut.getPos(); Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, null, null); writer.close(); TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); // Covers the case of multiple spills. outputBytesWithOverheadCounter.increment(writer.getRawLength()); sr.putIndex(rec, i); } sr.writeToFile(finalIndexFile, conf); } finally { finalOut.close(); } return; } else { final TezSpillRecord spillRec = new TezSpillRecord(partitions); for (int parts = 0; parts < partitions; parts++) { // create the segments to be merged List<Segment> segmentList = new ArrayList<Segment>(numSpills); for (int i = 0; i < numSpills; i++) { TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts); Segment s = new Segment( conf, rfs, filename[i], indexRecord.getStartOffset(), indexRecord.getPartLength(), codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, true); segmentList.add(i, s); if (LOG.isDebugEnabled()) { LOG.debug( "TaskIdentifier=" + taskIdentifier + " Partition=" + parts + "Spill =" + i + "(" + indexRecord.getStartOffset() + "," + indexRecord.getRawLength() + ", " + indexRecord.getPartLength() + ")"); } } int mergeFactor = this.conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT); // sort the segments only if there are intermediate merges boolean sortSegments = segmentList.size() > mergeFactor; // merge TezRawKeyValueIterator kvIter = TezMerger.merge( conf, rfs, keyClass, valClass, codec, segmentList, mergeFactor, new Path(taskIdentifier), (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf), nullProgressable, sortSegments, true, null, spilledRecordsCounter, additionalSpillBytesRead, null); // Not using any Progress in TezMerger. Should just work. // write merged output to disk long segmentStart = finalOut.getPos(); Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, spilledRecordsCounter, null); if (combiner == null || numSpills < minSpillsForCombine) { TezMerger.writeFile( kvIter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); } else { runCombineProcessor(kvIter, writer); } writer.close(); // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, parts); } spillRec.writeToFile(finalIndexFile, conf); finalOut.close(); for (int i = 0; i < numSpills; i++) { rfs.delete(filename[i], true); } } }
protected void spill(int mstart, int mend) throws IOException, InterruptedException { // approximate the length of the output file to be the length of the // buffer + header lengths for the partitions final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null); if (combiner == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { final int kvoff = offsetFor(spindex); int keystart = kvmeta.get(kvoff + KEYSTART); int valstart = kvmeta.get(kvoff + VALSTART); key.reset(kvbuffer, keystart, valstart - keystart); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { LOG.debug("Running combine processor"); } runCombineProcessor(kvIter, writer); } } // close the writer writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); // Reset the value will be set during the final merge. outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final // merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }