@Override public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException { if (inputs == null || inputs.size() == 0) { return; } TaskAttemptID dummyMapId = inputs.get(0).getMapId(); List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0); int noInMemorySegments = inMemorySegments.size(); InMemoryMapOutput<K, V> mergedMapOutputs = unconditionalReserve(dummyMapId, mergeOutputSize, false); Writer<K, V> writer = new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream()); LOG.info( "Initiating Memory-to-Memory merge with " + noInMemorySegments + " segments of total-size: " + mergeOutputSize); RawKeyValueIterator rIter = Merger.merge( jobConf, rfs, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), inMemorySegments, inMemorySegments.size(), new Path(reduceId.toString()), (RawComparator<K>) jobConf.getOutputKeyComparator(), reporter, null, null, null); Merger.writeFile(rIter, writer, reporter, jobConf); writer.close(); LOG.info( reduceId + " Memory-to-Memory merge of the " + noInMemorySegments + " files in-memory complete."); // Note the output of the merge closeInMemoryMergedFile(mergedMapOutputs); }
private void writeIndexRecord( FSDataOutputStream indexOut, FSDataOutputStream out, long start, Writer<K, V> writer) throws IOException { // when we write the offset/decompressed-length/compressed-length to // the final index file, we write longs for both compressed and // decompressed lengths. This helps us to reliably seek directly to // the offset/length for a partition when we start serving the // byte-ranges to the reduces. We probably waste some space in the // file by doing this as opposed to writing VLong but it helps us later on. // index record: <offset, raw-length, compressed-length> // StringBuffer sb = new StringBuffer(); indexOut.writeLong(start); indexOut.writeLong(writer.getRawLength()); long segmentLength = out.getPos() - start; indexOut.writeLong(segmentLength); LOG.info("Index: (" + start + ", " + writer.getRawLength() + ", " + segmentLength + ")"); }
public static <K extends Object, V extends Object> void writeFile( RawKeyValueIterator records, Writer<K, V> writer, Progressable progressable) throws IOException { long recordCtr = 0; while (records.next()) { writer.append(records.getKey(), records.getValue()); if ((++recordCtr % PROGRESS_BAR) == 0) { progressable.progress(); } } }
public static <K extends Object, V extends Object> void writeFile( RawKeyValueIterator records, Writer<K, V> writer, Progressable progressable, Configuration conf) throws IOException { long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress", 10000); long recordCtr = 0; while (records.next()) { writer.append(records.getKey(), records.getValue()); if (((recordCtr++) % progressBar) == 0) { progressable.progress(); } } }
private void mergeParts() throws IOException { // get the approximate size of the final output/index files long finalOutFileSize = 0; long finalIndexFileSize = 0; Path[] filename = new Path[numSpills]; Path[] indexFileName = new Path[numSpills]; FileSystem localFs = FileSystem.getLocal(job); for (int i = 0; i < numSpills; i++) { filename[i] = mapOutputFile.getSpillFile(getTaskID(), i); indexFileName[i] = mapOutputFile.getSpillIndexFile(getTaskID(), i); finalOutFileSize += localFs.getFileStatus(filename[i]).getLen(); } if (numSpills == 1) { // the spill is the final output localFs.rename(filename[0], new Path(filename[0].getParent(), "file.out")); localFs.rename(indexFileName[0], new Path(indexFileName[0].getParent(), "file.out.index")); return; } // make correction in the length to include the sequence file header // lengths for each partition finalOutFileSize += partitions * APPROX_HEADER_LENGTH; finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH; Path finalOutputFile = mapOutputFile.getOutputFileForWrite(getTaskID(), finalOutFileSize); Path finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(getTaskID(), finalIndexFileSize); // The output stream for the final single output file FSDataOutputStream finalOut = localFs.create(finalOutputFile, true, 4096); // The final index file output stream FSDataOutputStream finalIndexOut = localFs.create(finalIndexFile, true, 4096); if (numSpills == 0) { // create dummy files for (int i = 0; i < partitions; i++) { long segmentStart = finalOut.getPos(); Writer<K, V> writer = new Writer<K, V>(job, finalOut, keyClass, valClass, codec); writer.close(); writeIndexRecord(finalIndexOut, finalOut, segmentStart, writer); } finalOut.close(); finalIndexOut.close(); return; } { for (int parts = 0; parts < partitions; parts++) { // create the segments to be merged List<Segment<K, V>> segmentList = new ArrayList<Segment<K, V>>(numSpills); for (int i = 0; i < numSpills; i++) { FSDataInputStream indexIn = localFs.open(indexFileName[i]); indexIn.seek(parts * MAP_OUTPUT_INDEX_RECORD_LENGTH); long segmentOffset = indexIn.readLong(); long rawSegmentLength = indexIn.readLong(); long segmentLength = indexIn.readLong(); indexIn.close(); Segment<K, V> s = new Segment<K, V>( job, localFs, filename[i], segmentOffset, segmentLength, codec, true); segmentList.add(i, s); if (LOG.isDebugEnabled()) { LOG.debug( "Index: (" + indexFileName[i] + ", " + segmentOffset + rawSegmentLength + ", " + segmentLength + ")"); } } // merge @SuppressWarnings("unchecked") RawKeyValueIterator kvIter = Merger.merge( job, localFs, keyClass, valClass, segmentList, job.getInt("io.sort.factor", 100), new Path(getTaskID().toString()), job.getOutputKeyComparator(), reporter); // write merged output to disk long segmentStart = finalOut.getPos(); Writer<K, V> writer = new Writer<K, V>(job, finalOut, keyClass, valClass, codec); if (null == combinerClass || job.getCombineOnceOnly() || numSpills < minSpillsForCombine) { Merger.writeFile(kvIter, writer, reporter, job); } else { combineCollector.setWriter(writer); combineAndSpill(kvIter, combineInputCounter); } // close writer.close(); // write index record writeIndexRecord(finalIndexOut, finalOut, segmentStart, writer); } finalOut.close(); finalIndexOut.close(); // cleanup for (int i = 0; i < numSpills; i++) { localFs.delete(filename[i], true); localFs.delete(indexFileName[i], true); } } }
private RawKeyValueIterator finalMerge( JobConf job, FileSystem fs, List<InMemoryMapOutput<K, V>> inMemoryMapOutputs, List<CompressAwarePath> onDiskMapOutputs) throws IOException { LOG.info( "finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs"); final float maxRedPer = job.getFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0f); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new IOException(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT + maxRedPer); } int maxInMemReduce = (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE); // merge config params Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); Class<V> valueClass = (Class<V>) job.getMapOutputValueClass(); boolean keepInputs = job.getKeepFailedTaskFiles(); final Path tmpDir = new Path(reduceId.toString()); final RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); // segments required to vacate memory List<Segment<K, V>> memDiskSegments = new ArrayList<Segment<K, V>>(); long inMemToDiskBytes = 0; boolean mergePhaseFinished = false; if (inMemoryMapOutputs.size() > 0) { TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID(); inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce); final int numMemDiskSegments = memDiskSegments.size(); if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) { // If we reach here, it implies that we have less than io.sort.factor // disk segments and this will be incremented by 1 (result of the // memory segments merge). Since this total would still be // <= io.sort.factor, we will not do any more intermediate merges, // the merge of all these disk segments would be directly fed to the // reduce method mergePhaseFinished = true; // must spill to disk, but can't retain in-mem for intermediate merge final Path outputPath = mapOutputFile .getInputFileForWrite(mapId, inMemToDiskBytes) .suffix(Task.MERGED_OUTPUT_PREFIX); final RawKeyValueIterator rIter = Merger.merge( job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null, mergePhase); Writer<K, V> writer = new Writer<K, V>(job, fs, outputPath, keyClass, valueClass, codec, null); try { Merger.writeFile(rIter, writer, reporter, job); writer.close(); onDiskMapOutputs.add( new CompressAwarePath( outputPath, writer.getRawLength(), writer.getCompressedLength())); writer = null; // add to list of final disk outputs. } catch (IOException e) { if (null != outputPath) { try { fs.delete(outputPath, true); } catch (IOException ie) { // NOTHING } } throw e; } finally { if (null != writer) { writer.close(); } } LOG.info( "Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy " + "reduce memory limit"); inMemToDiskBytes = 0; memDiskSegments.clear(); } else if (inMemToDiskBytes != 0) { LOG.info( "Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge"); } } // segments on disk List<Segment<K, V>> diskSegments = new ArrayList<Segment<K, V>>(); long onDiskBytes = inMemToDiskBytes; long rawBytes = inMemToDiskBytes; CompressAwarePath[] onDisk = onDiskMapOutputs.toArray(new CompressAwarePath[onDiskMapOutputs.size()]); for (CompressAwarePath file : onDisk) { long fileLength = fs.getFileStatus(file).getLen(); onDiskBytes += fileLength; rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength; LOG.debug("Disk file: " + file + " Length is " + fileLength); diskSegments.add( new Segment<K, V>( job, fs, file, codec, keepInputs, (file.toString().endsWith(Task.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter), file.getRawDataLength())); } LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk"); Collections.sort( diskSegments, new Comparator<Segment<K, V>>() { public int compare(Segment<K, V> o1, Segment<K, V> o2) { if (o1.getLength() == o2.getLength()) { return 0; } return o1.getLength() < o2.getLength() ? -1 : 1; } }); // build final list of segments from merged backed by disk + in-mem List<Segment<K, V>> finalSegments = new ArrayList<Segment<K, V>>(); long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); LOG.info( "Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce"); if (0 != onDiskBytes) { final int numInMemSegments = memDiskSegments.size(); diskSegments.addAll(0, memDiskSegments); memDiskSegments.clear(); // Pass mergePhase only if there is a going to be intermediate // merges. See comment where mergePhaseFinished is being set Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; RawKeyValueIterator diskMerge = Merger.merge( job, fs, keyClass, valueClass, codec, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false, spilledRecordsCounter, null, thisPhase); diskSegments.clear(); if (0 == finalSegments.size()) { return diskMerge; } finalSegments.add( new Segment<K, V>(new RawKVIteratorReader(diskMerge, onDiskBytes), true, rawBytes)); } return Merger.merge( job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, reporter, spilledRecordsCounter, null, null); }
@Override public void merge(List<CompressAwarePath> inputs) throws IOException { // sanity check if (inputs == null || inputs.isEmpty()) { LOG.info("No ondisk files to merge..."); return; } long approxOutputSize = 0; int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512); LOG.info( "OnDiskMerger: We have " + inputs.size() + " map outputs on disk. Triggering merge..."); // 1. Prepare the list of files to be merged. for (CompressAwarePath file : inputs) { approxOutputSize += localFS.getFileStatus(file).getLen(); } // add the checksum length approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum); // 2. Start the on-disk merge process Path outputPath = localDirAllocator .getLocalPathForWrite(inputs.get(0).toString(), approxOutputSize, jobConf) .suffix(Task.MERGED_OUTPUT_PREFIX); Writer<K, V> writer = new Writer<K, V>( jobConf, rfs, outputPath, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, null); RawKeyValueIterator iter = null; CompressAwarePath compressAwarePath; Path tmpDir = new Path(reduceId.toString()); try { iter = Merger.merge( jobConf, rfs, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir, (RawComparator<K>) jobConf.getOutputKeyComparator(), reporter, spilledRecordsCounter, null, mergedMapOutputsCounter, null); Merger.writeFile(iter, writer, reporter, jobConf); writer.close(); compressAwarePath = new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength()); } catch (IOException e) { localFS.delete(outputPath, true); throw e; } closeOnDiskFile(compressAwarePath); LOG.info( reduceId + " Finished merging " + inputs.size() + " map output files on disk of total-size " + approxOutputSize + "." + " Local output file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); }
@Override public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException { if (inputs == null || inputs.size() == 0) { return; } // name this output file same as the name of the first file that is // there in the current list of inmem files (this is guaranteed to // be absent on the disk currently. So we don't overwrite a prev. // created spill). Also we need to create the output file now since // it is not guaranteed that this file will be present after merge // is called (we delete empty files as soon as we see them // in the merge method) // figure out the mapId TaskAttemptID mapId = inputs.get(0).getMapId(); TaskID mapTaskId = mapId.getTaskID(); List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0); int noInMemorySegments = inMemorySegments.size(); Path outputPath = mapOutputFile .getInputFileForWrite(mapTaskId, mergeOutputSize) .suffix(Task.MERGED_OUTPUT_PREFIX); Writer<K, V> writer = new Writer<K, V>( jobConf, rfs, outputPath, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, null); RawKeyValueIterator rIter = null; CompressAwarePath compressAwarePath; try { LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); rIter = Merger.merge( jobConf, rfs, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), inMemorySegments, inMemorySegments.size(), new Path(reduceId.toString()), (RawComparator<K>) jobConf.getOutputKeyComparator(), reporter, spilledRecordsCounter, null, null); if (null == combinerClass) { Merger.writeFile(rIter, writer, reporter, jobConf); } else { combineCollector.setWriter(writer); combineAndSpill(rIter, reduceCombineInputCounter); } writer.close(); compressAwarePath = new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength()); LOG.info( reduceId + " Merge of the " + noInMemorySegments + " files in-memory complete." + " Local file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); } catch (IOException e) { // make sure that we delete the ondisk file that we created // earlier when we invoked cloneFileAttributes localFS.delete(outputPath, true); throw e; } // Note the output of the merge closeOnDiskFile(compressAwarePath); }
RawKeyValueIterator merge( Class<K> keyClass, Class<V> valueClass, int factor, int inMem, Path tmpDir) throws IOException { LOG.info("Merging " + segments.size() + " sorted segments"); // create the MergeStreams from the sorted map created in the constructor // and dump the final output to a file int numSegments = segments.size(); int origFactor = factor; int passNo = 1; do { // get the factor for this pass of merge. We assume in-memory segments // are the first entries in the segment list and that the pass factor // doesn't apply to them factor = getPassFactor(factor, passNo, numSegments - inMem); if (1 == passNo) { factor += inMem; } List<Segment<K, V>> segmentsToMerge = new ArrayList<Segment<K, V>>(); int segmentsConsidered = 0; int numSegmentsToConsider = factor; while (true) { // extract the smallest 'factor' number of segments // Call cleanup on the empty segments (no key/value data) List<Segment<K, V>> mStream = getSegmentDescriptors(numSegmentsToConsider); for (Segment<K, V> segment : mStream) { // Initialize the segment at the last possible moment; // this helps in ensuring we don't use buffers until we need them segment.init(); long startPos = segment.getPosition(); boolean hasNext = segment.next(); long endPos = segment.getPosition(); totalBytesProcessed += endPos - startPos; mergeProgress.set(totalBytesProcessed * progPerByte); if (hasNext) { segmentsToMerge.add(segment); segmentsConsidered++; } else { segment.close(); numSegments--; // we ignore this segment for the merge } } // if we have the desired number of segments // or looked at all available segments, we break if (segmentsConsidered == factor || segments.size() == 0) { break; } numSegmentsToConsider = factor - segmentsConsidered; } // feed the streams to the priority queue initialize(segmentsToMerge.size()); clear(); for (Segment<K, V> segment : segmentsToMerge) { put(segment); } // if we have lesser number of segments remaining, then just return the // iterator, else do another single level merge if (numSegments <= factor) { // calculate the length of the remaining segments. Required for // calculating the merge progress long totalBytes = 0; for (int i = 0; i < segmentsToMerge.size(); i++) { totalBytes += segmentsToMerge.get(i).getLength(); } if (totalBytes != 0) // being paranoid progPerByte = 1.0f / (float) totalBytes; if (totalBytes != 0) mergeProgress.set(totalBytesProcessed * progPerByte); else mergeProgress.set(1.0f); // Last pass and no segments left - we're done LOG.info( "Down to the last merge-pass, with " + numSegments + " segments left of total size: " + totalBytes + " bytes"); return this; } else { LOG.info( "Merging " + segmentsToMerge.size() + " intermediate segments out of a total of " + (segments.size() + segmentsToMerge.size())); // we want to spread the creation of temp files on multiple disks if // available under the space constraints long approxOutputSize = 0; for (Segment<K, V> s : segmentsToMerge) { approxOutputSize += s.getLength() + ChecksumFileSystem.getApproxChkSumLength(s.getLength()); } Path tmpFilename = new Path(tmpDir, "intermediate").suffix("." + passNo); Path outputFile = lDirAlloc.getLocalPathForWrite(tmpFilename.toString(), approxOutputSize, conf); Writer<K, V> writer = new Writer<K, V>(conf, fs, outputFile, keyClass, valueClass, codec); writeFile(this, writer, reporter); writer.close(); // we finished one single level merge; now clean up the priority // queue this.close(); // Add the newly create segment to the list of segments to be merged Segment<K, V> tempSegment = new Segment<K, V>(conf, fs, outputFile, codec, false); segments.add(tempSegment); numSegments = segments.size(); Collections.sort(segments, segmentComparator); passNo++; } // we are worried about only the first pass merge factor. So reset the // factor to what it originally was factor = origFactor; } while (true); }