private void adjustPriorityQueue(Segment<K, V> reader) throws IOException { long startPos = reader.getPosition(); boolean hasNext = reader.next(); long endPos = reader.getPosition(); totalBytesProcessed += endPos - startPos; mergeProgress.set(totalBytesProcessed * progPerByte); if (hasNext) { adjustTop(); } else { pop(); reader.close(); } }
RawKeyValueIterator merge( Class<K> keyClass, Class<V> valueClass, int factor, int inMem, Path tmpDir) throws IOException { LOG.info("Merging " + segments.size() + " sorted segments"); // create the MergeStreams from the sorted map created in the constructor // and dump the final output to a file int numSegments = segments.size(); int origFactor = factor; int passNo = 1; do { // get the factor for this pass of merge. We assume in-memory segments // are the first entries in the segment list and that the pass factor // doesn't apply to them factor = getPassFactor(factor, passNo, numSegments - inMem); if (1 == passNo) { factor += inMem; } List<Segment<K, V>> segmentsToMerge = new ArrayList<Segment<K, V>>(); int segmentsConsidered = 0; int numSegmentsToConsider = factor; while (true) { // extract the smallest 'factor' number of segments // Call cleanup on the empty segments (no key/value data) List<Segment<K, V>> mStream = getSegmentDescriptors(numSegmentsToConsider); for (Segment<K, V> segment : mStream) { // Initialize the segment at the last possible moment; // this helps in ensuring we don't use buffers until we need them segment.init(); long startPos = segment.getPosition(); boolean hasNext = segment.next(); long endPos = segment.getPosition(); totalBytesProcessed += endPos - startPos; mergeProgress.set(totalBytesProcessed * progPerByte); if (hasNext) { segmentsToMerge.add(segment); segmentsConsidered++; } else { segment.close(); numSegments--; // we ignore this segment for the merge } } // if we have the desired number of segments // or looked at all available segments, we break if (segmentsConsidered == factor || segments.size() == 0) { break; } numSegmentsToConsider = factor - segmentsConsidered; } // feed the streams to the priority queue initialize(segmentsToMerge.size()); clear(); for (Segment<K, V> segment : segmentsToMerge) { put(segment); } // if we have lesser number of segments remaining, then just return the // iterator, else do another single level merge if (numSegments <= factor) { // calculate the length of the remaining segments. Required for // calculating the merge progress long totalBytes = 0; for (int i = 0; i < segmentsToMerge.size(); i++) { totalBytes += segmentsToMerge.get(i).getLength(); } if (totalBytes != 0) // being paranoid progPerByte = 1.0f / (float) totalBytes; if (totalBytes != 0) mergeProgress.set(totalBytesProcessed * progPerByte); else mergeProgress.set(1.0f); // Last pass and no segments left - we're done LOG.info( "Down to the last merge-pass, with " + numSegments + " segments left of total size: " + totalBytes + " bytes"); return this; } else { LOG.info( "Merging " + segmentsToMerge.size() + " intermediate segments out of a total of " + (segments.size() + segmentsToMerge.size())); // we want to spread the creation of temp files on multiple disks if // available under the space constraints long approxOutputSize = 0; for (Segment<K, V> s : segmentsToMerge) { approxOutputSize += s.getLength() + ChecksumFileSystem.getApproxChkSumLength(s.getLength()); } Path tmpFilename = new Path(tmpDir, "intermediate").suffix("." + passNo); Path outputFile = lDirAlloc.getLocalPathForWrite(tmpFilename.toString(), approxOutputSize, conf); Writer<K, V> writer = new Writer<K, V>(conf, fs, outputFile, keyClass, valueClass, codec); writeFile(this, writer, reporter); writer.close(); // we finished one single level merge; now clean up the priority // queue this.close(); // Add the newly create segment to the list of segments to be merged Segment<K, V> tempSegment = new Segment<K, V>(conf, fs, outputFile, codec, false); segments.add(tempSegment); numSegments = segments.size(); Collections.sort(segments, segmentComparator); passNo++; } // we are worried about only the first pass merge factor. So reset the // factor to what it originally was factor = origFactor; } while (true); }
public void close() throws IOException { Segment<K, V> segment; while ((segment = pop()) != null) { segment.close(); } }