Example #1
0
    @Override
    public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
      if (inputs == null || inputs.size() == 0) {
        return;
      }

      TaskAttemptID dummyMapId = inputs.get(0).getMapId();
      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
      long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0);
      int noInMemorySegments = inMemorySegments.size();

      InMemoryMapOutput<K, V> mergedMapOutputs =
          unconditionalReserve(dummyMapId, mergeOutputSize, false);

      Writer<K, V> writer = new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());

      LOG.info(
          "Initiating Memory-to-Memory merge with "
              + noInMemorySegments
              + " segments of total-size: "
              + mergeOutputSize);

      RawKeyValueIterator rIter =
          Merger.merge(
              jobConf,
              rfs,
              (Class<K>) jobConf.getMapOutputKeyClass(),
              (Class<V>) jobConf.getMapOutputValueClass(),
              inMemorySegments,
              inMemorySegments.size(),
              new Path(reduceId.toString()),
              (RawComparator<K>) jobConf.getOutputKeyComparator(),
              reporter,
              null,
              null,
              null);
      Merger.writeFile(rIter, writer, reporter, jobConf);
      writer.close();

      LOG.info(
          reduceId
              + " Memory-to-Memory merge of the "
              + noInMemorySegments
              + " files in-memory complete.");

      // Note the output of the merge
      closeInMemoryMergedFile(mergedMapOutputs);
    }
Example #2
0
 private void writeIndexRecord(
     FSDataOutputStream indexOut, FSDataOutputStream out, long start, Writer<K, V> writer)
     throws IOException {
   // when we write the offset/decompressed-length/compressed-length to
   // the final index file, we write longs for both compressed and
   // decompressed lengths. This helps us to reliably seek directly to
   // the offset/length for a partition when we start serving the
   // byte-ranges to the reduces. We probably waste some space in the
   // file by doing this as opposed to writing VLong but it helps us later on.
   // index record: <offset, raw-length, compressed-length>
   // StringBuffer sb = new StringBuffer();
   indexOut.writeLong(start);
   indexOut.writeLong(writer.getRawLength());
   long segmentLength = out.getPos() - start;
   indexOut.writeLong(segmentLength);
   LOG.info("Index: (" + start + ", " + writer.getRawLength() + ", " + segmentLength + ")");
 }
Example #3
0
  public static <K extends Object, V extends Object> void writeFile(
      RawKeyValueIterator records, Writer<K, V> writer, Progressable progressable)
      throws IOException {
    long recordCtr = 0;
    while (records.next()) {
      writer.append(records.getKey(), records.getValue());

      if ((++recordCtr % PROGRESS_BAR) == 0) {
        progressable.progress();
      }
    }
  }
Example #4
0
  public static <K extends Object, V extends Object> void writeFile(
      RawKeyValueIterator records,
      Writer<K, V> writer,
      Progressable progressable,
      Configuration conf)
      throws IOException {
    long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress", 10000);
    long recordCtr = 0;
    while (records.next()) {
      writer.append(records.getKey(), records.getValue());

      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
  }
Example #5
0
    private void mergeParts() throws IOException {
      // get the approximate size of the final output/index files
      long finalOutFileSize = 0;
      long finalIndexFileSize = 0;
      Path[] filename = new Path[numSpills];
      Path[] indexFileName = new Path[numSpills];
      FileSystem localFs = FileSystem.getLocal(job);

      for (int i = 0; i < numSpills; i++) {
        filename[i] = mapOutputFile.getSpillFile(getTaskID(), i);
        indexFileName[i] = mapOutputFile.getSpillIndexFile(getTaskID(), i);
        finalOutFileSize += localFs.getFileStatus(filename[i]).getLen();
      }

      if (numSpills == 1) { // the spill is the final output
        localFs.rename(filename[0], new Path(filename[0].getParent(), "file.out"));
        localFs.rename(indexFileName[0], new Path(indexFileName[0].getParent(), "file.out.index"));
        return;
      }
      // make correction in the length to include the sequence file header
      // lengths for each partition
      finalOutFileSize += partitions * APPROX_HEADER_LENGTH;

      finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;

      Path finalOutputFile = mapOutputFile.getOutputFileForWrite(getTaskID(), finalOutFileSize);
      Path finalIndexFile =
          mapOutputFile.getOutputIndexFileForWrite(getTaskID(), finalIndexFileSize);

      // The output stream for the final single output file
      FSDataOutputStream finalOut = localFs.create(finalOutputFile, true, 4096);

      // The final index file output stream
      FSDataOutputStream finalIndexOut = localFs.create(finalIndexFile, true, 4096);
      if (numSpills == 0) {
        // create dummy files
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer<K, V> writer = new Writer<K, V>(job, finalOut, keyClass, valClass, codec);
          writer.close();
          writeIndexRecord(finalIndexOut, finalOut, segmentStart, writer);
        }
        finalOut.close();
        finalIndexOut.close();
        return;
      }
      {
        for (int parts = 0; parts < partitions; parts++) {
          // create the segments to be merged
          List<Segment<K, V>> segmentList = new ArrayList<Segment<K, V>>(numSpills);
          for (int i = 0; i < numSpills; i++) {
            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
            indexIn.seek(parts * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            long segmentOffset = indexIn.readLong();
            long rawSegmentLength = indexIn.readLong();
            long segmentLength = indexIn.readLong();
            indexIn.close();
            Segment<K, V> s =
                new Segment<K, V>(
                    job, localFs, filename[i], segmentOffset, segmentLength, codec, true);
            segmentList.add(i, s);

            if (LOG.isDebugEnabled()) {
              LOG.debug(
                  "Index: ("
                      + indexFileName[i]
                      + ", "
                      + segmentOffset
                      + rawSegmentLength
                      + ", "
                      + segmentLength
                      + ")");
            }
          }

          // merge
          @SuppressWarnings("unchecked")
          RawKeyValueIterator kvIter =
              Merger.merge(
                  job,
                  localFs,
                  keyClass,
                  valClass,
                  segmentList,
                  job.getInt("io.sort.factor", 100),
                  new Path(getTaskID().toString()),
                  job.getOutputKeyComparator(),
                  reporter);

          // write merged output to disk
          long segmentStart = finalOut.getPos();
          Writer<K, V> writer = new Writer<K, V>(job, finalOut, keyClass, valClass, codec);
          if (null == combinerClass
              || job.getCombineOnceOnly()
              || numSpills < minSpillsForCombine) {
            Merger.writeFile(kvIter, writer, reporter, job);
          } else {
            combineCollector.setWriter(writer);
            combineAndSpill(kvIter, combineInputCounter);
          }

          // close
          writer.close();

          // write index record
          writeIndexRecord(finalIndexOut, finalOut, segmentStart, writer);
        }
        finalOut.close();
        finalIndexOut.close();
        // cleanup
        for (int i = 0; i < numSpills; i++) {
          localFs.delete(filename[i], true);
          localFs.delete(indexFileName[i], true);
        }
      }
    }
Example #6
0
  private RawKeyValueIterator finalMerge(
      JobConf job,
      FileSystem fs,
      List<InMemoryMapOutput<K, V>> inMemoryMapOutputs,
      List<CompressAwarePath> onDiskMapOutputs)
      throws IOException {
    LOG.info(
        "finalMerge called with "
            + inMemoryMapOutputs.size()
            + " in-memory map-outputs and "
            + onDiskMapOutputs.size()
            + " on-disk map-outputs");

    final float maxRedPer = job.getFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0f);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
      throw new IOException(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT + maxRedPer);
    }
    int maxInMemReduce =
        (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE);

    // merge config params
    Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
    Class<V> valueClass = (Class<V>) job.getMapOutputValueClass();
    boolean keepInputs = job.getKeepFailedTaskFiles();
    final Path tmpDir = new Path(reduceId.toString());
    final RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();

    // segments required to vacate memory
    List<Segment<K, V>> memDiskSegments = new ArrayList<Segment<K, V>>();
    long inMemToDiskBytes = 0;
    boolean mergePhaseFinished = false;
    if (inMemoryMapOutputs.size() > 0) {
      TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
      inMemToDiskBytes =
          createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce);
      final int numMemDiskSegments = memDiskSegments.size();
      if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {

        // If we reach here, it implies that we have less than io.sort.factor
        // disk segments and this will be incremented by 1 (result of the
        // memory segments merge). Since this total would still be
        // <= io.sort.factor, we will not do any more intermediate merges,
        // the merge of all these disk segments would be directly fed to the
        // reduce method

        mergePhaseFinished = true;
        // must spill to disk, but can't retain in-mem for intermediate merge
        final Path outputPath =
            mapOutputFile
                .getInputFileForWrite(mapId, inMemToDiskBytes)
                .suffix(Task.MERGED_OUTPUT_PREFIX);
        final RawKeyValueIterator rIter =
            Merger.merge(
                job,
                fs,
                keyClass,
                valueClass,
                memDiskSegments,
                numMemDiskSegments,
                tmpDir,
                comparator,
                reporter,
                spilledRecordsCounter,
                null,
                mergePhase);
        Writer<K, V> writer =
            new Writer<K, V>(job, fs, outputPath, keyClass, valueClass, codec, null);
        try {
          Merger.writeFile(rIter, writer, reporter, job);
          writer.close();
          onDiskMapOutputs.add(
              new CompressAwarePath(
                  outputPath, writer.getRawLength(), writer.getCompressedLength()));
          writer = null;
          // add to list of final disk outputs.
        } catch (IOException e) {
          if (null != outputPath) {
            try {
              fs.delete(outputPath, true);
            } catch (IOException ie) {
              // NOTHING
            }
          }
          throw e;
        } finally {
          if (null != writer) {
            writer.close();
          }
        }
        LOG.info(
            "Merged "
                + numMemDiskSegments
                + " segments, "
                + inMemToDiskBytes
                + " bytes to disk to satisfy "
                + "reduce memory limit");
        inMemToDiskBytes = 0;
        memDiskSegments.clear();
      } else if (inMemToDiskBytes != 0) {
        LOG.info(
            "Keeping "
                + numMemDiskSegments
                + " segments, "
                + inMemToDiskBytes
                + " bytes in memory for "
                + "intermediate, on-disk merge");
      }
    }

    // segments on disk
    List<Segment<K, V>> diskSegments = new ArrayList<Segment<K, V>>();
    long onDiskBytes = inMemToDiskBytes;
    long rawBytes = inMemToDiskBytes;
    CompressAwarePath[] onDisk =
        onDiskMapOutputs.toArray(new CompressAwarePath[onDiskMapOutputs.size()]);
    for (CompressAwarePath file : onDisk) {
      long fileLength = fs.getFileStatus(file).getLen();
      onDiskBytes += fileLength;
      rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength;

      LOG.debug("Disk file: " + file + " Length is " + fileLength);
      diskSegments.add(
          new Segment<K, V>(
              job,
              fs,
              file,
              codec,
              keepInputs,
              (file.toString().endsWith(Task.MERGED_OUTPUT_PREFIX)
                  ? null
                  : mergedMapOutputsCounter),
              file.getRawDataLength()));
    }
    LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
    Collections.sort(
        diskSegments,
        new Comparator<Segment<K, V>>() {
          public int compare(Segment<K, V> o1, Segment<K, V> o2) {
            if (o1.getLength() == o2.getLength()) {
              return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
          }
        });

    // build final list of segments from merged backed by disk + in-mem
    List<Segment<K, V>> finalSegments = new ArrayList<Segment<K, V>>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
    LOG.info(
        "Merging "
            + finalSegments.size()
            + " segments, "
            + inMemBytes
            + " bytes from memory into reduce");
    if (0 != onDiskBytes) {
      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      // Pass mergePhase only if there is a going to be intermediate
      // merges. See comment where mergePhaseFinished is being set
      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
      RawKeyValueIterator diskMerge =
          Merger.merge(
              job,
              fs,
              keyClass,
              valueClass,
              codec,
              diskSegments,
              ioSortFactor,
              numInMemSegments,
              tmpDir,
              comparator,
              reporter,
              false,
              spilledRecordsCounter,
              null,
              thisPhase);
      diskSegments.clear();
      if (0 == finalSegments.size()) {
        return diskMerge;
      }
      finalSegments.add(
          new Segment<K, V>(new RawKVIteratorReader(diskMerge, onDiskBytes), true, rawBytes));
    }
    return Merger.merge(
        job,
        fs,
        keyClass,
        valueClass,
        finalSegments,
        finalSegments.size(),
        tmpDir,
        comparator,
        reporter,
        spilledRecordsCounter,
        null,
        null);
  }
Example #7
0
    @Override
    public void merge(List<CompressAwarePath> inputs) throws IOException {
      // sanity check
      if (inputs == null || inputs.isEmpty()) {
        LOG.info("No ondisk files to merge...");
        return;
      }

      long approxOutputSize = 0;
      int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512);

      LOG.info(
          "OnDiskMerger: We have  " + inputs.size() + " map outputs on disk. Triggering merge...");

      // 1. Prepare the list of files to be merged.
      for (CompressAwarePath file : inputs) {
        approxOutputSize += localFS.getFileStatus(file).getLen();
      }

      // add the checksum length
      approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

      // 2. Start the on-disk merge process
      Path outputPath =
          localDirAllocator
              .getLocalPathForWrite(inputs.get(0).toString(), approxOutputSize, jobConf)
              .suffix(Task.MERGED_OUTPUT_PREFIX);
      Writer<K, V> writer =
          new Writer<K, V>(
              jobConf,
              rfs,
              outputPath,
              (Class<K>) jobConf.getMapOutputKeyClass(),
              (Class<V>) jobConf.getMapOutputValueClass(),
              codec,
              null);
      RawKeyValueIterator iter = null;
      CompressAwarePath compressAwarePath;
      Path tmpDir = new Path(reduceId.toString());
      try {
        iter =
            Merger.merge(
                jobConf,
                rfs,
                (Class<K>) jobConf.getMapOutputKeyClass(),
                (Class<V>) jobConf.getMapOutputValueClass(),
                codec,
                inputs.toArray(new Path[inputs.size()]),
                true,
                ioSortFactor,
                tmpDir,
                (RawComparator<K>) jobConf.getOutputKeyComparator(),
                reporter,
                spilledRecordsCounter,
                null,
                mergedMapOutputsCounter,
                null);

        Merger.writeFile(iter, writer, reporter, jobConf);
        writer.close();
        compressAwarePath =
            new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength());
      } catch (IOException e) {
        localFS.delete(outputPath, true);
        throw e;
      }

      closeOnDiskFile(compressAwarePath);

      LOG.info(
          reduceId
              + " Finished merging "
              + inputs.size()
              + " map output files on disk of total-size "
              + approxOutputSize
              + "."
              + " Local output file is "
              + outputPath
              + " of size "
              + localFS.getFileStatus(outputPath).getLen());
    }
Example #8
0
    @Override
    public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
      if (inputs == null || inputs.size() == 0) {
        return;
      }

      // name this output file same as the name of the first file that is
      // there in the current list of inmem files (this is guaranteed to
      // be absent on the disk currently. So we don't overwrite a prev.
      // created spill). Also we need to create the output file now since
      // it is not guaranteed that this file will be present after merge
      // is called (we delete empty files as soon as we see them
      // in the merge method)

      // figure out the mapId
      TaskAttemptID mapId = inputs.get(0).getMapId();
      TaskID mapTaskId = mapId.getTaskID();

      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
      long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0);
      int noInMemorySegments = inMemorySegments.size();

      Path outputPath =
          mapOutputFile
              .getInputFileForWrite(mapTaskId, mergeOutputSize)
              .suffix(Task.MERGED_OUTPUT_PREFIX);

      Writer<K, V> writer =
          new Writer<K, V>(
              jobConf,
              rfs,
              outputPath,
              (Class<K>) jobConf.getMapOutputKeyClass(),
              (Class<V>) jobConf.getMapOutputValueClass(),
              codec,
              null);

      RawKeyValueIterator rIter = null;
      CompressAwarePath compressAwarePath;
      try {
        LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments...");

        rIter =
            Merger.merge(
                jobConf,
                rfs,
                (Class<K>) jobConf.getMapOutputKeyClass(),
                (Class<V>) jobConf.getMapOutputValueClass(),
                inMemorySegments,
                inMemorySegments.size(),
                new Path(reduceId.toString()),
                (RawComparator<K>) jobConf.getOutputKeyComparator(),
                reporter,
                spilledRecordsCounter,
                null,
                null);

        if (null == combinerClass) {
          Merger.writeFile(rIter, writer, reporter, jobConf);
        } else {
          combineCollector.setWriter(writer);
          combineAndSpill(rIter, reduceCombineInputCounter);
        }
        writer.close();
        compressAwarePath =
            new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength());

        LOG.info(
            reduceId
                + " Merge of the "
                + noInMemorySegments
                + " files in-memory complete."
                + " Local file is "
                + outputPath
                + " of size "
                + localFS.getFileStatus(outputPath).getLen());
      } catch (IOException e) {
        // make sure that we delete the ondisk file that we created
        // earlier when we invoked cloneFileAttributes
        localFS.delete(outputPath, true);
        throw e;
      }

      // Note the output of the merge
      closeOnDiskFile(compressAwarePath);
    }
Example #9
0
    RawKeyValueIterator merge(
        Class<K> keyClass, Class<V> valueClass, int factor, int inMem, Path tmpDir)
        throws IOException {
      LOG.info("Merging " + segments.size() + " sorted segments");

      // create the MergeStreams from the sorted map created in the constructor
      // and dump the final output to a file
      int numSegments = segments.size();
      int origFactor = factor;
      int passNo = 1;
      do {
        // get the factor for this pass of merge. We assume in-memory segments
        // are the first entries in the segment list and that the pass factor
        // doesn't apply to them
        factor = getPassFactor(factor, passNo, numSegments - inMem);
        if (1 == passNo) {
          factor += inMem;
        }
        List<Segment<K, V>> segmentsToMerge = new ArrayList<Segment<K, V>>();
        int segmentsConsidered = 0;
        int numSegmentsToConsider = factor;
        while (true) {
          // extract the smallest 'factor' number of segments
          // Call cleanup on the empty segments (no key/value data)
          List<Segment<K, V>> mStream = getSegmentDescriptors(numSegmentsToConsider);
          for (Segment<K, V> segment : mStream) {
            // Initialize the segment at the last possible moment;
            // this helps in ensuring we don't use buffers until we need them
            segment.init();
            long startPos = segment.getPosition();
            boolean hasNext = segment.next();
            long endPos = segment.getPosition();
            totalBytesProcessed += endPos - startPos;
            mergeProgress.set(totalBytesProcessed * progPerByte);
            if (hasNext) {
              segmentsToMerge.add(segment);
              segmentsConsidered++;
            } else {
              segment.close();
              numSegments--; // we ignore this segment for the merge
            }
          }
          // if we have the desired number of segments
          // or looked at all available segments, we break
          if (segmentsConsidered == factor || segments.size() == 0) {
            break;
          }

          numSegmentsToConsider = factor - segmentsConsidered;
        }

        // feed the streams to the priority queue
        initialize(segmentsToMerge.size());
        clear();
        for (Segment<K, V> segment : segmentsToMerge) {
          put(segment);
        }

        // if we have lesser number of segments remaining, then just return the
        // iterator, else do another single level merge
        if (numSegments <= factor) {
          // calculate the length of the remaining segments. Required for
          // calculating the merge progress
          long totalBytes = 0;
          for (int i = 0; i < segmentsToMerge.size(); i++) {
            totalBytes += segmentsToMerge.get(i).getLength();
          }
          if (totalBytes != 0) // being paranoid
          progPerByte = 1.0f / (float) totalBytes;

          if (totalBytes != 0) mergeProgress.set(totalBytesProcessed * progPerByte);
          else mergeProgress.set(1.0f); // Last pass and no segments left - we're done

          LOG.info(
              "Down to the last merge-pass, with "
                  + numSegments
                  + " segments left of total size: "
                  + totalBytes
                  + " bytes");
          return this;
        } else {
          LOG.info(
              "Merging "
                  + segmentsToMerge.size()
                  + " intermediate segments out of a total of "
                  + (segments.size() + segmentsToMerge.size()));

          // we want to spread the creation of temp files on multiple disks if
          // available under the space constraints
          long approxOutputSize = 0;
          for (Segment<K, V> s : segmentsToMerge) {
            approxOutputSize +=
                s.getLength() + ChecksumFileSystem.getApproxChkSumLength(s.getLength());
          }
          Path tmpFilename = new Path(tmpDir, "intermediate").suffix("." + passNo);

          Path outputFile =
              lDirAlloc.getLocalPathForWrite(tmpFilename.toString(), approxOutputSize, conf);

          Writer<K, V> writer = new Writer<K, V>(conf, fs, outputFile, keyClass, valueClass, codec);
          writeFile(this, writer, reporter);
          writer.close();

          // we finished one single level merge; now clean up the priority
          // queue
          this.close();

          // Add the newly create segment to the list of segments to be merged
          Segment<K, V> tempSegment = new Segment<K, V>(conf, fs, outputFile, codec, false);
          segments.add(tempSegment);
          numSegments = segments.size();
          Collections.sort(segments, segmentComparator);

          passNo++;
        }
        // we are worried about only the first pass merge factor. So reset the
        // factor to what it originally was
        factor = origFactor;
      } while (true);
    }