Beispiel #1
0
  @Override
  public void flush() throws IOException {
    LOG.info("Starting flush of map output");
    spillLock.lock();
    try {
      while (spillInProgress) {
        spillDone.await();
      }
      checkSpillException();

      final int kvbend = 4 * kvend;
      if ((kvbend + METASIZE) % kvbuffer.length != equator - (equator % METASIZE)) {
        // spill finished
        resetSpill();
      }
      if (kvindex != kvend) {
        kvend = (kvindex + NMETA) % kvmeta.capacity();
        bufend = bufmark;
        if (LOG.isInfoEnabled()) {
          LOG.info("Sorting & Spilling map output");
          LOG.info("bufstart = " + bufstart + "; bufend = " + bufmark + "; bufvoid = " + bufvoid);
          LOG.info(
              "kvstart = "
                  + kvstart
                  + "("
                  + (kvstart * 4)
                  + "); kvend = "
                  + kvend
                  + "("
                  + (kvend * 4)
                  + "); length = "
                  + (distanceTo(kvend, kvstart, kvmeta.capacity()) + 1)
                  + "/"
                  + maxRec);
        }
        sortAndSpill();
      }
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while waiting for the writer", e);
    } finally {
      spillLock.unlock();
    }
    assert !spillLock.isHeldByCurrentThread();
    // shut down spill thread and wait for it to exit. Since the preceding
    // ensures that it is finished with its work (and sortAndSpill did not
    // throw), we elect to use an interrupt instead of setting a flag.
    // Spilling simultaneously from this thread while the spill thread
    // finishes its work might be both a useful way to extend this and also
    // sufficient motivation for the latter approach.
    try {
      spillThread.interrupt();
      spillThread.join();
    } catch (InterruptedException e) {
      throw new IOException("Spill failed", e);
    }
    // release sort buffer before the merge
    // FIXME
    // kvbuffer = null;
    mergeParts();
    Path outputPath = mapOutputFile.getOutputFile();
    fileOutputByteCounter.increment(rfs.getFileStatus(outputPath).getLen());
  }
Beispiel #2
0
      /**
       * Attempt to write a sequence of bytes to the collection buffer. This method will block if
       * the spill thread is running and it cannot write.
       *
       * @throws MapBufferTooSmallException if record is too large to deserialize into the
       *     collection buffer.
       */
      @Override
      public synchronized void write(byte b[], int off, int len) throws IOException {
        boolean kvfull = false;
        boolean buffull = false;
        boolean wrap = false;
        synchronized (spillLock) {
          do {
            if (sortSpillException != null) {
              throw (IOException) new IOException("Spill failed").initCause(sortSpillException);
            }

            // sufficient accounting space?
            final int kvnext = (kvindex + 1) % kvoffsets.length;
            kvfull = kvnext == kvstart;
            // sufficient buffer space?
            if (bufstart <= bufend && bufend <= bufindex) {
              buffull = bufindex + len > bufvoid;
              wrap = (bufvoid - bufindex) + bufstart > len;
            } else {
              // bufindex <= bufstart <= bufend
              // bufend <= bufindex <= bufstart
              wrap = false;
              buffull = bufindex + len > bufstart;
            }

            if (kvstart == kvend) {
              // spill thread not running
              if (kvend != kvindex) {
                // we have records we can spill
                final boolean kvsoftlimit =
                    (kvnext > kvend)
                        ? kvnext - kvend > softRecordLimit
                        : kvend - kvnext <= kvoffsets.length - softRecordLimit;
                final boolean bufsoftlimit =
                    (bufindex > bufend)
                        ? bufindex - bufend > softBufferLimit
                        : bufend - bufindex < bufvoid - softBufferLimit;
                if (kvsoftlimit || bufsoftlimit || (buffull && !wrap)) {
                  LOG.info(
                      "Spilling map output: buffer full = "
                          + bufsoftlimit
                          + " and record full = "
                          + kvsoftlimit);
                  LOG.info(
                      "bufstart = "
                          + bufstart
                          + "; bufend = "
                          + bufmark
                          + "; bufvoid = "
                          + bufvoid);
                  LOG.info(
                      "kvstart = "
                          + kvstart
                          + "; kvend = "
                          + kvindex
                          + "; length = "
                          + kvoffsets.length);
                  kvend = kvindex;
                  bufend = bufmark;
                  // TODO No need to recreate this thread every time
                  SpillThread t = new SpillThread();
                  t.setDaemon(true);
                  t.setName("SpillThread");
                  t.start();
                }
              } else if (buffull && !wrap) {
                // We have no buffered records, and this record is too large
                // to write into kvbuffer. We must spill it directly from
                // collect
                final int size =
                    ((bufend <= bufindex) ? bufindex - bufend : (bufvoid - bufend) + bufindex)
                        + len;
                bufstart = bufend = bufindex = bufmark = 0;
                kvstart = kvend = kvindex = 0;
                bufvoid = kvbuffer.length;
                throw new MapBufferTooSmallException(size + " bytes");
              }
            }

            if (kvfull || (buffull && !wrap)) {
              while (kvstart != kvend) {
                reporter.progress();
                try {
                  spillLock.wait();
                } catch (InterruptedException e) {
                  throw (IOException)
                      new IOException("Buffer interrupted while waiting for the writer")
                          .initCause(e);
                }
              }
            }
          } while (kvfull || (buffull && !wrap));
        }
        // here, we know that we have sufficient space to write
        if (buffull) {
          final int gaplen = bufvoid - bufindex;
          System.arraycopy(b, off, kvbuffer, bufindex, gaplen);
          len -= gaplen;
          off += gaplen;
          bufindex = 0;
        }
        System.arraycopy(b, off, kvbuffer, bufindex, len);
        bufindex += len;
      }
Beispiel #3
0
  public DefaultSorter(
      OutputContext outputContext, Configuration conf, int numOutputs, long initialMemoryAvailable)
      throws IOException {
    super(outputContext, conf, numOutputs, initialMemoryAvailable);
    // sanity checks
    final float spillper =
        this.conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT_DEFAULT);
    final int sortmb = this.availableMemoryMb;
    if (spillper > (float) 1.0 || spillper <= (float) 0.0) {
      throw new IOException(
          "Invalid \""
              + TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT
              + "\": "
              + spillper);
    }
    if ((sortmb & 0x7FF) != sortmb) {
      throw new IOException(
          "Invalid \"" + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + "\": " + sortmb);
    }

    indexCacheMemoryLimit =
        this.conf.getInt(
            TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES,
            TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES_DEFAULT);

    // buffers and accounting
    int maxMemUsage = sortmb << 20;
    maxMemUsage -= maxMemUsage % METASIZE;
    kvbuffer = new byte[maxMemUsage];
    bufvoid = kvbuffer.length;
    kvmeta = ByteBuffer.wrap(kvbuffer).order(ByteOrder.nativeOrder()).asIntBuffer();
    setEquator(0);
    bufstart = bufend = bufindex = equator;
    kvstart = kvend = kvindex;

    maxRec = kvmeta.capacity() / NMETA;
    softLimit = (int) (kvbuffer.length * spillper);
    bufferRemaining = softLimit;
    if (LOG.isInfoEnabled()) {
      LOG.info(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + ": " + sortmb);
      LOG.info("soft limit at " + softLimit);
      LOG.info("bufstart = " + bufstart + "; bufvoid = " + bufvoid);
      LOG.info("kvstart = " + kvstart + "; length = " + maxRec);
    }

    // k/v serialization
    valSerializer.open(bb);
    keySerializer.open(bb);

    spillInProgress = false;
    minSpillsForCombine =
        this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 3);
    spillThread.setDaemon(true);
    spillThread.setName(
        "SpillThread ["
            + TezUtilsInternal.cleanVertexName(outputContext.getDestinationVertexName() + "]"));
    spillLock.lock();
    try {
      spillThread.start();
      while (!spillThreadRunning) {
        spillDone.await();
      }
    } catch (InterruptedException e) {
      throw new IOException("Spill thread failed to initialize", e);
    } finally {
      spillLock.unlock();
    }
    if (sortSpillException != null) {
      throw new IOException("Spill thread failed to initialize", sortSpillException);
    }
  }