示例#1
0
    @SuppressWarnings("unchecked")
    public synchronized void collect(K key, V value) throws IOException {
      reporter.progress();
      if (key.getClass() != keyClass) {
        throw new IOException(
            "Type mismatch in key from map: expected "
                + keyClass.getName()
                + ", recieved "
                + key.getClass().getName());
      }
      if (value.getClass() != valClass) {
        throw new IOException(
            "Type mismatch in value from map: expected "
                + valClass.getName()
                + ", recieved "
                + value.getClass().getName());
      }
      if (sortSpillException != null) {
        throw (IOException) new IOException("Spill failed").initCause(sortSpillException);
      }
      try {
        // serialize key bytes into buffer
        int keystart = bufindex;
        keySerializer.serialize(key);
        if (bufindex < keystart) {
          // wrapped the key; reset required
          bb.reset();
          keystart = 0;
        }
        // serialize value bytes into buffer
        int valstart = bufindex;
        valSerializer.serialize(value);
        int valend = bb.markRecord();
        mapOutputByteCounter.increment(
            valend >= keystart ? valend - keystart : (bufvoid - keystart) + valend);

        if (keystart == bufindex) {
          // if emitted records make no writes, it's possible to wrap
          // accounting space without notice
          bb.write(new byte[0], 0, 0);
        }

        int partition = partitioner.getPartition(key, value, partitions);
        if (partition < 0 || partition >= partitions) {
          throw new IOException("Illegal partition for " + key + " (" + partition + ")");
        }
        mapOutputRecordCounter.increment(1);

        // update accounting info
        int ind = kvindex * ACCTSIZE;
        kvoffsets[kvindex] = ind;
        kvindices[ind + PARTITION] = partition;
        kvindices[ind + KEYSTART] = keystart;
        kvindices[ind + VALSTART] = valstart;
        kvindex = (kvindex + 1) % kvoffsets.length;
      } catch (MapBufferTooSmallException e) {
        LOG.info("Record too large for in-memory buffer: " + e.getMessage());
        spillSingleRecord(key, value);
        mapOutputRecordCounter.increment(1);
        return;
      }
    }
示例#2
0
  /**
   * Serialize the key, value to intermediate storage. When this method returns, kvindex must refer
   * to sufficient unused storage to store one METADATA.
   */
  synchronized void collect(Object key, Object value, final int partition) throws IOException {

    if (key.getClass() != keyClass) {
      throw new IOException(
          "Type mismatch in key from map: expected "
              + keyClass.getName()
              + ", received "
              + key.getClass().getName());
    }
    if (value.getClass() != valClass) {
      throw new IOException(
          "Type mismatch in value from map: expected "
              + valClass.getName()
              + ", received "
              + value.getClass().getName());
    }
    if (partition < 0 || partition >= partitions) {
      throw new IOException(
          "Illegal partition for "
              + key
              + " ("
              + partition
              + ")"
              + ", TotalPartitions: "
              + partitions);
    }
    checkSpillException();
    bufferRemaining -= METASIZE;
    if (bufferRemaining <= 0) {
      // start spill if the thread is not running and the soft limit has been
      // reached
      spillLock.lock();
      try {
        do {
          if (!spillInProgress) {
            final int kvbidx = 4 * kvindex;
            final int kvbend = 4 * kvend;
            // serialized, unspilled bytes always lie between kvindex and
            // bufindex, crossing the equator. Note that any void space
            // created by a reset must be included in "used" bytes
            final int bUsed = distanceTo(kvbidx, bufindex);
            final boolean bufsoftlimit = bUsed >= softLimit;
            if ((kvbend + METASIZE) % kvbuffer.length != equator - (equator % METASIZE)) {
              // spill finished, reclaim space
              resetSpill();
              bufferRemaining =
                  Math.min(distanceTo(bufindex, kvbidx) - 2 * METASIZE, softLimit - bUsed)
                      - METASIZE;
              continue;
            } else if (bufsoftlimit && kvindex != kvend) {
              // spill records, if any collected; check latter, as it may
              // be possible for metadata alignment to hit spill pcnt
              startSpill();
              final int avgRec =
                  (int) (mapOutputByteCounter.getValue() / mapOutputRecordCounter.getValue());
              // leave at least half the split buffer for serialization data
              // ensure that kvindex >= bufindex
              final int distkvi = distanceTo(bufindex, kvbidx);
              final int newPos =
                  (bufindex
                          + Math.max(
                              2 * METASIZE - 1,
                              Math.min(distkvi / 2, distkvi / (METASIZE + avgRec) * METASIZE)))
                      % kvbuffer.length;
              setEquator(newPos);
              bufmark = bufindex = newPos;
              final int serBound = 4 * kvend;
              // bytes remaining before the lock must be held and limits
              // checked is the minimum of three arcs: the metadata space, the
              // serialization space, and the soft limit
              bufferRemaining =
                  Math.min(
                          // metadata max
                          distanceTo(bufend, newPos),
                          Math.min(
                              // serialization max
                              distanceTo(newPos, serBound),
                              // soft limit
                              softLimit))
                      - 2 * METASIZE;
            }
          }
        } while (false);
      } finally {
        spillLock.unlock();
      }
    }

    try {
      // serialize key bytes into buffer
      int keystart = bufindex;
      keySerializer.serialize(key);
      if (bufindex < keystart) {
        // wrapped the key; must make contiguous
        bb.shiftBufferedKey();
        keystart = 0;
      }
      // serialize value bytes into buffer
      final int valstart = bufindex;
      valSerializer.serialize(value);
      // It's possible for records to have zero length, i.e. the serializer
      // will perform no writes. To ensure that the boundary conditions are
      // checked and that the kvindex invariant is maintained, perform a
      // zero-length write into the buffer. The logic monitoring this could be
      // moved into collect, but this is cleaner and inexpensive. For now, it
      // is acceptable.
      bb.write(b0, 0, 0);

      // the record must be marked after the preceding write, as the metadata
      // for this record are not yet written
      int valend = bb.markRecord();

      mapOutputRecordCounter.increment(1);
      mapOutputByteCounter.increment(distanceTo(keystart, valend, bufvoid));

      // write accounting info
      kvmeta.put(kvindex + PARTITION, partition);
      kvmeta.put(kvindex + KEYSTART, keystart);
      kvmeta.put(kvindex + VALSTART, valstart);
      kvmeta.put(kvindex + VALLEN, distanceTo(valstart, valend));
      // advance kvindex
      kvindex = (int) (((long) kvindex - NMETA + kvmeta.capacity()) % kvmeta.capacity());
    } catch (MapBufferTooSmallException e) {
      LOG.info("Record too large for in-memory buffer: " + e.getMessage());
      spillSingleRecord(key, value, partition);
      mapOutputRecordCounter.increment(1);
      return;
    }
  }