Beispiel #1
0
    public synchronized boolean next(K key, V value) throws IOException {

      setProgress(getProgress());
      long beforePos = getPos();
      boolean ret = rawIn.next(key, value);
      if (ret) {
        inputRecordCounter.increment(1);
        inputByteCounter.increment(getPos() - beforePos);
      }
      return ret;
    }
Beispiel #2
0
    /**
     * Handles the degenerate case where serialization fails to fit in the in-memory buffer, so we
     * must spill the record from collect directly to a spill file. Consider this "losing".
     */
    @SuppressWarnings("unchecked")
    private void spillSingleRecord(final K key, final V value) throws IOException {
      long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      FSDataOutputStream indexOut = null;
      final int partition = partitioner.getPartition(key, value, partitions);
      try {
        // create spill file
        Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(), numSpills, size);
        out = localFs.create(filename);
        // create spill index
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(
                getTaskID(), numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        indexOut = localFs.create(indexFilename);
        // we don't run the combiner for a single record
        for (int i = 0; i < partitions; ++i) {
          IFile.Writer writer = null;
          try {
            long segmentStart = out.getPos();
            // Create a new codec, don't care!
            writer = new IFile.Writer(job, out, keyClass, valClass, codec);

            if (i == partition) {
              if (job.getCombineOnceOnly()) {
                Reducer combiner = (Reducer) ReflectionUtils.newInstance(combinerClass, job);
                combineCollector.setWriter(writer);
                combiner.reduce(
                    key,
                    new Iterator<V>() {
                      private boolean done = false;

                      public boolean hasNext() {
                        return !done;
                      }

                      public V next() {
                        if (done) throw new NoSuchElementException();
                        done = true;
                        return value;
                      }

                      public void remove() {
                        throw new UnsupportedOperationException();
                      }
                    },
                    combineCollector,
                    reporter);
              } else {
                final long recordStart = out.getPos();
                writer.append(key, value);
                // Note that our map byte count will not be accurate with
                // compression
                mapOutputByteCounter.increment(out.getPos() - recordStart);
              }
            }
            writer.close();

            // index record
            writeIndexRecord(indexOut, out, segmentStart, writer);
          } catch (IOException e) {
            if (null != writer) writer.close();
            throw e;
          }
        }
        ++numSpills;
      } finally {
        if (out != null) out.close();
        if (indexOut != null) indexOut.close();
      }
    }
Beispiel #3
0
    @SuppressWarnings("unchecked")
    public synchronized void collect(K key, V value) throws IOException {
      reporter.progress();
      if (key.getClass() != keyClass) {
        throw new IOException(
            "Type mismatch in key from map: expected "
                + keyClass.getName()
                + ", recieved "
                + key.getClass().getName());
      }
      if (value.getClass() != valClass) {
        throw new IOException(
            "Type mismatch in value from map: expected "
                + valClass.getName()
                + ", recieved "
                + value.getClass().getName());
      }
      if (sortSpillException != null) {
        throw (IOException) new IOException("Spill failed").initCause(sortSpillException);
      }
      try {
        // serialize key bytes into buffer
        int keystart = bufindex;
        keySerializer.serialize(key);
        if (bufindex < keystart) {
          // wrapped the key; reset required
          bb.reset();
          keystart = 0;
        }
        // serialize value bytes into buffer
        int valstart = bufindex;
        valSerializer.serialize(value);
        int valend = bb.markRecord();
        mapOutputByteCounter.increment(
            valend >= keystart ? valend - keystart : (bufvoid - keystart) + valend);

        if (keystart == bufindex) {
          // if emitted records make no writes, it's possible to wrap
          // accounting space without notice
          bb.write(new byte[0], 0, 0);
        }

        int partition = partitioner.getPartition(key, value, partitions);
        if (partition < 0 || partition >= partitions) {
          throw new IOException("Illegal partition for " + key + " (" + partition + ")");
        }
        mapOutputRecordCounter.increment(1);

        // update accounting info
        int ind = kvindex * ACCTSIZE;
        kvoffsets[kvindex] = ind;
        kvindices[ind + PARTITION] = partition;
        kvindices[ind + KEYSTART] = keystart;
        kvindices[ind + VALSTART] = valstart;
        kvindex = (kvindex + 1) % kvoffsets.length;
      } catch (MapBufferTooSmallException e) {
        LOG.info("Record too large for in-memory buffer: " + e.getMessage());
        spillSingleRecord(key, value);
        mapOutputRecordCounter.increment(1);
        return;
      }
    }
Beispiel #4
0
 public void collect(K key, V value) throws IOException {
   reporter.progress();
   out.write(key, value);
   mapOutputRecordCounter.increment(1);
 }