/** * Attempt to write a sequence of bytes to the collection buffer. This method will block if * the spill thread is running and it cannot write. * * @throws MapBufferTooSmallException if record is too large to deserialize into the * collection buffer. */ @Override public synchronized void write(byte b[], int off, int len) throws IOException { boolean kvfull = false; boolean buffull = false; boolean wrap = false; synchronized (spillLock) { do { if (sortSpillException != null) { throw (IOException) new IOException("Spill failed").initCause(sortSpillException); } // sufficient accounting space? final int kvnext = (kvindex + 1) % kvoffsets.length; kvfull = kvnext == kvstart; // sufficient buffer space? if (bufstart <= bufend && bufend <= bufindex) { buffull = bufindex + len > bufvoid; wrap = (bufvoid - bufindex) + bufstart > len; } else { // bufindex <= bufstart <= bufend // bufend <= bufindex <= bufstart wrap = false; buffull = bufindex + len > bufstart; } if (kvstart == kvend) { // spill thread not running if (kvend != kvindex) { // we have records we can spill final boolean kvsoftlimit = (kvnext > kvend) ? kvnext - kvend > softRecordLimit : kvend - kvnext <= kvoffsets.length - softRecordLimit; final boolean bufsoftlimit = (bufindex > bufend) ? bufindex - bufend > softBufferLimit : bufend - bufindex < bufvoid - softBufferLimit; if (kvsoftlimit || bufsoftlimit || (buffull && !wrap)) { LOG.info( "Spilling map output: buffer full = " + bufsoftlimit + " and record full = " + kvsoftlimit); LOG.info( "bufstart = " + bufstart + "; bufend = " + bufmark + "; bufvoid = " + bufvoid); LOG.info( "kvstart = " + kvstart + "; kvend = " + kvindex + "; length = " + kvoffsets.length); kvend = kvindex; bufend = bufmark; // TODO No need to recreate this thread every time SpillThread t = new SpillThread(); t.setDaemon(true); t.setName("SpillThread"); t.start(); } } else if (buffull && !wrap) { // We have no buffered records, and this record is too large // to write into kvbuffer. We must spill it directly from // collect final int size = ((bufend <= bufindex) ? bufindex - bufend : (bufvoid - bufend) + bufindex) + len; bufstart = bufend = bufindex = bufmark = 0; kvstart = kvend = kvindex = 0; bufvoid = kvbuffer.length; throw new MapBufferTooSmallException(size + " bytes"); } } if (kvfull || (buffull && !wrap)) { while (kvstart != kvend) { reporter.progress(); try { spillLock.wait(); } catch (InterruptedException e) { throw (IOException) new IOException("Buffer interrupted while waiting for the writer") .initCause(e); } } } } while (kvfull || (buffull && !wrap)); } // here, we know that we have sufficient space to write if (buffull) { final int gaplen = bufvoid - bufindex; System.arraycopy(b, off, kvbuffer, bufindex, gaplen); len -= gaplen; off += gaplen; bufindex = 0; } System.arraycopy(b, off, kvbuffer, bufindex, len); bufindex += len; }
public DefaultSorter( OutputContext outputContext, Configuration conf, int numOutputs, long initialMemoryAvailable) throws IOException { super(outputContext, conf, numOutputs, initialMemoryAvailable); // sanity checks final float spillper = this.conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT_DEFAULT); final int sortmb = this.availableMemoryMb; if (spillper > (float) 1.0 || spillper <= (float) 0.0) { throw new IOException( "Invalid \"" + TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT + "\": " + spillper); } if ((sortmb & 0x7FF) != sortmb) { throw new IOException( "Invalid \"" + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + "\": " + sortmb); } indexCacheMemoryLimit = this.conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES_DEFAULT); // buffers and accounting int maxMemUsage = sortmb << 20; maxMemUsage -= maxMemUsage % METASIZE; kvbuffer = new byte[maxMemUsage]; bufvoid = kvbuffer.length; kvmeta = ByteBuffer.wrap(kvbuffer).order(ByteOrder.nativeOrder()).asIntBuffer(); setEquator(0); bufstart = bufend = bufindex = equator; kvstart = kvend = kvindex; maxRec = kvmeta.capacity() / NMETA; softLimit = (int) (kvbuffer.length * spillper); bufferRemaining = softLimit; if (LOG.isInfoEnabled()) { LOG.info(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + ": " + sortmb); LOG.info("soft limit at " + softLimit); LOG.info("bufstart = " + bufstart + "; bufvoid = " + bufvoid); LOG.info("kvstart = " + kvstart + "; length = " + maxRec); } // k/v serialization valSerializer.open(bb); keySerializer.open(bb); spillInProgress = false; minSpillsForCombine = this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 3); spillThread.setDaemon(true); spillThread.setName( "SpillThread [" + TezUtilsInternal.cleanVertexName(outputContext.getDestinationVertexName() + "]")); spillLock.lock(); try { spillThread.start(); while (!spillThreadRunning) { spillDone.await(); } } catch (InterruptedException e) { throw new IOException("Spill thread failed to initialize", e); } finally { spillLock.unlock(); } if (sortSpillException != null) { throw new IOException("Spill thread failed to initialize", sortSpillException); } }