/** INTERNAL: Build and return a new element based on the change set. */ public Object buildRemovedElementFromChangeSet(Object changeSet, MergeManager mergeManager) { ObjectChangeSet objectChangeSet = (ObjectChangeSet) changeSet; if (!mergeManager.shouldMergeChangesIntoDistributedCache()) { mergeManager.registerRemovedNewObjectIfRequired(objectChangeSet.getUnitOfWorkClone()); } return this.buildElementFromChangeSet(changeSet, mergeManager); }
/** INTERNAL: Build and return a new element based on the change set. */ public Object buildAddedElementFromChangeSet(Object changeSet, MergeManager mergeManager) { ObjectChangeSet objectChangeSet = (ObjectChangeSet) changeSet; if (this.shouldMergeCascadeParts(mergeManager)) { Object targetElement = null; if (mergeManager.shouldMergeChangesIntoDistributedCache()) { targetElement = objectChangeSet.getTargetVersionOfSourceObject(mergeManager.getSession(), true); } else { targetElement = objectChangeSet.getUnitOfWorkClone(); } mergeManager.mergeChanges(targetElement, objectChangeSet); } return this.buildElementFromChangeSet(changeSet, mergeManager); }
public void run() { try { while (!stopped && !Thread.currentThread().isInterrupted()) { MapHost host = null; try { // If merge is on, block merger.waitForInMemoryMerge(); // Get a host to shuffle from host = scheduler.getHost(); metrics.threadBusy(); // Shuffle copyFromHost(host); } finally { if (host != null) { scheduler.freeHost(host); metrics.threadFree(); } } } } catch (InterruptedException ie) { return; } catch (Throwable t) { shuffle.reportException(t); } }
/** INTERNAL: Build and return a new element based on the specified element. */ public Object buildElementFromElement(Object element, MergeManager mergeManager) { if (this.shouldMergeCascadeParts(mergeManager)) { ObjectChangeSet objectChangeSet = null; if (mergeManager.getSession().isUnitOfWork()) { UnitOfWorkChangeSet uowChangeSet = (UnitOfWorkChangeSet) ((UnitOfWorkImpl) mergeManager.getSession()).getUnitOfWorkChangeSet(); if (uowChangeSet != null) { objectChangeSet = (ObjectChangeSet) uowChangeSet.getObjectChangeSetForClone(element); } } Object mergeElement = mergeManager.getObjectToMerge(element); mergeManager.mergeChanges(mergeElement, objectChangeSet); } return mergeManager.getTargetVersionOfSourceObject(element); }
/** * Merge changes from the source to the target object. Make the necessary removals and adds and * map key modifications. */ private void mergeChangesIntoObjectWithoutOrder( Object target, ChangeRecord changeRecord, Object source, MergeManager mergeManager) { EISCollectionChangeRecord sdkChangeRecord = (EISCollectionChangeRecord) changeRecord; ContainerPolicy cp = this.getContainerPolicy(); AbstractSession session = mergeManager.getSession(); Object targetCollection = null; if (sdkChangeRecord.getOwner().isNew()) { targetCollection = cp.containerInstance(sdkChangeRecord.getAdds().size()); } else { targetCollection = this.getRealCollectionAttributeValueFromObject(target, session); } Vector removes = sdkChangeRecord.getRemoves(); Vector adds = sdkChangeRecord.getAdds(); Vector changedMapKeys = sdkChangeRecord.getChangedMapKeys(); synchronized (targetCollection) { for (Enumeration stream = removes.elements(); stream.hasMoreElements(); ) { Object removeElement = this.buildRemovedElementFromChangeSet(stream.nextElement(), mergeManager); Object targetElement = null; for (Object iter = cp.iteratorFor(targetCollection); cp.hasNext(iter); ) { targetElement = cp.next(iter, session); if (this.compareElements(targetElement, removeElement, session)) { break; // matching element found - skip the rest of them } } if (targetElement != null) { // a matching element was found, remove it cp.removeFrom(targetElement, targetCollection, session); } } for (Enumeration stream = adds.elements(); stream.hasMoreElements(); ) { Object addElement = this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager); cp.addInto(addElement, targetCollection, session); } for (Enumeration stream = changedMapKeys.elements(); stream.hasMoreElements(); ) { Object changedMapKeyElement = this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager); Object originalElement = ((UnitOfWorkImpl) session).getOriginalVersionOfObject(changedMapKeyElement); cp.removeFrom(originalElement, targetCollection, session); cp.addInto(changedMapKeyElement, targetCollection, session); } } // reset the attribute to allow for set method to re-morph changes if the collection is not // being stored directly this.setRealAttributeValueInObject(target, targetCollection); }
/** * INTERNAL: Merge changes from the source to the target object. Simply replace the entire target * collection. */ public void mergeIntoObject( Object target, boolean isTargetUnInitialized, Object source, MergeManager mergeManager) { ContainerPolicy cp = this.getContainerPolicy(); AbstractSession session = mergeManager.getSession(); Object sourceCollection = this.getRealCollectionAttributeValueFromObject(source, session); Object targetCollection = cp.containerInstance(cp.sizeFor(sourceCollection)); for (Object iter = cp.iteratorFor(sourceCollection); cp.hasNext(iter); ) { Object targetElement = this.buildElementFromElement(cp.next(iter, session), mergeManager); cp.addInto(targetElement, targetCollection, session); } // reset the attribute to allow for set method to re-morph changes if the collection is not // being stored directly this.setRealAttributeValueInObject(target, targetCollection); }
/** * Merge changes from the source to the target object. Simply replace the entire target * collection. */ private void mergeChangesIntoObjectWithOrder( Object target, ChangeRecord changeRecord, Object source, MergeManager mergeManager) { ContainerPolicy cp = this.getContainerPolicy(); AbstractSession session = mergeManager.getSession(); Vector changes = ((EISOrderedCollectionChangeRecord) changeRecord).getNewCollection(); Object targetCollection = cp.containerInstance(changes.size()); for (Enumeration stream = changes.elements(); stream.hasMoreElements(); ) { Object targetElement = this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager); cp.addInto(targetElement, targetCollection, session); } // reset the attribute to allow for set method to re-morph changes if the collection is not // being stored directly this.setRealAttributeValueInObject(target, targetCollection); }
/** Build and return a new element based on the change set. */ protected Object buildElementFromChangeSet(Object changeSet, MergeManager mergeManager) { return ((ObjectChangeSet) changeSet).getTargetVersionOfSourceObject(mergeManager.getSession()); }
private InputAttemptIdentifier[] copyMapOutput( MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) { MapOutput mapOutput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) { return new InputAttemptIdentifier[] {srcAttemptId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + srcAttemptId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk mapOutput = merger.reserve(srcAttemptId, decompressedLength, id); // Check if we can shuffle *now* ... if (mapOutput.getType() == Type.WAIT) { LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getType()); if (mapOutput.getType() == Type.MEMORY) { shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength); } else { shuffleToDisk(host, mapOutput, input, compressedLength); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(srcAttemptId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (srcAttemptId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (srcAttemptId == null) { return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } else { return new InputAttemptIdentifier[] {srcAttemptId}; } } LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new InputAttemptIdentifier[] {srcAttemptId}; } }
@Test(timeout = 10000) public void testConfigs() throws IOException { long maxTaskMem = 8192 * 1024 * 1024l; // Test Shuffle fetch buffer and post merge buffer percentage Configuration conf = new TezConfiguration(defaultConf); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.8f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f); Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) == 6871947776l); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.5f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f); Assert.assertTrue( MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.9f); Assert.assertTrue( MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.1f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.1f); Assert.assertTrue( MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) < Integer.MAX_VALUE); try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 2.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -2.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong post merge buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, -1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong post merge buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } // test post merge mem limit conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.8f); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString(), maxTaskMem); ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class); long initialMemoryAvailable = (long) (maxTaskMem * 0.8); MergeManager mergeManager = new MergeManager( conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1); Assert.assertTrue(mergeManager.postMergeMemLimit > Integer.MAX_VALUE); initialMemoryAvailable = 200 * 1024 * 1024l; // initial mem < memlimit mergeManager = new MergeManager( conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1); Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable); }
void testLocalDiskMergeMultipleTasks(boolean interruptInMiddle) throws IOException, InterruptedException { Configuration conf = new TezConfiguration(defaultConf); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); Path localDir = new Path(workDir, "local"); Path srcDir = new Path(workDir, "srcData"); localFs.mkdirs(localDir); localFs.mkdirs(srcDir); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString()); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString()); InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString()); ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class); ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class); MergeManager t0mergeManagerReal = new MergeManager( conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, 2000000, null, false, -1); MergeManager t0mergeManager = spy(t0mergeManagerReal); t0mergeManager.configureAndStart(); MergeManager t1mergeManagerReal = new MergeManager( conf, localFs, localDirAllocator, t1inputContext, null, null, null, null, t1exceptionReporter, 2000000, null, false, -1); MergeManager t1mergeManager = spy(t1mergeManagerReal); // Partition 0 Keys 0-2, Partition 1 Keys 3-5 SrcFileInfo src1Info = createFile( conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0); // Partition 0 Keys 6-8, Partition 1 Keys 9-11 SrcFileInfo src2Info = createFile( conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6); // Simulating Task 0 fetches partition 0. (targetIndex = 0,1) // Simulating Task 1 fetches partition 1. (targetIndex = 0,1) InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName()); InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName()); InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName()); InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName()); MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch( t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager); MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch( t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager); MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch( t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager); MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch( t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager); t0MapOutput0.commit(); t0MapOutput1.commit(); verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath()); verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath()); // Run the OnDiskMerge via MergeManager // Simulate the thread invocation - remove files, and invoke merge List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>(); t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs); t0mergeManager.onDiskMapOutputs.clear(); if (!interruptInMiddle) { t0mergeManager.onDiskMerger.merge(t0MergeFiles); Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size()); } else { doAnswer( new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { // Simulate artificial delay so that interrupting thread can get a chance Thread.sleep(2000); return invocationOnMock.callRealMethod(); } }) .when(t0mergeManager) .closeOnDiskFile(any(FileChunk.class)); // Start Interrupting thread Thread interruptingThread = new Thread(new InterruptingThread(t0mergeManager.onDiskMerger)); interruptingThread.start(); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } // Will be interrupted in the middle by interruptingThread. t0mergeManager.onDiskMerger.startMerge(Sets.newHashSet(t0MergeFiles)); t0mergeManager.onDiskMerger.waitForMerge(); Assert.assertNotEquals(1, t0mergeManager.onDiskMapOutputs.size()); } if (!interruptInMiddle) { t1MapOutput0.commit(); t1MapOutput1.commit(); verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath()); verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath()); // Run the OnDiskMerge via MergeManager // Simulate the thread invocation - remove files, and invoke merge List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>(); t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs); t1mergeManager.onDiskMapOutputs.clear(); t1mergeManager.onDiskMerger.merge(t1MergeFiles); Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size()); Assert.assertNotEquals( t0mergeManager.onDiskMapOutputs.iterator().next().getPath(), t1mergeManager.onDiskMapOutputs.iterator().next().getPath()); Assert.assertTrue( t0mergeManager .onDiskMapOutputs .iterator() .next() .getPath() .toString() .contains(t0inputContext.getUniqueIdentifier())); Assert.assertTrue( t1mergeManager .onDiskMapOutputs .iterator() .next() .getPath() .toString() .contains(t1inputContext.getUniqueIdentifier())); } }
@Test(timeout = 60000l) public void testIntermediateMemoryMerge() throws Throwable { Configuration conf = new TezConfiguration(defaultConf); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 3); Path localDir = new Path(workDir, "local"); Path srcDir = new Path(workDir, "srcData"); localFs.mkdirs(localDir); localFs.mkdirs(srcDir); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString()); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext inputContext = createMockInputContext(UUID.randomUUID().toString()); ExceptionReporter exceptionReporter = mock(ExceptionReporter.class); MergeManager mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); assertEquals(0, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); /** * Test #1 - Have 4 segments where all of them can fit into memory. - After 3 segment commits, * it would trigger mem-to-mem merge. - All of them can be merged in memory. */ byte[] data1 = generateDataBySize(conf, 10); byte[] data2 = generateDataBySize(conf, 20); byte[] data3 = generateDataBySize(conf, 200); byte[] data4 = generateDataBySize(conf, 20000); MapOutput mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0); MapOutput mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0); MapOutput mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0); MapOutput mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0); assertEquals(MapOutput.Type.MEMORY, mo1.getType()); assertEquals(MapOutput.Type.MEMORY, mo2.getType()); assertEquals(MapOutput.Type.MEMORY, mo3.getType()); assertEquals(MapOutput.Type.MEMORY, mo4.getType()); assertEquals(0, mergeManager.getCommitMemory()); // size should be ~20230. assertEquals( data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length); System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length); System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length); System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length); // Committing 3 segments should trigger mem-to-mem merge mo1.commit(); mo2.commit(); mo3.commit(); mo4.commit(); // Wait for mem-to-mem to complete mergeManager.waitForMemToMemMerge(); assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size()); assertEquals(1, mergeManager.inMemoryMapOutputs.size()); mergeManager.close(true); /** * Test #2 - Have 4 segments where all of them can fit into memory, but one of them would be big * enough that it can not be fit in memory during mem-to-mem merging. * * <p>- After 3 segment commits, it would trigger mem-to-mem merge. - Smaller segments which can * be fit in additional memory allocated gets merged. */ mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); // Single shuffle limit is 25% of 2000000 data1 = generateDataBySize(conf, 10); data2 = generateDataBySize(conf, 400000); data3 = generateDataBySize(conf, 400000); data4 = generateDataBySize(conf, 400000); mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0); mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0); mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0); mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0); assertEquals(MapOutput.Type.MEMORY, mo1.getType()); assertEquals(MapOutput.Type.MEMORY, mo2.getType()); assertEquals(MapOutput.Type.MEMORY, mo3.getType()); assertEquals(MapOutput.Type.MEMORY, mo4.getType()); assertEquals(0, mergeManager.getCommitMemory()); assertEquals( data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length); System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length); System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length); System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length); // Committing 3 segments should trigger mem-to-mem merge mo1.commit(); mo2.commit(); mo3.commit(); mo4.commit(); // Wait for mem-to-mem to complete mergeManager.waitForMemToMemMerge(); /** * Already all segments are in memory which is around 120000. It would not be able to allocate * more than 800000 for mem-to-mem. So it would pick up only 2 small segments which can be * accomodated within 800000. */ assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size()); assertEquals(2, mergeManager.inMemoryMapOutputs.size()); mergeManager.close(true); /** * Test #3 - Set number of segments for merging to 4. - Have 4 in-memory segments of size 400000 * each - Committing 4 segments would trigger mem-to-mem - But none of them can be merged as * there is no enough head room for merging in memory. */ mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); // Single shuffle limit is 25% of 2000000 data1 = generateDataBySize(conf, 400000); data2 = generateDataBySize(conf, 400000); data3 = generateDataBySize(conf, 400000); data4 = generateDataBySize(conf, 400000); mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0); mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0); mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0); mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0); assertEquals(MapOutput.Type.MEMORY, mo1.getType()); assertEquals(MapOutput.Type.MEMORY, mo2.getType()); assertEquals(MapOutput.Type.MEMORY, mo3.getType()); assertEquals(MapOutput.Type.MEMORY, mo4.getType()); assertEquals(0, mergeManager.getCommitMemory()); assertEquals( data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length); System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length); System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length); System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length); // Committing 3 segments should trigger mem-to-mem merge mo1.commit(); mo2.commit(); mo3.commit(); mo4.commit(); // Wait for mem-to-mem to complete mergeManager.waitForMemToMemMerge(); // None of them can be merged as new mem needed for mem-to-mem can't // accomodate any segements assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size()); assertEquals(4, mergeManager.inMemoryMapOutputs.size()); mergeManager.close(true); /** * Test #4 - Set number of segments for merging to 4. - Have 4 in-memory segments of size * {490000,490000,490000,230000} - Committing 4 segments would trigger mem-to-mem - But only * 300000 can fit into memory. This should not be merged as there is no point in merging single * segment. It should be added back to the inMemorySegments */ conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4); mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); // Single shuffle limit is 25% of 2000000 data1 = generateDataBySize(conf, 490000); data2 = generateDataBySize(conf, 490000); data3 = generateDataBySize(conf, 490000); data4 = generateDataBySize(conf, 230000); mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0); mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0); mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0); mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0); assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000)); assertEquals(MapOutput.Type.MEMORY, mo1.getType()); assertEquals(MapOutput.Type.MEMORY, mo2.getType()); assertEquals(MapOutput.Type.MEMORY, mo3.getType()); assertEquals(MapOutput.Type.MEMORY, mo4.getType()); assertEquals(0, mergeManager.getCommitMemory()); assertEquals( data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length); System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length); System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length); System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length); // Committing 4 segments should trigger mem-to-mem merge mo1.commit(); mo2.commit(); mo3.commit(); mo4.commit(); // 4 segments were there originally in inMemoryMapOutput. int numberOfMapOutputs = 4; // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit // into memory, it should return early mergeManager.waitForMemToMemMerge(); // Check if inMemorySegment has got the MapOutput back for merging later assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size()); mergeManager.close(true); /** * Test #5 - Same to #4, but calling mergeManager.close(false) and confirm that final merge * doesn't occur. */ conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4); mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); // Single shuffle limit is 25% of 2000000 data1 = generateDataBySize(conf, 490000); data2 = generateDataBySize(conf, 490000); data3 = generateDataBySize(conf, 490000); data4 = generateDataBySize(conf, 230000); mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0); mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0); mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0); mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0); assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000)); assertEquals(MapOutput.Type.MEMORY, mo1.getType()); assertEquals(MapOutput.Type.MEMORY, mo2.getType()); assertEquals(MapOutput.Type.MEMORY, mo3.getType()); assertEquals(MapOutput.Type.MEMORY, mo4.getType()); assertEquals(0, mergeManager.getCommitMemory()); assertEquals( data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length); System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length); System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length); System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length); // Committing 4 segments should trigger mem-to-mem merge mo1.commit(); mo2.commit(); mo3.commit(); mo4.commit(); // 4 segments were there originally in inMemoryMapOutput. numberOfMapOutputs = 4; // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit // into memory, it should return early mergeManager.waitForMemToMemMerge(); // Check if inMemorySegment has got the MapOutput back for merging later assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size()); Assert.assertNull(mergeManager.close(false)); Assert.assertFalse(mergeManager.isMergeComplete()); }
@Test(timeout = 20000) public void testIntermediateMemoryMergeAccounting() throws Exception { Configuration conf = new TezConfiguration(defaultConf); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2); Path localDir = new Path(workDir, "local"); Path srcDir = new Path(workDir, "srcData"); localFs.mkdirs(localDir); localFs.mkdirs(srcDir); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString()); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext inputContext = createMockInputContext(UUID.randomUUID().toString()); ExceptionReporter exceptionReporter = mock(ExceptionReporter.class); MergeManager mergeManager = new MergeManager( conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1); mergeManager.configureAndStart(); assertEquals(0, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); byte[] data1 = generateData(conf, 10); byte[] data2 = generateData(conf, 20); MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0); MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0); assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType()); assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType()); assertEquals(0, mergeManager.getCommitMemory()); assertEquals(data1.length + data2.length, mergeManager.getUsedMemory()); System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length); System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length); secondMapOutput.commit(); assertEquals(data2.length, mergeManager.getCommitMemory()); assertEquals(data1.length + data2.length, mergeManager.getUsedMemory()); firstMapOutput.commit(); mergeManager.waitForMemToMemMerge(); assertEquals(data1.length + data2.length, mergeManager.getCommitMemory()); assertEquals(data1.length + data2.length, mergeManager.getUsedMemory()); }
@Test(timeout = 10000) public void testReservationAccounting() throws IOException { Configuration conf = new TezConfiguration(defaultConf); FileSystem localFs = FileSystem.getLocal(conf); InputContext inputContext = createMockInputContext(UUID.randomUUID().toString()); MergeManager mergeManager = new MergeManager( conf, localFs, null, inputContext, null, null, null, null, mock(ExceptionReporter.class), 2000000, null, false, -1); mergeManager.configureAndStart(); assertEquals(0, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); MapOutput mapOutput = mergeManager.reserve(null, 1, 1, 0); assertEquals(1, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); mapOutput.abort(); assertEquals(0, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); mapOutput = mergeManager.reserve(null, 2, 2, 0); mergeManager.closeInMemoryFile(mapOutput); assertEquals(2, mergeManager.getUsedMemory()); assertEquals(2, mergeManager.getCommitMemory()); mergeManager.releaseCommittedMemory(2); assertEquals(0, mergeManager.getUsedMemory()); assertEquals(0, mergeManager.getCommitMemory()); }
private TaskAttemptID[] copyMapOutput( MapHost host, DataInputStream input, Set<TaskAttemptID> remaining) { MapOutput<K, V> mapOutput = null; TaskAttemptID mapId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); mapId = TaskAttemptID.forName(header.mapId); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new TaskAttemptID[remaining.size()]); } InputStream is = input; is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength); compressedLength -= CryptoUtils.cryptoPadding(jobConf); decompressedLength -= CryptoUtils.cryptoPadding(jobConf); // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) { return new TaskAttemptID[] {mapId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk try { mapOutput = merger.reserve(mapId, decompressedLength, id); } catch (IOException ioe) { // kill this reduce attempt ioErrs.increment(1); scheduler.reportLocalError(ioe); return EMPTY_ATTEMPT_ID_ARRAY; } // Check if we can shuffle *now* ... if (mapOutput == null) { LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError // on decompression failures. Catching and re-throwing as IOException // to allow fetch failure logic to be processed try { // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription()); mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter); } catch (java.lang.InternalError e) { LOG.warn("Failed to shuffle for fetcher#" + id, e); throw new IOException(e); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(mapId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (mapId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (mapId == null) { return remaining.toArray(new TaskAttemptID[remaining.size()]); } else { return new TaskAttemptID[] {mapId}; } } LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new TaskAttemptID[] {mapId}; } }