/** INTERNAL: Build and return a new element based on the change set. */
  public Object buildRemovedElementFromChangeSet(Object changeSet, MergeManager mergeManager) {
    ObjectChangeSet objectChangeSet = (ObjectChangeSet) changeSet;

    if (!mergeManager.shouldMergeChangesIntoDistributedCache()) {
      mergeManager.registerRemovedNewObjectIfRequired(objectChangeSet.getUnitOfWorkClone());
    }

    return this.buildElementFromChangeSet(changeSet, mergeManager);
  }
  /** INTERNAL: Build and return a new element based on the change set. */
  public Object buildAddedElementFromChangeSet(Object changeSet, MergeManager mergeManager) {
    ObjectChangeSet objectChangeSet = (ObjectChangeSet) changeSet;

    if (this.shouldMergeCascadeParts(mergeManager)) {
      Object targetElement = null;
      if (mergeManager.shouldMergeChangesIntoDistributedCache()) {
        targetElement =
            objectChangeSet.getTargetVersionOfSourceObject(mergeManager.getSession(), true);
      } else {
        targetElement = objectChangeSet.getUnitOfWorkClone();
      }
      mergeManager.mergeChanges(targetElement, objectChangeSet);
    }

    return this.buildElementFromChangeSet(changeSet, mergeManager);
  }
  public void run() {
    try {
      while (!stopped && !Thread.currentThread().isInterrupted()) {
        MapHost host = null;
        try {
          // If merge is on, block
          merger.waitForInMemoryMerge();

          // Get a host to shuffle from
          host = scheduler.getHost();
          metrics.threadBusy();

          // Shuffle
          copyFromHost(host);
        } finally {
          if (host != null) {
            scheduler.freeHost(host);
            metrics.threadFree();
          }
        }
      }
    } catch (InterruptedException ie) {
      return;
    } catch (Throwable t) {
      shuffle.reportException(t);
    }
  }
  /** INTERNAL: Build and return a new element based on the specified element. */
  public Object buildElementFromElement(Object element, MergeManager mergeManager) {
    if (this.shouldMergeCascadeParts(mergeManager)) {
      ObjectChangeSet objectChangeSet = null;
      if (mergeManager.getSession().isUnitOfWork()) {
        UnitOfWorkChangeSet uowChangeSet =
            (UnitOfWorkChangeSet)
                ((UnitOfWorkImpl) mergeManager.getSession()).getUnitOfWorkChangeSet();
        if (uowChangeSet != null) {
          objectChangeSet = (ObjectChangeSet) uowChangeSet.getObjectChangeSetForClone(element);
        }
      }
      Object mergeElement = mergeManager.getObjectToMerge(element);
      mergeManager.mergeChanges(mergeElement, objectChangeSet);
    }

    return mergeManager.getTargetVersionOfSourceObject(element);
  }
  /**
   * Merge changes from the source to the target object. Make the necessary removals and adds and
   * map key modifications.
   */
  private void mergeChangesIntoObjectWithoutOrder(
      Object target, ChangeRecord changeRecord, Object source, MergeManager mergeManager) {
    EISCollectionChangeRecord sdkChangeRecord = (EISCollectionChangeRecord) changeRecord;
    ContainerPolicy cp = this.getContainerPolicy();
    AbstractSession session = mergeManager.getSession();

    Object targetCollection = null;
    if (sdkChangeRecord.getOwner().isNew()) {
      targetCollection = cp.containerInstance(sdkChangeRecord.getAdds().size());
    } else {
      targetCollection = this.getRealCollectionAttributeValueFromObject(target, session);
    }

    Vector removes = sdkChangeRecord.getRemoves();
    Vector adds = sdkChangeRecord.getAdds();
    Vector changedMapKeys = sdkChangeRecord.getChangedMapKeys();

    synchronized (targetCollection) {
      for (Enumeration stream = removes.elements(); stream.hasMoreElements(); ) {
        Object removeElement =
            this.buildRemovedElementFromChangeSet(stream.nextElement(), mergeManager);

        Object targetElement = null;
        for (Object iter = cp.iteratorFor(targetCollection); cp.hasNext(iter); ) {
          targetElement = cp.next(iter, session);
          if (this.compareElements(targetElement, removeElement, session)) {
            break; // matching element found - skip the rest of them
          }
        }
        if (targetElement != null) {
          // a matching element was found, remove it
          cp.removeFrom(targetElement, targetCollection, session);
        }
      }

      for (Enumeration stream = adds.elements(); stream.hasMoreElements(); ) {
        Object addElement = this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager);
        cp.addInto(addElement, targetCollection, session);
      }

      for (Enumeration stream = changedMapKeys.elements(); stream.hasMoreElements(); ) {
        Object changedMapKeyElement =
            this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager);
        Object originalElement =
            ((UnitOfWorkImpl) session).getOriginalVersionOfObject(changedMapKeyElement);
        cp.removeFrom(originalElement, targetCollection, session);
        cp.addInto(changedMapKeyElement, targetCollection, session);
      }
    }

    // reset the attribute to allow for set method to re-morph changes if the collection is not
    // being stored directly
    this.setRealAttributeValueInObject(target, targetCollection);
  }
  /**
   * INTERNAL: Merge changes from the source to the target object. Simply replace the entire target
   * collection.
   */
  public void mergeIntoObject(
      Object target, boolean isTargetUnInitialized, Object source, MergeManager mergeManager) {
    ContainerPolicy cp = this.getContainerPolicy();
    AbstractSession session = mergeManager.getSession();

    Object sourceCollection = this.getRealCollectionAttributeValueFromObject(source, session);
    Object targetCollection = cp.containerInstance(cp.sizeFor(sourceCollection));

    for (Object iter = cp.iteratorFor(sourceCollection); cp.hasNext(iter); ) {
      Object targetElement = this.buildElementFromElement(cp.next(iter, session), mergeManager);
      cp.addInto(targetElement, targetCollection, session);
    }

    // reset the attribute to allow for set method to re-morph changes if the collection is not
    // being stored directly
    this.setRealAttributeValueInObject(target, targetCollection);
  }
  /**
   * Merge changes from the source to the target object. Simply replace the entire target
   * collection.
   */
  private void mergeChangesIntoObjectWithOrder(
      Object target, ChangeRecord changeRecord, Object source, MergeManager mergeManager) {
    ContainerPolicy cp = this.getContainerPolicy();
    AbstractSession session = mergeManager.getSession();

    Vector changes = ((EISOrderedCollectionChangeRecord) changeRecord).getNewCollection();
    Object targetCollection = cp.containerInstance(changes.size());

    for (Enumeration stream = changes.elements(); stream.hasMoreElements(); ) {
      Object targetElement =
          this.buildAddedElementFromChangeSet(stream.nextElement(), mergeManager);
      cp.addInto(targetElement, targetCollection, session);
    }

    // reset the attribute to allow for set method to re-morph changes if the collection is not
    // being stored directly
    this.setRealAttributeValueInObject(target, targetCollection);
  }
 /** Build and return a new element based on the change set. */
 protected Object buildElementFromChangeSet(Object changeSet, MergeManager mergeManager) {
   return ((ObjectChangeSet) changeSet).getTargetVersionOfSourceObject(mergeManager.getSession());
 }
  private InputAttemptIdentifier[] copyMapOutput(
      MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) {
    MapOutput mapOutput = null;
    InputAttemptIdentifier srcAttemptId = null;
    long decompressedLength = -1;
    long compressedLength = -1;

    try {
      long startTime = System.currentTimeMillis();
      int forReduce = -1;
      // Read the shuffle header
      try {
        ShuffleHeader header = new ShuffleHeader();
        header.readFields(input);
        srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce);
        compressedLength = header.compressedLength;
        decompressedLength = header.uncompressedLength;
        forReduce = header.forReduce;
      } catch (IllegalArgumentException e) {
        badIdErrs.increment(1);
        LOG.warn("Invalid map id ", e);
        // Don't know which one was bad, so consider all of them as bad
        return remaining.toArray(new InputAttemptIdentifier[remaining.size()]);
      }

      // Do some basic sanity verification
      if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) {
        return new InputAttemptIdentifier[] {srcAttemptId};
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "header: "
                + srcAttemptId
                + ", len: "
                + compressedLength
                + ", decomp len: "
                + decompressedLength);
      }

      // Get the location for the map output - either in-memory or on-disk
      mapOutput = merger.reserve(srcAttemptId, decompressedLength, id);

      // Check if we can shuffle *now* ...
      if (mapOutput.getType() == Type.WAIT) {
        LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ...");
        // Not an error but wait to process data.
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // Go!
      LOG.info(
          "fetcher#"
              + id
              + " about to shuffle output of map "
              + mapOutput.getAttemptIdentifier()
              + " decomp: "
              + decompressedLength
              + " len: "
              + compressedLength
              + " to "
              + mapOutput.getType());
      if (mapOutput.getType() == Type.MEMORY) {
        shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength);
      } else {
        shuffleToDisk(host, mapOutput, input, compressedLength);
      }

      // Inform the shuffle scheduler
      long endTime = System.currentTimeMillis();
      scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput);
      // Note successful shuffle
      remaining.remove(srcAttemptId);
      metrics.successFetch();
      return null;
    } catch (IOException ioe) {
      ioErrs.increment(1);
      if (srcAttemptId == null || mapOutput == null) {
        LOG.info(
            "fetcher#"
                + id
                + " failed to read map header"
                + srcAttemptId
                + " decomp: "
                + decompressedLength
                + ", "
                + compressedLength,
            ioe);
        if (srcAttemptId == null) {
          return remaining.toArray(new InputAttemptIdentifier[remaining.size()]);
        } else {
          return new InputAttemptIdentifier[] {srcAttemptId};
        }
      }

      LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe);

      // Inform the shuffle-scheduler
      mapOutput.abort();
      metrics.failedFetch();
      return new InputAttemptIdentifier[] {srcAttemptId};
    }
  }
  @Test(timeout = 10000)
  public void testConfigs() throws IOException {
    long maxTaskMem = 8192 * 1024 * 1024l;

    // Test Shuffle fetch buffer and post merge buffer percentage
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.8f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) == 6871947776l);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.5f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.9f);
    Assert.assertTrue(
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.1f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.1f);
    Assert.assertTrue(
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) < Integer.MAX_VALUE);

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 2.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -2.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 1.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, -1.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 1.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
      conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -1.4f);
      MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
      Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    // test post merge mem limit
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.8f);
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString(), maxTaskMem);
    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    long initialMemoryAvailable = (long) (maxTaskMem * 0.8);
    MergeManager mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            t0inputContext,
            null,
            null,
            null,
            null,
            t0exceptionReporter,
            initialMemoryAvailable,
            null,
            false,
            -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit > Integer.MAX_VALUE);

    initialMemoryAvailable = 200 * 1024 * 1024l; // initial mem < memlimit
    mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            t0inputContext,
            null,
            null,
            null,
            null,
            t0exceptionReporter,
            initialMemoryAvailable,
            null,
            false,
            -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable);
  }
  void testLocalDiskMergeMultipleTasks(boolean interruptInMiddle)
      throws IOException, InterruptedException {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());

    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());

    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString());
    InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString());

    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class);

    MergeManager t0mergeManagerReal =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            t0inputContext,
            null,
            null,
            null,
            null,
            t0exceptionReporter,
            2000000,
            null,
            false,
            -1);
    MergeManager t0mergeManager = spy(t0mergeManagerReal);
    t0mergeManager.configureAndStart();

    MergeManager t1mergeManagerReal =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            t1inputContext,
            null,
            null,
            null,
            null,
            t1exceptionReporter,
            2000000,
            null,
            false,
            -1);
    MergeManager t1mergeManager = spy(t1mergeManagerReal);

    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo src1Info =
        createFile(
            conf,
            localFs,
            new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"),
            2,
            3,
            0);
    // Partition 0 Keys 6-8, Partition 1 Keys 9-11
    SrcFileInfo src2Info =
        createFile(
            conf,
            localFs,
            new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"),
            2,
            3,
            6);

    // Simulating Task 0 fetches partition 0. (targetIndex = 0,1)

    // Simulating Task 1 fetches partition 1. (targetIndex = 0,1)

    InputAttemptIdentifier t0Identifier0 =
        new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t0Identifier1 =
        new InputAttemptIdentifier(1, 0, src2Info.path.getName());

    InputAttemptIdentifier t1Identifier0 =
        new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t1Identifier1 =
        new InputAttemptIdentifier(1, 0, src2Info.path.getName());

    MapOutput t0MapOutput0 =
        getMapOutputForDirectDiskFetch(
            t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager);
    MapOutput t0MapOutput1 =
        getMapOutputForDirectDiskFetch(
            t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager);

    MapOutput t1MapOutput0 =
        getMapOutputForDirectDiskFetch(
            t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager);
    MapOutput t1MapOutput1 =
        getMapOutputForDirectDiskFetch(
            t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager);

    t0MapOutput0.commit();
    t0MapOutput1.commit();
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath());
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath());
    // Run the OnDiskMerge via MergeManager
    // Simulate the thread invocation - remove files, and invoke merge
    List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>();
    t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs);
    t0mergeManager.onDiskMapOutputs.clear();

    if (!interruptInMiddle) {
      t0mergeManager.onDiskMerger.merge(t0MergeFiles);
      Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size());
    } else {

      doAnswer(
              new Answer() {
                @Override
                public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
                  // Simulate artificial delay so that interrupting thread can get a chance
                  Thread.sleep(2000);
                  return invocationOnMock.callRealMethod();
                }
              })
          .when(t0mergeManager)
          .closeOnDiskFile(any(FileChunk.class));

      // Start Interrupting thread
      Thread interruptingThread = new Thread(new InterruptingThread(t0mergeManager.onDiskMerger));
      interruptingThread.start();
      try {
        Thread.sleep(1000);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }

      // Will be interrupted in the middle by interruptingThread.
      t0mergeManager.onDiskMerger.startMerge(Sets.newHashSet(t0MergeFiles));
      t0mergeManager.onDiskMerger.waitForMerge();
      Assert.assertNotEquals(1, t0mergeManager.onDiskMapOutputs.size());
    }

    if (!interruptInMiddle) {
      t1MapOutput0.commit();
      t1MapOutput1.commit();
      verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath());
      verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath());
      // Run the OnDiskMerge via MergeManager
      // Simulate the thread invocation - remove files, and invoke merge
      List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>();
      t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs);
      t1mergeManager.onDiskMapOutputs.clear();
      t1mergeManager.onDiskMerger.merge(t1MergeFiles);
      Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size());

      Assert.assertNotEquals(
          t0mergeManager.onDiskMapOutputs.iterator().next().getPath(),
          t1mergeManager.onDiskMapOutputs.iterator().next().getPath());

      Assert.assertTrue(
          t0mergeManager
              .onDiskMapOutputs
              .iterator()
              .next()
              .getPath()
              .toString()
              .contains(t0inputContext.getUniqueIdentifier()));
      Assert.assertTrue(
          t1mergeManager
              .onDiskMapOutputs
              .iterator()
              .next()
              .getPath()
              .toString()
              .contains(t1inputContext.getUniqueIdentifier()));
    }
  }
  @Test(timeout = 60000l)
  public void testIntermediateMemoryMerge() throws Throwable {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 3);

    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());

    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());

    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);

    MergeManager mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());

    /**
     * Test #1 - Have 4 segments where all of them can fit into memory. - After 3 segment commits,
     * it would trigger mem-to-mem merge. - All of them can be merged in memory.
     */
    byte[] data1 = generateDataBySize(conf, 10);
    byte[] data2 = generateDataBySize(conf, 20);
    byte[] data3 = generateDataBySize(conf, 200);
    byte[] data4 = generateDataBySize(conf, 20000);

    MapOutput mo1 =
        mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0);
    MapOutput mo2 =
        mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0);
    MapOutput mo3 =
        mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0);
    MapOutput mo4 =
        mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0);

    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());

    // size should be ~20230.
    assertEquals(
        data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);

    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();

    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();

    assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(1, mergeManager.inMemoryMapOutputs.size());

    mergeManager.close(true);

    /**
     * Test #2 - Have 4 segments where all of them can fit into memory, but one of them would be big
     * enough that it can not be fit in memory during mem-to-mem merging.
     *
     * <p>- After 3 segment commits, it would trigger mem-to-mem merge. - Smaller segments which can
     * be fit in additional memory allocated gets merged.
     */
    mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 10);
    data2 = generateDataBySize(conf, 400000);
    data3 = generateDataBySize(conf, 400000);
    data4 = generateDataBySize(conf, 400000);

    mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0);

    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());

    assertEquals(
        data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);

    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();

    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();

    /**
     * Already all segments are in memory which is around 120000. It would not be able to allocate
     * more than 800000 for mem-to-mem. So it would pick up only 2 small segments which can be
     * accomodated within 800000.
     */
    assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(2, mergeManager.inMemoryMapOutputs.size());

    mergeManager.close(true);

    /**
     * Test #3 - Set number of segments for merging to 4. - Have 4 in-memory segments of size 400000
     * each - Committing 4 segments would trigger mem-to-mem - But none of them can be merged as
     * there is no enough head room for merging in memory.
     */
    mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 400000);
    data2 = generateDataBySize(conf, 400000);
    data3 = generateDataBySize(conf, 400000);
    data4 = generateDataBySize(conf, 400000);

    mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0);

    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());

    assertEquals(
        data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);

    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();

    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();

    // None of them can be merged as new mem needed for mem-to-mem can't
    // accomodate any segements
    assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(4, mergeManager.inMemoryMapOutputs.size());

    mergeManager.close(true);

    /**
     * Test #4 - Set number of segments for merging to 4. - Have 4 in-memory segments of size
     * {490000,490000,490000,230000} - Committing 4 segments would trigger mem-to-mem - But only
     * 300000 can fit into memory. This should not be merged as there is no point in merging single
     * segment. It should be added back to the inMemorySegments
     */
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
    mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 490000);
    data2 = generateDataBySize(conf, 490000);
    data3 = generateDataBySize(conf, 490000);
    data4 = generateDataBySize(conf, 230000);

    mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0);

    assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));

    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());

    assertEquals(
        data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);

    // Committing 4 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();

    // 4 segments were there originally in inMemoryMapOutput.
    int numberOfMapOutputs = 4;

    // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
    // into memory, it should return early
    mergeManager.waitForMemToMemMerge();

    // Check if inMemorySegment has got the MapOutput back for merging later
    assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());

    mergeManager.close(true);

    /**
     * Test #5 - Same to #4, but calling mergeManager.close(false) and confirm that final merge
     * doesn't occur.
     */
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
    mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 490000);
    data2 = generateDataBySize(conf, 490000);
    data3 = generateDataBySize(conf, 490000);
    data4 = generateDataBySize(conf, 230000);

    mo1 = mergeManager.reserve(new InputAttemptIdentifier(0, 0), data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(new InputAttemptIdentifier(1, 0), data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(new InputAttemptIdentifier(2, 0), data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(new InputAttemptIdentifier(3, 0), data4.length, data4.length, 0);

    assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));

    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());

    assertEquals(
        data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);

    // Committing 4 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();

    // 4 segments were there originally in inMemoryMapOutput.
    numberOfMapOutputs = 4;

    // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
    // into memory, it should return early
    mergeManager.waitForMemToMemMerge();

    // Check if inMemorySegment has got the MapOutput back for merging later
    assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());

    Assert.assertNull(mergeManager.close(false));
    Assert.assertFalse(mergeManager.isMergeComplete());
  }
  @Test(timeout = 20000)
  public void testIntermediateMemoryMergeAccounting() throws Exception {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2);

    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());

    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());

    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);

    MergeManager mergeManager =
        new MergeManager(
            conf,
            localFs,
            localDirAllocator,
            inputContext,
            null,
            null,
            null,
            null,
            exceptionReporter,
            2000000,
            null,
            false,
            -1);
    mergeManager.configureAndStart();

    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());

    byte[] data1 = generateData(conf, 10);
    byte[] data2 = generateData(conf, 20);
    MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0);
    MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0);
    assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType());
    assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());

    System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length);

    secondMapOutput.commit();
    assertEquals(data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
    firstMapOutput.commit();

    mergeManager.waitForMemToMemMerge();
    assertEquals(data1.length + data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
  }
 @Test(timeout = 10000)
 public void testReservationAccounting() throws IOException {
   Configuration conf = new TezConfiguration(defaultConf);
   FileSystem localFs = FileSystem.getLocal(conf);
   InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
   MergeManager mergeManager =
       new MergeManager(
           conf,
           localFs,
           null,
           inputContext,
           null,
           null,
           null,
           null,
           mock(ExceptionReporter.class),
           2000000,
           null,
           false,
           -1);
   mergeManager.configureAndStart();
   assertEquals(0, mergeManager.getUsedMemory());
   assertEquals(0, mergeManager.getCommitMemory());
   MapOutput mapOutput = mergeManager.reserve(null, 1, 1, 0);
   assertEquals(1, mergeManager.getUsedMemory());
   assertEquals(0, mergeManager.getCommitMemory());
   mapOutput.abort();
   assertEquals(0, mergeManager.getUsedMemory());
   assertEquals(0, mergeManager.getCommitMemory());
   mapOutput = mergeManager.reserve(null, 2, 2, 0);
   mergeManager.closeInMemoryFile(mapOutput);
   assertEquals(2, mergeManager.getUsedMemory());
   assertEquals(2, mergeManager.getCommitMemory());
   mergeManager.releaseCommittedMemory(2);
   assertEquals(0, mergeManager.getUsedMemory());
   assertEquals(0, mergeManager.getCommitMemory());
 }
Beispiel #15
0
  private TaskAttemptID[] copyMapOutput(
      MapHost host, DataInputStream input, Set<TaskAttemptID> remaining) {
    MapOutput<K, V> mapOutput = null;
    TaskAttemptID mapId = null;
    long decompressedLength = -1;
    long compressedLength = -1;

    try {
      long startTime = System.currentTimeMillis();
      int forReduce = -1;
      // Read the shuffle header
      try {
        ShuffleHeader header = new ShuffleHeader();
        header.readFields(input);
        mapId = TaskAttemptID.forName(header.mapId);
        compressedLength = header.compressedLength;
        decompressedLength = header.uncompressedLength;
        forReduce = header.forReduce;
      } catch (IllegalArgumentException e) {
        badIdErrs.increment(1);
        LOG.warn("Invalid map id ", e);
        // Don't know which one was bad, so consider all of them as bad
        return remaining.toArray(new TaskAttemptID[remaining.size()]);
      }

      InputStream is = input;
      is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
      compressedLength -= CryptoUtils.cryptoPadding(jobConf);
      decompressedLength -= CryptoUtils.cryptoPadding(jobConf);

      // Do some basic sanity verification
      if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) {
        return new TaskAttemptID[] {mapId};
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "header: "
                + mapId
                + ", len: "
                + compressedLength
                + ", decomp len: "
                + decompressedLength);
      }

      // Get the location for the map output - either in-memory or on-disk
      try {
        mapOutput = merger.reserve(mapId, decompressedLength, id);
      } catch (IOException ioe) {
        // kill this reduce attempt
        ioErrs.increment(1);
        scheduler.reportLocalError(ioe);
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // Check if we can shuffle *now* ...
      if (mapOutput == null) {
        LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
        // Not an error but wait to process data.
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
      // on decompression failures. Catching and re-throwing as IOException
      // to allow fetch failure logic to be processed
      try {
        // Go!
        LOG.info(
            "fetcher#"
                + id
                + " about to shuffle output of map "
                + mapOutput.getMapId()
                + " decomp: "
                + decompressedLength
                + " len: "
                + compressedLength
                + " to "
                + mapOutput.getDescription());
        mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter);
      } catch (java.lang.InternalError e) {
        LOG.warn("Failed to shuffle for fetcher#" + id, e);
        throw new IOException(e);
      }

      // Inform the shuffle scheduler
      long endTime = System.currentTimeMillis();
      scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput);
      // Note successful shuffle
      remaining.remove(mapId);
      metrics.successFetch();
      return null;
    } catch (IOException ioe) {
      ioErrs.increment(1);
      if (mapId == null || mapOutput == null) {
        LOG.info(
            "fetcher#"
                + id
                + " failed to read map header"
                + mapId
                + " decomp: "
                + decompressedLength
                + ", "
                + compressedLength,
            ioe);
        if (mapId == null) {
          return remaining.toArray(new TaskAttemptID[remaining.size()]);
        } else {
          return new TaskAttemptID[] {mapId};
        }
      }

      LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe);

      // Inform the shuffle-scheduler
      mapOutput.abort();
      metrics.failedFetch();
      return new TaskAttemptID[] {mapId};
    }
  }