Example #1
0
    public VertexManagerPluginDescriptor build() {
      VertexManagerPluginDescriptor desc =
          VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());

      try {
        return desc.setUserPayload(TezUtils.createUserPayloadFromConf(this.conf));
      } catch (IOException e) {
        throw new TezUncheckedException(e);
      }
    }
  public static void main(String[] args) {
    try {
      Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
      String containerIdStr = System.getenv(Environment.CONTAINER_ID.name());
      String nodeHostString = System.getenv(Environment.NM_HOST.name());
      String nodePortString = System.getenv(Environment.NM_PORT.name());
      String nodeHttpPortString = System.getenv(Environment.NM_HTTP_PORT.name());
      String appSubmitTimeStr = System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV);

      validateInputParam(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV);

      ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
      ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();

      long appSubmitTime = Long.parseLong(appSubmitTimeStr);

      Configuration conf = new Configuration(new YarnConfiguration());
      TezUtils.addUserSpecifiedTezConfiguration(conf);

      String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name());

      // Do not automatically close FileSystem objects so that in case of
      // SIGTERM I have a chance to write out the job history. I'll be closing
      // the objects myself.
      conf.setBoolean("fs.automatic.close", false);

      // Command line options
      Options opts = new Options();
      opts.addOption(
          TezConstants.TEZ_SESSION_MODE_CLI_OPTION,
          false,
          "Run Tez Application Master in Session mode");

      CommandLine cliParser = new GnuParser().parse(opts, args);

      DAGAppMaster appMaster =
          new DAGAppMaster(
              applicationAttemptId,
              containerId,
              nodeHostString,
              Integer.parseInt(nodePortString),
              Integer.parseInt(nodeHttpPortString),
              appSubmitTime,
              cliParser.hasOption(TezConstants.TEZ_SESSION_MODE_CLI_OPTION));
      ShutdownHookManager.get()
          .addShutdownHook(new DAGAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY);

      initAndStartAppMaster(appMaster, conf, jobUserName);

    } catch (Throwable t) {
      LOG.fatal("Error starting DAGAppMaster", t);
      System.exit(1);
    }
  }
  public ShuffleManager(
      TezInputContext inputContext,
      Configuration conf,
      int numInputs,
      int bufferSize,
      boolean ifileReadAheadEnabled,
      int ifileReadAheadLength,
      CompressionCodec codec,
      FetchedInputAllocator inputAllocator)
      throws IOException {
    this.inputContext = inputContext;
    this.numInputs = numInputs;

    this.shuffledInputsCounter =
        inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
    this.failedShufflesCounter =
        inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS);
    this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES);
    this.decompressedDataSizeCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED);
    this.bytesShuffledToDiskCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK);
    this.bytesShuffledToMemCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM);

    this.ifileBufferSize = bufferSize;
    this.ifileReadAhead = ifileReadAheadEnabled;
    this.ifileReadAheadLength = ifileReadAheadLength;
    this.codec = codec;
    this.inputManager = inputAllocator;

    this.srcNameTrimmed = TezUtils.cleanVertexName(inputContext.getSourceVertexName());

    completedInputSet =
        Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs));
    completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs);
    knownSrcHosts = new ConcurrentHashMap<String, InputHost>();
    pendingHosts = new LinkedBlockingQueue<InputHost>();
    obsoletedInputs =
        Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>());
    runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>());

    int maxConfiguredFetchers =
        conf.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);

    this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);

    ExecutorService fetcherRawExecutor =
        Executors.newFixedThreadPool(
            numFetchers,
            new ThreadFactoryBuilder()
                .setDaemon(true)
                .setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d")
                .build());
    this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor);

    ExecutorService schedulerRawExecutor =
        Executors.newFixedThreadPool(
            1,
            new ThreadFactoryBuilder()
                .setDaemon(true)
                .setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]")
                .build());
    this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor);

    this.startTime = System.currentTimeMillis();
    this.lastProgressTime = startTime;

    this.shuffleSecret =
        ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(
                TezConfiguration.TEZ_SHUFFLE_HANDLER_SERVICE_ID));
    httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf);
    LOG.info(
        this.getClass().getSimpleName()
            + " : numInputs="
            + numInputs
            + ", compressionCodec="
            + (codec == null ? "NoCompressionCodec" : codec.getClass().getName())
            + ", numFetchers="
            + numFetchers
            + ", ifileBufferSize="
            + ifileBufferSize
            + ", ifileReadAheadEnabled="
            + ifileReadAhead
            + ", ifileReadAheadLength="
            + ifileReadAheadLength
            + ", "
            + httpConnectionParams.toString());
  }
  @Override
  public void initialize(VertexManagerPluginContext context) {
    Configuration conf;
    try {
      conf = TezUtils.createConfFromUserPayload(context.getUserPayload());
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }

    this.context = context;

    this.slowStartMinSrcCompletionFraction =
        conf.getFloat(
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT);
    this.slowStartMaxSrcCompletionFraction =
        conf.getFloat(
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT);

    if (slowStartMinSrcCompletionFraction < 0
        || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) {
      throw new IllegalArgumentException(
          "Invalid values for slowStartMinSrcCompletionFraction"
              + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0 and "
              + "max cannot be < min.");
    }

    enableAutoParallelism =
        conf.getBoolean(
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT);
    desiredTaskInputDataSize =
        conf.getLong(
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT);
    minTaskParallelism =
        conf.getInt(
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
            ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT);
    LOG.info(
        "Shuffle Vertex Manager: settings"
            + " minFrac:"
            + slowStartMinSrcCompletionFraction
            + " maxFrac:"
            + slowStartMaxSrcCompletionFraction
            + " auto:"
            + enableAutoParallelism
            + " desiredTaskIput:"
            + desiredTaskInputDataSize
            + " minTasks:"
            + minTaskParallelism);

    Map<String, EdgeProperty> inputs = context.getInputVertexEdgeProperties();
    for (Map.Entry<String, EdgeProperty> entry : inputs.entrySet()) {
      if (entry.getValue().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
        String vertex = entry.getKey();
        bipartiteSources.put(vertex, new HashSet<Integer>());
      }
    }
    if (bipartiteSources.isEmpty()) {
      throw new TezUncheckedException("Atleast 1 bipartite source should exist");
    }
    // dont track the source tasks here since those tasks may themselves be
    // dynamically changed as the DAG progresses.

  }
Example #5
0
  @Override
  public void initialize() {
    Configuration conf;
    try {
      conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }

    this.slowStartMinSrcCompletionFraction =
        conf.getFloat(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT);
    float defaultSlowStartMaxSrcFraction =
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT;
    if (slowStartMinSrcCompletionFraction > defaultSlowStartMaxSrcFraction) {
      defaultSlowStartMaxSrcFraction = slowStartMinSrcCompletionFraction;
    }
    this.slowStartMaxSrcCompletionFraction =
        conf.getFloat(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
            defaultSlowStartMaxSrcFraction);

    if (slowStartMinSrcCompletionFraction < 0
        || slowStartMaxSrcCompletionFraction > 1
        || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) {
      throw new IllegalArgumentException(
          "Invalid values for slowStartMinSrcCompletionFraction"
              + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0, max cannot be > 1,"
              + " and max cannot be < min.");
    }

    enableAutoParallelism =
        conf.getBoolean(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT);
    desiredTaskInputDataSize =
        conf.getLong(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT);
    minTaskParallelism =
        Math.max(
            1,
            conf.getInt(
                ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
                ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT));
    LOG.info(
        "Shuffle Vertex Manager: settings"
            + " minFrac:"
            + slowStartMinSrcCompletionFraction
            + " maxFrac:"
            + slowStartMaxSrcCompletionFraction
            + " auto:"
            + enableAutoParallelism
            + " desiredTaskIput:"
            + desiredTaskInputDataSize
            + " minTasks:"
            + minTaskParallelism);

    updatePendingTasks();
    if (enableAutoParallelism) {
      getContext().vertexReconfigurationPlanned();
    }
    // dont track the source tasks here since those tasks may themselves be
    // dynamically changed as the DAG progresses.

  }
  @Test
  public void testReduceProcessor() throws Exception {
    final String dagName = "mrdag0";
    String mapVertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    String reduceVertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);

    MRHelpers.translateVertexConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.set(
        MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
        new Path(workDir, "localized-resources").toUri().toString());
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput);

    InputSpec mapInputSpec =
        new InputSpec(
            "NullSrcVertex",
            new InputDescriptor(MRInputLegacy.class.getName())
                .setUserPayload(MRHelpers.createMRInputPayload(jobConf, null)),
            1);
    OutputSpec mapOutputSpec =
        new OutputSpec(
            "NullDestVertex",
            new OutputDescriptor(LocalOnFileSorterOutput.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);
    // Run a map
    LogicalIOProcessorRuntimeTask mapTask =
        MapUtils.createLogicalTask(
            localFs,
            workDir,
            jobConf,
            0,
            mapInput,
            new TestUmbilical(),
            dagName,
            mapVertexName,
            Collections.singletonList(mapInputSpec),
            Collections.singletonList(mapOutputSpec));

    mapTask.initialize();
    mapTask.run();
    mapTask.close();

    LOG.info("Starting reduce...");

    Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>();

    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(
        MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
        new Path(workDir, "localized-resources").toUri().toString());
    FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
    ProcessorDescriptor reduceProcessorDesc =
        new ProcessorDescriptor(ReduceProcessor.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));

    InputSpec reduceInputSpec =
        new InputSpec(
            mapVertexName,
            new InputDescriptor(LocalMergedInput.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);
    OutputSpec reduceOutputSpec =
        new OutputSpec(
            "NullDestinationVertex",
            new OutputDescriptor(MROutputLegacy.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);

    // Now run a reduce
    TaskSpec taskSpec =
        new TaskSpec(
            TezTestUtils.getMockTaskAttemptId(0, 1, 0, 0),
            dagName,
            reduceVertexName,
            reduceProcessorDesc,
            Collections.singletonList(reduceInputSpec),
            Collections.singletonList(reduceOutputSpec),
            null);

    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(
        ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ShuffleUtils.convertJobTokenToBytes(shuffleToken));

    LogicalIOProcessorRuntimeTask task =
        new LogicalIOProcessorRuntimeTask(
            taskSpec,
            0,
            jobConf,
            new String[] {workDir.toString()},
            new TestUmbilical(),
            serviceConsumerMetadata,
            HashMultimap.<String, String>create());

    task.initialize();
    task.run();
    task.close();

    // MRTask mrTask = (MRTask)t.getProcessor();
    // TODO NEWTEZ Verify the partitioner has not been created
    // Likely not applicable anymore.
    // Assert.assertNull(mrTask.getPartitioner());

    // Only a task commit happens, hence the data is still in the temporary directory.
    Path reduceOutputDir =
        new Path(
            new Path(workDir, "output"),
            "_temporary/0/" + IDConverter.toMRTaskIdForOutput(TezTestUtils.getMockTaskId(0, 1, 0)));

    Path reduceOutputFile = new Path(reduceOutputDir, "part-v001-o000-00000");

    SequenceFile.Reader reader = new SequenceFile.Reader(localFs, reduceOutputFile, jobConf);

    LongWritable key = new LongWritable();
    Text value = new Text();
    long prev = Long.MIN_VALUE;
    while (reader.next(key, value)) {
      if (prev != Long.MIN_VALUE) {
        Assert.assertTrue(prev < key.get());
        prev = key.get();
      }
    }

    reader.close();
  }
  @Test(timeout = 5000)
  public void testTaskAttemptFailedKilled() throws IOException, TezException {
    ApplicationId appId = ApplicationId.newInstance(1000, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    Credentials credentials = new Credentials();
    AppContext appContext = mock(AppContext.class);
    EventHandler eventHandler = mock(EventHandler.class);
    DAG dag = mock(DAG.class);
    AMContainerMap amContainerMap = mock(AMContainerMap.class);
    Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
    doReturn(eventHandler).when(appContext).getEventHandler();
    doReturn(dag).when(appContext).getCurrentDAG();
    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
    doReturn(credentials).when(appContext).getAppCredentials();
    doReturn(appAcls).when(appContext).getApplicationACLs();
    doReturn(amContainerMap).when(appContext).getAllContainers();
    NodeId nodeId = NodeId.newInstance("localhost", 0);
    AMContainer amContainer = mock(AMContainer.class);
    Container container = mock(Container.class);
    doReturn(nodeId).when(container).getNodeId();
    doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
    doReturn(container).when(amContainer).getContainer();

    Configuration conf = new TezConfiguration();
    UserPayload userPayload = TezUtils.createUserPayloadFromConf(conf);
    TaskCommunicatorManager taskAttemptListener =
        new TaskCommunicatorManager(
            appContext,
            mock(TaskHeartbeatHandler.class),
            mock(ContainerHeartbeatHandler.class),
            Lists.newArrayList(
                new NamedEntityDescriptor(TezConstants.getTezYarnServicePluginName(), null)
                    .setUserPayload(userPayload)));

    TaskSpec taskSpec1 = mock(TaskSpec.class);
    TezTaskAttemptID taskAttemptId1 = mock(TezTaskAttemptID.class);
    doReturn(taskAttemptId1).when(taskSpec1).getTaskAttemptID();
    AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);

    TaskSpec taskSpec2 = mock(TaskSpec.class);
    TezTaskAttemptID taskAttemptId2 = mock(TezTaskAttemptID.class);
    doReturn(taskAttemptId2).when(taskSpec2).getTaskAttemptID();
    AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);

    ContainerId containerId1 = createContainerId(appId, 1);
    taskAttemptListener.registerRunningContainer(containerId1, 0);
    taskAttemptListener.registerTaskAttempt(amContainerTask1, containerId1, 0);
    ContainerId containerId2 = createContainerId(appId, 2);
    taskAttemptListener.registerRunningContainer(containerId2, 0);
    taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId2, 0);

    taskAttemptListener.taskFailed(
        taskAttemptId1, TaskAttemptEndReason.COMMUNICATION_ERROR, "Diagnostics1");
    taskAttemptListener.taskKilled(
        taskAttemptId2, TaskAttemptEndReason.EXECUTOR_BUSY, "Diagnostics2");

    ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(eventHandler, times(2)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    assertTrue(argumentCaptor.getAllValues().get(1) instanceof TaskAttemptEventAttemptKilled);
    TaskAttemptEventAttemptFailed failedEvent =
        (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    TaskAttemptEventAttemptKilled killedEvent =
        (TaskAttemptEventAttemptKilled) argumentCaptor.getAllValues().get(1);

    assertEquals("Diagnostics1", failedEvent.getDiagnosticInfo());
    assertEquals(
        TaskAttemptTerminationCause.COMMUNICATION_ERROR, failedEvent.getTerminationCause());

    assertEquals("Diagnostics2", killedEvent.getDiagnosticInfo());
    assertEquals(TaskAttemptTerminationCause.SERVICE_BUSY, killedEvent.getTerminationCause());
    // TODO TEZ-2003. Verify unregistration from the registered list
  }