Java MultiStageMRConfToTezTranslator示例

编程语言: Java

命名空间/包名称: org.apache.tez.mapreduce.hadoop

hotexamples.com的示例: 3

Java MultiStageMRConfToTezTranslator - 已找到3个示例。这些是从开源项目中提取的最受好评的org.apache.tez.mapreduce.hadoop.MultiStageMRConfToTezTranslator现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

translateVertexConfToTez(3)

示例#1

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /** Given two vertices a, b update their configurations to be used in an Edge a-b */
  public void updateConfigurationForEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w)
      throws IOException {

    // Tez needs to setup output subsequent input pairs correctly
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(wConf, vConf);

    // update payloads (configuration for the vertices might have changed)
    v.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(vConf));
    w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf));
  }

示例#2

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /*
   * Helper function to create Vertex for given ReduceWork.
   */
  private Vertex createVertex(
      JobConf conf,
      ReduceWork reduceWork,
      LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fs,
      Path mrScratchDir,
      Context ctx)
      throws Exception {

    // set up operator plan
    Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false);

    // create the directories FileSinkOperators need
    Utilities.createTmpDirs(conf, reduceWork);

    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // create the vertex
    Vertex reducer =
        new Vertex(
            reduceWork.getName(),
            new ProcessorDescriptor(ReduceTezProcessor.class.getName())
                .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
            reduceWork.getNumReduceTasks(),
            getContainerResource(conf));

    Map<String, String> environment = new HashMap<String, String>();

    MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
    reducer.setTaskEnvironment(environment);

    reducer.setJavaOpts(getContainerJavaOpts(conf));

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr : additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }
    reducer.setTaskLocalResources(localResources);

    return reducer;
  }

示例#3

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /*
   * Helper function to create Vertex from MapWork.
   */
  private Vertex createVertex(
      JobConf conf,
      MapWork mapWork,
      LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fs,
      Path mrScratchDir,
      Context ctx,
      TezWork tezWork)
      throws Exception {

    Path tezDir = getTezDir(mrScratchDir);

    // set up the operator plan
    Utilities.setMapWork(conf, mapWork, mrScratchDir, false);

    // create the directories FileSinkOperators need
    Utilities.createTmpDirs(conf, mapWork);

    // Tez ask us to call this even if there's no preceding vertex
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // finally create the vertex
    Vertex map = null;

    // use tez to combine splits
    boolean useTezGroupedSplits = false;

    int numTasks = -1;
    Class amSplitGeneratorClass = null;
    InputSplitInfo inputSplitInfo = null;
    Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);

    boolean vertexHasCustomInput = false;
    if (tezWork != null) {
      for (BaseWork baseWork : tezWork.getParents(mapWork)) {
        if (tezWork.getEdgeType(baseWork, mapWork) == EdgeType.CUSTOM_EDGE) {
          vertexHasCustomInput = true;
        }
      }
    }
    if (vertexHasCustomInput) {
      useTezGroupedSplits = false;
      // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat
      // here would cause pre-mature grouping which would be incorrect.
      inputFormatClass = HiveInputFormat.class;
      conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
      // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
      // this plug-in to avoid getting a serialized event at run-time.
      conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
    } else {
      // we'll set up tez to combine spits for us iff the input format
      // is HiveInputFormat
      if (inputFormatClass == HiveInputFormat.class) {
        useTezGroupedSplits = true;
        conf.setClass(
            "mapred.input.format.class", TezGroupedSplitsInputFormat.class, InputFormat.class);
      }
    }

    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
      // if we're generating the splits in the AM, we just need to set
      // the correct plugin.
      amSplitGeneratorClass = MRInputAMSplitGenerator.class;
    } else {
      // client side split generation means we have to compute them now
      inputSplitInfo =
          MRHelpers.generateInputSplits(
              conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")));
      numTasks = inputSplitInfo.getNumTasks();
    }

    byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf);
    map =
        new Vertex(
            mapWork.getName(),
            new ProcessorDescriptor(MapTezProcessor.class.getName()).setUserPayload(serializedConf),
            numTasks,
            getContainerResource(conf));
    Map<String, String> environment = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
    map.setTaskEnvironment(environment);
    map.setJavaOpts(getContainerJavaOpts(conf));

    assert mapWork.getAliasToWork().keySet().size() == 1;

    String alias = mapWork.getAliasToWork().keySet().iterator().next();

    byte[] mrInput = null;
    if (useTezGroupedSplits) {
      mrInput =
          MRHelpers.createMRInputPayloadWithGrouping(
              serializedConf, HiveInputFormat.class.getName());
    } else {
      mrInput = MRHelpers.createMRInputPayload(serializedConf, null);
    }
    map.addInput(
        alias,
        new InputDescriptor(MRInputLegacy.class.getName()).setUserPayload(mrInput),
        amSplitGeneratorClass);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr : additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }

    if (inputSplitInfo != null) {
      // only relevant for client-side split generation
      map.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      MRHelpers.updateLocalResourcesForInputSplits(
          FileSystem.get(conf), inputSplitInfo, localResources);
    }

    map.setTaskLocalResources(localResources);
    return map;
  }