Beispiel #1
0
 private LocalResource createLocalResource(String name) {
   LocalResource lr =
       LocalResource.newInstance(
           URL.newInstance(null, "localhost", 2321, name),
           LocalResourceType.FILE,
           LocalResourceVisibility.APPLICATION,
           1,
           1000000);
   return lr;
 }
  public DAG createDAG(
      FileSystem remoteFs,
      Configuration conf,
      Path remoteStagingDir,
      int numMapper,
      int numReducer,
      int iReduceStagesCount,
      int numIReducer,
      long mapSleepTime,
      int mapSleepCount,
      long reduceSleepTime,
      int reduceSleepCount,
      long iReduceSleepTime,
      int iReduceSleepCount,
      boolean writeSplitsToDFS,
      boolean generateSplitsInAM)
      throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
      mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }

    MRHelpers.translateVertexConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
      intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
      for (int i = 1; i <= iReduceStagesCount; ++i) {
        JobConf iReduceStageConf = new JobConf(conf);
        iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
        iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
        iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
        iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
        iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        iReduceStageConf.set(
            MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

        MRHelpers.translateVertexConfToTez(iReduceStageConf);
        intermediateReduceStageConfs[i - 1] = iReduceStageConf;
      }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
      finalReduceConf = new JobConf(conf);
      finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
      finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
      finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
      finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
      finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
      finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
      finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

      MRHelpers.translateVertexConfToTez(finalReduceConf);
    }

    MRHelpers.doJobClientMagic(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {
        MRHelpers.doJobClientMagic(intermediateReduceStageConfs[i]);
      }
    }
    if (numReducer > 0) {
      MRHelpers.doJobClientMagic(finalReduceConf);
    }

    InputSplitInfo inputSplitInfo = null;
    if (!generateSplitsInAM) {
      if (writeSplitsToDFS) {
        LOG.info("Writing splits to DFS");
        try {
          inputSplitInfo = MRHelpers.generateInputSplits(mapStageConf, remoteStagingDir);
        } catch (InterruptedException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        } catch (ClassNotFoundException e) {
          throw new TezUncheckedException("Failed to generate input splits", e);
        }
      } else {
        try {
          LOG.info("Creating in-mem splits");
          inputSplitInfo = MRHelpers.generateInputSplitsToMem(mapStageConf);
        } catch (ClassNotFoundException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        } catch (InterruptedException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        }
      }
      if (inputSplitInfo.getCredentials() != null) {
        this.credentials.addAll(inputSplitInfo.getCredentials());
      }
    }

    DAG dag = new DAG("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
      throw new TezUncheckedException(
          "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] {remoteJarPath}, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc =
        LocalResource.newInstance(
            ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION,
            jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    byte[] mapInputPayload = null;
    byte[] mapUserPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    if (writeSplitsToDFS || generateSplitsInAM) {
      mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, null);
    } else {
      mapInputPayload =
          MRHelpers.createMRInputPayload(mapUserPayload, inputSplitInfo.getSplitsProto());
    }
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Vertex mapVertex =
        new Vertex(
            "map",
            new ProcessorDescriptor(MapProcessor.class.getName()).setUserPayload(mapUserPayload),
            numTasks,
            MRHelpers.getMapResource(mapStageConf));
    if (!generateSplitsInAM) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    }

    if (writeSplitsToDFS) {
      Map<String, LocalResource> mapLocalResources = new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(remoteFs, inputSplitInfo, mapLocalResources);
      mapVertex.setTaskLocalFiles(mapLocalResources);
    } else {
      mapVertex.setTaskLocalFiles(commonLocalResources);
    }

    if (generateSplitsInAM) {
      MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputAMSplitGenerator.class);
    } else {
      if (writeSplitsToDFS) {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, null);
      } else {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputSplitDistributor.class);
      }
    }
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {
        Configuration iconf = intermediateReduceStageConfs[i];
        byte[] iReduceUserPayload = MRHelpers.createUserPayloadFromConf(iconf);
        Vertex ivertex =
            new Vertex(
                "ireduce" + (i + 1),
                new ProcessorDescriptor(ReduceProcessor.class.getName())
                    .setUserPayload(iReduceUserPayload),
                numIReducer,
                MRHelpers.getReduceResource(iconf));
        ivertex.setTaskLocalFiles(commonLocalResources);
        vertices.add(ivertex);
      }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
      byte[] reducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
      finalReduceVertex =
          new Vertex(
              "reduce",
              new ProcessorDescriptor(ReduceProcessor.class.getName())
                  .setUserPayload(reducePayload),
              numReducer,
              MRHelpers.getReduceResource(finalReduceConf));
      finalReduceVertex.setTaskLocalFiles(commonLocalResources);
      MRHelpers.addMROutputLegacy(finalReduceVertex, reducePayload);
      vertices.add(finalReduceVertex);
    } else {
      // Map only job
      MRHelpers.addMROutputLegacy(mapVertex, mapUserPayload);
    }

    Configuration partitionerConf = new Configuration(false);
    partitionerConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfigurer edgeConf =
        OrderedPartitionedKVEdgeConfigurer.newBuilder(
                IntWritable.class.getName(),
                IntWritable.class.getName(),
                MRPartitioner.class.getName(),
                partitionerConf)
            .configureInput()
            .useLegacyInput()
            .done()
            .build();

    for (int i = 0; i < vertices.size(); ++i) {
      dag.addVertex(vertices.get(i));
      if (i != 0) {
        dag.addEdge(
            new Edge(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
      }
    }

    return dag;
  }