Пример #1
0
  /**
   * Setup output format appropriately
   *
   * @param job - Job handle
   * @throws IOException - Exception if any
   */
  private void configureOutputFormat(Job job) throws IOException {
    final Configuration configuration = job.getConfiguration();
    Path targetPath = inputOptions.getTargetPath();
    targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration));

    if (inputOptions.shouldAtomicCommit()) {
      Path workDir = inputOptions.getAtomicWorkPath();
      if (workDir == null) {
        workDir = targetPath.getParent();
      }
      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt());
      FileSystem workFS = workDir.getFileSystem(configuration);
      FileSystem targetFS = targetPath.getFileSystem(configuration);
      if (!DistCpUtils.compareFs(targetFS, workFS)) {
        throw new IllegalArgumentException(
            "Work path "
                + workDir
                + " and target path "
                + targetPath
                + " are in different file system");
      }
      CopyOutputFormat.setWorkingDirectory(job, workDir);
    } else {
      CopyOutputFormat.setWorkingDirectory(job, targetPath);
    }
    CopyOutputFormat.setCommitDirectory(job, targetPath);

    Path counterFilePath = inputOptions.getOutPutDirectory();
    if (counterFilePath == null) {
      LOG.error("Output directory is null for distcp");
    } else {
      LOG.info("DistCp output directory path: " + counterFilePath);
      CopyOutputFormat.setOutputPath(job, counterFilePath);
    }
  }
Пример #2
0
  /**
   * Create Job object for submitting it, with all the configuration
   *
   * @return Reference to job object.
   * @throws IOException - Exception if any
   */
  protected Job createJob() throws IOException {
    String jobName = "distcp";
    String userChosenName = getConf().get("mapred.job.name");
    if (userChosenName != null) jobName += ": " + userChosenName;
    Job job = new Job(getConf(), jobName);
    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
    job.setJarByClass(CopyMapper.class);
    configureOutputFormat(job);

    job.setMapperClass(CopyMapper.class);
    job.setReducerClass(Reducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(CopyOutputFormat.class);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration()
        .set(DistCpConstants.CONF_LABEL_NUM_MAPS, String.valueOf(inputOptions.getMaxMaps()));

    if (inputOptions.getSslConfigurationFile() != null) {
      setupSSLConfig(job.getConfiguration());
    }

    inputOptions.appendToConf(job.getConfiguration());
    return job;
  }