/** * Setup output format appropriately * * @param job - Job handle * @throws IOException - Exception if any */ private void configureOutputFormat(Job job) throws IOException { final Configuration configuration = job.getConfiguration(); Path targetPath = inputOptions.getTargetPath(); targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration)); if (inputOptions.shouldAtomicCommit()) { Path workDir = inputOptions.getAtomicWorkPath(); if (workDir == null) { workDir = targetPath.getParent(); } workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt()); FileSystem workFS = workDir.getFileSystem(configuration); FileSystem targetFS = targetPath.getFileSystem(configuration); if (!DistCpUtils.compareFs(targetFS, workFS)) { throw new IllegalArgumentException( "Work path " + workDir + " and target path " + targetPath + " are in different file system"); } CopyOutputFormat.setWorkingDirectory(job, workDir); } else { CopyOutputFormat.setWorkingDirectory(job, targetPath); } CopyOutputFormat.setCommitDirectory(job, targetPath); Path counterFilePath = inputOptions.getOutPutDirectory(); if (counterFilePath == null) { LOG.error("Output directory is null for distcp"); } else { LOG.info("DistCp output directory path: " + counterFilePath); CopyOutputFormat.setOutputPath(job, counterFilePath); } }
/** * Create Job object for submitting it, with all the configuration * * @return Reference to job object. * @throws IOException - Exception if any */ protected Job createJob() throws IOException { String jobName = "distcp"; String userChosenName = getConf().get("mapred.job.name"); if (userChosenName != null) jobName += ": " + userChosenName; Job job = new Job(getConf(), jobName); job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions)); job.setJarByClass(CopyMapper.class); configureOutputFormat(job); job.setMapperClass(CopyMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(CopyOutputFormat.class); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration() .set(DistCpConstants.CONF_LABEL_NUM_MAPS, String.valueOf(inputOptions.getMaxMaps())); if (inputOptions.getSslConfigurationFile() != null) { setupSSLConfig(job.getConfiguration()); } inputOptions.appendToConf(job.getConfiguration()); return job; }