private void executePostProcessing(DistCpOptions options) throws IOException { Path targetPath = options.getTargetPath(); FileSystem fs = targetPath.getFileSystem(getConf()); List<Path> inPaths = options.getSourcePaths(); assert inPaths.size() == 1 : "Source paths more than 1 can't be handled"; Path sourcePath = inPaths.get(0); Path includePath = new Path(getConf().get("falcon.include.path")); assert includePath .toString() .substring(0, sourcePath.toString().length()) .equals(sourcePath.toString()) : "Source path is not a subset of include path"; String relativePath = includePath.toString().substring(sourcePath.toString().length()); String fixedPath = getFixedPath(relativePath); FileStatus[] files = fs.globStatus(new Path(targetPath.toString() + "/" + fixedPath)); if (files != null) { for (FileStatus file : files) { fs.create(new Path(file.getPath(), EntityUtil.SUCCEEDED_FILE_NAME)).close(); LOG.info("Created " + new Path(file.getPath(), EntityUtil.SUCCEEDED_FILE_NAME)); } } else { LOG.info( "No files present in path: " + new Path(targetPath.toString() + "/" + fixedPath).toString()); } }
@Test public void testDeleteMissing() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl( taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); fs.rename(new Path(targetBaseAdd), new Path(targetBase)); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } // Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } } catch (Throwable e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); } }
protected DistCpOptions getDistCpOptions(CommandLine cmd) { String[] paths = cmd.getOptionValue("sourcePaths").trim().split(","); List<Path> srcPaths = getPaths(paths); String trgPath = cmd.getOptionValue("targetPath").trim(); DistCpOptions distcpOptions = new DistCpOptions(srcPaths, new Path(trgPath)); distcpOptions.setSyncFolder(true); distcpOptions.setBlocking(true); distcpOptions.setMaxMaps(Integer.valueOf(cmd.getOptionValue("maxMaps"))); return distcpOptions; }
@Test public void testPreserveStatus() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl( taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); FsPermission sourcePerm = new FsPermission((short) 511); FsPermission initialPerm = new FsPermission((short) 448); sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.preserve(FileAttribute.PERMISSION); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } // Test for idempotent commit committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Preserve status failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); } }
@Override public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst); options.setSyncFolder(true); options.setSkipCRC(true); options.preserve(FileAttribute.BLOCKSIZE); // Creates the command-line parameters for distcp String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()}; try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }