private void testAndVerify(int numCreatedModified) throws Exception { SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2"); System.out.println(report); DistCpSync distCpSync = new DistCpSync(options, conf); // do the sync Assert.assertTrue(distCpSync.sync()); // make sure the source path has been updated to the snapshot path final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); Assert.assertEquals(spath, options.getSourcePaths().get(0)); // build copy listing final Path listingPath = new Path("/tmp/META/fileList.seq"); CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync); listing.buildListing(listingPath, options); Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath); CopyMapper copyMapper = new CopyMapper(); StubContext stubContext = new StubContext(conf, null, 0); Mapper<Text, CopyListingFileStatus, Text, Text>.Context context = stubContext.getContext(); // Enable append context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true); copyMapper.setup(context); for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) { copyMapper.map(entry.getKey(), entry.getValue(), context); } // verify that we only list modified and created files/directories Assert.assertEquals(numCreatedModified, copyListing.size()); // verify the source and target now has the same structure verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false); }
/** Test the basic functionality. */ @Test public void testSync() throws Exception { initData(source); initData(target); enableAndCreateFirstSnapshot(); // make changes under source int numCreatedModified = changeData(source); dfs.createSnapshot(source, "s2"); // before sync, make some further changes on source. this should not affect // the later distcp since we're copying (s2-s1) to target final Path toDelete = new Path(source, "foo/d1/foo/f1"); dfs.delete(toDelete, true); final Path newdir = new Path(source, "foo/d1/foo/newdir"); dfs.mkdirs(newdir); SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2"); System.out.println(report); DistCpSync distCpSync = new DistCpSync(options, conf); // do the sync Assert.assertTrue(distCpSync.sync()); // make sure the source path has been updated to the snapshot path final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); Assert.assertEquals(spath, options.getSourcePaths().get(0)); // build copy listing final Path listingPath = new Path("/tmp/META/fileList.seq"); CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync); listing.buildListing(listingPath, options); Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath); CopyMapper copyMapper = new CopyMapper(); StubContext stubContext = new StubContext(conf, null, 0); Mapper<Text, CopyListingFileStatus, Text, Text>.Context context = stubContext.getContext(); // Enable append context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true); copyMapper.setup(context); for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) { copyMapper.map(entry.getKey(), entry.getValue(), context); } // verify that we only list modified and created files/directories Assert.assertEquals(numCreatedModified, copyListing.size()); // verify that we only copied new appended data of f2 and the new file f1 Assert.assertEquals( BLOCK_SIZE * 3, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue()); // verify the source and target now has the same structure verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false); }
/** * Create input listing by invoking an appropriate copy listing implementation. Also add * delegation tokens for each path to job's credential store * * @param job - Handle to job * @return Returns the path where the copy listing is created * @throws IOException - If any */ protected Path createInputFileListing(Job job) throws IOException { Path fileListingPath = getFileListingPath(); CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(), job.getCredentials(), inputOptions); copyListing.buildListing(fileListingPath, inputOptions); LOG.info("Number of paths considered for copy: " + copyListing.getNumberOfPaths()); LOG.info( "Number of bytes considered for copy: " + copyListing.getBytesToCopy() + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)"); return fileListingPath; }