private void testAndVerify(int numCreatedModified) throws Exception {
    SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
    System.out.println(report);

    DistCpSync distCpSync = new DistCpSync(options, conf);
    // do the sync
    Assert.assertTrue(distCpSync.sync());

    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));

    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);

    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
      copyMapper.map(entry.getKey(), entry.getValue(), context);
    }

    // verify that we only list modified and created files/directories
    Assert.assertEquals(numCreatedModified, copyListing.size());

    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
  }
  /** Test the basic functionality. */
  @Test
  public void testSync() throws Exception {
    initData(source);
    initData(target);
    enableAndCreateFirstSnapshot();

    // make changes under source
    int numCreatedModified = changeData(source);
    dfs.createSnapshot(source, "s2");

    // before sync, make some further changes on source. this should not affect
    // the later distcp since we're copying (s2-s1) to target
    final Path toDelete = new Path(source, "foo/d1/foo/f1");
    dfs.delete(toDelete, true);
    final Path newdir = new Path(source, "foo/d1/foo/newdir");
    dfs.mkdirs(newdir);

    SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
    System.out.println(report);

    DistCpSync distCpSync = new DistCpSync(options, conf);

    // do the sync
    Assert.assertTrue(distCpSync.sync());

    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));

    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);

    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
      copyMapper.map(entry.getKey(), entry.getValue(), context);
    }

    // verify that we only list modified and created files/directories
    Assert.assertEquals(numCreatedModified, copyListing.size());

    // verify that we only copied new appended data of f2 and the new file f1
    Assert.assertEquals(
        BLOCK_SIZE * 3,
        stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());

    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
  }
Esempio n. 3
0
 /**
  * Create input listing by invoking an appropriate copy listing implementation. Also add
  * delegation tokens for each path to job's credential store
  *
  * @param job - Handle to job
  * @return Returns the path where the copy listing is created
  * @throws IOException - If any
  */
 protected Path createInputFileListing(Job job) throws IOException {
   Path fileListingPath = getFileListingPath();
   CopyListing copyListing =
       CopyListing.getCopyListing(job.getConfiguration(), job.getCredentials(), inputOptions);
   copyListing.buildListing(fileListingPath, inputOptions);
   LOG.info("Number of paths considered for copy: " + copyListing.getNumberOfPaths());
   LOG.info(
       "Number of bytes considered for copy: "
           + copyListing.getBytesToCopy()
           + " (Actual number of bytes copied depends on whether any files are "
           + "skipped or overwritten.)");
   return fileListingPath;
 }