private void executePostProcessing(DistCpOptions options) throws IOException {
    Path targetPath = options.getTargetPath();
    FileSystem fs = targetPath.getFileSystem(getConf());
    List<Path> inPaths = options.getSourcePaths();
    assert inPaths.size() == 1 : "Source paths more than 1 can't be handled";

    Path sourcePath = inPaths.get(0);
    Path includePath = new Path(getConf().get("falcon.include.path"));
    assert includePath
            .toString()
            .substring(0, sourcePath.toString().length())
            .equals(sourcePath.toString())
        : "Source path is not a subset of include path";

    String relativePath = includePath.toString().substring(sourcePath.toString().length());
    String fixedPath = getFixedPath(relativePath);

    FileStatus[] files = fs.globStatus(new Path(targetPath.toString() + "/" + fixedPath));
    if (files != null) {
      for (FileStatus file : files) {
        fs.create(new Path(file.getPath(), EntityUtil.SUCCEEDED_FILE_NAME)).close();
        LOG.info("Created " + new Path(file.getPath(), EntityUtil.SUCCEEDED_FILE_NAME));
      }
    } else {
      LOG.info(
          "No files present in path: "
              + new Path(targetPath.toString() + "/" + fixedPath).toString());
    }
  }
예제 #2
0
  @Test
  public void testDeleteMissing() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      fs.rename(new Path(targetBaseAdd), new Path(targetBase));

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.setSyncFolder(true);
      options.setDeleteMissing(true);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
    } catch (Throwable e) {
      LOG.error("Exception encountered while testing for delete missing", e);
      Assert.fail("Delete missing failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
    }
  }
  protected DistCpOptions getDistCpOptions(CommandLine cmd) {
    String[] paths = cmd.getOptionValue("sourcePaths").trim().split(",");
    List<Path> srcPaths = getPaths(paths);
    String trgPath = cmd.getOptionValue("targetPath").trim();

    DistCpOptions distcpOptions = new DistCpOptions(srcPaths, new Path(trgPath));
    distcpOptions.setSyncFolder(true);
    distcpOptions.setBlocking(true);
    distcpOptions.setMaxMaps(Integer.valueOf(cmd.getOptionValue("maxMaps")));

    return distcpOptions;
  }
예제 #4
0
  @Test
  public void testPreserveStatus() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      FsPermission sourcePerm = new FsPermission((short) 511);
      FsPermission initialPerm = new FsPermission((short) 448);
      sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
      targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.preserve(FileAttribute.PERMISSION);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

    } catch (IOException e) {
      LOG.error("Exception encountered while testing for preserve status", e);
      Assert.fail("Preserve status failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
    }
  }
예제 #5
-1
  @Override
  public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {

    DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
    options.setSyncFolder(true);
    options.setSkipCRC(true);
    options.preserve(FileAttribute.BLOCKSIZE);

    // Creates the command-line parameters for distcp
    String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};

    try {
      conf.setBoolean("mapred.mapper.new-api", true);
      DistCp distcp = new DistCp(conf, options);

      // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue
      // added by HADOOP-10459
      if (distcp.run(params) == 0) {
        return true;
      } else {
        return false;
      }
    } catch (Exception e) {
      throw new IOException("Cannot execute DistCp process: " + e, e);
    } finally {
      conf.setBoolean("mapred.mapper.new-api", false);
    }
  }