@Override public int run(String[] args) throws Exception { CommandLine cmd = getCommand(args); DistCpOptions options = getDistCpOptions(cmd); Configuration conf = this.getConf(); // inject wf configs Path confPath = new Path("file:///" + System.getProperty("oozie.action.conf.xml")); LOG.info(confPath + " found conf ? " + confPath.getFileSystem(conf).exists(confPath)); conf.addResource(confPath); String falconFeedStorageType = cmd.getOptionValue("falconFeedStorageType").trim(); Storage.TYPE feedStorageType = Storage.TYPE.valueOf(falconFeedStorageType); DistCp distCp = (feedStorageType == Storage.TYPE.FILESYSTEM) ? new CustomReplicator(conf, options) : new DistCp(conf, options); LOG.info("Started DistCp"); distCp.execute(); if (feedStorageType == Storage.TYPE.FILESYSTEM) { executePostProcessing(options); // this only applies for FileSystem Storage. } LOG.info("Completed DistCp"); return 0; }
private static void invokeCopy(Configuration config, String[] parsedArgs) { try { log.info( "Running DistCp with arguments [" + StringUtils.arrayToCommaDelimitedString(parsedArgs) + "]"); DistCpOptions inputOptions = OptionsParser.parse(parsedArgs); org.apache.hadoop.tools.DistCp distCp = new org.apache.hadoop.tools.DistCp(config, inputOptions); distCp.execute(); } catch (Exception e) { throw new HadoopException("Error running DistCp job", e); } }
private void updatePermissions(FileStatus src, FileStatus dst) throws IOException { if (preserve_status) { DistCp.updatePermissions(src, dst, preseved, destFileSys); } }
@Override public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst); options.setSyncFolder(true); options.setSkipCRC(true); options.preserve(FileAttribute.BLOCKSIZE); // Creates the command-line parameters for distcp String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()}; try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }