private String downloadResource(String value, boolean convertToUnix) { if (value.matches("(" + getMatchingSchemaAsRegex() + ")://.*")) { getConsole().printInfo("converting to local " + value); File resourceDir = new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); String destinationName = new Path(value).getName(); File destinationFile = new File(resourceDir, destinationName); if (resourceDir.exists() && !resourceDir.isDirectory()) { throw new RuntimeException( "The resource directory is not a directory, resourceDir is set to" + resourceDir); } if (!resourceDir.exists() && !resourceDir.mkdirs()) { throw new RuntimeException("Couldn't create directory " + resourceDir); } try { FileSystem fs = FileSystem.get(new URI(value), conf); fs.copyToLocalFile(new Path(value), new Path(destinationFile.getCanonicalPath())); value = destinationFile.getCanonicalPath(); if (convertToUnix && DosToUnix.isWindowsScript(destinationFile)) { try { DosToUnix.convertWindowsScriptToUnix(destinationFile); } catch (Exception e) { throw new RuntimeException("Caught exception while converting to unix line endings", e); } } } catch (Exception e) { throw new RuntimeException("Failed to read external resource " + value, e); } } return value; }
private void stats() throws Exception { CliOptionsParser.StatsSequenceCommandOptions statsSequenceCommandOptions = sequenceCommandOptions.statsSequenceCommandOptions; // get input parameters String input = statsSequenceCommandOptions.input; String output = statsSequenceCommandOptions.output; int kvalue = statsSequenceCommandOptions.kmers; // prepare the HDFS output folder Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String outHdfsDirname = Long.toString(new Date().getTime()); // run MapReduce job to compute stats ReadStatsMR.run(input, outHdfsDirname, kvalue); // post-processing Path outFile = new Path(outHdfsDirname + "/part-r-00000"); try { if (!fs.exists(outFile)) { logger.error("Stats results file not found: {}", outFile.getName()); } else { String outRawFileName = output + "/stats.json"; fs.copyToLocalFile(outFile, new Path(outRawFileName)); // Utils.parseStatsFile(outRawFileName, out); } fs.delete(new Path(outHdfsDirname), true); } catch (IOException e) { throw e; } }
// 下载文件到本地系统 public void download(String remote, String local) throws IOException { Path path = new Path(remote); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); fs.copyToLocalFile(path, new Path(local)); log.debug("download: from" + remote + " to " + local); fs.close(); }
public static void main(String[] args) throws Exception { final String sitePath = "/tmp/scale-site.conf"; final String testPath = "/tmp/scale-test.conf"; Opts opts = new Opts(); opts.parseArgs(Run.class.getName(), args); Configuration conf = CachedConfiguration.getInstance(); FileSystem fs; fs = FileSystem.get(conf); fs.copyToLocalFile(new Path("/accumulo-scale/conf/site.conf"), new Path(sitePath)); fs.copyToLocalFile( new Path(String.format("/accumulo-scale/conf/%s.conf", opts.testId)), new Path(testPath)); // load configuration file properties Properties scaleProps = new Properties(); Properties testProps = new Properties(); try { FileInputStream fis = new FileInputStream(sitePath); scaleProps.load(fis); fis.close(); fis = new FileInputStream(testPath); testProps.load(fis); } catch (Exception e) { System.out.println("Problem loading config file"); e.printStackTrace(); } ScaleTest test = (ScaleTest) Class.forName(String.format("accumulo.test.scalability.%s", opts.testId)).newInstance(); test.init(scaleProps, testProps, opts.numTabletServers); if (opts.action.equalsIgnoreCase("setup")) { test.setup(); } else if (opts.action.equalsIgnoreCase("client")) { InetAddress addr = InetAddress.getLocalHost(); String host = addr.getHostName(); fs.createNewFile(new Path("/accumulo-scale/clients/" + host)); test.client(); fs.copyFromLocalFile(new Path("/tmp/scale.out"), new Path("/accumulo-scale/results/" + host)); } else if (opts.action.equalsIgnoreCase("teardown")) { test.teardown(); } }
// the method which actually copies the caches locally and unjars/unzips them // and does chmod for the files private static Path localizeCache( Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive) throws IOException { FileSystem fs = getFileSystem(cache, conf); FileSystem localFs = FileSystem.getLocal(conf); Path parchive = null; if (isArchive) { parchive = new Path( cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName())); } else { parchive = cacheStatus.localizedLoadPath; } if (!localFs.mkdirs(parchive.getParent())) { throw new IOException( "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString()); } String cacheId = cache.getPath(); fs.copyToLocalFile(new Path(cacheId), parchive); if (isArchive) { String tmpArchive = parchive.toString().toLowerCase(); File srcFile = new File(parchive.toString()); File destDir = new File(parchive.getParent().toString()); if (tmpArchive.endsWith(".jar")) { RunJar.unJar(srcFile, destDir); } else if (tmpArchive.endsWith(".zip")) { FileUtil.unZip(srcFile, destDir); } else if (isTarFile(tmpArchive)) { FileUtil.unTar(srcFile, destDir); } // else will not do anyhting // and copy the file into the dir as it is } long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString())); cacheStatus.size = cacheSize; addCacheInfoUpdate(cacheStatus); // do chmod here try { // Setting recursive permission to grant everyone read and execute Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir); LOG.info("Doing chmod on localdir :" + localDir); FileUtil.chmod(localDir.toString(), "ugo+rx", true); } catch (InterruptedException e) { LOG.warn("Exception in chmod" + e.toString()); } // update cacheStatus to reflect the newly cached file cacheStatus.mtime = getTimestamp(conf, cache); return cacheStatus.localizedLoadPath; }
public boolean copy(Path source, String dest) { try { if (!fileSystem.exists(source)) { logger.warn("File " + source + " does not exists"); return false; } fileSystem.copyToLocalFile(source, new Path(dest)); return true; } catch (IOException e) { logger.error(e.getMessage(), e); } return false; }
public void copyToLocalFile(boolean delSrc, Path src, Path dst) { IOException te = null; int cnt = 0; while (cnt < MAX_TRY) { try { mFs.copyToLocalFile(delSrc, src, dst); } catch (IOException e) { cnt++; LOG.error(cnt + " : " + e.getMessage(), e); te = e; continue; } return; } CommonUtils.runtimeException(te); }
private void getSegmentFiles(String pathStr, File dir, FileSystem fs) throws IOException { if (!dir.exists() && !dir.mkdirs()) { throw new IOException(dir + " does not exist and creation failed"); } final File tmpDownloadFile = File.createTempFile("dataSegment", ".zip"); if (tmpDownloadFile.exists() && !tmpDownloadFile.delete()) { logger.warn("Couldn't clear out temporary file [%s]", tmpDownloadFile); } try { final Path inPath = new Path(pathStr); fs.copyToLocalFile(false, inPath, new Path(tmpDownloadFile.toURI())); long size = 0L; try (final ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(tmpDownloadFile)))) { final byte[] buffer = new byte[1 << 13]; for (ZipEntry entry = zipInputStream.getNextEntry(); entry != null; entry = zipInputStream.getNextEntry()) { final String fileName = entry.getName(); try (final FileOutputStream fos = new FileOutputStream(dir.getAbsolutePath() + File.separator + fileName)) { for (int len = zipInputStream.read(buffer); len >= 0; len = zipInputStream.read(buffer)) { size += len; fos.write(buffer, 0, len); } } } } } finally { if (tmpDownloadFile.exists() && !tmpDownloadFile.delete()) { logger.warn("Temporary download file could not be deleted [%s]", tmpDownloadFile); } } }
public void copyDirectoryToLocal(Path src, Path dest) throws Exception { FileSystem srcFs = src.getFileSystem(conf); FileSystem destFs = dest.getFileSystem(conf); if (srcFs.exists(src)) { FileStatus[] files = srcFs.listStatus(src); for (FileStatus file : files) { String name = file.getPath().getName(); Path dfs_path = file.getPath(); Path local_path = new Path(dest, name); if (file.isDir()) { if (!destFs.exists(local_path)) { destFs.mkdirs(local_path); } copyDirectoryToLocal(dfs_path, local_path); } else { srcFs.copyToLocalFile(dfs_path, local_path); } } } }
/** Copies the job file to the working directory of the process that will be started. */ @SuppressWarnings("deprecation") private void localizeTaskConfiguration( TaskTracker tracker, JobConf ttConf, String workDir, Task t, JobID jobID) throws IOException { Path jobFile = new Path(t.getJobFile()); FileSystem systemFS = tracker.systemFS; this.localizedJobFile = new Path(workDir, jobID + ".xml"); LOG.info("Localizing CJT configuration from " + jobFile + " to " + localizedJobFile); systemFS.copyToLocalFile(jobFile, localizedJobFile); JobConf localJobConf = new JobConf(localizedJobFile); boolean modified = Task.saveStaticResolutions(localJobConf); if (modified) { FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(localizedJobFile.toUri().getPath())); try { localJobConf.writeXml(out); } catch (IOException e) { out.close(); throw e; } } // Add the values from the job conf to the configuration of this runner this.conf.addResource(localizedJobFile); }
private String downloadResource(String value, boolean convertToUnix) { if (canDownloadResource(value)) { getConsole().printInfo("converting to local " + value); File resourceDir = new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); String destinationName = new Path(value).getName(); File destinationFile = new File(resourceDir, destinationName); if (resourceDir.exists() && !resourceDir.isDirectory()) { throw new RuntimeException( "The resource directory is not a directory, resourceDir is set to" + resourceDir); } if (!resourceDir.exists() && !resourceDir.mkdirs()) { throw new RuntimeException("Couldn't create directory " + resourceDir); } try { FileSystem fs = FileSystem.get(new URI(value), conf); fs.copyToLocalFile(new Path(value), new Path(destinationFile.getCanonicalPath())); value = destinationFile.getCanonicalPath(); // add "execute" permission to downloaded resource file (needed when loading dll file) FileUtil.chmod(value, "ugo+rx", true); if (convertToUnix && DosToUnix.isWindowsScript(destinationFile)) { try { DosToUnix.convertWindowsScriptToUnix(destinationFile); } catch (Exception e) { throw new RuntimeException( "Caught exception while converting file " + destinationFile + " to unix line endings", e); } } } catch (Exception e) { throw new RuntimeException("Failed to read external resource " + value, e); } } return value; }
// Video splts are converted to target format here... public void map(Object key, Text value, Context context) throws IOException, InterruptedException { try { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); String st = value.toString(); st = st.trim(); System.out.println("job2:mapInp:-" + st); String[] fmt = st.split(" #!# \\*"); String[] lst = fmt[0].split(" #!# "); String out = "", dlt = ""; int flag = 1; for (String st1 : lst) { Pattern x = Pattern.compile("(.*)/(.*)"); Matcher xm = x.matcher(st1); String prefixPath = "", fnm = "", inpExt = ""; while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); } String[] tmpArr = fnm.split("\\."); fnm = tmpArr[0]; inpExt = tmpArr[1]; hdfs.copyToLocalFile(true, new Path(st1), new Path("/home/" + fnm + "." + inpExt)); String fname = "/home/" + fnm; if (flag == 1) { flag = 0; out += prefixPath + "/" + fnm + "." + fmt[1]; } else { out += " #!# " + prefixPath + "/" + fnm + "." + fmt[1]; } if (fmt[1].equals("mpg") || fmt[1].equals("mpeg") || fmt[1].equals("mp4")) { Process p = Runtime.getRuntime() .exec( "mencoder -of mpeg -ovc lavc -lavcopts vcodec=mpeg1video -oac copy " + "/home/" + fnm + "." + inpExt + " -o " + fname + "." + fmt[1]); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p.destroy(); dlt += " /home/" + fnm + "." + inpExt; } else if (fmt[1].equals("avi")) { Process p = Runtime.getRuntime() .exec( "mencoder -ovc lavc -oac mp3lame -o " + fname + "." + fmt[1] + " " + "/home/" + fnm + "." + inpExt); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p.destroy(); dlt += " /home/" + fnm + "." + inpExt; } else { // TBD System.out.println("Unsupported target format!!!!!"); } hdfs.copyFromLocalFile( true, true, new Path(fname + "." + fmt[1]), new Path(prefixPath + "/" + fnm + "." + fmt[1])); } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt}; // delete the files after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); System.out.println("Job2 mapOut:" + out); context.write(new Text(lst[0]), new Text(out)); System.out.println(out); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }
private void run(String[] args) throws Exception { LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor(); LlapOptions options = optionsProcessor.processOptions(args); if (options == null) { // help return; } Path tmpDir = new Path(options.getDirectory()); if (conf == null) { throw new Exception("Cannot load any configuration to run command"); } FileSystem fs = FileSystem.get(conf); FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); // needed so that the file is actually loaded into configuration. for (String f : NEEDED_CONFIGS) { conf.addResource(f); if (conf.getResource(f) == null) { throw new Exception("Unable to find required config file: " + f); } } for (String f : OPTIONAL_CONFIGS) { conf.addResource(f); } conf.reloadConfiguration(); if (options.getName() != null) { // update service registry configs - caveat: this has nothing to do with the actual settings // as read by the AM // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between // instances conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); } if (options.getSize() != -1) { if (options.getCache() != -1) { Preconditions.checkArgument( options.getCache() < options.getSize(), "Cache has to be smaller than the container sizing"); } if (options.getXmx() != -1) { Preconditions.checkArgument( options.getXmx() < options.getSize(), "Working memory has to be smaller than the container sizing"); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) { Preconditions.checkArgument( options.getXmx() + options.getCache() < options.getSize(), "Working memory + cache has to be smaller than the containing sizing "); } } final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); if (options.getSize() != -1) { final long containerSize = options.getSize() / (1024 * 1024); Preconditions.checkArgument( containerSize >= minAlloc, "Container size should be greater than minimum allocation(%s)", minAlloc + "m"); conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); } if (options.getExecutors() != -1) { conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors()); // TODO: vcpu settings - possibly when DRFA works right } if (options.getCache() != -1) { conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache()); } if (options.getXmx() != -1) { // Needs more explanation here // Xmx is not the max heap value in JDK8 // You need to subtract 50% of the survivor fraction from this, to get actual usable memory // before it goes into GC conf.setLong( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, (long) (options.getXmx()) / (1024 * 1024)); } for (Entry<Object, Object> props : options.getConfig().entrySet()) { conf.set((String) props.getKey(), (String) props.getValue()); } URL logger = conf.getResource("llap-daemon-log4j2.properties"); if (null == logger) { throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties"); } Path home = new Path(System.getenv("HIVE_HOME")); Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin"); if (!lfs.exists(home)) { throw new Exception("Unable to find HIVE_HOME:" + home); } else if (!lfs.exists(scripts)) { LOG.warn("Unable to find llap scripts:" + scripts); } Path libDir = new Path(tmpDir, "lib"); String tezLibs = conf.get("tez.lib.uris"); if (tezLibs == null) { LOG.warn("Missing tez.lib.uris in tez-site.xml"); } if (LOG.isDebugEnabled()) { LOG.debug("Copying tez libs from " + tezLibs); } lfs.mkdirs(libDir); fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz")); CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true); lfs.delete(new Path(libDir, "tez.tar.gz"), false); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir); // copy default aux classes (json/hbase) for (String className : DEFAULT_AUX_CLASSES) { localizeJarForClass(lfs, libDir, className, false); } if (options.getIsHBase()) { try { localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true); Job fakeJob = new Job(new JobConf()); // HBase API is convoluted. TableMapReduceUtil.addDependencyJars(fakeJob); Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars"); for (String jarPath : hbaseJars) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } catch (Throwable t) { String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them"; LOG.error(err); System.err.println(err); throw new RuntimeException(t); } } String auxJars = options.getAuxJars(); if (auxJars != null && !auxJars.isEmpty()) { // TODO: transitive dependencies warning? String[] jarPaths = auxJars.split(","); for (String jarPath : jarPaths) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } Path confPath = new Path(tmpDir, "conf"); lfs.mkdirs(confPath); for (String f : NEEDED_CONFIGS) { copyConfig(options, lfs, confPath, f); } for (String f : OPTIONAL_CONFIGS) { try { copyConfig(options, lfs, confPath, f); } catch (Throwable t) { LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage()); } } lfs.copyFromLocalFile(new Path(logger.toString()), confPath); // extract configs for processing by the python fragments in Slider JSONObject configs = new JSONObject(); configs.put( ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB)); configs.put( HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE)); configs.put( HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname, HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)); configs.put( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB)); configs.put( ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE)); configs.put( ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1)); FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json")); OutputStreamWriter w = new OutputStreamWriter(os); configs.write(w); w.close(); os.close(); lfs.close(); fs.close(); if (LOG.isDebugEnabled()) { LOG.debug("Exiting successfully"); } }
// merge the converted files here public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { System.out.println("I'm in Job2 reduce"); Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); try { String out = ""; for (Text t : values) { out = t.toString(); out = out.trim(); System.out.println("job2:redInp:-" + out); break; } String[] outl = out.split(" #!# "); Pattern x = Pattern.compile("(.*)/(.*)\\.(.*)"); Matcher xm = x.matcher(outl[0]); String prefixPath = "", fnm = "", ext = ""; while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); ext = xm.group(3); } String foutname = fnm.split("_")[0]; foutname += "." + ext; String query = "mencoder -oac copy -ovc copy"; int cnt = 0; for (String st : outl) { cnt++; hdfs.copyToLocalFile( true, new Path(st), new Path("/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext)); query += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext; } query += " -o " + "/home/" + foutname; Process p2 = Runtime.getRuntime().exec(query); // query for merging the video files is executed here String ls_str = ""; DataInputStream ls_in = new DataInputStream(p2.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p2.destroy(); hdfs.copyFromLocalFile( true, true, new Path("/home/" + foutname), new Path(prefixPath + "/" + foutname)); cnt = 0; String dlt1 = ""; for (String st3 : outl) { cnt++; dlt1 += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext; } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt1}; // delete the files after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); context.write(new Text(""), new Text(prefixPath + "/" + foutname)); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }
/** * 文件下载,快捷方�? * * * @throws Exception * @throws IOException */ @Test public void download2() throws Exception, IOException { fs.copyToLocalFile(new Path("/hadoop"), new Path("/Users/lixiongcheng/Downloads/hadoop")); }
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { // BSP job configuration HamaConfiguration conf = new HamaConfiguration(); final int CLUSTER_COUNT = 3; conf.setInt("bsp.local.tasks.maximum", CLUSTER_COUNT); BSPJob bsp = new BSPJob(conf, KMeansCluster.class); // Set the job name bsp.setJobName("K Means Clustering"); bsp.setBspClass(ClusterBSP.class); // Set the task size as a number of GroomServer BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); System.out.println("Grooms are: " + cluster.getActiveGroomNames()); // Choose one as a master for (String peerName : cluster.getActiveGroomNames().values()) { System.out.println("Master Peer:" + peerName); conf.set(KMeansCluster.CONF_MASTER_TASK, peerName); break; } System.out.println("Setting number of tasks / clusters to:" + CLUSTER_COUNT); bsp.setNumBspTask(CLUSTER_COUNT); FileSystem fileSys = FileSystem.get(conf); final long jobTime = System.currentTimeMillis(); final String srcFileName = "/tmp/kmeans_" + jobTime + "/random-data-in"; final String fileOutputDir = "/tmp/kmeans_" + jobTime + "/output"; final Path srcFilePath = new Path(srcFileName); final int numPoints = 1000; final int range = 100; new SphereRandomPointGenerator(CLUSTER_COUNT, 5) .generateSourceFile(fileSys, srcFilePath, numPoints, range, 0, 3); conf.set(KMeansCluster.CONF_FILE_SOURCE, srcFilePath.toString()); conf.set(KMeansCluster.CONF_FILE_OUT, fileOutputDir); System.out.println("Starting job"); if (bsp.waitForCompletion(true)) { System.out.println("Done!"); } String localOut = "/tmp/" + jobTime + "/local/"; fileSys.copyToLocalFile(new Path(fileOutputDir), new Path(localOut)); // fileSys.listStatus() System.out.println("Output in: " + new Path(localOut)); }
// Run Pig Locally public void runPigLocal( Map<String, String> params, String out, String tmp, final boolean quiet, final boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) throws IllegalArgumentException, IOException { // Create temp file on local to hold data to sort final File local_tmp = Files.createTempDir(); local_tmp.deleteOnExit(); Runtime.getRuntime() .addShutdownHook( new Thread( new Runnable() { @Override public void run() { try { logConsole(quiet, silent, warn, "Deleting tmp files in local tmp"); delete(local_tmp); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } })); // Set input parameter for pig job params.put("tmpdir", local_tmp.toString() + "/" + tmp); // Check for an out of '-', meaning write to stdout String pigout; if (out.equals("-")) { params.put("out", local_tmp + "/" + tmp + "/final"); pigout = local_tmp + "/" + tmp + "/final"; } else { params.put("out", local_tmp + "/" + StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } // Copy the tmp folder from HDFS to the local tmp directory, and delete the remote folder fs.copyToLocalFile(true, new Path(tmp), new Path(local_tmp + "/" + tmp)); try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); // PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", local_tmp.getAbsolutePath()); // Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { System.err.println( ";******************** The output separator must be a single byte in UTF-8. ******************** "); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.LOCAL, conf); UserGroupInformation.setConfiguration(new Configuration(false)); pigServer.registerScript(PIG_DIR + "/formatAndSortLocal.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { File results = new File(pigout); String[] resultList = results.list(); // Find the files in the directory, open and printout results for (int i = 0; i < resultList.length; i++) { if (resultList[i].contains("part-") && !resultList[i].contains(".crc")) { BufferedReader br = new BufferedReader(new FileReader(new File(pigout + "/" + resultList[i]))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } br.close(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { fs.copyFromLocalFile( new Path(local_tmp + "/" + StringEscapeUtils.escapeJava(out)), new Path(pigout)); System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
/** * Method to move files from HDFS to local filesystem * * <p>localPath: Path on the machines filesystem fs:FileSystem object from HDFS pathList:List of * paths for files that might need to be backed up size:max size in bytes to be backed up * * <p>ReturnsDate of the last files backed up if reached size limit, else, zero */ public long backupFiles( String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) { Path fsPath; long tmpSize = 0; long tmpDate = 0; // Start iterating over all paths for (Path hdfsPath : pathList) { try { long nFileSize = fs.getContentSummary(hdfsPath).getLength(); tmpSize = tmpSize + nFileSize; if ((tmpSize <= size) || (size == 0)) { FileStatus stat = fs.getFileStatus(hdfsPath); System.err.println( "File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, " + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", " + stat.getPermission().toString()); tmpDate = stat.getModificationTime() / 1000; String sFsPath = localPath + hdfsPath.toUri().getPath(); fsPath = new Path(sFsPath); File f = new File(sFsPath); // COMMENTED OUT: until a few backup cycles run // and the mtime gets in fact set on all copied // files. // // ignore it if the file exists and has the same mtime // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime()) // { // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs"); // continue; // } if (false == m_bDryRun) { // check if we need to back up the local file // (not directory), if it already exists. if (f.exists() && f.isFile()) { // ignore files with substrings in the // no-preserve file if (true == doPreserveFile(sFsPath)) { // move it to the backup path String sNewPath = preservePath + hdfsPath.toUri().getPath(); File newFile = new File(sNewPath); // create directory structure for new file? if (false == newFile.getParentFile().exists()) { if (false == newFile.getParentFile().mkdirs()) { System.err.println("Failed to mkdirs " + newFile.getParentFile().toString()); System.exit(1); } } // rename existing file to new location if (false == f.renameTo(newFile)) { System.err.println( "Failed to renameTo " + f.toString() + " to " + newFile.toString()); System.exit(1); } System.out.println("preserved " + f.toString() + " into " + newFile.toString()); } else { System.out.println("skipped preservation of " + f.toString()); } } // copy from hdfs to local filesystem fs.copyToLocalFile(hdfsPath, fsPath); // set the mtime to match hdfs file f.setLastModified(stat.getModificationTime()); // compare checksums on both files compareChecksums(fs, hdfsPath, sFsPath); } // don't print the progress after every file -- go // by at least 1% increments long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes); if (nPercentDone > m_nLastPercentBytesDone) { System.out.println( "progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone + "% done" + ", tstamp=" + tmpDate); m_nLastPercentBytesDone = nPercentDone; } if (m_nSleepSeconds > 0) { try { Thread.sleep(1000 * m_nSleepSeconds); } catch (Exception e2) { // ignore } } } else { return tmpDate; } } catch (IOException e) { System.err.println("FATAL ERROR: Something wrong with the file"); System.err.println(e); System.out.println(tmpDate); System.exit(1); return 0; } } return 0; }
// The input video files are split into chunks of 64MB here... public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println("job1:mapInp:-" + line); String[] info = line.split(" "); info[0] = info[0].trim(); info[1] = info[1].trim(); String lstfnames = "", fname = ""; try { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); String prefixPath = "", fnm = ""; Pattern x = Pattern.compile("(.*)/(.*)"); Matcher xm = x.matcher(info[0]); while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); } String dst = "/home/" + fnm; // dst is path of the file on local system. hdfs.copyToLocalFile(new Path(info[0]), new Path(dst)); Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst); String s; BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)"); long time = 0; // "time" is the duration of the input video file long sps = 0; // "sps" is the number of seconds(duration) of each video split while ((s = stdError.readLine()) != null) { Matcher md = D.matcher(s); while (md.find()) { time = Long.parseLong(md.group(1)) * 3600 + Long.parseLong(md.group(2)) * 60 + Long.parseLong(md.group(3)); } } Process p1 = Runtime.getRuntime().exec("du -s " + dst); BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream())); String s1 = "", size = ""; // "size" is the size of input video file while ((s1 = stdInput1.readLine()) != null) { String s11[] = s1.split("\t"); size = s11[0]; } sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB String hr, min, sc; hr = Long.toString((sps / 3600)); min = Long.toString((sps % 3600) / 60); sc = Long.toString(sps % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; String splt = hr + ":" + min + ":" + sc; String query = "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file String app = "", inpExt = ""; Pattern xx = Pattern.compile("(.*)\\.(.*)"); Matcher xxm = xx.matcher(dst); while (xxm.find()) { fname = xxm.group(1); inpExt = xxm.group(2); } String[] tmpArr = fname.split("/"); String hdfsFname = ""; long stSrt = 0; int cnt = 0; while (true) { if (stSrt > time) break; if (stSrt + sps > time) { long t = time - stSrt; hr = Long.toString((t / 3600)); min = Long.toString((t % 3600) / 60); sc = Long.toString(t % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; splt = hr + ":" + min + ":" + sc; } cnt++; hr = Long.toString((stSrt / 3600)); min = Long.toString((stSrt % 3600) / 60); sc = Long.toString(stSrt % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; app = hr + ":" + min + ":" + sc + " -endPos " + splt + " " + dst + " -o " + fname + "_" + Integer.toString(cnt) + "." + inpExt; Process p2 = Runtime.getRuntime().exec(query + app); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p2.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p2.destroy(); String[] tmpArr1 = fnm.split("\\."); hdfs.copyFromLocalFile( true, true, new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt), new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt)); lstfnames += prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# "; stSrt += sps; } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); lstfnames += "*" + info[1]; context.write( new Text(fname), new Text( lstfnames)); // "fname" contains name of the input video file with // extension(eg.".avi") removed #### "lstfnames" is a string, contains // all the names of video splits(concatenated) System.out.println("lstfnames : " + lstfnames); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }