// the method which actually copies the caches locally and unjars/unzips them // and does chmod for the files private static Path localizeCache( Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive) throws IOException { FileSystem fs = getFileSystem(cache, conf); FileSystem localFs = FileSystem.getLocal(conf); Path parchive = null; if (isArchive) { parchive = new Path( cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName())); } else { parchive = cacheStatus.localizedLoadPath; } if (!localFs.mkdirs(parchive.getParent())) { throw new IOException( "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString()); } String cacheId = cache.getPath(); fs.copyToLocalFile(new Path(cacheId), parchive); if (isArchive) { String tmpArchive = parchive.toString().toLowerCase(); File srcFile = new File(parchive.toString()); File destDir = new File(parchive.getParent().toString()); if (tmpArchive.endsWith(".jar")) { RunJar.unJar(srcFile, destDir); } else if (tmpArchive.endsWith(".zip")) { FileUtil.unZip(srcFile, destDir); } else if (isTarFile(tmpArchive)) { FileUtil.unTar(srcFile, destDir); } // else will not do anyhting // and copy the file into the dir as it is } long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString())); cacheStatus.size = cacheSize; addCacheInfoUpdate(cacheStatus); // do chmod here try { // Setting recursive permission to grant everyone read and execute Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir); LOG.info("Doing chmod on localdir :" + localDir); FileUtil.chmod(localDir.toString(), "ugo+rx", true); } catch (InterruptedException e) { LOG.warn("Exception in chmod" + e.toString()); } // update cacheStatus to reflect the newly cached file cacheStatus.mtime = getTimestamp(conf, cache); return cacheStatus.localizedLoadPath; }
private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); } fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!"); return null; } }
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60); Path train_file = new Path(args[0]); Path test_file = new Path(args[1]); conf.set("train_file", train_file.getParent().getName()); Job job = new Job(conf, "MapTestID"); job.setJarByClass(MapTestID.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MapTestID.MapTestIDMap.class); job.setReducerClass(MapTestID.MapTestIDReduce.class); job.setNumReduceTasks(300); FileInputFormat.addInputPath(job, train_file); FileInputFormat.addInputPath(job, test_file); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job.waitForCompletion(true) ? 0 : 1; }
public static String absolutePath(Path p) { if (p == null) return ""; StringBuilder sb = new StringBuilder(); Path parentPath = p.getParent(); if (parentPath == null) return "/"; sb.append(absolutePath(parentPath)); if (sb.length() > 1) sb.append("/"); sb.append(p.getName()); return sb.toString(); }
/** * open a file for writing * * @param hdfsPath !null path - * @return !null stream */ @Override public OutputStream openFileForWrite(final Path src) { if (isRunningAsUser()) { return super.openFileForWrite(src); } if (true) throw new UnsupportedOperationException("Fix This"); // ToDo final FileSystem fs = getDFS(); try { Path parent = src.getParent(); guaranteeDirectory(parent); return FileSystem.create(fs, src, FULL_FILE_ACCESS); } catch (IOException e) { throw new RuntimeException(e); } }