private static void finalize( Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
/** Check whether the file list have duplication. */ private static void checkDuplication(FileSystem fs, Path file, Path sorted, Configuration conf) throws IOException { SequenceFile.Reader in = null; try { SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class, conf); sorter.sort(file, sorted); in = new SequenceFile.Reader(fs, sorted, conf); Text prevdst = null, curdst = new Text(); Text prevsrc = null, cursrc = new Text(); for (; in.next(curdst, cursrc); ) { if (prevdst != null && curdst.equals(prevdst)) { throw new DuplicationException( "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc); } prevdst = curdst; curdst = new Text(); prevsrc = cursrc; cursrc = new Text(); } } finally { checkAndClose(in); } }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
/** * Produce splits such that each is no greater than the quotient of the total size and the * number of splits requested. * * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) { throw new RuntimeException( "Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePair value = new FilePair(); final long targetsize = cbsize / numSplits; long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. if (acc + key.get() > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[]) null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += key.get(); } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[]) null)); } return splits.toArray(new FileSplit[splits.size()]); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); Path centroids = new Path(conf.get(CENTERS_CONF_KEY)); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf); Centroid key = new Centroid(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { Centroid clusterCenter = new Centroid(key); centers.add(clusterCenter); } reader.close(); }
/** Delete the dst files/dirs which do not exist in src */ private static void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf) throws IOException { if (!dstroot.isDir()) { throw new IOException( "dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } // write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter( jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { // do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for (lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for (FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } // sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter( jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); // compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); // compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for (; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for (; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { // lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { // lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch (Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException( "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }