public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("opening " + name); DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name))); long read = 0; try { while (read < size) { long remains = size - read; int n = (remains <= buffer.length) ? (int) remains : buffer.length; in.readFully(buffer, 0, n); read += n; if (fastCheck) { Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(check); } if (n != buffer.length) { Arrays.fill(buffer, n, buffer.length, (byte) 0); Arrays.fill(check, n, check.length, (byte) 0); } assertTrue(Arrays.equals(buffer, check)); reporter.setStatus("reading " + name + "@" + read + "/" + size); } } finally { in.close(); } collector.collect(new Text("bytes"), new LongWritable(read)); reporter.setStatus("read " + name); }
public void map(Text key, Text val, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { long acc = 0L; long recs = 0; final int keydiff = keymax - keymin; final int valdiff = valmax - valmin; for (long i = 0L; acc < bytesToWrite; ++i) { int recacc = 0; recacc += generateSentence(key, keymin + (0 == keydiff ? 0 : r.nextInt(keydiff))); recacc += generateSentence(val, valmin + (0 == valdiff ? 0 : r.nextInt(valdiff))); output.collect(key, val); ++recs; acc += recacc; reporter.incrCounter(Counters.BYTES_WRITTEN, recacc); reporter.incrCounter(Counters.RECORDS_WRITTEN, 1); reporter.setStatus(acc + "/" + (bytesToWrite - acc) + " bytes"); } reporter.setStatus("Wrote " + recs + " records"); }
public void map( Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); if (size == 0) return; reporter.setStatus("opening " + name); FSDataInputStream in = fs.open(new Path(DATA_DIR, name)); try { for (int i = 0; i < SEEKS_PER_FILE; i++) { // generate a random position long position = Math.abs(random.nextLong()) % size; // seek file to that position reporter.setStatus("seeking " + name); in.seek(position); byte b = in.readByte(); // check that byte matches byte checkByte = 0; // advance random state to that position random.setSeed(seed); for (int p = 0; p <= position; p += check.length) { reporter.setStatus("generating data for " + name); if (fastCheck) { checkByte = (byte) random.nextInt(Byte.MAX_VALUE); } else { random.nextBytes(check); checkByte = check[(int) (position % check.length)]; } } assertEquals(b, checkByte); } } finally { in.close(); } }
public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("creating " + name); // write to temp file initially to permit parallel execution Path tempFile = new Path(DATA_DIR, name + suffix); OutputStream out = fs.create(tempFile); long written = 0; try { while (written < size) { if (fastCheck) { Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(buffer); } long remains = size - written; int length = (remains <= buffer.length) ? (int) remains : buffer.length; out.write(buffer, 0, length); written += length; reporter.setStatus("writing " + name + "@" + written + "/" + size); } } finally { out.close(); } // rename to final location fs.rename(tempFile, new Path(DATA_DIR, name)); collector.collect(new Text("bytes"), new LongWritable(written)); reporter.setStatus("wrote " + name); }
private boolean validate(String str, Reporter reporter) { String[] parts = str.split("\t"); if (parts.length != 6) { if (parts.length < 6) { reporter.incrCounter(LineCounters.TOO_FEW_TABS, 1); } else { reporter.incrCounter(LineCounters.TOO_MANY_TABS, 1); } reporter.incrCounter(LineCounters.BAD_LINES, 1); if ((reporter.getCounter(LineCounters.BAD_LINES).getCounter() % 10) == 0) { reporter.setStatus("Got 10 bad lines."); System.err.println("Read another 10 bad lines."); } return false; } return true; }
public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { totalSize *= MEGA; // create instance of local filesystem FileSystem localFS = FileSystem.getLocal(fsConfig); try { // native runtime Runtime runTime = Runtime.getRuntime(); // copy the dso and executable from dfs synchronized (this) { localFS.delete(HDFS_TEST_DIR, true); if (!(localFS.mkdirs(HDFS_TEST_DIR))) { throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem"); } } synchronized (this) { if (!localFS.exists(HDFS_SHLIB)) { if (!FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig)) { throw new IOException("Failed to copy " + HDFS_SHLIB + " to local filesystem"); } String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } synchronized (this) { if (!localFS.exists(HDFS_READ)) { if (!FileUtil.copy(fs, HDFS_READ, localFS, HDFS_READ, false, fsConfig)) { throw new IOException("Failed to copy " + HDFS_READ + " to local filesystem"); } String chmodCmd = new String(CHMOD + " a+x " + HDFS_READ); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } // exec the C program Path inFile = new Path(DATA_DIR, name); String readCmd = new String(HDFS_READ + " " + inFile + " " + totalSize + " " + bufferSize); Process process = runTime.exec(readCmd, null, new File(HDFS_TEST_DIR.toString())); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(HDFS_READ + ": Failed with exitStatus: " + exitStatus); } } catch (InterruptedException interruptedException) { reporter.setStatus(interruptedException.toString()); } finally { localFS.close(); } return new Long(totalSize); }
/** * Copy a file to a destination. * * @param srcstat src path and metadata * @param dstpath dst path * @param reporter */ private void copy( FileStatus srcstat, Path relativedst, OutputCollector<WritableComparable<?>, Text> outc, Reporter reporter) throws IOException { Path absdst = new Path(destPath, relativedst); int totfiles = job.getInt(SRC_COUNT_LABEL, -1); assert totfiles >= 0 : "Invalid file count " + totfiles; // if a directory, ensure created even if empty if (srcstat.isDir()) { if (destFileSys.exists(absdst)) { if (!destFileSys.getFileStatus(absdst).isDir()) { throw new IOException("Failed to mkdirs: " + absdst + " is a file."); } } else if (!destFileSys.mkdirs(absdst)) { throw new IOException("Failed to mkdirs " + absdst); } // TODO: when modification times can be set, directories should be // emitted to reducers so they might be preserved. Also, mkdirs does // not currently return an error when the directory already exists; // if this changes, all directory work might as well be done in reduce return; } if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) { outc.collect(null, new Text("SKIP: " + srcstat.getPath())); ++skipcount; reporter.incrCounter(Counter.SKIP, 1); updateStatus(reporter); return; } Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst); long cbcopied = 0L; FSDataInputStream in = null; FSDataOutputStream out = null; try { // open src file try { in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath()); } catch (IOException e) { LOG.error("Failed to open src file " + srcstat.getPath() + ", ignore and return"); in = null; return; } reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen()); // open tmp file out = create(tmpfile, reporter, srcstat); // copy file for (int cbread; (cbread = in.read(buffer)) >= 0; ) { out.write(buffer, 0, cbread); cbcopied += cbread; reporter.setStatus( String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen()) + absdst + " [ " + StringUtils.humanReadableInt(cbcopied) + " / " + StringUtils.humanReadableInt(srcstat.getLen()) + " ]"); } } finally { checkAndClose(in); checkAndClose(out); } if (cbcopied != srcstat.getLen()) { if (srcstat.getLen() == 0 && cbcopied > 0) { LOG.info("most likely see a WAL file corruption: " + srcstat.getPath()); } else { throw new IOException( "File size not matched: copied " + bytesString(cbcopied) + " to tmpfile (=" + tmpfile + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath()); } } else { if (totfiles == 1) { // Copying a single file; use dst path provided by user as destination // rather than destination directory, if a file Path dstparent = absdst.getParent(); if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDir())) { absdst = dstparent; } } if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) { throw new IOException(absdst + " is a directory"); } if (!destFileSys.mkdirs(absdst.getParent())) { throw new IOException("Failed to craete parent dir: " + absdst.getParent()); } rename(tmpfile, absdst); FileStatus dststat = destFileSys.getFileStatus(absdst); if (dststat.getLen() != srcstat.getLen()) { destFileSys.delete(absdst, false); throw new IOException( "File size not matched: copied " + bytesString(dststat.getLen()) + " to dst (=" + absdst + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath()); } updatePermissions(srcstat, dststat); } // report at least once for each file ++copycount; reporter.incrCounter(Counter.BYTESCOPIED, cbcopied); reporter.incrCounter(Counter.COPY, 1); updateStatus(reporter); }
private void updateStatus(Reporter reporter) { reporter.setStatus(getCountString()); }