public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
public void configure(JobConf job) { this.jobconf = job; String cassConfig; // Get the cached files try { localFiles = DistributedCache.getLocalCacheFiles(job); } catch (IOException e) { throw new RuntimeException(e); } cassConfig = localFiles[0].getParent().toString(); System.setProperty("storage-config", cassConfig); try { StorageService.instance.initClient(); } catch (Exception e) { throw new RuntimeException(e); } try { Thread.sleep(10 * 1000); } catch (InterruptedException e) { throw new RuntimeException(e); } }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
public void bumpProgress() { numWritten++; if (numWritten % 25000 == 0) { long now = System.currentTimeMillis(); long delta = now - lastCheckpoint; lastCheckpoint = now; LOG.info("Wrote last 25000 records in " + delta + " ms"); localManager.progress(); } }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public static boolean stopIteration(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path preFile = new Path("preX/Result"); Path curFile = new Path("curX/part-00000"); if (!(fs.exists(preFile) && fs.exists(curFile))) { System.exit(1); } boolean stop = true; String line1, line2; FSDataInputStream in1 = fs.open(preFile); FSDataInputStream in2 = fs.open(curFile); InputStreamReader isr1 = new InputStreamReader(in1); InputStreamReader isr2 = new InputStreamReader(in2); BufferedReader br1 = new BufferedReader(isr1); BufferedReader br2 = new BufferedReader(isr2); while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) { String[] str1 = line1.split("\\s+"); String[] str2 = line2.split("\\s+"); double preElem = Double.parseDouble(str1[1]); double curElem = Double.parseDouble(str2[1]); if (Math.abs(preElem - curElem) > eps) { stop = false; break; } } if (stop == false) { fs.delete(preFile, true); if (fs.rename(curFile, preFile) == false) { System.exit(1); } } return stop; }
public static void main(String[] args) throws Exception { int megaBytes = 10; int files = 100; boolean noRead = false; boolean noWrite = false; boolean noSeek = false; boolean fastCheck = false; long seed = new Random().nextLong(); String usage = "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-files")) { files = Integer.parseInt(args[++i]); } else if (args[i].equals("-megaBytes")) { megaBytes = Integer.parseInt(args[++i]); } else if (args[i].equals("-noread")) { noRead = true; } else if (args[i].equals("-nowrite")) { noWrite = true; } else if (args[i].equals("-noseek")) { noSeek = true; } else if (args[i].equals("-fastcheck")) { fastCheck = true; } } LOG.info("seed = " + seed); LOG.info("files = " + files); LOG.info("megaBytes = " + megaBytes); FileSystem fs = FileSystem.get(conf); if (!noWrite) { createControlFile(fs, megaBytes * MEGA, files, seed); writeTest(fs, fastCheck); } if (!noRead) { readTest(fs, fastCheck); } if (!noSeek) { seekTest(fs, fastCheck); } }
private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName) throws IOException { Path reduceFile; if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000"); else reduceFile = new Path(READ_DIR, "part-00000"); DataInputStream in; in = new DataInputStream(fs.open(reduceFile)); BufferedReader lines; lines = new BufferedReader(new InputStreamReader(in)); long tasks = 0; long size = 0; long time = 0; float rate = 0; float sqrate = 0; String line; while ((line = lines.readLine()) != null) { StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); String attr = tokens.nextToken(); if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken()); else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken()); } double med = rate / 1000 / tasks; double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); String resultLines[] = { "----- DFSCIOTest ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" : (testType == TEST_TYPE_READ) ? "read" : "unknown"), " Date & time: " + new Date(System.currentTimeMillis()), " Number of files: " + tasks, "Total MBytes processed: " + size / MEGA, " Throughput mb/sec: " + size * 1000.0 / (time * MEGA), "Average IO rate mb/sec: " + med, " Std IO rate deviation: " + stdDev, " Test exec time sec: " + (float) execTime / 1000, "" }; PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true)); for (int i = 0; i < resultLines.length; i++) { LOG.info(resultLines[i]); res.println(resultLines[i]); } }
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)"); return JobClient.SUCCESS; } String input = args[0]; String output = args[1]; String type = args[2]; String splitChar = args[3]; JobConf config = new JobConf(getConf(), getClass()); config.set("user.split", splitChar); config.setJobName("File Filter -" + System.currentTimeMillis()); config.setNumReduceTasks(10); config.setReducerClass(IdentityReducer.class); config.setMapperClass(FileTestMapper.class); if ("text".equals(type)) { config.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(config, new Path(input)); } else { config.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(config, new Path(input)); } config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {DateStringUtils.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now()); } JobConf config = new JobConf(getConf(), getClass()); config.set("user.args", Utils.asString(args)); config.setJobName(getClass() + "-" + System.currentTimeMillis()); config.setNumReduceTasks(100); config.setMapperClass(getClass()); config.setReducerClass(getClass()); config.setInputFormat(getInputFormat()); config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); // add input paths for (String path : getInputPath(args)) { if (TextInputFormat.class.equals(getInputFormat())) { TextInputFormat.addInputPath(config, new Path(path)); } else if (SequenceFileInputFormat.class.equals(getInputFormat())) { SequenceFileInputFormat.addInputPath(config, new Path(path)); } } config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(getOutputPath(args)); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get("map.input.file")); String inputFile = inputURI.getPath(); partition = Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue(); noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Usage: OldMaxTemperature <input path> <output path>"); System.exit(-1); } /*[*/ JobConf conf = new JobConf(OldMaxTemperature.class); /*]*/ /*[*/ conf /*]*/.setJobName("Max temperature"); FileInputFormat.addInputPath(/*[*/ conf /*]*/, new Path(args[0])); FileOutputFormat.setOutputPath(/*[*/ conf /*]*/, new Path(args[1])); /*[*/ conf /*]*/.setMapperClass(OldMaxTemperatureMapper.class); /*[*/ conf /*]*/.setReducerClass(OldMaxTemperatureReducer.class); /*[*/ conf /*]*/.setOutputKeyClass(Text.class); /*[*/ conf /*]*/.setOutputValueClass(IntWritable.class); /*[*/ JobClient.runJob(conf); /*]*/ }
public static Message createMessage( String keyspace, byte[] key, String columnFamily, List<ColumnFamily> columnFamilies) { ColumnFamily baseColumnFamily; DataOutputBuffer bufOut = new DataOutputBuffer(); RowMutation rm; Message message; Column column; /* Get the first column family from list, this is just to get past validation */ baseColumnFamily = new ColumnFamily( ColumnFamilyType.Standard, DatabaseDescriptor.getComparator(keyspace, columnFamily), DatabaseDescriptor.getSubComparator(keyspace, columnFamily), CFMetaData.getId(keyspace, columnFamily)); for (ColumnFamily cf : columnFamilies) { bufOut.reset(); ColumnFamily.serializer().serializeWithIndexes(cf, bufOut); byte[] data = new byte[bufOut.getLength()]; System.arraycopy(bufOut.getData(), 0, data, 0, bufOut.getLength()); column = new Column(FBUtilities.toByteBuffer(cf.id()), ByteBuffer.wrap(data), 0); baseColumnFamily.addColumn(column); } rm = new RowMutation(keyspace, ByteBuffer.wrap(key)); rm.add(baseColumnFamily); try { /* Make message */ message = rm.makeRowMutationMessage(StorageService.Verb.BINARY, MessagingService.version_); } catch (IOException e) { throw new RuntimeException(e); } return message; }
public void testFsCache() throws Exception { { long now = System.currentTimeMillis(); String[] users = new String[] {"foo", "bar"}; final Configuration conf = new Configuration(); FileSystem[] fs = new FileSystem[users.length]; for (int i = 0; i < users.length; i++) { UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]); fs[i] = ugi.doAs( new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { return FileSystem.get(conf); } }); for (int j = 0; j < i; j++) { assertFalse(fs[j] == fs[i]); } } FileSystem.closeAll(); } { try { runTestCache(HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT); } catch (java.net.BindException be) { LOG.warn( "Cannot test HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT (=" + HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT + ")", be); } runTestCache(0); } }
/** * Distributed i/o benchmark. * * <p>This test writes into or reads from a specified number of files. File size is specified as a * parameter to the test. Each file is accessed in a separate map task. * * <p>The reducer collects the following statistics: * * <ul> * <li>number of tasks completed * <li>number of bytes written/read * <li>execution time * <li>io rate * <li>io rate squared * </ul> * * Finally, the following information is appended to a local file * * <ul> * <li>read or write test * <li>date and time the test finished * <li>number of files * <li>total number of bytes processed * <li>throughput in mb/sec (total number of bytes / sum of processing times) * <li>average i/o rate in mb/sec per file * <li>standard i/o rate deviation * </ul> */ @Ignore public class DFSCIOTest extends TestCase { // Constants private static final Log LOG = LogFactory.getLog(DFSCIOTest.class); private static final int TEST_TYPE_READ = 0; private static final int TEST_TYPE_WRITE = 1; private static final int TEST_TYPE_CLEANUP = 2; private static final int DEFAULT_BUFFER_SIZE = 1000000; private static final String BASE_FILE_NAME = "test_io_"; private static final String DEFAULT_RES_FILE_NAME = "DFSCIOTest_results.log"; private static Configuration fsConfig = new Configuration(); private static final long MEGA = 0x100000; private static String TEST_ROOT_DIR = System.getProperty("test.build.data", "/benchmarks/DFSCIOTest"); private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control"); private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write"); private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read"); private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data"); private static Path HDFS_TEST_DIR = new Path("/tmp/DFSCIOTest"); private static String HDFS_LIB_VERSION = System.getProperty("libhdfs.version", "1"); private static String CHMOD = new String("chmod"); private static Path HDFS_SHLIB = new Path(HDFS_TEST_DIR + "/libhdfs.so." + HDFS_LIB_VERSION); private static Path HDFS_READ = new Path(HDFS_TEST_DIR + "/hdfs_read"); private static Path HDFS_WRITE = new Path(HDFS_TEST_DIR + "/hdfs_write"); /** * Run the test with default parameters. * * @throws Exception */ public void testIOs() throws Exception { testIOs(10, 10); } /** * Run the test with the specified parameters. * * @param fileSize file size * @param nrFiles number of files * @throws IOException */ public static void testIOs(int fileSize, int nrFiles) throws IOException { FileSystem fs = FileSystem.get(fsConfig); createControlFile(fs, fileSize, nrFiles); writeTest(fs); readTest(fs); } private static void createControlFile( FileSystem fs, int fileSize, // in MB int nrFiles) throws IOException { LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files"); fs.delete(CONTROL_DIR, true); for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(CONTROL_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter( fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(fileSize)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: " + nrFiles + " files"); } private static String getFileName(int fIdx) { return BASE_FILE_NAME + Integer.toString(fIdx); } /** * Write/Read mapper base class. * * <p>Collects the following statistics per task: * * <ul> * <li>number of tasks completed * <li>number of bytes written/read * <li>execution time * <li>i/o rate * <li>i/o rate squared * </ul> */ private abstract static class IOStatMapper extends IOMapperBase<Long> { IOStatMapper() {} void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize) throws IOException { long totalSize = objSize.longValue(); float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA); LOG.info("Number of bytes processed = " + totalSize); LOG.info("Exec time = " + execTime); LOG.info("IO rate = " + ioRateMbSec); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), new Text(String.valueOf(totalSize))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), new Text(String.valueOf(execTime))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), new Text(String.valueOf(ioRateMbSec * 1000))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000))); } } /** Write mapper class. */ public static class WriteMapper extends IOStatMapper { public WriteMapper() { super(); for (int i = 0; i < bufferSize; i++) buffer[i] = (byte) ('0' + i % 50); } public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { // create file totalSize *= MEGA; // create instance of local filesystem FileSystem localFS = FileSystem.getLocal(fsConfig); try { // native runtime Runtime runTime = Runtime.getRuntime(); // copy the dso and executable from dfs and chmod them synchronized (this) { localFS.delete(HDFS_TEST_DIR, true); if (!(localFS.mkdirs(HDFS_TEST_DIR))) { throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem"); } } synchronized (this) { if (!localFS.exists(HDFS_SHLIB)) { FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig); String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } synchronized (this) { if (!localFS.exists(HDFS_WRITE)) { FileUtil.copy(fs, HDFS_WRITE, localFS, HDFS_WRITE, false, fsConfig); String chmodCmd = new String(CHMOD + " a+x " + HDFS_WRITE); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } // exec the C program Path outFile = new Path(DATA_DIR, name); String writeCmd = new String(HDFS_WRITE + " " + outFile + " " + totalSize + " " + bufferSize); Process process = runTime.exec(writeCmd, null, new File(HDFS_TEST_DIR.toString())); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(writeCmd + ": Failed with exitStatus: " + exitStatus); } } catch (InterruptedException interruptedException) { reporter.setStatus(interruptedException.toString()); } finally { localFS.close(); } return new Long(totalSize); } } private static void writeTest(FileSystem fs) throws IOException { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); runIOTest(WriteMapper.class, WRITE_DIR); } private static void runIOTest(Class<? extends Mapper> mapperClass, Path outputDir) throws IOException { JobConf job = new JobConf(fsConfig, DFSCIOTest.class); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass); job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); } /** Read mapper class. */ public static class ReadMapper extends IOStatMapper { public ReadMapper() { super(); } public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { totalSize *= MEGA; // create instance of local filesystem FileSystem localFS = FileSystem.getLocal(fsConfig); try { // native runtime Runtime runTime = Runtime.getRuntime(); // copy the dso and executable from dfs synchronized (this) { localFS.delete(HDFS_TEST_DIR, true); if (!(localFS.mkdirs(HDFS_TEST_DIR))) { throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem"); } } synchronized (this) { if (!localFS.exists(HDFS_SHLIB)) { if (!FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig)) { throw new IOException("Failed to copy " + HDFS_SHLIB + " to local filesystem"); } String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } synchronized (this) { if (!localFS.exists(HDFS_READ)) { if (!FileUtil.copy(fs, HDFS_READ, localFS, HDFS_READ, false, fsConfig)) { throw new IOException("Failed to copy " + HDFS_READ + " to local filesystem"); } String chmodCmd = new String(CHMOD + " a+x " + HDFS_READ); Process process = runTime.exec(chmodCmd); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus); } } } // exec the C program Path inFile = new Path(DATA_DIR, name); String readCmd = new String(HDFS_READ + " " + inFile + " " + totalSize + " " + bufferSize); Process process = runTime.exec(readCmd, null, new File(HDFS_TEST_DIR.toString())); int exitStatus = process.waitFor(); if (exitStatus != 0) { throw new IOException(HDFS_READ + ": Failed with exitStatus: " + exitStatus); } } catch (InterruptedException interruptedException) { reporter.setStatus(interruptedException.toString()); } finally { localFS.close(); } return new Long(totalSize); } } private static void readTest(FileSystem fs) throws IOException { fs.delete(READ_DIR, true); runIOTest(ReadMapper.class, READ_DIR); } private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles) throws Exception { IOStatMapper ioer = null; if (testType == TEST_TYPE_READ) ioer = new ReadMapper(); else if (testType == TEST_TYPE_WRITE) ioer = new WriteMapper(); else return; for (int i = 0; i < nrFiles; i++) ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize); } public static void main(String[] args) { int testType = TEST_TYPE_READ; int bufferSize = DEFAULT_BUFFER_SIZE; int fileSize = 1; int nrFiles = 1; String resFileName = DEFAULT_RES_FILE_NAME; boolean isSequential = false; String version = "DFSCIOTest.0.0.1"; String usage = "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] "; System.out.println(version); if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].startsWith("-r")) { testType = TEST_TYPE_READ; } else if (args[i].startsWith("-w")) { testType = TEST_TYPE_WRITE; } else if (args[i].startsWith("-clean")) { testType = TEST_TYPE_CLEANUP; } else if (args[i].startsWith("-seq")) { isSequential = true; } else if (args[i].equals("-nrFiles")) { nrFiles = Integer.parseInt(args[++i]); } else if (args[i].equals("-fileSize")) { fileSize = Integer.parseInt(args[++i]); } else if (args[i].equals("-bufferSize")) { bufferSize = Integer.parseInt(args[++i]); } else if (args[i].equals("-resFile")) { resFileName = args[++i]; } } LOG.info("nrFiles = " + nrFiles); LOG.info("fileSize (MB) = " + fileSize); LOG.info("bufferSize = " + bufferSize); try { fsConfig.setInt("test.io.file.buffer.size", bufferSize); FileSystem fs = FileSystem.get(fsConfig); if (testType != TEST_TYPE_CLEANUP) { fs.delete(HDFS_TEST_DIR, true); if (!fs.mkdirs(HDFS_TEST_DIR)) { throw new IOException("Mkdirs failed to create " + HDFS_TEST_DIR.toString()); } // Copy the executables over to the remote filesystem String hadoopHome = System.getenv("HADOOP_PREFIX"); fs.copyFromLocalFile( new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION), HDFS_SHLIB); fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ); fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE); } if (isSequential) { long tStart = System.currentTimeMillis(); sequentialTest(fs, testType, fileSize, nrFiles); long execTime = System.currentTimeMillis() - tStart; String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000; LOG.info(resultLine); return; } if (testType == TEST_TYPE_CLEANUP) { cleanup(fs); return; } createControlFile(fs, fileSize, nrFiles); long tStart = System.currentTimeMillis(); if (testType == TEST_TYPE_WRITE) writeTest(fs); if (testType == TEST_TYPE_READ) readTest(fs); long execTime = System.currentTimeMillis() - tStart; analyzeResult(fs, testType, execTime, resFileName); } catch (Exception e) { System.err.print(e.getLocalizedMessage()); System.exit(-1); } } private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName) throws IOException { Path reduceFile; if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000"); else reduceFile = new Path(READ_DIR, "part-00000"); DataInputStream in; in = new DataInputStream(fs.open(reduceFile)); BufferedReader lines; lines = new BufferedReader(new InputStreamReader(in)); long tasks = 0; long size = 0; long time = 0; float rate = 0; float sqrate = 0; String line; while ((line = lines.readLine()) != null) { StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); String attr = tokens.nextToken(); if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken()); else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken()); } double med = rate / 1000 / tasks; double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); String resultLines[] = { "----- DFSCIOTest ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" : (testType == TEST_TYPE_READ) ? "read" : "unknown"), " Date & time: " + new Date(System.currentTimeMillis()), " Number of files: " + tasks, "Total MBytes processed: " + size / MEGA, " Throughput mb/sec: " + size * 1000.0 / (time * MEGA), "Average IO rate mb/sec: " + med, " Std IO rate deviation: " + stdDev, " Test exec time sec: " + (float) execTime / 1000, "" }; PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true)); for (int i = 0; i < resultLines.length; i++) { LOG.info(resultLines[i]); res.println(resultLines[i]); } } private static void cleanup(FileSystem fs) throws Exception { LOG.info("Cleaning up test files"); fs.delete(new Path(TEST_ROOT_DIR), true); fs.delete(HDFS_TEST_DIR, true); } }
private static byte[] pair(BytesWritable a, BytesWritable b) { byte[] pairData = new byte[a.getLength() + b.getLength()]; System.arraycopy(a.getBytes(), 0, pairData, 0, a.getLength()); System.arraycopy(b.getBytes(), 0, pairData, a.getLength(), b.getLength()); return pairData; }
public static void main(String[] args) { int testType = TEST_TYPE_READ; int bufferSize = DEFAULT_BUFFER_SIZE; int fileSize = 1; int nrFiles = 1; String resFileName = DEFAULT_RES_FILE_NAME; boolean isSequential = false; String version = "DFSCIOTest.0.0.1"; String usage = "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] "; System.out.println(version); if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].startsWith("-r")) { testType = TEST_TYPE_READ; } else if (args[i].startsWith("-w")) { testType = TEST_TYPE_WRITE; } else if (args[i].startsWith("-clean")) { testType = TEST_TYPE_CLEANUP; } else if (args[i].startsWith("-seq")) { isSequential = true; } else if (args[i].equals("-nrFiles")) { nrFiles = Integer.parseInt(args[++i]); } else if (args[i].equals("-fileSize")) { fileSize = Integer.parseInt(args[++i]); } else if (args[i].equals("-bufferSize")) { bufferSize = Integer.parseInt(args[++i]); } else if (args[i].equals("-resFile")) { resFileName = args[++i]; } } LOG.info("nrFiles = " + nrFiles); LOG.info("fileSize (MB) = " + fileSize); LOG.info("bufferSize = " + bufferSize); try { fsConfig.setInt("test.io.file.buffer.size", bufferSize); FileSystem fs = FileSystem.get(fsConfig); if (testType != TEST_TYPE_CLEANUP) { fs.delete(HDFS_TEST_DIR, true); if (!fs.mkdirs(HDFS_TEST_DIR)) { throw new IOException("Mkdirs failed to create " + HDFS_TEST_DIR.toString()); } // Copy the executables over to the remote filesystem String hadoopHome = System.getenv("HADOOP_PREFIX"); fs.copyFromLocalFile( new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION), HDFS_SHLIB); fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ); fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE); } if (isSequential) { long tStart = System.currentTimeMillis(); sequentialTest(fs, testType, fileSize, nrFiles); long execTime = System.currentTimeMillis() - tStart; String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000; LOG.info(resultLine); return; } if (testType == TEST_TYPE_CLEANUP) { cleanup(fs); return; } createControlFile(fs, fileSize, nrFiles); long tStart = System.currentTimeMillis(); if (testType == TEST_TYPE_WRITE) writeTest(fs); if (testType == TEST_TYPE_READ) readTest(fs); long execTime = System.currentTimeMillis() - tStart; analyzeResult(fs, testType, execTime, resFileName); } catch (Exception e) { System.err.print(e.getLocalizedMessage()); System.exit(-1); } }
public class TestFileSystem extends TestCase { private static final Log LOG = FileSystem.LOG; private static Configuration conf = new Configuration(); private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096); private static final long MEGA = 1024 * 1024; private static final int SEEKS_PER_FILE = 4; private static String ROOT = System.getProperty("test.build.data", "fs_test"); private static Path CONTROL_DIR = new Path(ROOT, "fs_control"); private static Path WRITE_DIR = new Path(ROOT, "fs_write"); private static Path READ_DIR = new Path(ROOT, "fs_read"); private static Path DATA_DIR = new Path(ROOT, "fs_data"); public void testFs() throws Exception { testFs(10 * MEGA, 100, 0); } public static void testFs(long megaBytes, int numFiles, long seed) throws Exception { FileSystem fs = FileSystem.get(conf); if (seed == 0) seed = new Random().nextLong(); LOG.info("seed = " + seed); createControlFile(fs, megaBytes, numFiles, seed); writeTest(fs, false); readTest(fs, false); seekTest(fs, false); fs.delete(CONTROL_DIR, true); fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); fs.delete(READ_DIR, true); } public static void testCommandFormat() throws Exception { // This should go to TestFsShell.java when it is added. CommandFormat cf; cf = new CommandFormat("copyToLocal", 2, 2, "crc", "ignoreCrc"); assertEquals(cf.parse(new String[] {"-get", "file", "-"}, 1).get(1), "-"); try { cf.parse(new String[] {"-get", "file", "-ignoreCrc", "/foo"}, 1); fail("Expected parsing to fail as it should stop at first non-option"); } catch (Exception e) { // Expected } cf = new CommandFormat("tail", 1, 1, "f"); assertEquals(cf.parse(new String[] {"-tail", "fileName"}, 1).get(0), "fileName"); assertEquals(cf.parse(new String[] {"-tail", "-f", "fileName"}, 1).get(0), "fileName"); cf = new CommandFormat("setrep", 2, 2, "R", "w"); assertEquals(cf.parse(new String[] {"-setrep", "-R", "2", "/foo/bar"}, 1).get(1), "/foo/bar"); cf = new CommandFormat("put", 2, 10000); assertEquals(cf.parse(new String[] {"-put", "-", "dest"}, 1).get(1), "dest"); } public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed) throws Exception { LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files"); Path controlFile = new Path(CONTROL_DIR, "files"); fs.delete(controlFile, true); Random random = new Random(seed); SequenceFile.Writer writer = SequenceFile.createWriter( fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE); long totalSize = 0; long maxSize = ((megaBytes / numFiles) * 2) + 1; try { while (totalSize < megaBytes) { Text name = new Text(Long.toString(random.nextLong())); long size = random.nextLong(); if (size < 0) size = -size; size = size % maxSize; // LOG.info(" adding: name="+name+" size="+size); writer.append(name, new LongWritable(size)); totalSize += size; } } finally { writer.close(); } LOG.info("created control file for: " + totalSize + " bytes"); } public static class WriteMapper extends Configured implements Mapper<Text, LongWritable, Text, LongWritable> { private Random random = new Random(); private byte[] buffer = new byte[BUFFER_SIZE]; private FileSystem fs; private boolean fastCheck; // a random suffix per task private String suffix = "-" + random.nextLong(); { try { fs = FileSystem.get(conf); } catch (IOException e) { throw new RuntimeException(e); } } public WriteMapper() { super(null); } public WriteMapper(Configuration conf) { super(conf); } public void configure(JobConf job) { setConf(job); fastCheck = job.getBoolean("fs.test.fastCheck", false); } public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("creating " + name); // write to temp file initially to permit parallel execution Path tempFile = new Path(DATA_DIR, name + suffix); OutputStream out = fs.create(tempFile); long written = 0; try { while (written < size) { if (fastCheck) { Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(buffer); } long remains = size - written; int length = (remains <= buffer.length) ? (int) remains : buffer.length; out.write(buffer, 0, length); written += length; reporter.setStatus("writing " + name + "@" + written + "/" + size); } } finally { out.close(); } // rename to final location fs.rename(tempFile, new Path(DATA_DIR, name)); collector.collect(new Text("bytes"), new LongWritable(written)); reporter.setStatus("wrote " + name); } public void close() {} } public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(WriteMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, WRITE_DIR); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); } public static class ReadMapper extends Configured implements Mapper<Text, LongWritable, Text, LongWritable> { private Random random = new Random(); private byte[] buffer = new byte[BUFFER_SIZE]; private byte[] check = new byte[BUFFER_SIZE]; private FileSystem fs; private boolean fastCheck; { try { fs = FileSystem.get(conf); } catch (IOException e) { throw new RuntimeException(e); } } public ReadMapper() { super(null); } public ReadMapper(Configuration conf) { super(conf); } public void configure(JobConf job) { setConf(job); fastCheck = job.getBoolean("fs.test.fastCheck", false); } public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("opening " + name); DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name))); long read = 0; try { while (read < size) { long remains = size - read; int n = (remains <= buffer.length) ? (int) remains : buffer.length; in.readFully(buffer, 0, n); read += n; if (fastCheck) { Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(check); } if (n != buffer.length) { Arrays.fill(buffer, n, buffer.length, (byte) 0); Arrays.fill(check, n, check.length, (byte) 0); } assertTrue(Arrays.equals(buffer, check)); reporter.setStatus("reading " + name + "@" + read + "/" + size); } } finally { in.close(); } collector.collect(new Text("bytes"), new LongWritable(read)); reporter.setStatus("read " + name); } public void close() {} } public static void readTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(ReadMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); } public static class SeekMapper<K> extends Configured implements Mapper<Text, LongWritable, K, LongWritable> { private Random random = new Random(); private byte[] check = new byte[BUFFER_SIZE]; private FileSystem fs; private boolean fastCheck; { try { fs = FileSystem.get(conf); } catch (IOException e) { throw new RuntimeException(e); } } public SeekMapper() { super(null); } public SeekMapper(Configuration conf) { super(conf); } public void configure(JobConf job) { setConf(job); fastCheck = job.getBoolean("fs.test.fastCheck", false); } public void map( Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); if (size == 0) return; reporter.setStatus("opening " + name); FSDataInputStream in = fs.open(new Path(DATA_DIR, name)); try { for (int i = 0; i < SEEKS_PER_FILE; i++) { // generate a random position long position = Math.abs(random.nextLong()) % size; // seek file to that position reporter.setStatus("seeking " + name); in.seek(position); byte b = in.readByte(); // check that byte matches byte checkByte = 0; // advance random state to that position random.setSeed(seed); for (int p = 0; p <= position; p += check.length) { reporter.setStatus("generating data for " + name); if (fastCheck) { checkByte = (byte) random.nextInt(Byte.MAX_VALUE); } else { random.nextBytes(check); checkByte = check[(int) (position % check.length)]; } } assertEquals(b, checkByte); } } finally { in.close(); } } public void close() {} } public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(SeekMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); } public static void main(String[] args) throws Exception { int megaBytes = 10; int files = 100; boolean noRead = false; boolean noWrite = false; boolean noSeek = false; boolean fastCheck = false; long seed = new Random().nextLong(); String usage = "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-files")) { files = Integer.parseInt(args[++i]); } else if (args[i].equals("-megaBytes")) { megaBytes = Integer.parseInt(args[++i]); } else if (args[i].equals("-noread")) { noRead = true; } else if (args[i].equals("-nowrite")) { noWrite = true; } else if (args[i].equals("-noseek")) { noSeek = true; } else if (args[i].equals("-fastcheck")) { fastCheck = true; } } LOG.info("seed = " + seed); LOG.info("files = " + files); LOG.info("megaBytes = " + megaBytes); FileSystem fs = FileSystem.get(conf); if (!noWrite) { createControlFile(fs, megaBytes * MEGA, files, seed); writeTest(fs, fastCheck); } if (!noRead) { readTest(fs, fastCheck); } if (!noSeek) { seekTest(fs, fastCheck); } } public void testFsCache() throws Exception { { long now = System.currentTimeMillis(); String[] users = new String[] {"foo", "bar"}; final Configuration conf = new Configuration(); FileSystem[] fs = new FileSystem[users.length]; for (int i = 0; i < users.length; i++) { UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]); fs[i] = ugi.doAs( new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { return FileSystem.get(conf); } }); for (int j = 0; j < i; j++) { assertFalse(fs[j] == fs[i]); } } FileSystem.closeAll(); } { try { runTestCache(HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT); } catch (java.net.BindException be) { LOG.warn( "Cannot test HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT (=" + HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT + ")", be); } runTestCache(0); } } static void runTestCache(int port) throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).nameNodePort(port).numDataNodes(2).build(); URI uri = cluster.getFileSystem().getUri(); LOG.info("uri=" + uri); { FileSystem fs = FileSystem.get(uri, new Configuration()); checkPath(cluster, fs); for (int i = 0; i < 100; i++) { assertTrue(fs == FileSystem.get(uri, new Configuration())); } } if (port == HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) { // test explicit default port URI uri2 = new URI( uri.getScheme(), uri.getUserInfo(), uri.getHost(), HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT, uri.getPath(), uri.getQuery(), uri.getFragment()); LOG.info("uri2=" + uri2); FileSystem fs = FileSystem.get(uri2, conf); checkPath(cluster, fs); for (int i = 0; i < 100; i++) { assertTrue(fs == FileSystem.get(uri2, new Configuration())); } } } finally { if (cluster != null) cluster.shutdown(); } } static void checkPath(MiniDFSCluster cluster, FileSystem fileSys) throws IOException { InetSocketAddress add = cluster.getNameNode().getNameNodeAddress(); // Test upper/lower case fileSys.checkPath( new Path("hdfs://" + StringUtils.toUpperCase(add.getHostName()) + ":" + add.getPort())); } public void testFsClose() throws Exception { { Configuration conf = new Configuration(); new Path("file:///").getFileSystem(conf); FileSystem.closeAll(); } } public void testFsShutdownHook() throws Exception { final Set<FileSystem> closed = Collections.synchronizedSet(new HashSet<FileSystem>()); Configuration conf = new Configuration(); Configuration confNoAuto = new Configuration(); conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class); confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class); confNoAuto.setBoolean("fs.automatic.close", false); TestShutdownFileSystem fsWithAuto = (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf)); TestShutdownFileSystem fsWithoutAuto = (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto)); fsWithAuto.setClosedSet(closed); fsWithoutAuto.setClosedSet(closed); // Different URIs should result in different FS instances assertNotSame(fsWithAuto, fsWithoutAuto); FileSystem.CACHE.closeAll(true); assertEquals(1, closed.size()); assertTrue(closed.contains(fsWithAuto)); closed.clear(); FileSystem.closeAll(); assertEquals(1, closed.size()); assertTrue(closed.contains(fsWithoutAuto)); } public void testCacheKeysAreCaseInsensitive() throws Exception { Configuration conf = new Configuration(); // check basic equality FileSystem.Cache.Key lowercaseCachekey1 = new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf); FileSystem.Cache.Key lowercaseCachekey2 = new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf); assertEquals(lowercaseCachekey1, lowercaseCachekey2); // check insensitive equality FileSystem.Cache.Key uppercaseCachekey = new FileSystem.Cache.Key(new URI("HDFS://Localhost:12345/"), conf); assertEquals(lowercaseCachekey2, uppercaseCachekey); // check behaviour with collections List<FileSystem.Cache.Key> list = new ArrayList<FileSystem.Cache.Key>(); list.add(uppercaseCachekey); assertTrue(list.contains(uppercaseCachekey)); assertTrue(list.contains(lowercaseCachekey2)); Set<FileSystem.Cache.Key> set = new HashSet<FileSystem.Cache.Key>(); set.add(uppercaseCachekey); assertTrue(set.contains(uppercaseCachekey)); assertTrue(set.contains(lowercaseCachekey2)); Map<FileSystem.Cache.Key, String> map = new HashMap<FileSystem.Cache.Key, String>(); map.put(uppercaseCachekey, ""); assertTrue(map.containsKey(uppercaseCachekey)); assertTrue(map.containsKey(lowercaseCachekey2)); } public static void testFsUniqueness(long megaBytes, int numFiles, long seed) throws Exception { // multiple invocations of FileSystem.get return the same object. FileSystem fs1 = FileSystem.get(conf); FileSystem fs2 = FileSystem.get(conf); assertTrue(fs1 == fs2); // multiple invocations of FileSystem.newInstance return different objects fs1 = FileSystem.newInstance(conf); fs2 = FileSystem.newInstance(conf); assertTrue(fs1 != fs2 && !fs1.equals(fs2)); fs1.close(); fs2.close(); } public static class TestShutdownFileSystem extends RawLocalFileSystem { private Set<FileSystem> closedSet; public void setClosedSet(Set<FileSystem> closedSet) { this.closedSet = closedSet; } public void close() throws IOException { if (closedSet != null) { closedSet.add(this); } super.close(); } } }
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new FileTest(), args); System.exit(res); }
public static void main(String[] args) throws Exception { int res = ToolRunner.run(NutchConfiguration.create(), new Injector(), args); System.exit(res); }
public static void main(String[] args) throws Exception { JobConf job = new JobConf(DistCp.class); DistCp distcp = new DistCp(job); int res = ToolRunner.run(distcp, args); System.exit(res); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new MaxTemperatureDriver(), args); System.exit(exitCode); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new SortByTemperatureUsingTotalOrderPartitioner(), args); System.exit(exitCode); }
static void printUsage() { System.err.println( "sortvalidate [-m <maps>] [-r <reduces>] [-deep] " + "-sortInput <sort-input-dir> -sortOutput <sort-output-dir>"); System.exit(1); }
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new UserViewMuliHostStepThreeGroup(), args); System.exit(res); }
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new XiangLi1_exercise3(), args); System.exit(res); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new PartitionByStationUsingMultipleOutputs(), args); System.exit(exitCode); }
public class ElephantRecordWriter implements RecordWriter<IntWritable, ElephantRecordWritable>, Closeable { FileSystem fileSystem; Args args; Map<Integer, Persistence> lps = new HashMap<Integer, Persistence>(); Progressable progressable; LocalElephantManager localManager; int numWritten = 0; long lastCheckpoint = System.currentTimeMillis(); public ElephantRecordWriter(Configuration conf, Args args, Progressable progressable) throws IOException { fileSystem = Utils.getFS(args.outputDirHdfs, conf); this.args = args; this.progressable = progressable; localManager = new LocalElephantManager(fileSystem, args.spec, LocalElephantManager.getTmpDirs(conf)); } private Persistence retrieveShard(int shardIdx) throws IOException { Persistence lp = null; if (lps.containsKey(shardIdx)) { lp = lps.get(shardIdx); } else { String localShard = localManager.downloadRemoteShard("" + shardIdx, null); Coordinator fact = args.spec.getCoordinator(); lp = fact.openPersistenceForAppend(localShard, args.spec.getPersistenceOptions()); lps.put(shardIdx, lp); progress(); } return lp; } public void write(IntWritable shard, ElephantRecordWritable carrier) throws IOException { Persistence lp = retrieveShard(shard.get()); NewKeyValDocument doc = new NewKeyValDocument(carrier.key, carrier.value); lp.index(doc); bumpProgress(); } public void bumpProgress() { numWritten++; if (numWritten % 25000 == 0) { long now = System.currentTimeMillis(); long delta = now - lastCheckpoint; lastCheckpoint = now; LOG.info("Wrote last 25000 records in " + delta + " ms"); localManager.progress(); } } public void close() throws IOException { close(null); } public void close(Reporter reporter) throws IOException { for (Integer shard : lps.keySet()) { String lpDir = localManager.localTmpDir("" + shard); LOG.info("Closing LP for shard " + shard + " at " + lpDir); lps.get(shard).close(); LOG.info("Closed LP for shard " + shard + " at " + lpDir); progress(); String remoteDir = args.outputDirHdfs + "/" + shard; // Do all this stuff to ensure that S3 actually does delete int deleteAttempt = 4; while (fileSystem.exists(new Path(remoteDir)) && deleteAttempt > 0) { LOG.info("Deleting existing shard " + shard + " at " + remoteDir); fileSystem.delete(new Path(remoteDir), true); --deleteAttempt; } if (fileSystem.exists(new Path(remoteDir)) && deleteAttempt == 0) { throw new IOException( "Failed to delete shard " + shard + " at " + remoteDir + " after " + deleteAttempt + " attempts!"); } else { LOG.info("Deleted existing shard " + shard + " at " + remoteDir); } LOG.info("Copying " + lpDir + " to " + remoteDir); fileSystem.copyFromLocalFile(new Path(lpDir), new Path(remoteDir)); LOG.info("Copied " + lpDir + " to " + remoteDir); progress(); } localManager.cleanup(); } private void progress() { if (progressable != null) progressable.progress(); } }
static void printUsage() { System.out.println("kmeans [-m <maps>] [-r <reduces>] <input> <output>"); System.exit(1); }