public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
Beispiel #2
0
/**
 * Distributed i/o benchmark.
 *
 * <p>This test writes into or reads from a specified number of files. File size is specified as a
 * parameter to the test. Each file is accessed in a separate map task.
 *
 * <p>The reducer collects the following statistics:
 *
 * <ul>
 *   <li>number of tasks completed
 *   <li>number of bytes written/read
 *   <li>execution time
 *   <li>io rate
 *   <li>io rate squared
 * </ul>
 *
 * Finally, the following information is appended to a local file
 *
 * <ul>
 *   <li>read or write test
 *   <li>date and time the test finished
 *   <li>number of files
 *   <li>total number of bytes processed
 *   <li>throughput in mb/sec (total number of bytes / sum of processing times)
 *   <li>average i/o rate in mb/sec per file
 *   <li>standard i/o rate deviation
 * </ul>
 */
@Ignore
public class DFSCIOTest extends TestCase {
  // Constants
  private static final Log LOG = LogFactory.getLog(DFSCIOTest.class);
  private static final int TEST_TYPE_READ = 0;
  private static final int TEST_TYPE_WRITE = 1;
  private static final int TEST_TYPE_CLEANUP = 2;
  private static final int DEFAULT_BUFFER_SIZE = 1000000;
  private static final String BASE_FILE_NAME = "test_io_";
  private static final String DEFAULT_RES_FILE_NAME = "DFSCIOTest_results.log";

  private static Configuration fsConfig = new Configuration();
  private static final long MEGA = 0x100000;
  private static String TEST_ROOT_DIR =
      System.getProperty("test.build.data", "/benchmarks/DFSCIOTest");
  private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
  private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write");
  private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read");
  private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");

  private static Path HDFS_TEST_DIR = new Path("/tmp/DFSCIOTest");
  private static String HDFS_LIB_VERSION = System.getProperty("libhdfs.version", "1");
  private static String CHMOD = new String("chmod");
  private static Path HDFS_SHLIB = new Path(HDFS_TEST_DIR + "/libhdfs.so." + HDFS_LIB_VERSION);
  private static Path HDFS_READ = new Path(HDFS_TEST_DIR + "/hdfs_read");
  private static Path HDFS_WRITE = new Path(HDFS_TEST_DIR + "/hdfs_write");

  /**
   * Run the test with default parameters.
   *
   * @throws Exception
   */
  public void testIOs() throws Exception {
    testIOs(10, 10);
  }

  /**
   * Run the test with the specified parameters.
   *
   * @param fileSize file size
   * @param nrFiles number of files
   * @throws IOException
   */
  public static void testIOs(int fileSize, int nrFiles) throws IOException {

    FileSystem fs = FileSystem.get(fsConfig);

    createControlFile(fs, fileSize, nrFiles);
    writeTest(fs);
    readTest(fs);
  }

  private static void createControlFile(
      FileSystem fs,
      int fileSize, // in MB
      int nrFiles)
      throws IOException {
    LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");

    fs.delete(CONTROL_DIR, true);

    for (int i = 0; i < nrFiles; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer =
            SequenceFile.createWriter(
                fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(fileSize));
      } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
        if (writer != null) writer.close();
        writer = null;
      }
    }
    LOG.info("created control files for: " + nrFiles + " files");
  }

  private static String getFileName(int fIdx) {
    return BASE_FILE_NAME + Integer.toString(fIdx);
  }

  /**
   * Write/Read mapper base class.
   *
   * <p>Collects the following statistics per task:
   *
   * <ul>
   *   <li>number of tasks completed
   *   <li>number of bytes written/read
   *   <li>execution time
   *   <li>i/o rate
   *   <li>i/o rate squared
   * </ul>
   */
  private abstract static class IOStatMapper extends IOMapperBase<Long> {
    IOStatMapper() {}

    void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize)
        throws IOException {
      long totalSize = objSize.longValue();
      float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
      LOG.info("Number of bytes processed = " + totalSize);
      LOG.info("Exec time = " + execTime);
      LOG.info("IO rate = " + ioRateMbSec);

      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
          new Text(String.valueOf(totalSize)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
          new Text(String.valueOf(execTime)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
          new Text(String.valueOf(ioRateMbSec * 1000)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
          new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000)));
    }
  }

  /** Write mapper class. */
  public static class WriteMapper extends IOStatMapper {

    public WriteMapper() {
      super();
      for (int i = 0; i < bufferSize; i++) buffer[i] = (byte) ('0' + i % 50);
    }

    public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
      // create file
      totalSize *= MEGA;

      // create instance of local filesystem
      FileSystem localFS = FileSystem.getLocal(fsConfig);

      try {
        // native runtime
        Runtime runTime = Runtime.getRuntime();

        // copy the dso and executable from dfs and chmod them
        synchronized (this) {
          localFS.delete(HDFS_TEST_DIR, true);
          if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
            throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_SHLIB)) {
            FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig);

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_WRITE)) {
            FileUtil.copy(fs, HDFS_WRITE, localFS, HDFS_WRITE, false, fsConfig);

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_WRITE);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        // exec the C program
        Path outFile = new Path(DATA_DIR, name);
        String writeCmd =
            new String(HDFS_WRITE + " " + outFile + " " + totalSize + " " + bufferSize);
        Process process = runTime.exec(writeCmd, null, new File(HDFS_TEST_DIR.toString()));
        int exitStatus = process.waitFor();
        if (exitStatus != 0) {
          throw new IOException(writeCmd + ": Failed with exitStatus: " + exitStatus);
        }
      } catch (InterruptedException interruptedException) {
        reporter.setStatus(interruptedException.toString());
      } finally {
        localFS.close();
      }
      return new Long(totalSize);
    }
  }

  private static void writeTest(FileSystem fs) throws IOException {

    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);

    runIOTest(WriteMapper.class, WRITE_DIR);
  }

  private static void runIOTest(Class<? extends Mapper> mapperClass, Path outputDir)
      throws IOException {
    JobConf job = new JobConf(fsConfig, DFSCIOTest.class);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  /** Read mapper class. */
  public static class ReadMapper extends IOStatMapper {

    public ReadMapper() {
      super();
    }

    public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
      totalSize *= MEGA;

      // create instance of local filesystem
      FileSystem localFS = FileSystem.getLocal(fsConfig);

      try {
        // native runtime
        Runtime runTime = Runtime.getRuntime();

        // copy the dso and executable from dfs
        synchronized (this) {
          localFS.delete(HDFS_TEST_DIR, true);
          if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
            throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_SHLIB)) {
            if (!FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig)) {
              throw new IOException("Failed to copy " + HDFS_SHLIB + " to local filesystem");
            }

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_READ)) {
            if (!FileUtil.copy(fs, HDFS_READ, localFS, HDFS_READ, false, fsConfig)) {
              throw new IOException("Failed to copy " + HDFS_READ + " to local filesystem");
            }

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_READ);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();

            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        // exec the C program
        Path inFile = new Path(DATA_DIR, name);
        String readCmd = new String(HDFS_READ + " " + inFile + " " + totalSize + " " + bufferSize);
        Process process = runTime.exec(readCmd, null, new File(HDFS_TEST_DIR.toString()));
        int exitStatus = process.waitFor();

        if (exitStatus != 0) {
          throw new IOException(HDFS_READ + ": Failed with exitStatus: " + exitStatus);
        }
      } catch (InterruptedException interruptedException) {
        reporter.setStatus(interruptedException.toString());
      } finally {
        localFS.close();
      }
      return new Long(totalSize);
    }
  }

  private static void readTest(FileSystem fs) throws IOException {
    fs.delete(READ_DIR, true);
    runIOTest(ReadMapper.class, READ_DIR);
  }

  private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles)
      throws Exception {
    IOStatMapper ioer = null;
    if (testType == TEST_TYPE_READ) ioer = new ReadMapper();
    else if (testType == TEST_TYPE_WRITE) ioer = new WriteMapper();
    else return;
    for (int i = 0; i < nrFiles; i++)
      ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize);
  }

  public static void main(String[] args) {
    int testType = TEST_TYPE_READ;
    int bufferSize = DEFAULT_BUFFER_SIZE;
    int fileSize = 1;
    int nrFiles = 1;
    String resFileName = DEFAULT_RES_FILE_NAME;
    boolean isSequential = false;

    String version = "DFSCIOTest.0.0.1";
    String usage =
        "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] ";

    System.out.println(version);
    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].startsWith("-r")) {
        testType = TEST_TYPE_READ;
      } else if (args[i].startsWith("-w")) {
        testType = TEST_TYPE_WRITE;
      } else if (args[i].startsWith("-clean")) {
        testType = TEST_TYPE_CLEANUP;
      } else if (args[i].startsWith("-seq")) {
        isSequential = true;
      } else if (args[i].equals("-nrFiles")) {
        nrFiles = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-fileSize")) {
        fileSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-bufferSize")) {
        bufferSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-resFile")) {
        resFileName = args[++i];
      }
    }

    LOG.info("nrFiles = " + nrFiles);
    LOG.info("fileSize (MB) = " + fileSize);
    LOG.info("bufferSize = " + bufferSize);

    try {
      fsConfig.setInt("test.io.file.buffer.size", bufferSize);
      FileSystem fs = FileSystem.get(fsConfig);

      if (testType != TEST_TYPE_CLEANUP) {
        fs.delete(HDFS_TEST_DIR, true);
        if (!fs.mkdirs(HDFS_TEST_DIR)) {
          throw new IOException("Mkdirs failed to create " + HDFS_TEST_DIR.toString());
        }

        // Copy the executables over to the remote filesystem
        String hadoopHome = System.getenv("HADOOP_PREFIX");
        fs.copyFromLocalFile(
            new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION), HDFS_SHLIB);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE);
      }

      if (isSequential) {
        long tStart = System.currentTimeMillis();
        sequentialTest(fs, testType, fileSize, nrFiles);
        long execTime = System.currentTimeMillis() - tStart;
        String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000;
        LOG.info(resultLine);
        return;
      }
      if (testType == TEST_TYPE_CLEANUP) {
        cleanup(fs);
        return;
      }
      createControlFile(fs, fileSize, nrFiles);
      long tStart = System.currentTimeMillis();
      if (testType == TEST_TYPE_WRITE) writeTest(fs);
      if (testType == TEST_TYPE_READ) readTest(fs);
      long execTime = System.currentTimeMillis() - tStart;

      analyzeResult(fs, testType, execTime, resFileName);
    } catch (Exception e) {
      System.err.print(e.getLocalizedMessage());
      System.exit(-1);
    }
  }

  private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName)
      throws IOException {
    Path reduceFile;
    if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000");
    else reduceFile = new Path(READ_DIR, "part-00000");
    DataInputStream in;
    in = new DataInputStream(fs.open(reduceFile));

    BufferedReader lines;
    lines = new BufferedReader(new InputStreamReader(in));
    long tasks = 0;
    long size = 0;
    long time = 0;
    float rate = 0;
    float sqrate = 0;
    String line;
    while ((line = lines.readLine()) != null) {
      StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
      String attr = tokens.nextToken();
      if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken());
      else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken());
    }

    double med = rate / 1000 / tasks;
    double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med));
    String resultLines[] = {
      "----- DFSCIOTest ----- : "
          + ((testType == TEST_TYPE_WRITE)
              ? "write"
              : (testType == TEST_TYPE_READ) ? "read" : "unknown"),
      "           Date & time: " + new Date(System.currentTimeMillis()),
      "       Number of files: " + tasks,
      "Total MBytes processed: " + size / MEGA,
      "     Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
      "Average IO rate mb/sec: " + med,
      " Std IO rate deviation: " + stdDev,
      "    Test exec time sec: " + (float) execTime / 1000,
      ""
    };

    PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true));
    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
      res.println(resultLines[i]);
    }
  }

  private static void cleanup(FileSystem fs) throws Exception {
    LOG.info("Cleaning up test files");
    fs.delete(new Path(TEST_ROOT_DIR), true);
    fs.delete(HDFS_TEST_DIR, true);
  }
}
public class TestFileSystem extends TestCase {
  private static final Log LOG = FileSystem.LOG;

  private static Configuration conf = new Configuration();
  private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096);

  private static final long MEGA = 1024 * 1024;
  private static final int SEEKS_PER_FILE = 4;

  private static String ROOT = System.getProperty("test.build.data", "fs_test");
  private static Path CONTROL_DIR = new Path(ROOT, "fs_control");
  private static Path WRITE_DIR = new Path(ROOT, "fs_write");
  private static Path READ_DIR = new Path(ROOT, "fs_read");
  private static Path DATA_DIR = new Path(ROOT, "fs_data");

  public void testFs() throws Exception {
    testFs(10 * MEGA, 100, 0);
  }

  public static void testFs(long megaBytes, int numFiles, long seed) throws Exception {

    FileSystem fs = FileSystem.get(conf);

    if (seed == 0) seed = new Random().nextLong();

    LOG.info("seed = " + seed);

    createControlFile(fs, megaBytes, numFiles, seed);
    writeTest(fs, false);
    readTest(fs, false);
    seekTest(fs, false);
    fs.delete(CONTROL_DIR, true);
    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);
    fs.delete(READ_DIR, true);
  }

  public static void testCommandFormat() throws Exception {
    // This should go to TestFsShell.java when it is added.
    CommandFormat cf;
    cf = new CommandFormat("copyToLocal", 2, 2, "crc", "ignoreCrc");
    assertEquals(cf.parse(new String[] {"-get", "file", "-"}, 1).get(1), "-");
    try {
      cf.parse(new String[] {"-get", "file", "-ignoreCrc", "/foo"}, 1);
      fail("Expected parsing to fail as it should stop at first non-option");
    } catch (Exception e) {
      // Expected
    }
    cf = new CommandFormat("tail", 1, 1, "f");
    assertEquals(cf.parse(new String[] {"-tail", "fileName"}, 1).get(0), "fileName");
    assertEquals(cf.parse(new String[] {"-tail", "-f", "fileName"}, 1).get(0), "fileName");
    cf = new CommandFormat("setrep", 2, 2, "R", "w");
    assertEquals(cf.parse(new String[] {"-setrep", "-R", "2", "/foo/bar"}, 1).get(1), "/foo/bar");
    cf = new CommandFormat("put", 2, 10000);
    assertEquals(cf.parse(new String[] {"-put", "-", "dest"}, 1).get(1), "dest");
  }

  public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed)
      throws Exception {

    LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files");

    Path controlFile = new Path(CONTROL_DIR, "files");
    fs.delete(controlFile, true);
    Random random = new Random(seed);

    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE);

    long totalSize = 0;
    long maxSize = ((megaBytes / numFiles) * 2) + 1;
    try {
      while (totalSize < megaBytes) {
        Text name = new Text(Long.toString(random.nextLong()));

        long size = random.nextLong();
        if (size < 0) size = -size;
        size = size % maxSize;

        // LOG.info(" adding: name="+name+" size="+size);

        writer.append(name, new LongWritable(size));

        totalSize += size;
      }
    } finally {
      writer.close();
    }
    LOG.info("created control file for: " + totalSize + " bytes");
  }

  public static class WriteMapper extends Configured
      implements Mapper<Text, LongWritable, Text, LongWritable> {

    private Random random = new Random();
    private byte[] buffer = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    // a random suffix per task
    private String suffix = "-" + random.nextLong();

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public WriteMapper() {
      super(null);
    }

    public WriteMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("creating " + name);

      // write to temp file initially to permit parallel execution
      Path tempFile = new Path(DATA_DIR, name + suffix);
      OutputStream out = fs.create(tempFile);

      long written = 0;
      try {
        while (written < size) {
          if (fastCheck) {
            Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(buffer);
          }
          long remains = size - written;
          int length = (remains <= buffer.length) ? (int) remains : buffer.length;
          out.write(buffer, 0, length);
          written += length;
          reporter.setStatus("writing " + name + "@" + written + "/" + size);
        }
      } finally {
        out.close();
      }
      // rename to final location
      fs.rename(tempFile, new Path(DATA_DIR, name));

      collector.collect(new Text("bytes"), new LongWritable(written));

      reporter.setStatus("wrote " + name);
    }

    public void close() {}
  }

  public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(WriteMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, WRITE_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static class ReadMapper extends Configured
      implements Mapper<Text, LongWritable, Text, LongWritable> {

    private Random random = new Random();
    private byte[] buffer = new byte[BUFFER_SIZE];
    private byte[] check = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public ReadMapper() {
      super(null);
    }

    public ReadMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("opening " + name);

      DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name)));

      long read = 0;
      try {
        while (read < size) {
          long remains = size - read;
          int n = (remains <= buffer.length) ? (int) remains : buffer.length;
          in.readFully(buffer, 0, n);
          read += n;
          if (fastCheck) {
            Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(check);
          }
          if (n != buffer.length) {
            Arrays.fill(buffer, n, buffer.length, (byte) 0);
            Arrays.fill(check, n, check.length, (byte) 0);
          }
          assertTrue(Arrays.equals(buffer, check));

          reporter.setStatus("reading " + name + "@" + read + "/" + size);
        }
      } finally {
        in.close();
      }

      collector.collect(new Text("bytes"), new LongWritable(read));

      reporter.setStatus("read " + name);
    }

    public void close() {}
  }

  public static void readTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(ReadMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static class SeekMapper<K> extends Configured
      implements Mapper<Text, LongWritable, K, LongWritable> {

    private Random random = new Random();
    private byte[] check = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public SeekMapper() {
      super(null);
    }

    public SeekMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter)
        throws IOException {
      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      if (size == 0) return;

      reporter.setStatus("opening " + name);

      FSDataInputStream in = fs.open(new Path(DATA_DIR, name));

      try {
        for (int i = 0; i < SEEKS_PER_FILE; i++) {
          // generate a random position
          long position = Math.abs(random.nextLong()) % size;

          // seek file to that position
          reporter.setStatus("seeking " + name);
          in.seek(position);
          byte b = in.readByte();

          // check that byte matches
          byte checkByte = 0;
          // advance random state to that position
          random.setSeed(seed);
          for (int p = 0; p <= position; p += check.length) {
            reporter.setStatus("generating data for " + name);
            if (fastCheck) {
              checkByte = (byte) random.nextInt(Byte.MAX_VALUE);
            } else {
              random.nextBytes(check);
              checkByte = check[(int) (position % check.length)];
            }
          }
          assertEquals(b, checkByte);
        }
      } finally {
        in.close();
      }
    }

    public void close() {}
  }

  public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(SeekMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static void main(String[] args) throws Exception {
    int megaBytes = 10;
    int files = 100;
    boolean noRead = false;
    boolean noWrite = false;
    boolean noSeek = false;
    boolean fastCheck = false;
    long seed = new Random().nextLong();

    String usage =
        "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]";

    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].equals("-files")) {
        files = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-megaBytes")) {
        megaBytes = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-noread")) {
        noRead = true;
      } else if (args[i].equals("-nowrite")) {
        noWrite = true;
      } else if (args[i].equals("-noseek")) {
        noSeek = true;
      } else if (args[i].equals("-fastcheck")) {
        fastCheck = true;
      }
    }

    LOG.info("seed = " + seed);
    LOG.info("files = " + files);
    LOG.info("megaBytes = " + megaBytes);

    FileSystem fs = FileSystem.get(conf);

    if (!noWrite) {
      createControlFile(fs, megaBytes * MEGA, files, seed);
      writeTest(fs, fastCheck);
    }
    if (!noRead) {
      readTest(fs, fastCheck);
    }
    if (!noSeek) {
      seekTest(fs, fastCheck);
    }
  }

  public void testFsCache() throws Exception {
    {
      long now = System.currentTimeMillis();
      String[] users = new String[] {"foo", "bar"};
      final Configuration conf = new Configuration();
      FileSystem[] fs = new FileSystem[users.length];

      for (int i = 0; i < users.length; i++) {
        UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]);
        fs[i] =
            ugi.doAs(
                new PrivilegedExceptionAction<FileSystem>() {
                  public FileSystem run() throws IOException {
                    return FileSystem.get(conf);
                  }
                });
        for (int j = 0; j < i; j++) {
          assertFalse(fs[j] == fs[i]);
        }
      }
      FileSystem.closeAll();
    }

    {
      try {
        runTestCache(HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT);
      } catch (java.net.BindException be) {
        LOG.warn(
            "Cannot test HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT (="
                + HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT
                + ")",
            be);
      }

      runTestCache(0);
    }
  }

  static void runTestCache(int port) throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).nameNodePort(port).numDataNodes(2).build();
      URI uri = cluster.getFileSystem().getUri();
      LOG.info("uri=" + uri);

      {
        FileSystem fs = FileSystem.get(uri, new Configuration());
        checkPath(cluster, fs);
        for (int i = 0; i < 100; i++) {
          assertTrue(fs == FileSystem.get(uri, new Configuration()));
        }
      }

      if (port == HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) {
        // test explicit default port
        URI uri2 =
            new URI(
                uri.getScheme(),
                uri.getUserInfo(),
                uri.getHost(),
                HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT,
                uri.getPath(),
                uri.getQuery(),
                uri.getFragment());
        LOG.info("uri2=" + uri2);
        FileSystem fs = FileSystem.get(uri2, conf);
        checkPath(cluster, fs);
        for (int i = 0; i < 100; i++) {
          assertTrue(fs == FileSystem.get(uri2, new Configuration()));
        }
      }
    } finally {
      if (cluster != null) cluster.shutdown();
    }
  }

  static void checkPath(MiniDFSCluster cluster, FileSystem fileSys) throws IOException {
    InetSocketAddress add = cluster.getNameNode().getNameNodeAddress();
    // Test upper/lower case
    fileSys.checkPath(
        new Path("hdfs://" + StringUtils.toUpperCase(add.getHostName()) + ":" + add.getPort()));
  }

  public void testFsClose() throws Exception {
    {
      Configuration conf = new Configuration();
      new Path("file:///").getFileSystem(conf);
      FileSystem.closeAll();
    }
  }

  public void testFsShutdownHook() throws Exception {
    final Set<FileSystem> closed = Collections.synchronizedSet(new HashSet<FileSystem>());
    Configuration conf = new Configuration();
    Configuration confNoAuto = new Configuration();

    conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setBoolean("fs.automatic.close", false);

    TestShutdownFileSystem fsWithAuto =
        (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf));
    TestShutdownFileSystem fsWithoutAuto =
        (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto));

    fsWithAuto.setClosedSet(closed);
    fsWithoutAuto.setClosedSet(closed);

    // Different URIs should result in different FS instances
    assertNotSame(fsWithAuto, fsWithoutAuto);

    FileSystem.CACHE.closeAll(true);
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithAuto));

    closed.clear();

    FileSystem.closeAll();
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithoutAuto));
  }

  public void testCacheKeysAreCaseInsensitive() throws Exception {
    Configuration conf = new Configuration();

    // check basic equality
    FileSystem.Cache.Key lowercaseCachekey1 =
        new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf);
    FileSystem.Cache.Key lowercaseCachekey2 =
        new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf);
    assertEquals(lowercaseCachekey1, lowercaseCachekey2);

    // check insensitive equality
    FileSystem.Cache.Key uppercaseCachekey =
        new FileSystem.Cache.Key(new URI("HDFS://Localhost:12345/"), conf);
    assertEquals(lowercaseCachekey2, uppercaseCachekey);

    // check behaviour with collections
    List<FileSystem.Cache.Key> list = new ArrayList<FileSystem.Cache.Key>();
    list.add(uppercaseCachekey);
    assertTrue(list.contains(uppercaseCachekey));
    assertTrue(list.contains(lowercaseCachekey2));

    Set<FileSystem.Cache.Key> set = new HashSet<FileSystem.Cache.Key>();
    set.add(uppercaseCachekey);
    assertTrue(set.contains(uppercaseCachekey));
    assertTrue(set.contains(lowercaseCachekey2));

    Map<FileSystem.Cache.Key, String> map = new HashMap<FileSystem.Cache.Key, String>();
    map.put(uppercaseCachekey, "");
    assertTrue(map.containsKey(uppercaseCachekey));
    assertTrue(map.containsKey(lowercaseCachekey2));
  }

  public static void testFsUniqueness(long megaBytes, int numFiles, long seed) throws Exception {

    // multiple invocations of FileSystem.get return the same object.
    FileSystem fs1 = FileSystem.get(conf);
    FileSystem fs2 = FileSystem.get(conf);
    assertTrue(fs1 == fs2);

    // multiple invocations of FileSystem.newInstance return different objects
    fs1 = FileSystem.newInstance(conf);
    fs2 = FileSystem.newInstance(conf);
    assertTrue(fs1 != fs2 && !fs1.equals(fs2));
    fs1.close();
    fs2.close();
  }

  public static class TestShutdownFileSystem extends RawLocalFileSystem {
    private Set<FileSystem> closedSet;

    public void setClosedSet(Set<FileSystem> closedSet) {
      this.closedSet = closedSet;
    }

    public void close() throws IOException {
      if (closedSet != null) {
        closedSet.add(this);
      }
      super.close();
    }
  }
}