Пример #1
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }
    public void configure(JobConf job) {
      this.jobconf = job;
      String cassConfig;

      // Get the cached files
      try {
        localFiles = DistributedCache.getLocalCacheFiles(job);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      cassConfig = localFiles[0].getParent().toString();

      System.setProperty("storage-config", cassConfig);

      try {
        StorageService.instance.initClient();
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      try {
        Thread.sleep(10 * 1000);
      } catch (InterruptedException e) {
        throw new RuntimeException(e);
      }
    }
Пример #4
0
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
 }
 public void bumpProgress() {
   numWritten++;
   if (numWritten % 25000 == 0) {
     long now = System.currentTimeMillis();
     long delta = now - lastCheckpoint;
     lastCheckpoint = now;
     LOG.info("Wrote last 25000 records in " + delta + " ms");
     localManager.progress();
   }
 }
Пример #6
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
Пример #7
0
  public static boolean stopIteration(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path preFile = new Path("preX/Result");
    Path curFile = new Path("curX/part-00000");

    if (!(fs.exists(preFile) && fs.exists(curFile))) {
      System.exit(1);
    }

    boolean stop = true;
    String line1, line2;
    FSDataInputStream in1 = fs.open(preFile);
    FSDataInputStream in2 = fs.open(curFile);
    InputStreamReader isr1 = new InputStreamReader(in1);
    InputStreamReader isr2 = new InputStreamReader(in2);
    BufferedReader br1 = new BufferedReader(isr1);
    BufferedReader br2 = new BufferedReader(isr2);

    while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) {
      String[] str1 = line1.split("\\s+");
      String[] str2 = line2.split("\\s+");
      double preElem = Double.parseDouble(str1[1]);
      double curElem = Double.parseDouble(str2[1]);
      if (Math.abs(preElem - curElem) > eps) {
        stop = false;
        break;
      }
    }

    if (stop == false) {
      fs.delete(preFile, true);
      if (fs.rename(curFile, preFile) == false) {
        System.exit(1);
      }
    }
    return stop;
  }
  public static void main(String[] args) throws Exception {
    int megaBytes = 10;
    int files = 100;
    boolean noRead = false;
    boolean noWrite = false;
    boolean noSeek = false;
    boolean fastCheck = false;
    long seed = new Random().nextLong();

    String usage =
        "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]";

    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].equals("-files")) {
        files = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-megaBytes")) {
        megaBytes = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-noread")) {
        noRead = true;
      } else if (args[i].equals("-nowrite")) {
        noWrite = true;
      } else if (args[i].equals("-noseek")) {
        noSeek = true;
      } else if (args[i].equals("-fastcheck")) {
        fastCheck = true;
      }
    }

    LOG.info("seed = " + seed);
    LOG.info("files = " + files);
    LOG.info("megaBytes = " + megaBytes);

    FileSystem fs = FileSystem.get(conf);

    if (!noWrite) {
      createControlFile(fs, megaBytes * MEGA, files, seed);
      writeTest(fs, fastCheck);
    }
    if (!noRead) {
      readTest(fs, fastCheck);
    }
    if (!noSeek) {
      seekTest(fs, fastCheck);
    }
  }
Пример #9
0
  private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName)
      throws IOException {
    Path reduceFile;
    if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000");
    else reduceFile = new Path(READ_DIR, "part-00000");
    DataInputStream in;
    in = new DataInputStream(fs.open(reduceFile));

    BufferedReader lines;
    lines = new BufferedReader(new InputStreamReader(in));
    long tasks = 0;
    long size = 0;
    long time = 0;
    float rate = 0;
    float sqrate = 0;
    String line;
    while ((line = lines.readLine()) != null) {
      StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
      String attr = tokens.nextToken();
      if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken());
      else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken());
    }

    double med = rate / 1000 / tasks;
    double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med));
    String resultLines[] = {
      "----- DFSCIOTest ----- : "
          + ((testType == TEST_TYPE_WRITE)
              ? "write"
              : (testType == TEST_TYPE_READ) ? "read" : "unknown"),
      "           Date & time: " + new Date(System.currentTimeMillis()),
      "       Number of files: " + tasks,
      "Total MBytes processed: " + size / MEGA,
      "     Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
      "Average IO rate mb/sec: " + med,
      " Std IO rate deviation: " + stdDev,
      "    Test exec time sec: " + (float) execTime / 1000,
      ""
    };

    PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true));
    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
      res.println(resultLines[i]);
    }
  }
Пример #10
0
  public int run(String[] args) throws Exception {
    if (args.length < 4) {
      System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)");
      return JobClient.SUCCESS;
    }
    String input = args[0];
    String output = args[1];
    String type = args[2];
    String splitChar = args[3];

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.split", splitChar);

    config.setJobName("File Filter -" + System.currentTimeMillis());
    config.setNumReduceTasks(10);
    config.setReducerClass(IdentityReducer.class);
    config.setMapperClass(FileTestMapper.class);
    if ("text".equals(type)) {
      config.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(config, new Path(input));
    } else {
      config.setInputFormat(SequenceFileInputFormat.class);
      SequenceFileInputFormat.addInputPath(config, new Path(input));
    }
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(output);
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
  public int run(String[] args) throws Exception {
    if (args.length < 1) {
      args = new String[] {DateStringUtils.now()};
      System.out.println(
          "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now());
    }

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.args", Utils.asString(args));

    config.setJobName(getClass() + "-" + System.currentTimeMillis());
    config.setNumReduceTasks(100);
    config.setMapperClass(getClass());
    config.setReducerClass(getClass());
    config.setInputFormat(getInputFormat());
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    // add input paths
    for (String path : getInputPath(args)) {
      if (TextInputFormat.class.equals(getInputFormat())) {
        TextInputFormat.addInputPath(config, new Path(path));
      } else if (SequenceFileInputFormat.class.equals(getInputFormat())) {
        SequenceFileInputFormat.addInputPath(config, new Path(path));
      }
    }

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(getOutputPath(args));
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
Пример #12
0
      public void configure(JobConf job) {
        // 'key' == sortInput for sort-input; key == sortOutput for sort-output
        key = deduceInputFile(job);

        if (key == sortOutput) {
          partitioner = new HashPartitioner<WritableComparable, Writable>();

          // Figure the 'current' partition and no. of reduces of the 'sort'
          try {
            URI inputURI = new URI(job.get("map.input.file"));
            String inputFile = inputURI.getPath();
            partition =
                Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue();
            noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
          } catch (Exception e) {
            System.err.println("Caught: " + e);
            System.exit(-1);
          }
        }
      }
Пример #13
0
  public static void main(String[] args) throws IOException {
    if (args.length != 2) {
      System.err.println("Usage: OldMaxTemperature <input path> <output path>");
      System.exit(-1);
    }

    /*[*/ JobConf conf = new JobConf(OldMaxTemperature.class); /*]*/
    /*[*/ conf /*]*/.setJobName("Max temperature");

    FileInputFormat.addInputPath(/*[*/ conf /*]*/, new Path(args[0]));
    FileOutputFormat.setOutputPath(/*[*/ conf /*]*/, new Path(args[1]));

    /*[*/ conf /*]*/.setMapperClass(OldMaxTemperatureMapper.class);
    /*[*/ conf /*]*/.setReducerClass(OldMaxTemperatureReducer.class);

    /*[*/ conf /*]*/.setOutputKeyClass(Text.class);
    /*[*/ conf /*]*/.setOutputValueClass(IntWritable.class);

    /*[*/ JobClient.runJob(conf); /*]*/
  }
  public static Message createMessage(
      String keyspace, byte[] key, String columnFamily, List<ColumnFamily> columnFamilies) {
    ColumnFamily baseColumnFamily;
    DataOutputBuffer bufOut = new DataOutputBuffer();
    RowMutation rm;
    Message message;
    Column column;

    /* Get the first column family from list, this is just to get past validation */
    baseColumnFamily =
        new ColumnFamily(
            ColumnFamilyType.Standard,
            DatabaseDescriptor.getComparator(keyspace, columnFamily),
            DatabaseDescriptor.getSubComparator(keyspace, columnFamily),
            CFMetaData.getId(keyspace, columnFamily));

    for (ColumnFamily cf : columnFamilies) {
      bufOut.reset();
      ColumnFamily.serializer().serializeWithIndexes(cf, bufOut);
      byte[] data = new byte[bufOut.getLength()];
      System.arraycopy(bufOut.getData(), 0, data, 0, bufOut.getLength());

      column = new Column(FBUtilities.toByteBuffer(cf.id()), ByteBuffer.wrap(data), 0);
      baseColumnFamily.addColumn(column);
    }
    rm = new RowMutation(keyspace, ByteBuffer.wrap(key));
    rm.add(baseColumnFamily);

    try {
      /* Make message */
      message = rm.makeRowMutationMessage(StorageService.Verb.BINARY, MessagingService.version_);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    return message;
  }
  public void testFsCache() throws Exception {
    {
      long now = System.currentTimeMillis();
      String[] users = new String[] {"foo", "bar"};
      final Configuration conf = new Configuration();
      FileSystem[] fs = new FileSystem[users.length];

      for (int i = 0; i < users.length; i++) {
        UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]);
        fs[i] =
            ugi.doAs(
                new PrivilegedExceptionAction<FileSystem>() {
                  public FileSystem run() throws IOException {
                    return FileSystem.get(conf);
                  }
                });
        for (int j = 0; j < i; j++) {
          assertFalse(fs[j] == fs[i]);
        }
      }
      FileSystem.closeAll();
    }

    {
      try {
        runTestCache(HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT);
      } catch (java.net.BindException be) {
        LOG.warn(
            "Cannot test HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT (="
                + HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT
                + ")",
            be);
      }

      runTestCache(0);
    }
  }
Пример #16
0
/**
 * Distributed i/o benchmark.
 *
 * <p>This test writes into or reads from a specified number of files. File size is specified as a
 * parameter to the test. Each file is accessed in a separate map task.
 *
 * <p>The reducer collects the following statistics:
 *
 * <ul>
 *   <li>number of tasks completed
 *   <li>number of bytes written/read
 *   <li>execution time
 *   <li>io rate
 *   <li>io rate squared
 * </ul>
 *
 * Finally, the following information is appended to a local file
 *
 * <ul>
 *   <li>read or write test
 *   <li>date and time the test finished
 *   <li>number of files
 *   <li>total number of bytes processed
 *   <li>throughput in mb/sec (total number of bytes / sum of processing times)
 *   <li>average i/o rate in mb/sec per file
 *   <li>standard i/o rate deviation
 * </ul>
 */
@Ignore
public class DFSCIOTest extends TestCase {
  // Constants
  private static final Log LOG = LogFactory.getLog(DFSCIOTest.class);
  private static final int TEST_TYPE_READ = 0;
  private static final int TEST_TYPE_WRITE = 1;
  private static final int TEST_TYPE_CLEANUP = 2;
  private static final int DEFAULT_BUFFER_SIZE = 1000000;
  private static final String BASE_FILE_NAME = "test_io_";
  private static final String DEFAULT_RES_FILE_NAME = "DFSCIOTest_results.log";

  private static Configuration fsConfig = new Configuration();
  private static final long MEGA = 0x100000;
  private static String TEST_ROOT_DIR =
      System.getProperty("test.build.data", "/benchmarks/DFSCIOTest");
  private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
  private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write");
  private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read");
  private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");

  private static Path HDFS_TEST_DIR = new Path("/tmp/DFSCIOTest");
  private static String HDFS_LIB_VERSION = System.getProperty("libhdfs.version", "1");
  private static String CHMOD = new String("chmod");
  private static Path HDFS_SHLIB = new Path(HDFS_TEST_DIR + "/libhdfs.so." + HDFS_LIB_VERSION);
  private static Path HDFS_READ = new Path(HDFS_TEST_DIR + "/hdfs_read");
  private static Path HDFS_WRITE = new Path(HDFS_TEST_DIR + "/hdfs_write");

  /**
   * Run the test with default parameters.
   *
   * @throws Exception
   */
  public void testIOs() throws Exception {
    testIOs(10, 10);
  }

  /**
   * Run the test with the specified parameters.
   *
   * @param fileSize file size
   * @param nrFiles number of files
   * @throws IOException
   */
  public static void testIOs(int fileSize, int nrFiles) throws IOException {

    FileSystem fs = FileSystem.get(fsConfig);

    createControlFile(fs, fileSize, nrFiles);
    writeTest(fs);
    readTest(fs);
  }

  private static void createControlFile(
      FileSystem fs,
      int fileSize, // in MB
      int nrFiles)
      throws IOException {
    LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");

    fs.delete(CONTROL_DIR, true);

    for (int i = 0; i < nrFiles; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer =
            SequenceFile.createWriter(
                fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(fileSize));
      } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
        if (writer != null) writer.close();
        writer = null;
      }
    }
    LOG.info("created control files for: " + nrFiles + " files");
  }

  private static String getFileName(int fIdx) {
    return BASE_FILE_NAME + Integer.toString(fIdx);
  }

  /**
   * Write/Read mapper base class.
   *
   * <p>Collects the following statistics per task:
   *
   * <ul>
   *   <li>number of tasks completed
   *   <li>number of bytes written/read
   *   <li>execution time
   *   <li>i/o rate
   *   <li>i/o rate squared
   * </ul>
   */
  private abstract static class IOStatMapper extends IOMapperBase<Long> {
    IOStatMapper() {}

    void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize)
        throws IOException {
      long totalSize = objSize.longValue();
      float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
      LOG.info("Number of bytes processed = " + totalSize);
      LOG.info("Exec time = " + execTime);
      LOG.info("IO rate = " + ioRateMbSec);

      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
          new Text(String.valueOf(totalSize)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
          new Text(String.valueOf(execTime)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
          new Text(String.valueOf(ioRateMbSec * 1000)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
          new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000)));
    }
  }

  /** Write mapper class. */
  public static class WriteMapper extends IOStatMapper {

    public WriteMapper() {
      super();
      for (int i = 0; i < bufferSize; i++) buffer[i] = (byte) ('0' + i % 50);
    }

    public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
      // create file
      totalSize *= MEGA;

      // create instance of local filesystem
      FileSystem localFS = FileSystem.getLocal(fsConfig);

      try {
        // native runtime
        Runtime runTime = Runtime.getRuntime();

        // copy the dso and executable from dfs and chmod them
        synchronized (this) {
          localFS.delete(HDFS_TEST_DIR, true);
          if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
            throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_SHLIB)) {
            FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig);

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_WRITE)) {
            FileUtil.copy(fs, HDFS_WRITE, localFS, HDFS_WRITE, false, fsConfig);

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_WRITE);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        // exec the C program
        Path outFile = new Path(DATA_DIR, name);
        String writeCmd =
            new String(HDFS_WRITE + " " + outFile + " " + totalSize + " " + bufferSize);
        Process process = runTime.exec(writeCmd, null, new File(HDFS_TEST_DIR.toString()));
        int exitStatus = process.waitFor();
        if (exitStatus != 0) {
          throw new IOException(writeCmd + ": Failed with exitStatus: " + exitStatus);
        }
      } catch (InterruptedException interruptedException) {
        reporter.setStatus(interruptedException.toString());
      } finally {
        localFS.close();
      }
      return new Long(totalSize);
    }
  }

  private static void writeTest(FileSystem fs) throws IOException {

    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);

    runIOTest(WriteMapper.class, WRITE_DIR);
  }

  private static void runIOTest(Class<? extends Mapper> mapperClass, Path outputDir)
      throws IOException {
    JobConf job = new JobConf(fsConfig, DFSCIOTest.class);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  /** Read mapper class. */
  public static class ReadMapper extends IOStatMapper {

    public ReadMapper() {
      super();
    }

    public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
      totalSize *= MEGA;

      // create instance of local filesystem
      FileSystem localFS = FileSystem.getLocal(fsConfig);

      try {
        // native runtime
        Runtime runTime = Runtime.getRuntime();

        // copy the dso and executable from dfs
        synchronized (this) {
          localFS.delete(HDFS_TEST_DIR, true);
          if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
            throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_SHLIB)) {
            if (!FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig)) {
              throw new IOException("Failed to copy " + HDFS_SHLIB + " to local filesystem");
            }

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();
            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        synchronized (this) {
          if (!localFS.exists(HDFS_READ)) {
            if (!FileUtil.copy(fs, HDFS_READ, localFS, HDFS_READ, false, fsConfig)) {
              throw new IOException("Failed to copy " + HDFS_READ + " to local filesystem");
            }

            String chmodCmd = new String(CHMOD + " a+x " + HDFS_READ);
            Process process = runTime.exec(chmodCmd);
            int exitStatus = process.waitFor();

            if (exitStatus != 0) {
              throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
            }
          }
        }

        // exec the C program
        Path inFile = new Path(DATA_DIR, name);
        String readCmd = new String(HDFS_READ + " " + inFile + " " + totalSize + " " + bufferSize);
        Process process = runTime.exec(readCmd, null, new File(HDFS_TEST_DIR.toString()));
        int exitStatus = process.waitFor();

        if (exitStatus != 0) {
          throw new IOException(HDFS_READ + ": Failed with exitStatus: " + exitStatus);
        }
      } catch (InterruptedException interruptedException) {
        reporter.setStatus(interruptedException.toString());
      } finally {
        localFS.close();
      }
      return new Long(totalSize);
    }
  }

  private static void readTest(FileSystem fs) throws IOException {
    fs.delete(READ_DIR, true);
    runIOTest(ReadMapper.class, READ_DIR);
  }

  private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles)
      throws Exception {
    IOStatMapper ioer = null;
    if (testType == TEST_TYPE_READ) ioer = new ReadMapper();
    else if (testType == TEST_TYPE_WRITE) ioer = new WriteMapper();
    else return;
    for (int i = 0; i < nrFiles; i++)
      ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize);
  }

  public static void main(String[] args) {
    int testType = TEST_TYPE_READ;
    int bufferSize = DEFAULT_BUFFER_SIZE;
    int fileSize = 1;
    int nrFiles = 1;
    String resFileName = DEFAULT_RES_FILE_NAME;
    boolean isSequential = false;

    String version = "DFSCIOTest.0.0.1";
    String usage =
        "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] ";

    System.out.println(version);
    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].startsWith("-r")) {
        testType = TEST_TYPE_READ;
      } else if (args[i].startsWith("-w")) {
        testType = TEST_TYPE_WRITE;
      } else if (args[i].startsWith("-clean")) {
        testType = TEST_TYPE_CLEANUP;
      } else if (args[i].startsWith("-seq")) {
        isSequential = true;
      } else if (args[i].equals("-nrFiles")) {
        nrFiles = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-fileSize")) {
        fileSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-bufferSize")) {
        bufferSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-resFile")) {
        resFileName = args[++i];
      }
    }

    LOG.info("nrFiles = " + nrFiles);
    LOG.info("fileSize (MB) = " + fileSize);
    LOG.info("bufferSize = " + bufferSize);

    try {
      fsConfig.setInt("test.io.file.buffer.size", bufferSize);
      FileSystem fs = FileSystem.get(fsConfig);

      if (testType != TEST_TYPE_CLEANUP) {
        fs.delete(HDFS_TEST_DIR, true);
        if (!fs.mkdirs(HDFS_TEST_DIR)) {
          throw new IOException("Mkdirs failed to create " + HDFS_TEST_DIR.toString());
        }

        // Copy the executables over to the remote filesystem
        String hadoopHome = System.getenv("HADOOP_PREFIX");
        fs.copyFromLocalFile(
            new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION), HDFS_SHLIB);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE);
      }

      if (isSequential) {
        long tStart = System.currentTimeMillis();
        sequentialTest(fs, testType, fileSize, nrFiles);
        long execTime = System.currentTimeMillis() - tStart;
        String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000;
        LOG.info(resultLine);
        return;
      }
      if (testType == TEST_TYPE_CLEANUP) {
        cleanup(fs);
        return;
      }
      createControlFile(fs, fileSize, nrFiles);
      long tStart = System.currentTimeMillis();
      if (testType == TEST_TYPE_WRITE) writeTest(fs);
      if (testType == TEST_TYPE_READ) readTest(fs);
      long execTime = System.currentTimeMillis() - tStart;

      analyzeResult(fs, testType, execTime, resFileName);
    } catch (Exception e) {
      System.err.print(e.getLocalizedMessage());
      System.exit(-1);
    }
  }

  private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName)
      throws IOException {
    Path reduceFile;
    if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000");
    else reduceFile = new Path(READ_DIR, "part-00000");
    DataInputStream in;
    in = new DataInputStream(fs.open(reduceFile));

    BufferedReader lines;
    lines = new BufferedReader(new InputStreamReader(in));
    long tasks = 0;
    long size = 0;
    long time = 0;
    float rate = 0;
    float sqrate = 0;
    String line;
    while ((line = lines.readLine()) != null) {
      StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
      String attr = tokens.nextToken();
      if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken());
      else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken());
    }

    double med = rate / 1000 / tasks;
    double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med));
    String resultLines[] = {
      "----- DFSCIOTest ----- : "
          + ((testType == TEST_TYPE_WRITE)
              ? "write"
              : (testType == TEST_TYPE_READ) ? "read" : "unknown"),
      "           Date & time: " + new Date(System.currentTimeMillis()),
      "       Number of files: " + tasks,
      "Total MBytes processed: " + size / MEGA,
      "     Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
      "Average IO rate mb/sec: " + med,
      " Std IO rate deviation: " + stdDev,
      "    Test exec time sec: " + (float) execTime / 1000,
      ""
    };

    PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true));
    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
      res.println(resultLines[i]);
    }
  }

  private static void cleanup(FileSystem fs) throws Exception {
    LOG.info("Cleaning up test files");
    fs.delete(new Path(TEST_ROOT_DIR), true);
    fs.delete(HDFS_TEST_DIR, true);
  }
}
Пример #17
0
 private static byte[] pair(BytesWritable a, BytesWritable b) {
   byte[] pairData = new byte[a.getLength() + b.getLength()];
   System.arraycopy(a.getBytes(), 0, pairData, 0, a.getLength());
   System.arraycopy(b.getBytes(), 0, pairData, a.getLength(), b.getLength());
   return pairData;
 }
Пример #18
0
  public static void main(String[] args) {
    int testType = TEST_TYPE_READ;
    int bufferSize = DEFAULT_BUFFER_SIZE;
    int fileSize = 1;
    int nrFiles = 1;
    String resFileName = DEFAULT_RES_FILE_NAME;
    boolean isSequential = false;

    String version = "DFSCIOTest.0.0.1";
    String usage =
        "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] ";

    System.out.println(version);
    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].startsWith("-r")) {
        testType = TEST_TYPE_READ;
      } else if (args[i].startsWith("-w")) {
        testType = TEST_TYPE_WRITE;
      } else if (args[i].startsWith("-clean")) {
        testType = TEST_TYPE_CLEANUP;
      } else if (args[i].startsWith("-seq")) {
        isSequential = true;
      } else if (args[i].equals("-nrFiles")) {
        nrFiles = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-fileSize")) {
        fileSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-bufferSize")) {
        bufferSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-resFile")) {
        resFileName = args[++i];
      }
    }

    LOG.info("nrFiles = " + nrFiles);
    LOG.info("fileSize (MB) = " + fileSize);
    LOG.info("bufferSize = " + bufferSize);

    try {
      fsConfig.setInt("test.io.file.buffer.size", bufferSize);
      FileSystem fs = FileSystem.get(fsConfig);

      if (testType != TEST_TYPE_CLEANUP) {
        fs.delete(HDFS_TEST_DIR, true);
        if (!fs.mkdirs(HDFS_TEST_DIR)) {
          throw new IOException("Mkdirs failed to create " + HDFS_TEST_DIR.toString());
        }

        // Copy the executables over to the remote filesystem
        String hadoopHome = System.getenv("HADOOP_PREFIX");
        fs.copyFromLocalFile(
            new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION), HDFS_SHLIB);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ);
        fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE);
      }

      if (isSequential) {
        long tStart = System.currentTimeMillis();
        sequentialTest(fs, testType, fileSize, nrFiles);
        long execTime = System.currentTimeMillis() - tStart;
        String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000;
        LOG.info(resultLine);
        return;
      }
      if (testType == TEST_TYPE_CLEANUP) {
        cleanup(fs);
        return;
      }
      createControlFile(fs, fileSize, nrFiles);
      long tStart = System.currentTimeMillis();
      if (testType == TEST_TYPE_WRITE) writeTest(fs);
      if (testType == TEST_TYPE_READ) readTest(fs);
      long execTime = System.currentTimeMillis() - tStart;

      analyzeResult(fs, testType, execTime, resFileName);
    } catch (Exception e) {
      System.err.print(e.getLocalizedMessage());
      System.exit(-1);
    }
  }
public class TestFileSystem extends TestCase {
  private static final Log LOG = FileSystem.LOG;

  private static Configuration conf = new Configuration();
  private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096);

  private static final long MEGA = 1024 * 1024;
  private static final int SEEKS_PER_FILE = 4;

  private static String ROOT = System.getProperty("test.build.data", "fs_test");
  private static Path CONTROL_DIR = new Path(ROOT, "fs_control");
  private static Path WRITE_DIR = new Path(ROOT, "fs_write");
  private static Path READ_DIR = new Path(ROOT, "fs_read");
  private static Path DATA_DIR = new Path(ROOT, "fs_data");

  public void testFs() throws Exception {
    testFs(10 * MEGA, 100, 0);
  }

  public static void testFs(long megaBytes, int numFiles, long seed) throws Exception {

    FileSystem fs = FileSystem.get(conf);

    if (seed == 0) seed = new Random().nextLong();

    LOG.info("seed = " + seed);

    createControlFile(fs, megaBytes, numFiles, seed);
    writeTest(fs, false);
    readTest(fs, false);
    seekTest(fs, false);
    fs.delete(CONTROL_DIR, true);
    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);
    fs.delete(READ_DIR, true);
  }

  public static void testCommandFormat() throws Exception {
    // This should go to TestFsShell.java when it is added.
    CommandFormat cf;
    cf = new CommandFormat("copyToLocal", 2, 2, "crc", "ignoreCrc");
    assertEquals(cf.parse(new String[] {"-get", "file", "-"}, 1).get(1), "-");
    try {
      cf.parse(new String[] {"-get", "file", "-ignoreCrc", "/foo"}, 1);
      fail("Expected parsing to fail as it should stop at first non-option");
    } catch (Exception e) {
      // Expected
    }
    cf = new CommandFormat("tail", 1, 1, "f");
    assertEquals(cf.parse(new String[] {"-tail", "fileName"}, 1).get(0), "fileName");
    assertEquals(cf.parse(new String[] {"-tail", "-f", "fileName"}, 1).get(0), "fileName");
    cf = new CommandFormat("setrep", 2, 2, "R", "w");
    assertEquals(cf.parse(new String[] {"-setrep", "-R", "2", "/foo/bar"}, 1).get(1), "/foo/bar");
    cf = new CommandFormat("put", 2, 10000);
    assertEquals(cf.parse(new String[] {"-put", "-", "dest"}, 1).get(1), "dest");
  }

  public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed)
      throws Exception {

    LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files");

    Path controlFile = new Path(CONTROL_DIR, "files");
    fs.delete(controlFile, true);
    Random random = new Random(seed);

    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE);

    long totalSize = 0;
    long maxSize = ((megaBytes / numFiles) * 2) + 1;
    try {
      while (totalSize < megaBytes) {
        Text name = new Text(Long.toString(random.nextLong()));

        long size = random.nextLong();
        if (size < 0) size = -size;
        size = size % maxSize;

        // LOG.info(" adding: name="+name+" size="+size);

        writer.append(name, new LongWritable(size));

        totalSize += size;
      }
    } finally {
      writer.close();
    }
    LOG.info("created control file for: " + totalSize + " bytes");
  }

  public static class WriteMapper extends Configured
      implements Mapper<Text, LongWritable, Text, LongWritable> {

    private Random random = new Random();
    private byte[] buffer = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    // a random suffix per task
    private String suffix = "-" + random.nextLong();

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public WriteMapper() {
      super(null);
    }

    public WriteMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("creating " + name);

      // write to temp file initially to permit parallel execution
      Path tempFile = new Path(DATA_DIR, name + suffix);
      OutputStream out = fs.create(tempFile);

      long written = 0;
      try {
        while (written < size) {
          if (fastCheck) {
            Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(buffer);
          }
          long remains = size - written;
          int length = (remains <= buffer.length) ? (int) remains : buffer.length;
          out.write(buffer, 0, length);
          written += length;
          reporter.setStatus("writing " + name + "@" + written + "/" + size);
        }
      } finally {
        out.close();
      }
      // rename to final location
      fs.rename(tempFile, new Path(DATA_DIR, name));

      collector.collect(new Text("bytes"), new LongWritable(written));

      reporter.setStatus("wrote " + name);
    }

    public void close() {}
  }

  public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(DATA_DIR, true);
    fs.delete(WRITE_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(WriteMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, WRITE_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static class ReadMapper extends Configured
      implements Mapper<Text, LongWritable, Text, LongWritable> {

    private Random random = new Random();
    private byte[] buffer = new byte[BUFFER_SIZE];
    private byte[] check = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public ReadMapper() {
      super(null);
    }

    public ReadMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("opening " + name);

      DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name)));

      long read = 0;
      try {
        while (read < size) {
          long remains = size - read;
          int n = (remains <= buffer.length) ? (int) remains : buffer.length;
          in.readFully(buffer, 0, n);
          read += n;
          if (fastCheck) {
            Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(check);
          }
          if (n != buffer.length) {
            Arrays.fill(buffer, n, buffer.length, (byte) 0);
            Arrays.fill(check, n, check.length, (byte) 0);
          }
          assertTrue(Arrays.equals(buffer, check));

          reporter.setStatus("reading " + name + "@" + read + "/" + size);
        }
      } finally {
        in.close();
      }

      collector.collect(new Text("bytes"), new LongWritable(read));

      reporter.setStatus("read " + name);
    }

    public void close() {}
  }

  public static void readTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(ReadMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static class SeekMapper<K> extends Configured
      implements Mapper<Text, LongWritable, K, LongWritable> {

    private Random random = new Random();
    private byte[] check = new byte[BUFFER_SIZE];
    private FileSystem fs;
    private boolean fastCheck;

    {
      try {
        fs = FileSystem.get(conf);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    public SeekMapper() {
      super(null);
    }

    public SeekMapper(Configuration conf) {
      super(conf);
    }

    public void configure(JobConf job) {
      setConf(job);
      fastCheck = job.getBoolean("fs.test.fastCheck", false);
    }

    public void map(
        Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter)
        throws IOException {
      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      if (size == 0) return;

      reporter.setStatus("opening " + name);

      FSDataInputStream in = fs.open(new Path(DATA_DIR, name));

      try {
        for (int i = 0; i < SEEKS_PER_FILE; i++) {
          // generate a random position
          long position = Math.abs(random.nextLong()) % size;

          // seek file to that position
          reporter.setStatus("seeking " + name);
          in.seek(position);
          byte b = in.readByte();

          // check that byte matches
          byte checkByte = 0;
          // advance random state to that position
          random.setSeed(seed);
          for (int p = 0; p <= position; p += check.length) {
            reporter.setStatus("generating data for " + name);
            if (fastCheck) {
              checkByte = (byte) random.nextInt(Byte.MAX_VALUE);
            } else {
              random.nextBytes(check);
              checkByte = check[(int) (position % check.length)];
            }
          }
          assertEquals(b, checkByte);
        }
      } finally {
        in.close();
      }
    }

    public void close() {}
  }

  public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(SeekMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  public static void main(String[] args) throws Exception {
    int megaBytes = 10;
    int files = 100;
    boolean noRead = false;
    boolean noWrite = false;
    boolean noSeek = false;
    boolean fastCheck = false;
    long seed = new Random().nextLong();

    String usage =
        "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]";

    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }
    for (int i = 0; i < args.length; i++) { // parse command line
      if (args[i].equals("-files")) {
        files = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-megaBytes")) {
        megaBytes = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-noread")) {
        noRead = true;
      } else if (args[i].equals("-nowrite")) {
        noWrite = true;
      } else if (args[i].equals("-noseek")) {
        noSeek = true;
      } else if (args[i].equals("-fastcheck")) {
        fastCheck = true;
      }
    }

    LOG.info("seed = " + seed);
    LOG.info("files = " + files);
    LOG.info("megaBytes = " + megaBytes);

    FileSystem fs = FileSystem.get(conf);

    if (!noWrite) {
      createControlFile(fs, megaBytes * MEGA, files, seed);
      writeTest(fs, fastCheck);
    }
    if (!noRead) {
      readTest(fs, fastCheck);
    }
    if (!noSeek) {
      seekTest(fs, fastCheck);
    }
  }

  public void testFsCache() throws Exception {
    {
      long now = System.currentTimeMillis();
      String[] users = new String[] {"foo", "bar"};
      final Configuration conf = new Configuration();
      FileSystem[] fs = new FileSystem[users.length];

      for (int i = 0; i < users.length; i++) {
        UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]);
        fs[i] =
            ugi.doAs(
                new PrivilegedExceptionAction<FileSystem>() {
                  public FileSystem run() throws IOException {
                    return FileSystem.get(conf);
                  }
                });
        for (int j = 0; j < i; j++) {
          assertFalse(fs[j] == fs[i]);
        }
      }
      FileSystem.closeAll();
    }

    {
      try {
        runTestCache(HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT);
      } catch (java.net.BindException be) {
        LOG.warn(
            "Cannot test HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT (="
                + HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT
                + ")",
            be);
      }

      runTestCache(0);
    }
  }

  static void runTestCache(int port) throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).nameNodePort(port).numDataNodes(2).build();
      URI uri = cluster.getFileSystem().getUri();
      LOG.info("uri=" + uri);

      {
        FileSystem fs = FileSystem.get(uri, new Configuration());
        checkPath(cluster, fs);
        for (int i = 0; i < 100; i++) {
          assertTrue(fs == FileSystem.get(uri, new Configuration()));
        }
      }

      if (port == HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) {
        // test explicit default port
        URI uri2 =
            new URI(
                uri.getScheme(),
                uri.getUserInfo(),
                uri.getHost(),
                HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT,
                uri.getPath(),
                uri.getQuery(),
                uri.getFragment());
        LOG.info("uri2=" + uri2);
        FileSystem fs = FileSystem.get(uri2, conf);
        checkPath(cluster, fs);
        for (int i = 0; i < 100; i++) {
          assertTrue(fs == FileSystem.get(uri2, new Configuration()));
        }
      }
    } finally {
      if (cluster != null) cluster.shutdown();
    }
  }

  static void checkPath(MiniDFSCluster cluster, FileSystem fileSys) throws IOException {
    InetSocketAddress add = cluster.getNameNode().getNameNodeAddress();
    // Test upper/lower case
    fileSys.checkPath(
        new Path("hdfs://" + StringUtils.toUpperCase(add.getHostName()) + ":" + add.getPort()));
  }

  public void testFsClose() throws Exception {
    {
      Configuration conf = new Configuration();
      new Path("file:///").getFileSystem(conf);
      FileSystem.closeAll();
    }
  }

  public void testFsShutdownHook() throws Exception {
    final Set<FileSystem> closed = Collections.synchronizedSet(new HashSet<FileSystem>());
    Configuration conf = new Configuration();
    Configuration confNoAuto = new Configuration();

    conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setBoolean("fs.automatic.close", false);

    TestShutdownFileSystem fsWithAuto =
        (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf));
    TestShutdownFileSystem fsWithoutAuto =
        (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto));

    fsWithAuto.setClosedSet(closed);
    fsWithoutAuto.setClosedSet(closed);

    // Different URIs should result in different FS instances
    assertNotSame(fsWithAuto, fsWithoutAuto);

    FileSystem.CACHE.closeAll(true);
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithAuto));

    closed.clear();

    FileSystem.closeAll();
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithoutAuto));
  }

  public void testCacheKeysAreCaseInsensitive() throws Exception {
    Configuration conf = new Configuration();

    // check basic equality
    FileSystem.Cache.Key lowercaseCachekey1 =
        new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf);
    FileSystem.Cache.Key lowercaseCachekey2 =
        new FileSystem.Cache.Key(new URI("hdfs://localhost:12345/"), conf);
    assertEquals(lowercaseCachekey1, lowercaseCachekey2);

    // check insensitive equality
    FileSystem.Cache.Key uppercaseCachekey =
        new FileSystem.Cache.Key(new URI("HDFS://Localhost:12345/"), conf);
    assertEquals(lowercaseCachekey2, uppercaseCachekey);

    // check behaviour with collections
    List<FileSystem.Cache.Key> list = new ArrayList<FileSystem.Cache.Key>();
    list.add(uppercaseCachekey);
    assertTrue(list.contains(uppercaseCachekey));
    assertTrue(list.contains(lowercaseCachekey2));

    Set<FileSystem.Cache.Key> set = new HashSet<FileSystem.Cache.Key>();
    set.add(uppercaseCachekey);
    assertTrue(set.contains(uppercaseCachekey));
    assertTrue(set.contains(lowercaseCachekey2));

    Map<FileSystem.Cache.Key, String> map = new HashMap<FileSystem.Cache.Key, String>();
    map.put(uppercaseCachekey, "");
    assertTrue(map.containsKey(uppercaseCachekey));
    assertTrue(map.containsKey(lowercaseCachekey2));
  }

  public static void testFsUniqueness(long megaBytes, int numFiles, long seed) throws Exception {

    // multiple invocations of FileSystem.get return the same object.
    FileSystem fs1 = FileSystem.get(conf);
    FileSystem fs2 = FileSystem.get(conf);
    assertTrue(fs1 == fs2);

    // multiple invocations of FileSystem.newInstance return different objects
    fs1 = FileSystem.newInstance(conf);
    fs2 = FileSystem.newInstance(conf);
    assertTrue(fs1 != fs2 && !fs1.equals(fs2));
    fs1.close();
    fs2.close();
  }

  public static class TestShutdownFileSystem extends RawLocalFileSystem {
    private Set<FileSystem> closedSet;

    public void setClosedSet(Set<FileSystem> closedSet) {
      this.closedSet = closedSet;
    }

    public void close() throws IOException {
      if (closedSet != null) {
        closedSet.add(this);
      }
      super.close();
    }
  }
}
Пример #20
0
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new FileTest(), args);
   System.exit(res);
 }
Пример #21
0
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(NutchConfiguration.create(), new Injector(), args);
   System.exit(res);
 }
Пример #22
0
 public static void main(String[] args) throws Exception {
   JobConf job = new JobConf(DistCp.class);
   DistCp distcp = new DistCp(job);
   int res = ToolRunner.run(distcp, args);
   System.exit(res);
 }
 public static void main(String[] args) throws Exception {
   int exitCode = ToolRunner.run(new MaxTemperatureDriver(), args);
   System.exit(exitCode);
 }
 public static void main(String[] args) throws Exception {
   int exitCode = ToolRunner.run(new SortByTemperatureUsingTotalOrderPartitioner(), args);
   System.exit(exitCode);
 }
Пример #25
0
 static void printUsage() {
   System.err.println(
       "sortvalidate [-m <maps>] [-r <reduces>] [-deep] "
           + "-sortInput <sort-input-dir> -sortOutput <sort-output-dir>");
   System.exit(1);
 }
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new UserViewMuliHostStepThreeGroup(), args);
   System.exit(res);
 }
Пример #27
0
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new XiangLi1_exercise3(), args);
   System.exit(res);
 }
 public static void main(String[] args) throws Exception {
   int exitCode = ToolRunner.run(new PartitionByStationUsingMultipleOutputs(), args);
   System.exit(exitCode);
 }
  public class ElephantRecordWriter
      implements RecordWriter<IntWritable, ElephantRecordWritable>, Closeable {

    FileSystem fileSystem;
    Args args;
    Map<Integer, Persistence> lps = new HashMap<Integer, Persistence>();
    Progressable progressable;
    LocalElephantManager localManager;

    int numWritten = 0;
    long lastCheckpoint = System.currentTimeMillis();

    public ElephantRecordWriter(Configuration conf, Args args, Progressable progressable)
        throws IOException {
      fileSystem = Utils.getFS(args.outputDirHdfs, conf);
      this.args = args;

      this.progressable = progressable;
      localManager =
          new LocalElephantManager(fileSystem, args.spec, LocalElephantManager.getTmpDirs(conf));
    }

    private Persistence retrieveShard(int shardIdx) throws IOException {
      Persistence lp = null;

      if (lps.containsKey(shardIdx)) {
        lp = lps.get(shardIdx);
      } else {
        String localShard = localManager.downloadRemoteShard("" + shardIdx, null);

        Coordinator fact = args.spec.getCoordinator();
        lp = fact.openPersistenceForAppend(localShard, args.spec.getPersistenceOptions());

        lps.put(shardIdx, lp);
        progress();
      }
      return lp;
    }

    public void write(IntWritable shard, ElephantRecordWritable carrier) throws IOException {
      Persistence lp = retrieveShard(shard.get());

      NewKeyValDocument doc = new NewKeyValDocument(carrier.key, carrier.value);

      lp.index(doc);

      bumpProgress();
    }

    public void bumpProgress() {
      numWritten++;
      if (numWritten % 25000 == 0) {
        long now = System.currentTimeMillis();
        long delta = now - lastCheckpoint;
        lastCheckpoint = now;
        LOG.info("Wrote last 25000 records in " + delta + " ms");
        localManager.progress();
      }
    }

    public void close() throws IOException {
      close(null);
    }

    public void close(Reporter reporter) throws IOException {
      for (Integer shard : lps.keySet()) {
        String lpDir = localManager.localTmpDir("" + shard);
        LOG.info("Closing LP for shard " + shard + " at " + lpDir);
        lps.get(shard).close();
        LOG.info("Closed LP for shard " + shard + " at " + lpDir);
        progress();
        String remoteDir = args.outputDirHdfs + "/" + shard;

        // Do all this stuff to ensure that S3 actually does delete
        int deleteAttempt = 4;
        while (fileSystem.exists(new Path(remoteDir)) && deleteAttempt > 0) {
          LOG.info("Deleting existing shard " + shard + " at " + remoteDir);
          fileSystem.delete(new Path(remoteDir), true);
          --deleteAttempt;
        }
        if (fileSystem.exists(new Path(remoteDir)) && deleteAttempt == 0) {
          throw new IOException(
              "Failed to delete shard "
                  + shard
                  + " at "
                  + remoteDir
                  + " after "
                  + deleteAttempt
                  + " attempts!");
        } else {
          LOG.info("Deleted existing shard " + shard + " at " + remoteDir);
        }
        LOG.info("Copying " + lpDir + " to " + remoteDir);
        fileSystem.copyFromLocalFile(new Path(lpDir), new Path(remoteDir));
        LOG.info("Copied " + lpDir + " to " + remoteDir);
        progress();
      }
      localManager.cleanup();
    }

    private void progress() {
      if (progressable != null) progressable.progress();
    }
  }
 static void printUsage() {
   System.out.println("kmeans [-m <maps>] [-r <reduces>] <input> <output>");
   System.exit(1);
 }