コード例 #1
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
コード例 #2
0
 /** ***********************Driver function****************** */
 public static void main(String args[]) throws Exception {
   if (args.length != 3) {
     System.out.println(
         "provide sufficient arguments <input matrix file path>  <input vector file path> <output file path>");
     System.exit(-1);
   }
   int res = ToolRunner.run(new Configuration(), new Mat_vect_mul(), args);
   System.exit(res);
 }
コード例 #3
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
  /** Sets up configuration based on params */
  private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) {

    if (argConf.get("file") == null) {
      logger.fatal("Missing file parameter");
      System.exit(1);
    }

    if (argConf.get("hdfs_base_path") == null) {
      logger.fatal("Missing HDFS base path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("hdfs_temp_path") == null) {
      logger.fatal("Missing HDFS temp path, check gestore-conf.xml");
      System.exit(1);
    }

    if (argConf.get("local_temp_path") == null) {
      logger.fatal("Missing local temp path, check gestore-conf.xml");
      System.exit(1);
    }

    // Input paramaters
    curConf.put("run_id", argConf.get("run", ""));
    curConf.put("task_id", argConf.get("task", ""));
    curConf.put("file_id", argConf.get("file"));
    curConf.put("local_path", argConf.get("path", ""));
    curConf.put("type", argConf.get("type", "l2r"));
    curConf.put("timestamp_start", argConf.get("timestamp_start", "1"));
    curConf.put(
        "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE)));
    curConf.put("delimiter", argConf.get("regex", "ID=.*"));
    curConf.put("taxon", argConf.get("taxon", "all"));
    curConf.put("intermediate", argConf.get("full_run", "false"));
    curConf.put("quick_add", argConf.get("quick_add", "false"));
    Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*");
    curConf.put("format", argConf.get("format", "unknown"));
    curConf.put("split", argConf.get("split", "1"));
    curConf.put("copy", argConf.get("copy", "true"));

    // Constants
    curConf.put("base_path", argConf.get("hdfs_base_path"));
    curConf.put("temp_path", argConf.get("hdfs_temp_path"));
    curConf.put("local_temp_path", argConf.get("local_temp_path"));
    curConf.put("db_name_files", argConf.get("hbase_file_table"));
    curConf.put("db_name_runs", argConf.get("hbase_run_table"));
    curConf.put("db_name_updates", argConf.get("hbase_db_update_table"));

    // Timestamps
    Date currentTime = new Date();
    Date endDate = new Date(new Long(curConf.get("timestamp_stop")));
    curConf.put("timestamp_real", Long.toString(currentTime.getTime()));

    return true;
  }
コード例 #4
0
  public static void main(String args[]) throws Exception {
    Configuration c = new Configuration();
    if (args.length != 2) {
      System.out.println("provide sufficient arguments");
      System.exit(-1);
    }
    Job job = Job.getInstance(c, "Wordcount");
    job.setJarByClass(Wordcount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //	job.submit();
    job.waitForCompletion(true);
  }
コード例 #5
0
ファイル: Sort.java プロジェクト: robbyzhang/hadoop
  public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(Sort.class);
    job.setJobName("Sort");

    FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input/"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output/"));
    job.setMapperClass(Map.class);
    // job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
コード例 #6
0
ファイル: AuthorCounter.java プロジェクト: akhfa/hadoop-test
  public int run(String[] args) throws Exception {
    Path tempDir = new Path("/user/akhfa/temp");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(AuthorCounter.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
  }
コード例 #7
0
  /**
   * Parse arguments and then runs a map/reduce job. Print output in standard out.
   *
   * @return a non-zero if there is an error. Otherwise, return 0.
   */
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>");
      ToolRunner.printGenericCommandUsage(System.err);
      return 2;
    }

    final int nMaps = Integer.parseInt(args[0]);
    final long nSamples = Long.parseLong(args[1]);
    long now = System.currentTimeMillis();
    int rand = new Random().nextInt(Integer.MAX_VALUE);
    final Path tmpDir = new Path(TMP_DIR_PREFIX + "_" + now + "_" + rand);

    System.out.println("Number of Maps  = " + nMaps);
    System.out.println("Samples per Map = " + nSamples);

    System.out.println(
        "Estimated value of Pi is " + estimatePi(nMaps, nSamples, tmpDir, getConf()));
    return 0;
  }
コード例 #8
0
    // Video splts are converted to target format here...
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      try {
        Configuration config = new Configuration();
        FileSystem hdfs = FileSystem.get(config);

        String st = value.toString();
        st = st.trim();
        System.out.println("job2:mapInp:-" + st);
        String[] fmt = st.split(" #!# \\*");
        String[] lst = fmt[0].split(" #!# ");

        String out = "", dlt = "";
        int flag = 1;
        for (String st1 : lst) {

          Pattern x = Pattern.compile("(.*)/(.*)");
          Matcher xm = x.matcher(st1);
          String prefixPath = "", fnm = "", inpExt = "";
          while (xm.find()) {
            prefixPath = xm.group(1);
            fnm = xm.group(2);
          }
          String[] tmpArr = fnm.split("\\.");
          fnm = tmpArr[0];
          inpExt = tmpArr[1];
          hdfs.copyToLocalFile(true, new Path(st1), new Path("/home/" + fnm + "." + inpExt));
          String fname = "/home/" + fnm;
          if (flag == 1) {
            flag = 0;
            out += prefixPath + "/" + fnm + "." + fmt[1];
          } else {
            out += " #!# " + prefixPath + "/" + fnm + "." + fmt[1];
          }

          if (fmt[1].equals("mpg") || fmt[1].equals("mpeg") || fmt[1].equals("mp4")) {

            Process p =
                Runtime.getRuntime()
                    .exec(
                        "mencoder -of mpeg -ovc lavc -lavcopts vcodec=mpeg1video -oac copy "
                            + "/home/"
                            + fnm
                            + "."
                            + inpExt
                            + " -o "
                            + fname
                            + "."
                            + fmt[1]);

            String ls_str = "";
            DataInputStream ls_in = new DataInputStream(p.getInputStream());
            while ((ls_str = ls_in.readLine()) != null) {}

            p.destroy();
            dlt += " /home/" + fnm + "." + inpExt;
          } else if (fmt[1].equals("avi")) {

            Process p =
                Runtime.getRuntime()
                    .exec(
                        "mencoder -ovc lavc -oac mp3lame -o "
                            + fname
                            + "."
                            + fmt[1]
                            + " "
                            + "/home/"
                            + fnm
                            + "."
                            + inpExt);

            String ls_str = "";
            DataInputStream ls_in = new DataInputStream(p.getInputStream());
            while ((ls_str = ls_in.readLine()) != null) {}

            p.destroy();
            dlt += " /home/" + fnm + "." + inpExt;
          } else {
            // TBD
            System.out.println("Unsupported target format!!!!!");
          }
          hdfs.copyFromLocalFile(
              true,
              true,
              new Path(fname + "." + fmt[1]),
              new Path(prefixPath + "/" + fnm + "." + fmt[1]));
        }

        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt}; // delete the files after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();

        System.out.println("Job2 mapOut:" + out);
        context.write(new Text(lst[0]), new Text(out));
        System.out.println(out);
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }
コード例 #9
0
    // merge the converted files here
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      System.out.println("I'm in Job2 reduce");

      Configuration config = new Configuration();
      FileSystem hdfs = FileSystem.get(config);
      try {
        String out = "";
        for (Text t : values) {
          out = t.toString();
          out = out.trim();
          System.out.println("job2:redInp:-" + out);
          break;
        }
        String[] outl = out.split(" #!# ");

        Pattern x = Pattern.compile("(.*)/(.*)\\.(.*)");
        Matcher xm = x.matcher(outl[0]);
        String prefixPath = "", fnm = "", ext = "";
        while (xm.find()) {
          prefixPath = xm.group(1);
          fnm = xm.group(2);
          ext = xm.group(3);
        }
        String foutname = fnm.split("_")[0];
        foutname += "." + ext;
        String query = "mencoder -oac copy -ovc copy";
        int cnt = 0;
        for (String st : outl) {
          cnt++;
          hdfs.copyToLocalFile(
              true,
              new Path(st),
              new Path("/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext));
          query += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext;
        }
        query += " -o " + "/home/" + foutname;
        Process p2 =
            Runtime.getRuntime().exec(query); // query for merging the video files is executed here
        String ls_str = "";
        DataInputStream ls_in = new DataInputStream(p2.getInputStream());
        while ((ls_str = ls_in.readLine()) != null) {}
        p2.destroy();
        hdfs.copyFromLocalFile(
            true, true, new Path("/home/" + foutname), new Path(prefixPath + "/" + foutname));
        cnt = 0;
        String dlt1 = "";
        for (String st3 : outl) {
          cnt++;
          dlt1 += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext;
        }
        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt1}; // delete the files after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();
        context.write(new Text(""), new Text(prefixPath + "/" + foutname));
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }
コード例 #10
0
 public static void main(String[] args) throws Exception {
   int rc = ToolRunner.run(new Configuration(), new KnownKeysMRv2(), args);
   System.exit(rc);
 }
コード例 #11
0
 public static void main(String[] args) throws Exception {
   int ret = ToolRunner.run(new ElimiateRepeat(), args);
   System.exit(ret);
 }
コード例 #12
0
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new HadoopNBFilter(), args);
   System.exit(result);
 }
コード例 #13
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
コード例 #14
0
ファイル: move.java プロジェクト: EdvardPedersen/GeStore
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new Configuration(), new move(), args);
   System.exit(result);
 }
コード例 #15
0
/**
 * Test cases from MessagePackBase64LineInputFormat.
 *
 * @see TestLineInputFormat.java
 */
public class TestMessagePackBase64LineInputFormat extends TestCase {
  public static class MyClass {
    public String s;
    public int v;
  }

  public static class MyClassWritable extends MessagePackWritable<MyClass> {
    protected MyClass getObjectInstance() {
      return new MyClass();
    }
  }

  public static class MyClassMessagePackBase64LineInputFormat
      extends MessagePackBase64LineInputFormat<MyClass, MyClassWritable> {
    protected MyClassWritable getWritableInstance() {
      return new MyClassWritable();
    }
  }

  // --------------

  private static int MAX_LENGTH = 200;
  private final Base64 base64_ = new Base64();
  private static Configuration defaultConf = new Configuration();
  private static FileSystem localFs = null;

  private static Path workDir =
      new Path(new Path(System.getProperty("test.build.data", "."), "data"), "TestLineInputFormat");

  public void testFormat() throws Exception {
    localFs = FileSystem.getLocal(defaultConf);
    localFs.delete(workDir, true);

    Job job = new Job(new Configuration(defaultConf));
    Path file = new Path(workDir, "test.txt");

    int seed = new Random().nextInt();
    Random random = new Random(seed);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
      // create a file with length entries
      Writer writer = new OutputStreamWriter(localFs.create(file));
      try {
        MyClass mc = new MyClass();
        for (int i = 0; i < length; i++) {
          mc.s = Integer.toString(i);
          mc.v = i;
          byte[] raw = MessagePack.pack(mc);
          byte[] b64e = base64_.encodeBase64(raw);
          byte[] b64d = base64_.decode(b64e);
          MyClass mc2 = MessagePack.unpack(b64d, mc.getClass());
          assertEquals(mc.s, mc2.s);
          assertEquals(mc.v, mc2.v);

          writer.write(base64_.encodeToString(raw));
        }
      } finally {
        writer.close();
      }
      checkFormat(job);
    }
  }

  void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    for (int j = 0; j < splits.size(); j++) {
      RecordReader<LongWritable, MyClassWritable> reader =
          format.createRecordReader(splits.get(j), attemptContext);
      reader.initialize(splits.get(j), attemptContext);

      int count = 0;
      try {
        while (reader.nextKeyValue()) {
          LongWritable key = reader.getCurrentKey();
          MyClassWritable val = reader.getCurrentValue();
          MyClass mc = val.get();
          assertEquals(mc.v, count);
          assertEquals(mc.s, Integer.toString(count));
          count++;
        }
      } finally {
        reader.close();
      }
    }
  }
}
コード例 #16
0
 /** The actual main() method for our program; this is the "driver" for the MapReduce job. */
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new IndexConstructorDriver(), args);
   System.exit(res);
 }
コード例 #17
0
    // The input video files are split into chunks of 64MB here...
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      System.out.println("job1:mapInp:-" + line);
      String[] info = line.split(" ");
      info[0] = info[0].trim();
      info[1] = info[1].trim();
      String lstfnames = "", fname = "";
      try {
        Configuration config = new Configuration();
        FileSystem hdfs = FileSystem.get(config);
        String prefixPath = "", fnm = "";
        Pattern x = Pattern.compile("(.*)/(.*)");
        Matcher xm = x.matcher(info[0]);
        while (xm.find()) {
          prefixPath = xm.group(1);
          fnm = xm.group(2);
        }
        String dst = "/home/" + fnm; // dst is path of the file on local system.
        hdfs.copyToLocalFile(new Path(info[0]), new Path(dst));

        Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst);
        String s;

        BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
        Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)");
        long time = 0; // "time" is the duration of the input video file
        long sps = 0; // "sps" is the number of seconds(duration) of each video split
        while ((s = stdError.readLine()) != null) {
          Matcher md = D.matcher(s);
          while (md.find()) {
            time =
                Long.parseLong(md.group(1)) * 3600
                    + Long.parseLong(md.group(2)) * 60
                    + Long.parseLong(md.group(3));
          }
        }
        Process p1 = Runtime.getRuntime().exec("du -s " + dst);
        BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream()));
        String s1 = "", size = ""; // "size" is the size of input video file
        while ((s1 = stdInput1.readLine()) != null) {
          String s11[] = s1.split("\t");
          size = s11[0];
        }
        sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB
        String hr, min, sc;
        hr = Long.toString((sps / 3600));
        min = Long.toString((sps % 3600) / 60);
        sc = Long.toString(sps % 60);
        if (hr.length() < 2) hr = "0" + hr;
        if (min.length() < 2) min = "0" + min;
        if (sc.length() < 2) sc = "0" + sc;
        String splt = hr + ":" + min + ":" + sc;

        String query =
            "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file
        String app = "", inpExt = "";
        Pattern xx = Pattern.compile("(.*)\\.(.*)");
        Matcher xxm = xx.matcher(dst);
        while (xxm.find()) {
          fname = xxm.group(1);
          inpExt = xxm.group(2);
        }
        String[] tmpArr = fname.split("/");
        String hdfsFname = "";
        long stSrt = 0;
        int cnt = 0;

        while (true) {
          if (stSrt > time) break;
          if (stSrt + sps > time) {
            long t = time - stSrt;
            hr = Long.toString((t / 3600));
            min = Long.toString((t % 3600) / 60);
            sc = Long.toString(t % 60);
            if (hr.length() < 2) hr = "0" + hr;
            if (min.length() < 2) min = "0" + min;
            if (sc.length() < 2) sc = "0" + sc;
            splt = hr + ":" + min + ":" + sc;
          }
          cnt++;
          hr = Long.toString((stSrt / 3600));
          min = Long.toString((stSrt % 3600) / 60);
          sc = Long.toString(stSrt % 60);
          if (hr.length() < 2) hr = "0" + hr;
          if (min.length() < 2) min = "0" + min;
          if (sc.length() < 2) sc = "0" + sc;
          app =
              hr
                  + ":"
                  + min
                  + ":"
                  + sc
                  + " -endPos "
                  + splt
                  + " "
                  + dst
                  + " -o "
                  + fname
                  + "_"
                  + Integer.toString(cnt)
                  + "."
                  + inpExt;

          Process p2 = Runtime.getRuntime().exec(query + app);
          String ls_str = "";
          DataInputStream ls_in = new DataInputStream(p2.getInputStream());
          while ((ls_str = ls_in.readLine()) != null) {}
          p2.destroy();
          String[] tmpArr1 = fnm.split("\\.");
          hdfs.copyFromLocalFile(
              true,
              true,
              new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt),
              new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt));
          lstfnames +=
              prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# ";
          stSrt += sps;
        }
        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();
        lstfnames += "*" + info[1];

        context.write(
            new Text(fname),
            new Text(
                lstfnames)); // "fname" contains name of the input video file with
                             // extension(eg.".avi") removed #### "lstfnames" is a string, contains
                             // all the names of video splits(concatenated)
        System.out.println("lstfnames : " + lstfnames);
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }
コード例 #18
0
  /**
   * Run a map/reduce job for estimating Pi.
   *
   * @return the estimated value of Pi
   */
  public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
      throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
      throw new IOException(
          "Tmp directory "
              + fs.makeQualified(tmpDir)
              + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
      throw new IOException("Cannot create input directory " + inDir);
    }

    try {
      // generate an input file for each map task
      for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer =
            SequenceFile.createWriter(
                fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE);
        try {
          writer.append(offset, size);
        } finally {
          writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
      }

      // start a map/reduce job
      System.out.println("Starting Job");
      final long startTime = System.currentTimeMillis();
      job.waitForCompletion(true);
      final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
      System.out.println("Job Finished in " + duration + " seconds");

      // read outputs
      Path inFile = new Path(outDir, "reduce-out");
      LongWritable numInside = new LongWritable();
      LongWritable numOutside = new LongWritable();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
      try {
        reader.next(numInside, numOutside);
      } finally {
        reader.close();
      }

      // compute estimated value
      final BigDecimal numTotal =
          BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
      return BigDecimal.valueOf(4)
          .setScale(20)
          .multiply(BigDecimal.valueOf(numInside.get()))
          .divide(numTotal, RoundingMode.HALF_UP);
    } finally {
      fs.delete(tmpDir, true);
    }
  }
コード例 #19
0
 /** main method for running it as a stand alone command. */
 public static void main(String[] argv) throws Exception {
   System.exit(ToolRunner.run(null, new QuasiMonteCarlo(), argv));
 }