Пример #1
0
  public static void main(final String[] pArgs) throws Exception {
    TreasuryYieldMulti tym = new TreasuryYieldMulti();

    // Here is an example of how to use multiple collections as the input to
    // a hadoop job, from within Java code directly.
    MultiCollectionSplitBuilder mcsb = new MultiCollectionSplitBuilder();
    mcsb.add(
            new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"),
            (MongoURI) null, // authuri
            true, // notimeout
            (DBObject) null, // fields
            (DBObject) null, // sort
            (DBObject) null, // query
            false)
        .add(
            new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"),
            (MongoURI) null, // authuri
            true, // notimeout
            (DBObject) null, // fields
            (DBObject) null, // sort
            new BasicDBObject("_id", new BasicDBObject("$gt", new Date(883440000000L))),
            false); // range query

    Configuration conf = new Configuration();
    conf.set(MultiMongoCollectionSplitter.MULTI_COLLECTION_CONF_KEY, mcsb.toJSON());
    conf.setSplitterClass(conf, MultiMongoCollectionSplitter.class);

    System.exit(ToolRunner.run(conf, new TreasuryYieldXMLConfig(), pArgs));
  }
 @Override
 public void run() {
   try {
     int exitCode = ToolRunner.run(new NGramDriver(this.candidateName), Main.this.args);
     System.out.println(
         "Job for " + this.candidateName + " has completed with exit code: " + exitCode);
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
Пример #3
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path matFile = new Path(args[0]);
    FSDataInputStream matData = fs.open(matFile);
    BufferedReader br = new BufferedReader(new InputStreamReader(matData));

    int i = 0;
    String line;
    while ((line = br.readLine()) != null) {
      StringTokenizer tokenizer = new StringTokenizer(line);
      String iRow = tokenizer.nextToken();
      String iCol = tokenizer.nextToken();
      if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) {
        i++;
      }
    }
    br.close();
    int dimention = i;

    conf.setInt("DIMENTION", dimention);
    Path xFile = new Path("preX/Result");
    FSDataOutputStream xData = fs.create(xFile);
    BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData));
    for (int j = 0; j < dimention; j++) {
      iniX.write(String.valueOf(j) + " 0");
      iniX.newLine();
    }
    iniX.close();

    URI matVec = new URI(args[0]);
    DistributedCache.addCacheFile(matVec, conf);

    int iteration = 0;
    do {
      ToolRunner.run(conf, new Jacobi(), args);
    } while (iteration++ < max_iter && (!stopIteration(conf)));
  }
Пример #4
0
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new XiangLi1_exercise3(), args);
   System.exit(res);
 }
  public static void main(String[] args) throws Exception {
    System.out.println("Beginning job...");
    // Start phase 1
    Configuration conf = new Configuration();

    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "local");

    String[] inputArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Path xmlPath = null;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      if ("-setup".equals(inputArgs[i])) {
        xmlPath = new Path(inputArgs[++i]);
        DistributedCache.addCacheFile(xmlPath.toUri(), conf);
        conf.setBoolean("minibatch.job.setup", true);

      } else {
        other_args.add(inputArgs[i]);
      }
    }

    String[] tool_args = other_args.toArray(new String[0]);
    int result = ToolRunner.run(conf, new BatchGenerationEngine(), tool_args);
    // End phase 1

    // get example count and size from xml file
    // count = count_size[0];
    // size = count_size[1];
    int[] count_size = parseJobSetup(xmlPath);

    // distribute those output from phase 1 into different directories
    String outputPhase1 = tool_args[1];
    FileSystem fs = FileSystem.get(new Configuration());
    Path outputPhase1Path = new Path(outputPhase1);
    fs.setWorkingDirectory(outputPhase1Path);
    FileStatus[] outputP1AllFiles = fs.listStatus(outputPhase1Path);
    for (int i = 0; i < outputP1AllFiles.length; i++) {
      int batchNum = i / count_size[1];
      Path batchPath = new Path(outputPhase1 + "/batch" + batchNum);

      // if batch# directory not exists, mkdir
      if (!fs.exists(batchPath)) FileSystem.mkdirs(fs, batchPath, new FsPermission("777"));
      // move file into the batch# directory
      fs.rename(
          outputP1AllFiles[i].getPath(),
          new Path(
              outputPhase1 + "/batch" + batchNum + "/" + outputP1AllFiles[i].getPath().getName()));
    }
    //

    // Generate dictionary of jobs
    int numberOfJobs = count_size[0] * count_size[1];
    JobConfig[] dictionary = new JobConfig[numberOfJobs];

    // Add job 0 to dictionary
    Configuration conf0 = new Configuration();
    DistributedCache.addCacheFile(xmlPath.toUri(), conf0);
    JobConfig job0 = new JobConfig(conf0, "input go here", java.util.UUID.randomUUID().toString());
    dictionary[0] = job0;

    // Add the rest of jobs into dictionary
    for (int i = 1; i < dictionary.length; i++) {
      Configuration newConf = new Configuration();
      DistributedCache.addCacheFile(xmlPath.toUri(), newConf);
      JobConfig newJob =
          new JobConfig(newConf, dictionary[i - 1].args[1], java.util.UUID.randomUUID().toString());
      dictionary[i] = newJob;
    }

    // running the jobs
    logger.info("Start " + dictionary.length + " jobs!");
    for (int i = 0; i < dictionary.length; i++) {
      int runResult =
          ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args);
      if (runResult == 1) {
        logger.info("Job " + i + "-th Re-run once!");
        dictionary[i].args[1] = java.util.UUID.randomUUID().toString();
        runResult =
            ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args);
      }
      if (runResult == 1) {
        logger.info("Job " + i + "-th Re-run twice!");
        dictionary[i].args[1] = java.util.UUID.randomUUID().toString();
        runResult =
            ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args);
      }
      if (runResult == 1) {
        logger.info("Job " + i + "-th Failed!");
        break;
      } else {
        if (i - 1 < dictionary.length) dictionary[i + 1].args[0] = dictionary[i].args[1];
      }
    }

    System.exit(1);
  }
Пример #6
0
 public static void main(String[] args) throws Exception {
   int rc = ToolRunner.run(new Configuration(), new KnownKeysMRv2(), args);
   System.exit(rc);
 }
Пример #7
0
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new Configuration(), new Sqrt2(), args);
 }
Пример #8
0
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new HadoopNBFilter(), args);
   System.exit(result);
 }
Пример #9
0
 public static void main(String[] args) throws Exception {
   int result = ToolRunner.run(new Configuration(), new move(), args);
   System.exit(result);
 }
Пример #10
0
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(NutchConfiguration.create(), new Injector(), args);
   System.exit(res);
 }
 public static void main(String[] args) throws Exception {
   int ret = ToolRunner.run(new ElimiateRepeat(), args);
   System.exit(ret);
 }
 public static void main(String[] args) throws Exception {
   int res = ToolRunner.run(new Configuration(), new MapTestID(), args);
   System.exit(res);
 }