/** Runs this tool. */
  public int run(String[] argv) throws Exception {
    Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

    try {
      parser.parseArgument(argv);
    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      parser.printUsage(System.err);
      return -1;
    }

    LOG.info(" - input path: " + args.input);
    LOG.info(" - output path: " + args.output);
    LOG.info(" - number of reducers: " + args.numReducers);

    Job job = Job.getInstance(getConf());
    job.setJobName(CooccurrenceStripes.class.getSimpleName());
    job.setJarByClass(CooccurrenceStripes.class);

    job.addCacheFile(new URI("/tmp/mergedLineCounts2#mergedLineCounts2"));

    // Delete the output directory if it exists already.
    Path outputDir = new Path(args.output);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(args.numReducers);

    FileInputFormat.setInputPaths(job, new Path(args.input));
    FileOutputFormat.setOutputPath(job, new Path(args.output));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapStIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapStFW.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.getConfiguration().setInt("mapred.max.split.size", 1024 * 1024 * 64);
    job.getConfiguration().set("mapreduce.map.memory.mb", "3072");
    job.getConfiguration().set("mapreduce.map.java.opts", "-Xmx3072m");
    job.getConfiguration().set("mapreduce.reduce.memory.mb", "3072");
    job.getConfiguration().set("mapreduce.reduce.java.opts", "-Xmx3072m");

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println(
        "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
  }
Пример #2
0
  private static void parseOptions(Options options, String[] args) {
    ParserProperties properties = ParserProperties.defaults().withUsageWidth(128);
    CmdLineParser parser = new CmdLineParser(options, properties);
    try {
      parser.parseArgument(args);
    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      System.err.println(getUsageString(parser, false));
      System.exit(1);
    }
    if (options.showVersion) {
      System.out.println(getVersionString());
      System.exit(0);
    }
    if (options.showHelp) {
      System.out.println(getUsageString(parser, false));
      System.exit(0);
    }
    if (options.showExtendedHelp) {
      System.out.println(getUsageString(parser, true));
      System.exit(0);
    }
    if (options.debug || options.fullDebug || options.debugInfo) {
      // Disable interpreter when bytecode is requested
      options.noInterpreter = true;
    }
    if (options.fileName != null) {
      // Execute as last script
      if (options.fileName.toString().equals("-")) {
        // "-" is a short-hand to request reading from System.in
        if (System.console() == null) {
          // System.in is not interactive
          options.evalScripts.add(new EvalString(read(System.in)));
        } else {
          options.interactive = true;
        }
      } else {
        options.evalScripts.add(new EvalPath(options.fileName));
      }
    }
    if (options.evalScripts.isEmpty()) {
      // Default to interactive mode when no files or expressions were set
      options.interactive = true;

      // Warn if --module is used without input files.
      if (options.module) {
        System.err.println(formatMessage("module_no_files"));
      }
    }
    if (options.ecmascript7) {
      System.err.println(formatMessage("deprecated.es7"));
    }
  }
Пример #3
0
  /**
   * @param args the command line arguments
   * @throws java.io.IOException the exception
   * @throws java.lang.InterruptedException the exception
   */
  public static void main(String[] args) throws IOException, InterruptedException {
    try {
      Griffin griffin = new Griffin();

      CmdLineParser parser =
          new CmdLineParser(griffin, ParserProperties.defaults().withUsageWidth(120));
      parser.parseArgument(args);

      if (griffin.help || griffin.version || args.length == 0) {
        griffin.printHelpMessage();
        parser.printUsage(System.out);
      } else {
        griffin.commands.execute();
      }
    } catch (CmdLineException ex) {
      Logger.getLogger(Griffin.class.getName()).log(Level.SEVERE, null, ex);
    }
  }
Пример #4
0
  public static void main(String[] args) throws Exception {
    IndexArgs indexArgs = new IndexArgs();

    CmdLineParser parser =
        new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(90));

    try {
      parser.parseArgument(args);
    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      parser.printUsage(System.err);
      System.err.println(
          "Example: "
              + IndexCollection.class.getSimpleName()
              + parser.printExample(OptionHandlerFilter.REQUIRED));
      return;
    }

    if (indexArgs.docvectors && !indexArgs.positions) {
      LOG.warn(
          "to store docVectors you must store positions too. With this configuration, both positions and docVectors will not be stored!");
    }

    final long start = System.nanoTime();
    MultithreadedIndexer indexer = new MultithreadedIndexer(indexArgs);

    LOG.info("Index path: " + indexArgs.index);
    LOG.info("Threads: " + indexArgs.threads);
    LOG.info("Keep Stopwords: " + indexArgs.keepstop);
    LOG.info("Positions: " + indexArgs.positions);
    LOG.info("Store docVectors: " + indexArgs.docvectors);
    LOG.info("Optimize (merge segments): " + indexArgs.optimize);

    LOG.info("Starting indexer...");

    int numIndexed = indexer.run();
    final long durationMillis =
        TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
    LOG.info(
        "Total "
            + numIndexed
            + " documents indexed in "
            + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
  }