Java JobConf.setFloatの例

プログラミング言語: Java

名前空間/パッケージ名: org.apache.hadoop.mapred

クラス/型: JobConf

メソッド/関数: setFloat

hotexamples.comのコード掲載数: 3

Java JobConf.setFloat - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのorg.apache.hadoop.mapred.JobConf.setFloatの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

コード例 #1

ファイルを表示

ファイル: Docnos2Titles.java プロジェクト: AdeDZY/Ivory

  /** Runs this tool. */
  @SuppressWarnings("deprecation")
  public int run(String[] args) throws Exception {
    JobConf job = new JobConf(getConf(), Docnos2Titles.class);

    // Read commandline arguments
    CommandLine cmdline = parseArgs(args);
    if (cmdline == null) {
      printUsage();
    }
    String eCollectionPath = cmdline.getOptionValue(ECOLLECTION_OPTION);
    String fCollectionPath = cmdline.getOptionValue(FCOLLECTION_OPTION);
    String pwsimOutputPath = cmdline.getOptionValue(PWSIM_OPTION);
    String titlePairsPath = cmdline.getOptionValue(OUTPUT_PATH_OPTION);
    String eLang = cmdline.getOptionValue(ELANG_OPTION);
    String fLang = cmdline.getOptionValue(FLANG_OPTION);
    String samplesFile = cmdline.getOptionValue(SAMPLEDOCNOS_OPTION);
    job.setJobName("Docnos2Titles_" + fLang + "-" + eLang);

    FileInputFormat.addInputPaths(job, eCollectionPath);
    FileInputFormat.addInputPaths(job, fCollectionPath);
    FileOutputFormat.setOutputPath(job, new Path(titlePairsPath));
    DistributedCache.addCacheFile(new URI(pwsimOutputPath), job);
    DistributedCache.addCacheFile(new URI(samplesFile), job);
    job.set("eLang", eLang);
    job.set("fLang", fLang);
    job.set("PwsimPairs", pwsimOutputPath);
    job.set("Ivory.SampleFile", samplesFile);

    job.setInt("mapred.task.timeout", 60000000);
    job.set("mapreduce.map.memory.mb", "3000");
    job.set("mapreduce.map.java.opts", "-Xmx3000m");
    job.setBoolean("mapred.map.tasks.speculative.execution", false);
    job.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    job.setNumMapTasks(100);
    job.setNumReduceTasks(1);
    job.setInt("mapred.min.split.size", 2000000000);
    job.setFloat("mapred.reduce.slowstart.completed.maps", 0.9f);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setMapOutputKeyClass(PairOfInts.class);
    job.setMapOutputValueClass(PairOfIntString.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    sLogger.info("Running job " + job.getJobName() + "...");
    sLogger.info("E-collection path: " + eCollectionPath);
    sLogger.info("F-collection path: " + fCollectionPath);
    sLogger.info("Pwsim output path: " + pwsimOutputPath);
    sLogger.info("Output path: " + titlePairsPath);
    sLogger.info("Sample file?: " + ((samplesFile != null) ? samplesFile : "none"));

    long startTime = System.currentTimeMillis();
    JobClient.runJob(job);
    System.out.println(
        "Job finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
  }

コード例 #2

ファイルを表示

ファイル: Similarity.java プロジェクト: gdfm/similarity-self-join

  @Override
  public int run(String[] args) throws IOException {
    OptionParser p = new OptionParser();
    OptionSpec<String> maxwiOpt =
        p.accepts(maxwiOptName, "location of maxWi map file (HDFS) REQUIRED")
            .withRequiredArg()
            .ofType(String.class);
    OptionSpec<Float> thresholdOpt =
        p.accepts(thresholdOptName, "similarity threshold")
            .withRequiredArg()
            .ofType(Float.class)
            .defaultsTo(DEFAULT_THRESHOLD);
    OptionSpec<Integer> stripesOpt =
        p.accepts(stripesOptName, "number of stripes to divide the similarity matrix")
            .withRequiredArg()
            .ofType(Integer.class)
            .defaultsTo(1);
    OptionSpec<Integer> spreadOpt =
        p.accepts(spreadOptName, "number of reducers per stripe")
            .withRequiredArg()
            .ofType(Integer.class)
            .defaultsTo(DEFAULT_SPREAD);
    OptionSpec<Integer> factorOpt =
        p.accepts(factorOptName, "number of mappers per reducer")
            .withRequiredArg()
            .ofType(Integer.class)
            .defaultsTo(DEFAULT_FACTOR);
    OptionSpec<Integer> maxVectorIDOpt =
        p.accepts(maxVectorIDOptName, "maximum vector ID").withRequiredArg().ofType(Integer.class);
    p.acceptsAll(Arrays.asList("h", "?"), "show help");

    OptionSet options = parseOptions(p, args);

    // to distinguish indexes built in successive runs
    DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss");
    Date date = new Date();

    float threshold = options.valueOf(thresholdOpt); // threshold
    if (threshold < 0 || threshold >= 1) {
      System.err.println(thresholdOptName + " should be between 0 and 1");
      System.exit(1);
    }

    int numStripes = options.valueOf(stripesOpt); // number of stripes
    if (numStripes < 1) {
      System.err.println(stripesOptName + " should be > 0");
      System.exit(1);
    }

    // MapReduce parameters
    int spread = options.valueOf(spreadOpt); // how many reducers per stripe
    if (spread < 1) {
      System.err.println(spreadOptName + " should be > 0");
      System.exit(1);
    }

    int factor = options.valueOf(factorOpt); // how many mappers per reducer
    if (factor < 1) {
      System.err.println(factorOptName + " should be > 0");
      System.exit(1);
    }

    int maxKey = 0;
    if (options.has(maxVectorIDOpt)) {
      maxKey = options.valueOf(maxVectorIDOpt); // maximum value of the vector ID
      if (maxKey < 1) {
        System.err.println(maxVectorIDOptName + " should be > 0");
        System.exit(1);
      }
    }

    int numReducers = GenericKey.StripePartitioner.numReducers(numStripes, spread);
    int numMappers = numReducers * factor;
    int numBuckets = numMappers;

    // pick the file with max weights from command line
    String maxWiDir = options.valueOf(maxwiOpt);
    List<String> nonOptArgs = options.nonOptionArguments();

    LOG.info("Threshold set to " + threshold);
    LOG.info(
        String.format(
            "Buckets: %1$-10s Factor: %2$-10s Stripes: %3$-10s Spread: %4$-10s Reducers: %5$-10s",
            numBuckets, factor, numStripes, spread, numReducers));

    // start building the jobs
    JobConf conf1 = new JobConf(getConf(), Similarity.class);
    conf1.setFloat(PARAM_APS_THRESHOLD, threshold);
    conf1.setInt(PARAM_APS_STRIPES, numStripes);
    DistributedCache.addCacheFile(URI.create(maxWiDir), conf1);

    Path inputPath = new Path(nonOptArgs.get(0));
    Path indexPath =
        new Path(
            nonOptArgs.get(0) + "-index-" + threshold + "-s" + numStripes + "_" + df.format(date));
    // index filtering pruned nested directory
    Path indexOnlyPath = new Path(indexPath, "part*");
    Path outputPath = new Path(nonOptArgs.get(1) + "-" + threshold + "-s" + numStripes);
    FileInputFormat.setInputPaths(conf1, inputPath);
    FileOutputFormat.setOutputPath(conf1, indexPath);

    conf1.setInputFormat(SequenceFileInputFormat.class);
    conf1.setOutputFormat(SequenceFileOutputFormat.class);
    conf1.setMapOutputKeyClass(LongWritable.class);
    conf1.setMapOutputValueClass(IndexItem.class);
    conf1.setOutputKeyClass(LongWritable.class);
    conf1.setOutputValueClass(IndexItemArrayWritable.class);
    conf1.setMapperClass(IndexerMapper.class);
    conf1.setReducerClass(IndexerReducer.class);

    // assuming input is sorted according to the key (vectorID) so that the
    // part files are locally sorted
    MultipleOutputs.addNamedOutput(
        conf1,
        PRUNED,
        SequenceFileOutputFormat.class,
        IntWritable.class,
        VectorComponentArrayWritable.class);

    // remove the stuff we added from the job name
    conf1.set(
        "mapred.job.name",
        "APS-" + indexPath.getName().substring(0, indexPath.getName().length() - 16));
    conf1.setNumTasksToExecutePerJvm(-1); // JVM reuse
    conf1.setSpeculativeExecution(false);
    conf1.setCompressMapOutput(true);
    // hash the posting lists in different buckets to distribute the load
    conf1.setNumReduceTasks(numBuckets);

    RunningJob job1 = JobClient.runJob(conf1);

    // part 2
    JobConf conf2 = new JobConf(getConf(), Similarity.class);

    if (numStripes > 0) FileUtils.mergeRestFile(conf2, indexPath, PRUNED, INDEX_INTERVAL);

    MultipleInputs.addInputPath(
        conf2, indexOnlyPath, SequenceFileInputFormat.class, SimilarityMapperIndex.class);
    MultipleInputs.addInputPath(
        conf2, inputPath, SequenceFileInputFormat.class, SimilarityMapperInput.class);
    FileOutputFormat.setOutputPath(conf2, outputPath);
    conf2.setCombinerClass(SimilarityCombiner.class);
    conf2.setReducerClass(SimilarityReducer.class);
    conf2.setPartitionerClass(GenericKey.StripePartitioner.class);
    conf2.setOutputKeyComparatorClass(GenericKey.Comparator.class);
    conf2.setOutputValueGroupingComparator(GenericKey.PrimaryComparator.class);
    conf2.setMapOutputKeyClass(GenericKey.class);
    conf2.setMapOutputValueClass(GenericValue.class);
    conf2.setOutputKeyClass(VectorPair.class);
    conf2.setOutputValueClass(NullWritable.class);

    Counter numDocs =
        job1.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS");
    maxKey = maxKey > 0 ? maxKey : (int) numDocs.getValue();
    LOG.info("Setting max key value in input to " + maxKey);
    conf2.setInt(PARAM_APS_MAXKEY, maxKey);

    conf2.setInt(PARAM_APS_STRIPES, numStripes);
    conf2.setFloat(PARAM_APS_THRESHOLD, threshold);
    conf2.setInt(PARAM_APS_REDUCER_PER_STRIPE, spread);
    conf2.set("mapred.job.name", "APS-" + outputPath.getName());

    conf2.setNumTasksToExecutePerJvm(-1); // JVM reuse
    conf2.setSpeculativeExecution(false);
    conf2.setCompressMapOutput(true);
    conf2.setNumReduceTasks(numReducers);

    JobClient.runJob(conf2);

    return 0;
  }

コード例 #3

ファイルを表示

ファイル: JobBuilder.java プロジェクト: Prasadidasi/commoncrawl-crawler

 public JobBuilder delayReducersUntil(float pctMappersComplete) throws IOException {
   _jobConf.setFloat("mapred.reduce.slowstart.completed.maps", pctMappersComplete);
   return this;
 }