Java JobConf.setReduceSpeculativeExecution示例

编程语言: Java

命名空间/包名称: org.apache.hadoop.mapred

类/类型: JobConf

方法/功能: setReduceSpeculativeExecution

hotexamples.com的示例: 5

Java JobConf.setReduceSpeculativeExecution - 已找到5个示例。这些是从开源项目中提取的最受好评的org.apache.hadoop.mapred.JobConf.setReduceSpeculativeExecution现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

示例#1

显示文件

文件： DBOutputFormat.java 项目： Jude7/bc-hadoop2.0

  private static DBConfiguration setOutput(JobConf job, String tableName) {
    job.setOutputFormat(DBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job);

    dbConf.setOutputTableName(tableName);
    return dbConf;
  }

示例#2

显示文件

文件： Crush.java 项目： jmeagher/filecrush

  boolean createJobConfAndParseArgs(String... args) throws ParseException, IOException {

    job = new JobConf(getConf(), Crush.class);

    /*
     * Turn off speculative execution because that's just wasting network io.
     */
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    /*
     * Turn off pre-emption because we don't want to kill a task after two hours of network io.
     */
    job.set("mapred.fairscheduler.preemption", "false");

    tmpDir = new Path("tmp/crush-" + UUID.randomUUID());
    outDir = new Path(tmpDir, "out");

    double threshold = 0.75;

    List<String> regexes = asList(".+");
    List<String> replacements =
        asList("crushed_file-${crush.timestamp}-${crush.task.num}-${crush.file.num}");
    List<String> inFormats = asList(SequenceFileInputFormat.class.getName());
    List<String> outFormats = asList(SequenceFileOutputFormat.class.getName());

    String crushTimestamp;

    Options options = buildOptions();
    CommandLine cli = new GnuParser().parse(options, args);

    if (cli.hasOption("?")) {
      BufferedReader reader =
          new BufferedReader(
              new InputStreamReader(getClass().getClassLoader().getResourceAsStream("help.txt")));

      try {
        String line;

        while (null != (line = reader.readLine())) {
          System.out.println(line);
        }
      } finally {
        reader.close();
      }

      return false;
    }

    if (cli.hasOption("verbose")) {
      console = Verbosity.VERBOSE;
    } else if (cli.hasOption("info")) {
      console = Verbosity.INFO;
    } else {
      console = Verbosity.NONE;
    }

    if (cli.hasOption("ignore-regex")) {
      ignoredFiles = Pattern.compile(cli.getOptionValue("ignore-regex")).matcher("");
    }

    excludeSingleFileDirs = !cli.hasOption("include-single-file-dirs");

    String[] nonOptions = cli.getArgs();

    if (2 == nonOptions.length) {
      /*
       * Stand alone mode accepts two arguments.
       */
      mode = Mode.STAND_ALONE;

      srcDir = new Path(nonOptions[0]);

      dest = new Path(nonOptions[1]);

      if (cli.hasOption("input-format")) {
        inFormats = asList(cli.getOptionValue("input-format"));
      }

      if (cli.hasOption("output-format")) {
        outFormats = asList(cli.getOptionValue("output-format"));
      }

      replacements = asList(dest.getName());

      crushTimestamp = Long.toString(currentTimeMillis());

    } else {
      /*
       * The previous version expected three or four arguments. The third one specified the number of tasks to use, which is an
       * integral number, just like the third argument in the new version, which is a timestamp. We tell the two apart by looking
       * at the value of the argument. A timestamp is going to be a huge, 14-digit number while the number of tasks should be much
       * smaller.
       */

      if ((args.length == 4 || args.length == 3)
          && args.length == nonOptions.length
          && args[2].length() != 14) {

        int maxTasks = Integer.parseInt(args[2]);

        if (maxTasks <= 0 || maxTasks > 4000) {
          throw new IllegalArgumentException("Tasks must be in the range [1, 4000]: " + maxTasks);
        }

        job.setInt("mapred.reduce.tasks", maxTasks);

        maxFileBlocks = Integer.MAX_VALUE;

        crushTimestamp = Long.toString(currentTimeMillis());

        srcDir = new Path(args[0]);
        dest = new Path(args[1]);

        mode = Mode.CLONE;

        if (args.length == 4) {
          if (args[3].equals("TEXT")) {
            /*
             * These are the defaults except with text input and output formats.
             */
            inFormats = asList(TextInputFormat.class.getName());
            outFormats = asList(TextOutputFormat.class.getName());

          } else if (!args[3].equals("SEQUENCE")) {
            throw new IllegalArgumentException("Type must be either TEXT or SEQUENCE: " + args[3]);
          }
        }
      } else {
        /*
         * V2 style arguments.
         */
        if (cli.hasOption("threshold")) {
          threshold = Double.parseDouble(cli.getOptionValue("threshold"));

          if (0 >= threshold
              || 1 < threshold
              || Double.isInfinite(threshold)
              || Double.isNaN(threshold)) {
            throw new IllegalArgumentException(
                "Block size threshold must be in (0, 1]: " + threshold);
          }
        }

        if (cli.hasOption("max-file-blocks")) {
          int maxFileBlocksOption = Integer.parseInt(cli.getOptionValue("max-file-blocks"));

          if (0 > maxFileBlocksOption) {
            throw new IllegalArgumentException(
                "Maximum file size in blocks must be positive: " + maxFileBlocksOption);
          }

          maxFileBlocks = maxFileBlocksOption;
        } else {
          maxFileBlocks = 8;
        }

        if (cli.hasOption("regex")) {
          regexes = asList(cli.getOptionValues("regex"));
        }

        if (cli.hasOption("replacement")) {
          replacements = asList(cli.getOptionValues("replacement"));
        }

        if (cli.hasOption("input-format")) {
          inFormats = asList(cli.getOptionValues("input-format"));
        }

        if (cli.hasOption("output-format")) {
          outFormats = asList(cli.getOptionValues("output-format"));
        }

        if (3 != nonOptions.length) {
          throw new IllegalArgumentException(
              "Could not find source directory, out directory, and job timestamp");
        }

        srcDir = new Path(nonOptions[0]);
        dest = new Path(nonOptions[1]);

        crushTimestamp = nonOptions[2];

        if (cli.hasOption("clone")) {
          mode = Mode.CLONE;
        } else {
          mode = Mode.MAP_REDUCE;
        }

        if (!crushTimestamp.matches("\\d{14}")) {
          throw new IllegalArgumentException(
              "Crush timestamp must be 14 digits yyyymmddhhMMss: " + crushTimestamp);
        }
      }

      dfsBlockSize = Long.parseLong(job.get("dfs.blocksize"));
      maxEligibleSize = (long) (dfsBlockSize * threshold);
    }

    /*
     * Add the crush specs and compression options to the configuration.
     */
    job.set("crush.timestamp", crushTimestamp);

    if (ignoredFiles != null) {
      job.set("crush.ignore-regex", ignoredFiles.pattern().pattern());
    }

    if (regexes.size() != replacements.size()
        || replacements.size() != inFormats.size()
        || inFormats.size() != outFormats.size()) {
      throw new IllegalArgumentException(
          "Must be an equal number of regex, replacement, in-format, and out-format options");
    }

    job.setInt("crush.num.specs", regexes.size());

    matchers = new ArrayList<Matcher>(regexes.size());

    for (int i = 0; i < regexes.size(); i++) {
      job.set(format("crush.%d.regex", i), regexes.get(i));

      matchers.add(Pattern.compile(regexes.get(i)).matcher("dummy"));

      job.set(format("crush.%d.regex.replacement", i), replacements.get(i));

      String inFmt = inFormats.get(i);

      if ("sequence".equals(inFmt)) {
        inFmt = SequenceFileInputFormat.class.getName();
      } else if ("text".equals(inFmt)) {
        inFmt = TextInputFormat.class.getName();
      } else {
        try {
          if (!FileInputFormat.class.isAssignableFrom(Class.forName(inFmt))) {
            throw new IllegalArgumentException("Not a FileInputFormat:" + inFmt);
          }
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("Not a FileInputFormat:" + inFmt);
        }
      }

      job.set(format("crush.%d.input.format", i), inFmt);

      String outFmt = outFormats.get(i);

      if ("sequence".equals(outFmt)) {
        outFmt = SequenceFileOutputFormat.class.getName();
      } else if ("text".equals(outFmt)) {
        outFmt = TextOutputFormat.class.getName();
      } else {
        try {
          if (!FileOutputFormat.class.isAssignableFrom(Class.forName(outFmt))) {
            throw new IllegalArgumentException("Not a FileOutputFormat:" + outFmt);
          }
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("Not a FileOutputFormat:" + outFmt);
        }
      }

      job.set(format("crush.%d.output.format", i), outFmt);
    }

    String codec = cli.getOptionValue("compress");

    if (null == codec) {
      codec = DefaultCodec.class.getName();
    } else if ("none".equals(codec)) {
      codec = null;
    } else if ("gzip".equals(codec)) {
      codec = GzipCodec.class.getName();
    } else {
      try {
        if (!CompressionCodec.class.isAssignableFrom(Class.forName(codec))) {
          throw new IllegalArgumentException("Not a CompressionCodec: " + codec);
        }
      } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Not a CompressionCodec: " + codec);
      }
    }

    if (null == codec) {
      job.setBoolean("mapred.output.compress", false);
    } else {
      job.setBoolean("mapred.output.compress", true);
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapred.output.compression.codec", codec);

      try {
        CompressionCodec instance = (CompressionCodec) Class.forName(codec).newInstance();
        codecExtension = instance.getDefaultExtension();
      } catch (Exception e) {
        throw new AssertionError();
      }
    }

    return true;
  }

示例#3

显示文件

文件： JobBuilder.java 项目： Prasadidasi/commoncrawl-crawler

 public JobBuilder speculativeReducerExecution() throws IOException {
   _jobConf.setReduceSpeculativeExecution(true);
   return this;
 }

示例#4

显示文件

文件： JobBuilder.java 项目： Prasadidasi/commoncrawl-crawler

 public JobBuilder speculativeReducerExecution(boolean enable) throws IOException {
   _jobConf.setReduceSpeculativeExecution(enable);
   return this;
 }

示例#5

显示文件

文件： SolrIndexer.java 项目： hugHou/weblion

  public void indexSolr(
      String solrUrl,
      Path crawlDb,
      Path linkDb,
      List<Path> segments,
      boolean noCommit,
      boolean deleteGone,
      String solrParams,
      boolean filter,
      boolean normalize)
      throws IOException {

    filter = false;

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("SolrIndexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("index-solr " + solrUrl);

    LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
    LOG.info("SolrIndexer: URL filtering: " + filter);
    LOG.info("SolrIndexer: URL normalizing: " + normalize);

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    job.set(SolrConstants.SERVER_URL, solrUrl);
    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
    if (solrParams != null) {
      job.set(SolrConstants.PARAMS, solrParams);
    }
    NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
      JobClient.runJob(job);
      // do the commits once and for all the reducers in one go
      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);

      if (!noCommit) {
        solr.commit();
      }
      long end = System.currentTimeMillis();
      LOG.info(
          "SolrIndexer: finished at "
              + sdf.format(end)
              + ", elapsed: "
              + TimingUtil.elapsedTime(start, end));
    } catch (Exception e) {
      LOG.error(e.toString());
    } finally {
      FileSystem.get(job).delete(tmp, true);
    }
  }