Java JobConf.setReduceSpeculativeExecution 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: org.apache.hadoop.mapred

클래스/타입: JobConf

메소드/함수: setReduceSpeculativeExecution

hotexamples.com에서의 예제들: 5

Java JobConf.setReduceSpeculativeExecution - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 org.apache.hadoop.mapred.JobConf.setReduceSpeculativeExecution에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

예제 #1

파일 보기

파일: DBOutputFormat.java 프로젝트: Jude7/bc-hadoop2.0

  private static DBConfiguration setOutput(JobConf job, String tableName) {
    job.setOutputFormat(DBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job);

    dbConf.setOutputTableName(tableName);
    return dbConf;
  }

예제 #2

파일 보기

파일: Crush.java 프로젝트: jmeagher/filecrush

  boolean createJobConfAndParseArgs(String... args) throws ParseException, IOException {

    job = new JobConf(getConf(), Crush.class);

    /*
     * Turn off speculative execution because that's just wasting network io.
     */
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    /*
     * Turn off pre-emption because we don't want to kill a task after two hours of network io.
     */
    job.set("mapred.fairscheduler.preemption", "false");

    tmpDir = new Path("tmp/crush-" + UUID.randomUUID());
    outDir = new Path(tmpDir, "out");

    double threshold = 0.75;

    List<String> regexes = asList(".+");
    List<String> replacements =
        asList("crushed_file-${crush.timestamp}-${crush.task.num}-${crush.file.num}");
    List<String> inFormats = asList(SequenceFileInputFormat.class.getName());
    List<String> outFormats = asList(SequenceFileOutputFormat.class.getName());

    String crushTimestamp;

    Options options = buildOptions();
    CommandLine cli = new GnuParser().parse(options, args);

    if (cli.hasOption("?")) {
      BufferedReader reader =
          new BufferedReader(
              new InputStreamReader(getClass().getClassLoader().getResourceAsStream("help.txt")));

      try {
        String line;

        while (null != (line = reader.readLine())) {
          System.out.println(line);
        }
      } finally {
        reader.close();
      }

      return false;
    }

    if (cli.hasOption("verbose")) {
      console = Verbosity.VERBOSE;
    } else if (cli.hasOption("info")) {
      console = Verbosity.INFO;
    } else {
      console = Verbosity.NONE;
    }

    if (cli.hasOption("ignore-regex")) {
      ignoredFiles = Pattern.compile(cli.getOptionValue("ignore-regex")).matcher("");
    }

    excludeSingleFileDirs = !cli.hasOption("include-single-file-dirs");

    String[] nonOptions = cli.getArgs();

    if (2 == nonOptions.length) {
      /*
       * Stand alone mode accepts two arguments.
       */
      mode = Mode.STAND_ALONE;

      srcDir = new Path(nonOptions[0]);

      dest = new Path(nonOptions[1]);

      if (cli.hasOption("input-format")) {
        inFormats = asList(cli.getOptionValue("input-format"));
      }

      if (cli.hasOption("output-format")) {
        outFormats = asList(cli.getOptionValue("output-format"));
      }

      replacements = asList(dest.getName());

      crushTimestamp = Long.toString(currentTimeMillis());

    } else {
      /*
       * The previous version expected three or four arguments. The third one specified the number of tasks to use, which is an
       * integral number, just like the third argument in the new version, which is a timestamp. We tell the two apart by looking
       * at the value of the argument. A timestamp is going to be a huge, 14-digit number while the number of tasks should be much
       * smaller.
       */

      if ((args.length == 4 || args.length == 3)
          && args.length == nonOptions.length
          && args[2].length() != 14) {

        int maxTasks = Integer.parseInt(args[2]);

        if (maxTasks <= 0 || maxTasks > 4000) {
          throw new IllegalArgumentException("Tasks must be in the range [1, 4000]: " + maxTasks);
        }

        job.setInt("mapred.reduce.tasks", maxTasks);

        maxFileBlocks = Integer.MAX_VALUE;

        crushTimestamp = Long.toString(currentTimeMillis());

        srcDir = new Path(args[0]);
        dest = new Path(args[1]);

        mode = Mode.CLONE;

        if (args.length == 4) {
          if (args[3].equals("TEXT")) {
            /*
             * These are the defaults except with text input and output formats.
             */
            inFormats = asList(TextInputFormat.class.getName());
            outFormats = asList(TextOutputFormat.class.getName());

          } else if (!args[3].equals("SEQUENCE")) {
            throw new IllegalArgumentException("Type must be either TEXT or SEQUENCE: " + args[3]);
          }
        }
      } else {
        /*
         * V2 style arguments.
         */
        if (cli.hasOption("threshold")) {
          threshold = Double.parseDouble(cli.getOptionValue("threshold"));

          if (0 >= threshold
              || 1 < threshold
              || Double.isInfinite(threshold)
              || Double.isNaN(threshold)) {
            throw new IllegalArgumentException(
                "Block size threshold must be in (0, 1]: " + threshold);
          }
        }

        if (cli.hasOption("max-file-blocks")) {
          int maxFileBlocksOption = Integer.parseInt(cli.getOptionValue("max-file-blocks"));

          if (0 > maxFileBlocksOption) {
            throw new IllegalArgumentException(
                "Maximum file size in blocks must be positive: " + maxFileBlocksOption);
          }

          maxFileBlocks = maxFileBlocksOption;
        } else {
          maxFileBlocks = 8;
        }

        if (cli.hasOption("regex")) {
          regexes = asList(cli.getOptionValues("regex"));
        }

        if (cli.hasOption("replacement")) {
          replacements = asList(cli.getOptionValues("replacement"));
        }

        if (cli.hasOption("input-format")) {
          inFormats = asList(cli.getOptionValues("input-format"));
        }

        if (cli.hasOption("output-format")) {
          outFormats = asList(cli.getOptionValues("output-format"));
        }

        if (3 != nonOptions.length) {
          throw new IllegalArgumentException(
              "Could not find source directory, out directory, and job timestamp");
        }

        srcDir = new Path(nonOptions[0]);
        dest = new Path(nonOptions[1]);

        crushTimestamp = nonOptions[2];

        if (cli.hasOption("clone")) {
          mode = Mode.CLONE;
        } else {
          mode = Mode.MAP_REDUCE;
        }

        if (!crushTimestamp.matches("\\d{14}")) {
          throw new IllegalArgumentException(
              "Crush timestamp must be 14 digits yyyymmddhhMMss: " + crushTimestamp);
        }
      }

      dfsBlockSize = Long.parseLong(job.get("dfs.blocksize"));
      maxEligibleSize = (long) (dfsBlockSize * threshold);
    }

    /*
     * Add the crush specs and compression options to the configuration.
     */
    job.set("crush.timestamp", crushTimestamp);

    if (ignoredFiles != null) {
      job.set("crush.ignore-regex", ignoredFiles.pattern().pattern());
    }

    if (regexes.size() != replacements.size()
        || replacements.size() != inFormats.size()
        || inFormats.size() != outFormats.size()) {
      throw new IllegalArgumentException(
          "Must be an equal number of regex, replacement, in-format, and out-format options");
    }

    job.setInt("crush.num.specs", regexes.size());

    matchers = new ArrayList<Matcher>(regexes.size());

    for (int i = 0; i < regexes.size(); i++) {
      job.set(format("crush.%d.regex", i), regexes.get(i));

      matchers.add(Pattern.compile(regexes.get(i)).matcher("dummy"));

      job.set(format("crush.%d.regex.replacement", i), replacements.get(i));

      String inFmt = inFormats.get(i);

      if ("sequence".equals(inFmt)) {
        inFmt = SequenceFileInputFormat.class.getName();
      } else if ("text".equals(inFmt)) {
        inFmt = TextInputFormat.class.getName();
      } else {
        try {
          if (!FileInputFormat.class.isAssignableFrom(Class.forName(inFmt))) {
            throw new IllegalArgumentException("Not a FileInputFormat:" + inFmt);
          }
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("Not a FileInputFormat:" + inFmt);
        }
      }

      job.set(format("crush.%d.input.format", i), inFmt);

      String outFmt = outFormats.get(i);

      if ("sequence".equals(outFmt)) {
        outFmt = SequenceFileOutputFormat.class.getName();
      } else if ("text".equals(outFmt)) {
        outFmt = TextOutputFormat.class.getName();
      } else {
        try {
          if (!FileOutputFormat.class.isAssignableFrom(Class.forName(outFmt))) {
            throw new IllegalArgumentException("Not a FileOutputFormat:" + outFmt);
          }
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("Not a FileOutputFormat:" + outFmt);
        }
      }

      job.set(format("crush.%d.output.format", i), outFmt);
    }

    String codec = cli.getOptionValue("compress");

    if (null == codec) {
      codec = DefaultCodec.class.getName();
    } else if ("none".equals(codec)) {
      codec = null;
    } else if ("gzip".equals(codec)) {
      codec = GzipCodec.class.getName();
    } else {
      try {
        if (!CompressionCodec.class.isAssignableFrom(Class.forName(codec))) {
          throw new IllegalArgumentException("Not a CompressionCodec: " + codec);
        }
      } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Not a CompressionCodec: " + codec);
      }
    }

    if (null == codec) {
      job.setBoolean("mapred.output.compress", false);
    } else {
      job.setBoolean("mapred.output.compress", true);
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapred.output.compression.codec", codec);

      try {
        CompressionCodec instance = (CompressionCodec) Class.forName(codec).newInstance();
        codecExtension = instance.getDefaultExtension();
      } catch (Exception e) {
        throw new AssertionError();
      }
    }

    return true;
  }

예제 #3

파일 보기

파일: JobBuilder.java 프로젝트: Prasadidasi/commoncrawl-crawler

 public JobBuilder speculativeReducerExecution() throws IOException {
   _jobConf.setReduceSpeculativeExecution(true);
   return this;
 }

예제 #4

파일 보기

파일: JobBuilder.java 프로젝트: Prasadidasi/commoncrawl-crawler

 public JobBuilder speculativeReducerExecution(boolean enable) throws IOException {
   _jobConf.setReduceSpeculativeExecution(enable);
   return this;
 }

예제 #5

파일 보기

파일: SolrIndexer.java 프로젝트: hugHou/weblion

  public void indexSolr(
      String solrUrl,
      Path crawlDb,
      Path linkDb,
      List<Path> segments,
      boolean noCommit,
      boolean deleteGone,
      String solrParams,
      boolean filter,
      boolean normalize)
      throws IOException {

    filter = false;

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("SolrIndexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("index-solr " + solrUrl);

    LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
    LOG.info("SolrIndexer: URL filtering: " + filter);
    LOG.info("SolrIndexer: URL normalizing: " + normalize);

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    job.set(SolrConstants.SERVER_URL, solrUrl);
    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
    if (solrParams != null) {
      job.set(SolrConstants.PARAMS, solrParams);
    }
    NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
      JobClient.runJob(job);
      // do the commits once and for all the reducers in one go
      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);

      if (!noCommit) {
        solr.commit();
      }
      long end = System.currentTimeMillis();
      LOG.info(
          "SolrIndexer: finished at "
              + sdf.format(end)
              + ", elapsed: "
              + TimingUtil.elapsedTime(start, end));
    } catch (Exception e) {
      LOG.error(e.toString());
    } finally {
      FileSystem.get(job).delete(tmp, true);
    }
  }