Java JobConf.getBoolean Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapred

Class/Type: JobConf

Method/Function: getBoolean

Examples at hotexamples.com: 18

The `org.apache.hadoop.mapred.JobConf.getBoolean` is a method in the `JobConf` class of the Java Apache Hadoop library. It is used to retrieve a boolean value associated with a particular configuration key from the `JobConf` object. The method returns the boolean value corresponding to the specified key, or a default value if the key is not found in the configuration. This method is commonly used in Hadoop MapReduce applications to handle boolean configuration settings for job execution.

Java JobConf.getBoolean - 18 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapred.JobConf.getBoolean extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

Example #1

Show file

File: CompactorMR.java Project: nkeywal/hive

    @Override
    public void map(
        WritableComparable key,
        CompactorInputSplit split,
        OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector,
        Reporter reporter)
        throws IOException {
      // This will only get called once, since CompactRecordReader only returns one record,
      // the input split.
      // Based on the split we're passed we go instantiate the real reader and then iterate on it
      // until it finishes.
      @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
      AcidInputFormat<WritableComparable, V> aif =
          instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME));
      ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY));

      boolean isMajor = jobConf.getBoolean(IS_MAJOR, false);
      AcidInputFormat.RawReader<V> reader =
          aif.getRawReader(
              jobConf,
              isMajor,
              split.getBucket(),
              txnList,
              split.getBaseDir(),
              split.getDeltaDirs());
      RecordIdentifier identifier = reader.createKey();
      V value = reader.createValue();
      getWriter(reporter, reader.getObjectInspector(), split.getBucket());
      while (reader.next(identifier, value)) {
        if (isMajor && reader.isDelete(value)) continue;
        writer.write(value);
        reporter.progress();
      }
    }

Example #2

Show file

File: DistCp.java Project: neutronsharc/hdfsbackup

 /**
  * Mapper configuration. Extracts source and destination file system, as well as top-level paths
  * on source and destination directories. Gets the named file systems, to be used later in map.
  */
 public void configure(JobConf job) {
   destPath = new Path(job.get(DST_DIR_LABEL, "/"));
   try {
     destFileSys = destPath.getFileSystem(job);
   } catch (IOException ex) {
     throw new RuntimeException("Unable to get the named file system.", ex);
   }
   sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
   buffer = new byte[sizeBuf];
   ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
   preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
   if (preserve_status) {
     preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
   }
   update = job.getBoolean(Options.UPDATE.propertyname, false);
   overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
   this.job = job;
 }

Example #3

Show file

File: WordCountExtended.java Project: st3ffwo3/hadoop_samples

    /**
     * {@inheritDoc}
     *
     * @see org.apache.hadoop.mapred.MapReduceBase#configure(org.apache.hadoop.mapred.JobConf)
     */
    @Override
    public void configure(JobConf job) {
      m_caseSensitive = job.getBoolean(JOB_PARAMETER_CASE_SENSITIVE, true);
      m_inputFile = job.get(MAP_PARAMETER_INPUT_FILE);

      if (job.getBoolean(JOB_PARAMETER_SKIP_PATTERNS, false)) {
        Path[] patternsFiles = new Path[0];
        try {
          patternsFiles = DistributedCache.getLocalCacheFiles(job);
        } catch (IOException ioe) {
          System.err.println(
              "Caught exception while getting cached files: "
                  + StringUtils.stringifyException(ioe));
        }
        for (Path patternsFile : patternsFiles) {
          parseSkipFile(patternsFile);
        }
      }
    }

Example #4

Show file

File: ElephantOutputFormat.java Project: jeroenvandijk/elephantdb-playground

 public void checkOutputSpecs(FileSystem fs, JobConf conf) throws IOException {
   Args args = (Args) Utils.getObject(conf, ARGS_CONF);
   fs = Utils.getFS(args.outputDirHdfs, conf);
   if (conf.getBoolean("mapred.reduce.tasks.speculative.execution", true)) {
     // Because we don't want to write a bunch of extra times.
     throw new InvalidJobConfException("Speculative execution should be false");
   }
   if (fs.exists(new Path(args.outputDirHdfs))) {
     throw new InvalidJobConfException("Output dir already exists " + args.outputDirHdfs);
   }
 }

Example #5

Show file

File: TestCompressionEmulationUtils.java Project: pwendell/mr2-fairscheduler

  /**
   * Test if {@link CompressionEmulationUtil#configureCompressionEmulation(
   * org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.JobConf)} can extract compression
   * related configuration parameters.
   */
  @Test
  public void testExtractCompressionConfigs() {
    JobConf source = new JobConf();
    JobConf target = new JobConf();

    // set the default values
    source.setBoolean(FileOutputFormat.COMPRESS, false);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyDefaultCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyDefaultType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyDefaultCodec2");

    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check default values
    assertFalse(target.getBoolean(FileOutputFormat.COMPRESS, true));
    assertEquals("MyDefaultCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyDefaultType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertFalse(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true));
    assertEquals("MyDefaultCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertFalse(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));

    // set new values
    source.setBoolean(FileOutputFormat.COMPRESS, true);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyCodec2");
    org.apache.hadoop.mapred.FileInputFormat.setInputPaths(source, "file.gz");

    target = new JobConf(); // reset
    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check new values
    assertTrue(target.getBoolean(FileOutputFormat.COMPRESS, false));
    assertEquals("MyCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertTrue(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false));
    assertEquals("MyCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertTrue(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));
  }

Example #6

Show file

File: Fetcher.java Project: jonathangizmo/HadoopDistJ

  @VisibleForTesting
  Fetcher(
      JobConf job,
      TaskAttemptID reduceId,
      ShuffleSchedulerImpl<K, V> scheduler,
      MergeManager<K, V> merger,
      Reporter reporter,
      ShuffleClientMetrics metrics,
      ExceptionReporter exceptionReporter,
      SecretKey shuffleKey,
      int id) {
    this.jobConf = job;
    this.reporter = reporter;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.exceptionReporter = exceptionReporter;
    this.id = id;
    this.reduce = reduceId.getTaskID().getId();
    this.shuffleSecretKey = shuffleKey;
    ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs =
        reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs =
        reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    this.connectionTimeout =
        job.getInt(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT, DEFAULT_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(MRJobConfig.SHUFFLE_READ_TIMEOUT, DEFAULT_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
      sslShuffle =
          job.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT);
      if (sslShuffle && sslFactory == null) {
        sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
        try {
          sslFactory.init();
        } catch (Exception ex) {
          sslFactory.destroy();
          throw new RuntimeException(ex);
        }
      }
    }
  }

Example #7

Show file

File: TwoStageLoadbalancing.java Project: pombredanne/pss-1

  public static void main(int step, Path inputDir, JobConf job) throws IOException {
    FileSystem hdfs = inputDir.getFileSystem(job);
    if (!hdfs.exists(Collector.partitionSizesPath)) {
      System.out.println("Partition sizes file does not exists!");
      return;
    }
    debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE);
    MapFile.Reader partitionSizeReader =
        new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(), new JobConf());
    Text partitionK = new Text();
    LongWritable partSizeV = new LongWritable();

    try {
      while (partitionSizeReader.next(partitionK, partSizeV)) {
        partitionsNames.add(partitionK.toString()); // useless?
        partitionsSizes.put(partitionK.toString(), partSizeV.get());
      }
    } catch (Exception e) {;
    }
    for (int i = 0; i < partitionsNames.size(); i++) {
      System.out.println(
          "Partition "
              + partitionsNames.get(i)
              + " has "
              + partitionsSizes.get(partitionsNames.get(i))
              + " vectors.");
    }

    if (partitionsNames.size() <= 1) return;
    stage0();
    printUndirectedNeighbors("Stage0");
    printPartitionsStat("Stage0");

    printCircularPartitionsWeight("\nCircular");
    calcCWStandardDeviation();

    stage1();
    printDirectedNeighbors("Stage1");
    System.out.println("Stage 1 final weights: ");
    printPartitionsWeights("Stage1");
    if ((step == 2) || (step == 12)) {
      stage2();
      printDirectedNeighbors("Stage2");
      System.out.println("Stage 2 final weights: ");
      printPartitionsWeights("Stage2");
    }
    // stage3(job, hdfs);
    writeComparisonList(job, hdfs);
    // printComparisonList(job, hdfs);// remove
  }

Example #8

Show file

File: CompactorMR.java Project: nkeywal/hive

    private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket)
        throws IOException {
      if (writer == null) {
        AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf);
        options
            .inspector(inspector)
            .writingBase(jobConf.getBoolean(IS_MAJOR, false))
            .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false))
            .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties())
            .reporter(reporter)
            .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE))
            .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE))
            .bucket(bucket)
            .statementId(-1); // setting statementId == -1 makes compacted delta files use
        // delta_xxxx_yyyy format

        // Instantiate the underlying output format
        @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
        AcidOutputFormat<WritableComparable, V> aof =
            instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME));

        writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options);
      }
    }

Example #9

Show file

File: CIngestMapper.java Project: is/demos

  @Override
  public void configure(JobConf job) {
    this.job = job;

    mapper = new ObjectMapper();
    messagePack = new MessagePack();

    timeFmt = DateTimeFormat.forPattern("YYYYMMddHHmmssSSS");
    oidSerial = new AtomicInteger(0);
    serial = new AtomicInteger(0);

    storeAttirbute = job.getBoolean(CIngest.CONF_INGEST_STORE_ATTR, false);

    Iterator<String> tokens =
        Splitter.on("///").split(job.get(CIngest.CONF_CASSANDRA_CONNECT_TOKEN)).iterator();

    String clusterName = tokens.next();
    String seeds = tokens.next();
    String keyspaceName = tokens.next();
    String columeFamilyName = tokens.next();

    context =
        new AstyanaxContext.Builder()
            .forCluster(clusterName)
            .forKeyspace(keyspaceName)
            .withAstyanaxConfiguration(
                new AstyanaxConfigurationImpl().setDiscoveryType(NodeDiscoveryType.TOKEN_AWARE))
            .withConnectionPoolConfiguration(
                new ConnectionPoolConfigurationImpl("cp")
                    .setPort(9160)
                    .setMaxConnsPerHost(2)
                    .setSeeds(seeds))
            .withConnectionPoolMonitor(new CountingConnectionPoolMonitor())
            .buildKeyspace(ThriftFamilyFactory.getInstance());

    context.start();
    ks = context.getEntity();

    cf =
        new ColumnFamily<String, String>(
            columeFamilyName, StringSerializer.get(), StringSerializer.get());
  }

Example #10

Show file

File: CorpusVocabNormalizerAndNumberizer.java Project: rahulbhawsar/Cloud9

    public void configure(JobConf job) {
      sLogger.setLevel(Level.OFF);
      src = Language.languageForISO639_1(job.get(SRC_LANG));
      tgt = Language.languageForISO639_1(job.get(TGT_LANG));
      sLogger.debug("Source language: " + src.code());
      sLogger.debug("Target language: " + tgt.code());

      boolean useVocabServer = false;
      if (!useVocabServer) {
        if (vocE == null) vocE = new VocabularyWritable();
        if (vocF == null) vocF = new VocabularyWritable();
      } else {
        try {
          vocE =
              new VocabServerClient(
                  job.get("ha.vocabserver.host"),
                  Integer.parseInt(job.get("ha.vocabserver.port1")));
          vocF =
              new VocabServerClient(
                  job.get("ha.vocabserver.host"),
                  Integer.parseInt(job.get("ha.vocabserver.port2")));
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException(e);
        }
      }
      lp = LanguagePair.languageForISO639_1Pair(src.code() + "-" + tgt.code());

      if (job.getBoolean("ha.trunc.use", true)) {
        sawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, src, job);
        tawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, tgt, job);
      } else {
        sawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job);
        tawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job);
      }
      job_ = job;
    }

Example #11

Show file

File: Injector.java Project: soolr/nutch-1.7

 public void configure(JobConf job) {
   interval = job.getInt("db.fetch.interval.default", 2592000);
   scoreInjected = job.getFloat("db.score.injected", 1.0f);
   overwrite = job.getBoolean("db.injector.overwrite", false);
   update = job.getBoolean("db.injector.update", false);
 }

Example #12

Show file

File: NativeCodeLoader.java Project: xyang94/hadoopcs492

 /**
  * Return if native hadoop libraries, if present, can be used for this job.
  *
  * @param jobConf job configuration
  * @return <code>true</code> if native hadoop libraries, if present, can be used for this job;
  *     <code>false</code> otherwise.
  */
 public boolean getLoadNativeLibraries(JobConf jobConf) {
   return jobConf.getBoolean("hadoop.native.lib", true);
 }

Example #13

Show file

File: HiveHBaseTableInputFormat.java Project: wangbin83-gmail-com/apache-hive-1.0.1-src

  @Override
  public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {

    // obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
      TableMapReduceUtil.initCredentials(jobConf);
    }

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching =
        jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);

    if (hbaseColumnsMapping == null) {
      throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table.");
    }

    ColumnMappings columnMappings = null;
    try {
      columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
      throw new IOException(e);
    }

    int iKey = columnMappings.getKeyIndex();
    ColumnMapping keyMapping = columnMappings.getKeyMapping();

    // Take filter pushdown into account while calculating splits; this
    // allows us to prune off regions immediately.  Note that although
    // the Javadoc for the superclass getSplits says that it returns one
    // split per region, the implementation actually takes the scan
    // definition into account and excludes regions which don't satisfy
    // the start/stop row conditions (HBASE-1829).
    Scan scan =
        createFilterScan(
            jobConf,
            iKey,
            HiveHBaseInputFormatUtil.getStorageFormatOfKey(
                keyMapping.mappingSpec,
                jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    // REVIEW:  are we supposed to be applying the getReadColumnIDs
    // same as in getRecordReader?
    for (ColumnMapping colMap : columnMappings) {
      if (colMap.hbaseRowKey) {
        continue;
      }

      if (colMap.qualifierName == null) {
        scan.addFamily(colMap.familyNameBytes);
        addedFamilies.add(colMap.familyName);
      } else {
        if (!addedFamilies.contains(colMap.familyName)) {
          // add the column only if the family has not already been added
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
        }
      }
    }
    setScan(scan);

    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
    InputSplit[] results = new InputSplit[splits.size()];

    for (int i = 0; i < splits.size(); i++) {
      results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]);
    }

    return results;
  }

Example #14

Show file

File: MergeManagerImpl.java Project: imace/hops

  public MergeManagerImpl(
      TaskAttemptID reduceId,
      JobConf jobConf,
      FileSystem localFS,
      LocalDirAllocator localDirAllocator,
      Reporter reporter,
      CompressionCodec codec,
      Class<? extends Reducer> combinerClass,
      CombineOutputCollector<K, V> combineCollector,
      Counters.Counter spilledRecordsCounter,
      Counters.Counter reduceCombineInputCounter,
      Counters.Counter mergedMapOutputsCounter,
      ExceptionReporter exceptionReporter,
      Progress mergePhase,
      MapOutputFile mapOutputFile) {
    this.reduceId = reduceId;
    this.jobConf = jobConf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;

    this.reporter = reporter;
    this.codec = codec;
    this.combinerClass = combinerClass;
    this.combineCollector = combineCollector;
    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = mapOutputFile;
    this.mapOutputFile.setConf(jobConf);

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    final float maxInMemCopyUse = jobConf.getFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.90f);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
      throw new IllegalArgumentException(
          "Invalid value for " + MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    this.memoryLimit =
        (long)
            (jobConf.getLong(
                    MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES,
                    Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE))
                * maxInMemCopyUse);

    this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100);

    final float singleShuffleMemoryLimitPercent =
        jobConf.getFloat(
            MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
      throw new IllegalArgumentException(
          "Invalid value for "
              + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT
              + ": "
              + singleShuffleMemoryLimitPercent);
    }

    usedMemory = 0L;
    commitMemory = 0L;
    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);
    this.memToMemMergeOutputsThreshold =
        jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
    this.mergeThreshold =
        (long) (this.memoryLimit * jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.90f));
    LOG.info(
        "MergerManager: memoryLimit="
            + memoryLimit
            + ", "
            + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit
            + ", "
            + "mergeThreshold="
            + mergeThreshold
            + ", "
            + "ioSortFactor="
            + ioSortFactor
            + ", "
            + "memToMemMergeOutputsThreshold="
            + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
      throw new RuntimeException(
          "Invlaid configuration: "
              + "maxSingleShuffleLimit should be less than mergeThreshold"
              + "maxSingleShuffleLimit: "
              + this.maxSingleShuffleLimit
              + "mergeThreshold: "
              + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
    if (allowMemToMemMerge) {
      this.memToMemMerger =
          new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
      this.memToMemMerger.start();
    } else {
      this.memToMemMerger = null;
    }

    this.inMemoryMerger = createInMemoryMerger();
    this.inMemoryMerger.start();

    this.onDiskMerger = new OnDiskMerger(this);
    this.onDiskMerger.start();

    this.mergePhase = mergePhase;
  }

Example #15

Show file

File: DistRaid.java Project: fire9/hadoop-20

 /** {@inheritDoc} */
 public void configure(JobConf job) {
   this.jobconf = job;
   ignoreFailures = jobconf.getBoolean(IGNORE_FAILURES_OPTION_LABEL, true);
   st = new Statistics();
 }

Example #16

Show file

File: ExecMapper.java Project: cnhans/tdw

  public void configure(JobConf job) {
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());

    try {
      l4j.info(
          "conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
      l4j.info(
          "thread classpath = "
              + Arrays.asList(
                  ((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
      l4j.info("cannot get classpath: " + e.getMessage());
    }
    try {
      jc = job;
      int estCountBucketSize = jc.getInt("hive.exec.estdistinct.bucketsize.log", 15);
      int estCountBufferSize = jc.getInt("hive.exec.estdistinct.buffsize.log", 8);
      GenericUDAFCardinalityEstimation.initParams(estCountBucketSize, estCountBufferSize);

      boolean isChangSizeZero2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.defaultBoolVal);
      boolean isChangeNull2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.defaultBoolVal);
      GenericUDTFExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFExplode.isChangNull2Null = isChangeNull2Null;
      GenericUDTFPosExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFPosExplode.isChangNull2Null = isChangeNull2Null;

      mapredWork mrwork = Utilities.getMapRedWork(job);
      mo = new MapOperator();
      mo.setConf(mrwork);
      mo.setChildren(job);
      l4j.info(mo.dump(0));
      mo.initialize(jc, null);

      localWork = mrwork.getMapLocalWork();
      if (localWork == null) {
        return;
      }
      fetchOperators = new HashMap<String, FetchOperator>();
      for (Map.Entry<String, fetchWork> entry : localWork.getAliasToFetchWork().entrySet()) {
        fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), job));
        l4j.info("fetchoperator for " + entry.getKey() + " created");
      }
      for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        Operator<? extends Serializable> forwardOp = localWork.getAliasToWork().get(entry.getKey());
        forwardOp.initialize(
            jc, new ObjectInspector[] {entry.getValue().getOutputObjectInspector()});
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
      }
    } catch (Throwable e) {
      abort = true;
      if (e instanceof OutOfMemoryError) {
        throw (OutOfMemoryError) e;
      } else {
        throw new RuntimeException("Map operator initialization failed", e);
      }
    }
  }

Example #17

Show file

File: ColumnStorageHiveOutputFormat.java Project: EatCodeCat/tdw

  @Override
  public RecordWriter getHiveRecordWriter(
      JobConf jc,
      Path finalOutPath,
      Class<? extends Writable> valueClass,
      boolean isCompressed,
      Properties tbl,
      Progressable progress)
      throws IOException {

    boolean usenewformat = jc.getBoolean("fdf.newformat", false);
    IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    IFieldMap map = new IFieldMap();
    ArrayList<TypeInfo> types;
    if (columnTypeProperty == null) {
      types = new ArrayList<TypeInfo>();
      map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0));
    } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    String compress = tbl.getProperty(ConstVar.Compress);
    if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1);
    int i = 0;
    for (TypeInfo type : types) {
      byte fdftype = 0;
      String name = type.getTypeName();
      if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte;
      else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short;
      else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int;
      else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long;
      else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float;
      else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double;
      else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String;

      map.addFieldType(new IRecord.IFType(fdftype, i++));
    }
    head.setFieldMap(map);

    ArrayList<ArrayList<Integer>> columnprojects = null;
    String projectionString = jc.get(ConstVar.Projection);
    if (projectionString != null) {
      columnprojects = new ArrayList<ArrayList<Integer>>();
      String[] projectionList = projectionString.split(ConstVar.RecordSplit);
      for (String str : projectionList) {
        ArrayList<Integer> cp = new ArrayList<Integer>();
        String[] item = str.split(ConstVar.FieldSplit);
        for (String s : item) {
          cp.add(Integer.valueOf(s));
        }
        columnprojects.add(cp);
      }
    }

    if (!jc.getBoolean(ConstVar.NeedPostfix, true)) {
      final Configuration conf = new Configuration(jc);
      final IFormatDataFile ifdf = new IFormatDataFile(conf);
      ifdf.create(finalOutPath.toString(), head);
      return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {}

        @Override
        public void close(boolean abort) throws IOException {
          ifdf.close();
        }
      };
    }

    final IColumnDataFile icdf = new IColumnDataFile(jc);
    icdf.create(finalOutPath.toString(), head, columnprojects);

    LOG.info(finalOutPath.toString());
    LOG.info("output file compress?\t" + compress);
    LOG.info("head:\t" + head.toStr());

    return new RecordWriter() {

      @Override
      public void write(Writable w) throws IOException {
        icdf.addRecord((IRecord) w);
      }

      @Override
      public void close(boolean abort) throws IOException {
        icdf.close();
      }
    };
  }

Example #18

Show file

File: TestFileSystem.java Project: aliyun-beta/aliyun-oss-hadoop-fs

 public void configure(JobConf job) {
   setConf(job);
   fastCheck = job.getBoolean("fs.test.fastCheck", false);
 }