Example #1
0
    @Override
    public void map(
        WritableComparable key,
        CompactorInputSplit split,
        OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector,
        Reporter reporter)
        throws IOException {
      // This will only get called once, since CompactRecordReader only returns one record,
      // the input split.
      // Based on the split we're passed we go instantiate the real reader and then iterate on it
      // until it finishes.
      @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
      AcidInputFormat<WritableComparable, V> aif =
          instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME));
      ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY));

      boolean isMajor = jobConf.getBoolean(IS_MAJOR, false);
      AcidInputFormat.RawReader<V> reader =
          aif.getRawReader(
              jobConf,
              isMajor,
              split.getBucket(),
              txnList,
              split.getBaseDir(),
              split.getDeltaDirs());
      RecordIdentifier identifier = reader.createKey();
      V value = reader.createValue();
      getWriter(reporter, reader.getObjectInspector(), split.getBucket());
      while (reader.next(identifier, value)) {
        if (isMajor && reader.isDelete(value)) continue;
        writer.write(value);
        reporter.progress();
      }
    }
Example #2
0
 /**
  * Mapper configuration. Extracts source and destination file system, as well as top-level paths
  * on source and destination directories. Gets the named file systems, to be used later in map.
  */
 public void configure(JobConf job) {
   destPath = new Path(job.get(DST_DIR_LABEL, "/"));
   try {
     destFileSys = destPath.getFileSystem(job);
   } catch (IOException ex) {
     throw new RuntimeException("Unable to get the named file system.", ex);
   }
   sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
   buffer = new byte[sizeBuf];
   ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
   preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
   if (preserve_status) {
     preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
   }
   update = job.getBoolean(Options.UPDATE.propertyname, false);
   overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
   this.job = job;
 }
    /**
     * {@inheritDoc}
     *
     * @see org.apache.hadoop.mapred.MapReduceBase#configure(org.apache.hadoop.mapred.JobConf)
     */
    @Override
    public void configure(JobConf job) {
      m_caseSensitive = job.getBoolean(JOB_PARAMETER_CASE_SENSITIVE, true);
      m_inputFile = job.get(MAP_PARAMETER_INPUT_FILE);

      if (job.getBoolean(JOB_PARAMETER_SKIP_PATTERNS, false)) {
        Path[] patternsFiles = new Path[0];
        try {
          patternsFiles = DistributedCache.getLocalCacheFiles(job);
        } catch (IOException ioe) {
          System.err.println(
              "Caught exception while getting cached files: "
                  + StringUtils.stringifyException(ioe));
        }
        for (Path patternsFile : patternsFiles) {
          parseSkipFile(patternsFile);
        }
      }
    }
 public void checkOutputSpecs(FileSystem fs, JobConf conf) throws IOException {
   Args args = (Args) Utils.getObject(conf, ARGS_CONF);
   fs = Utils.getFS(args.outputDirHdfs, conf);
   if (conf.getBoolean("mapred.reduce.tasks.speculative.execution", true)) {
     // Because we don't want to write a bunch of extra times.
     throw new InvalidJobConfException("Speculative execution should be false");
   }
   if (fs.exists(new Path(args.outputDirHdfs))) {
     throw new InvalidJobConfException("Output dir already exists " + args.outputDirHdfs);
   }
 }
  /**
   * Test if {@link CompressionEmulationUtil#configureCompressionEmulation(
   * org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.JobConf)} can extract compression
   * related configuration parameters.
   */
  @Test
  public void testExtractCompressionConfigs() {
    JobConf source = new JobConf();
    JobConf target = new JobConf();

    // set the default values
    source.setBoolean(FileOutputFormat.COMPRESS, false);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyDefaultCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyDefaultType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyDefaultCodec2");

    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check default values
    assertFalse(target.getBoolean(FileOutputFormat.COMPRESS, true));
    assertEquals("MyDefaultCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyDefaultType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertFalse(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true));
    assertEquals("MyDefaultCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertFalse(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));

    // set new values
    source.setBoolean(FileOutputFormat.COMPRESS, true);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyCodec2");
    org.apache.hadoop.mapred.FileInputFormat.setInputPaths(source, "file.gz");

    target = new JobConf(); // reset
    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check new values
    assertTrue(target.getBoolean(FileOutputFormat.COMPRESS, false));
    assertEquals("MyCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertTrue(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false));
    assertEquals("MyCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertTrue(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));
  }
Example #6
0
  @VisibleForTesting
  Fetcher(
      JobConf job,
      TaskAttemptID reduceId,
      ShuffleSchedulerImpl<K, V> scheduler,
      MergeManager<K, V> merger,
      Reporter reporter,
      ShuffleClientMetrics metrics,
      ExceptionReporter exceptionReporter,
      SecretKey shuffleKey,
      int id) {
    this.jobConf = job;
    this.reporter = reporter;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.exceptionReporter = exceptionReporter;
    this.id = id;
    this.reduce = reduceId.getTaskID().getId();
    this.shuffleSecretKey = shuffleKey;
    ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs =
        reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs =
        reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    this.connectionTimeout =
        job.getInt(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT, DEFAULT_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(MRJobConfig.SHUFFLE_READ_TIMEOUT, DEFAULT_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
      sslShuffle =
          job.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT);
      if (sslShuffle && sslFactory == null) {
        sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
        try {
          sslFactory.init();
        } catch (Exception ex) {
          sslFactory.destroy();
          throw new RuntimeException(ex);
        }
      }
    }
  }
  public static void main(int step, Path inputDir, JobConf job) throws IOException {
    FileSystem hdfs = inputDir.getFileSystem(job);
    if (!hdfs.exists(Collector.partitionSizesPath)) {
      System.out.println("Partition sizes file does not exists!");
      return;
    }
    debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE);
    MapFile.Reader partitionSizeReader =
        new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(), new JobConf());
    Text partitionK = new Text();
    LongWritable partSizeV = new LongWritable();

    try {
      while (partitionSizeReader.next(partitionK, partSizeV)) {
        partitionsNames.add(partitionK.toString()); // useless?
        partitionsSizes.put(partitionK.toString(), partSizeV.get());
      }
    } catch (Exception e) {;
    }
    for (int i = 0; i < partitionsNames.size(); i++) {
      System.out.println(
          "Partition "
              + partitionsNames.get(i)
              + " has "
              + partitionsSizes.get(partitionsNames.get(i))
              + " vectors.");
    }

    if (partitionsNames.size() <= 1) return;
    stage0();
    printUndirectedNeighbors("Stage0");
    printPartitionsStat("Stage0");

    printCircularPartitionsWeight("\nCircular");
    calcCWStandardDeviation();

    stage1();
    printDirectedNeighbors("Stage1");
    System.out.println("Stage 1 final weights: ");
    printPartitionsWeights("Stage1");
    if ((step == 2) || (step == 12)) {
      stage2();
      printDirectedNeighbors("Stage2");
      System.out.println("Stage 2 final weights: ");
      printPartitionsWeights("Stage2");
    }
    // stage3(job, hdfs);
    writeComparisonList(job, hdfs);
    // printComparisonList(job, hdfs);// remove
  }
Example #8
0
    private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket)
        throws IOException {
      if (writer == null) {
        AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf);
        options
            .inspector(inspector)
            .writingBase(jobConf.getBoolean(IS_MAJOR, false))
            .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false))
            .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties())
            .reporter(reporter)
            .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE))
            .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE))
            .bucket(bucket)
            .statementId(-1); // setting statementId == -1 makes compacted delta files use
        // delta_xxxx_yyyy format

        // Instantiate the underlying output format
        @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class
        AcidOutputFormat<WritableComparable, V> aof =
            instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME));

        writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options);
      }
    }
Example #9
0
  @Override
  public void configure(JobConf job) {
    this.job = job;

    mapper = new ObjectMapper();
    messagePack = new MessagePack();

    timeFmt = DateTimeFormat.forPattern("YYYYMMddHHmmssSSS");
    oidSerial = new AtomicInteger(0);
    serial = new AtomicInteger(0);

    storeAttirbute = job.getBoolean(CIngest.CONF_INGEST_STORE_ATTR, false);

    Iterator<String> tokens =
        Splitter.on("///").split(job.get(CIngest.CONF_CASSANDRA_CONNECT_TOKEN)).iterator();

    String clusterName = tokens.next();
    String seeds = tokens.next();
    String keyspaceName = tokens.next();
    String columeFamilyName = tokens.next();

    context =
        new AstyanaxContext.Builder()
            .forCluster(clusterName)
            .forKeyspace(keyspaceName)
            .withAstyanaxConfiguration(
                new AstyanaxConfigurationImpl().setDiscoveryType(NodeDiscoveryType.TOKEN_AWARE))
            .withConnectionPoolConfiguration(
                new ConnectionPoolConfigurationImpl("cp")
                    .setPort(9160)
                    .setMaxConnsPerHost(2)
                    .setSeeds(seeds))
            .withConnectionPoolMonitor(new CountingConnectionPoolMonitor())
            .buildKeyspace(ThriftFamilyFactory.getInstance());

    context.start();
    ks = context.getEntity();

    cf =
        new ColumnFamily<String, String>(
            columeFamilyName, StringSerializer.get(), StringSerializer.get());
  }
    public void configure(JobConf job) {
      sLogger.setLevel(Level.OFF);
      src = Language.languageForISO639_1(job.get(SRC_LANG));
      tgt = Language.languageForISO639_1(job.get(TGT_LANG));
      sLogger.debug("Source language: " + src.code());
      sLogger.debug("Target language: " + tgt.code());

      boolean useVocabServer = false;
      if (!useVocabServer) {
        if (vocE == null) vocE = new VocabularyWritable();
        if (vocF == null) vocF = new VocabularyWritable();
      } else {
        try {
          vocE =
              new VocabServerClient(
                  job.get("ha.vocabserver.host"),
                  Integer.parseInt(job.get("ha.vocabserver.port1")));
          vocF =
              new VocabServerClient(
                  job.get("ha.vocabserver.host"),
                  Integer.parseInt(job.get("ha.vocabserver.port2")));
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException(e);
        }
      }
      lp = LanguagePair.languageForISO639_1Pair(src.code() + "-" + tgt.code());

      if (job.getBoolean("ha.trunc.use", true)) {
        sawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, src, job);
        tawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, tgt, job);
      } else {
        sawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job);
        tawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job);
      }
      job_ = job;
    }
Example #11
0
 public void configure(JobConf job) {
   interval = job.getInt("db.fetch.interval.default", 2592000);
   scoreInjected = job.getFloat("db.score.injected", 1.0f);
   overwrite = job.getBoolean("db.injector.overwrite", false);
   update = job.getBoolean("db.injector.update", false);
 }
Example #12
0
 /**
  * Return if native hadoop libraries, if present, can be used for this job.
  *
  * @param jobConf job configuration
  * @return <code>true</code> if native hadoop libraries, if present, can be used for this job;
  *     <code>false</code> otherwise.
  */
 public boolean getLoadNativeLibraries(JobConf jobConf) {
   return jobConf.getBoolean("hadoop.native.lib", true);
 }
  @Override
  public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {

    // obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
      TableMapReduceUtil.initCredentials(jobConf);
    }

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching =
        jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);

    if (hbaseColumnsMapping == null) {
      throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table.");
    }

    ColumnMappings columnMappings = null;
    try {
      columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
      throw new IOException(e);
    }

    int iKey = columnMappings.getKeyIndex();
    ColumnMapping keyMapping = columnMappings.getKeyMapping();

    // Take filter pushdown into account while calculating splits; this
    // allows us to prune off regions immediately.  Note that although
    // the Javadoc for the superclass getSplits says that it returns one
    // split per region, the implementation actually takes the scan
    // definition into account and excludes regions which don't satisfy
    // the start/stop row conditions (HBASE-1829).
    Scan scan =
        createFilterScan(
            jobConf,
            iKey,
            HiveHBaseInputFormatUtil.getStorageFormatOfKey(
                keyMapping.mappingSpec,
                jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    // REVIEW:  are we supposed to be applying the getReadColumnIDs
    // same as in getRecordReader?
    for (ColumnMapping colMap : columnMappings) {
      if (colMap.hbaseRowKey) {
        continue;
      }

      if (colMap.qualifierName == null) {
        scan.addFamily(colMap.familyNameBytes);
        addedFamilies.add(colMap.familyName);
      } else {
        if (!addedFamilies.contains(colMap.familyName)) {
          // add the column only if the family has not already been added
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
        }
      }
    }
    setScan(scan);

    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
    InputSplit[] results = new InputSplit[splits.size()];

    for (int i = 0; i < splits.size(); i++) {
      results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]);
    }

    return results;
  }
Example #14
0
  public MergeManagerImpl(
      TaskAttemptID reduceId,
      JobConf jobConf,
      FileSystem localFS,
      LocalDirAllocator localDirAllocator,
      Reporter reporter,
      CompressionCodec codec,
      Class<? extends Reducer> combinerClass,
      CombineOutputCollector<K, V> combineCollector,
      Counters.Counter spilledRecordsCounter,
      Counters.Counter reduceCombineInputCounter,
      Counters.Counter mergedMapOutputsCounter,
      ExceptionReporter exceptionReporter,
      Progress mergePhase,
      MapOutputFile mapOutputFile) {
    this.reduceId = reduceId;
    this.jobConf = jobConf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;

    this.reporter = reporter;
    this.codec = codec;
    this.combinerClass = combinerClass;
    this.combineCollector = combineCollector;
    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = mapOutputFile;
    this.mapOutputFile.setConf(jobConf);

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    final float maxInMemCopyUse = jobConf.getFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.90f);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
      throw new IllegalArgumentException(
          "Invalid value for " + MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    this.memoryLimit =
        (long)
            (jobConf.getLong(
                    MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES,
                    Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE))
                * maxInMemCopyUse);

    this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100);

    final float singleShuffleMemoryLimitPercent =
        jobConf.getFloat(
            MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
      throw new IllegalArgumentException(
          "Invalid value for "
              + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT
              + ": "
              + singleShuffleMemoryLimitPercent);
    }

    usedMemory = 0L;
    commitMemory = 0L;
    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);
    this.memToMemMergeOutputsThreshold =
        jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
    this.mergeThreshold =
        (long) (this.memoryLimit * jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.90f));
    LOG.info(
        "MergerManager: memoryLimit="
            + memoryLimit
            + ", "
            + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit
            + ", "
            + "mergeThreshold="
            + mergeThreshold
            + ", "
            + "ioSortFactor="
            + ioSortFactor
            + ", "
            + "memToMemMergeOutputsThreshold="
            + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
      throw new RuntimeException(
          "Invlaid configuration: "
              + "maxSingleShuffleLimit should be less than mergeThreshold"
              + "maxSingleShuffleLimit: "
              + this.maxSingleShuffleLimit
              + "mergeThreshold: "
              + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
    if (allowMemToMemMerge) {
      this.memToMemMerger =
          new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
      this.memToMemMerger.start();
    } else {
      this.memToMemMerger = null;
    }

    this.inMemoryMerger = createInMemoryMerger();
    this.inMemoryMerger.start();

    this.onDiskMerger = new OnDiskMerger(this);
    this.onDiskMerger.start();

    this.mergePhase = mergePhase;
  }
Example #15
0
 /** {@inheritDoc} */
 public void configure(JobConf job) {
   this.jobconf = job;
   ignoreFailures = jobconf.getBoolean(IGNORE_FAILURES_OPTION_LABEL, true);
   st = new Statistics();
 }
Example #16
0
  public void configure(JobConf job) {
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());

    try {
      l4j.info(
          "conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
      l4j.info(
          "thread classpath = "
              + Arrays.asList(
                  ((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
      l4j.info("cannot get classpath: " + e.getMessage());
    }
    try {
      jc = job;
      int estCountBucketSize = jc.getInt("hive.exec.estdistinct.bucketsize.log", 15);
      int estCountBufferSize = jc.getInt("hive.exec.estdistinct.buffsize.log", 8);
      GenericUDAFCardinalityEstimation.initParams(estCountBucketSize, estCountBufferSize);

      boolean isChangSizeZero2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.defaultBoolVal);
      boolean isChangeNull2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.defaultBoolVal);
      GenericUDTFExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFExplode.isChangNull2Null = isChangeNull2Null;
      GenericUDTFPosExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFPosExplode.isChangNull2Null = isChangeNull2Null;

      mapredWork mrwork = Utilities.getMapRedWork(job);
      mo = new MapOperator();
      mo.setConf(mrwork);
      mo.setChildren(job);
      l4j.info(mo.dump(0));
      mo.initialize(jc, null);

      localWork = mrwork.getMapLocalWork();
      if (localWork == null) {
        return;
      }
      fetchOperators = new HashMap<String, FetchOperator>();
      for (Map.Entry<String, fetchWork> entry : localWork.getAliasToFetchWork().entrySet()) {
        fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), job));
        l4j.info("fetchoperator for " + entry.getKey() + " created");
      }
      for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        Operator<? extends Serializable> forwardOp = localWork.getAliasToWork().get(entry.getKey());
        forwardOp.initialize(
            jc, new ObjectInspector[] {entry.getValue().getOutputObjectInspector()});
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
      }
    } catch (Throwable e) {
      abort = true;
      if (e instanceof OutOfMemoryError) {
        throw (OutOfMemoryError) e;
      } else {
        throw new RuntimeException("Map operator initialization failed", e);
      }
    }
  }
  @Override
  public RecordWriter getHiveRecordWriter(
      JobConf jc,
      Path finalOutPath,
      Class<? extends Writable> valueClass,
      boolean isCompressed,
      Properties tbl,
      Progressable progress)
      throws IOException {

    boolean usenewformat = jc.getBoolean("fdf.newformat", false);
    IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    IFieldMap map = new IFieldMap();
    ArrayList<TypeInfo> types;
    if (columnTypeProperty == null) {
      types = new ArrayList<TypeInfo>();
      map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0));
    } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    String compress = tbl.getProperty(ConstVar.Compress);
    if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1);
    int i = 0;
    for (TypeInfo type : types) {
      byte fdftype = 0;
      String name = type.getTypeName();
      if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte;
      else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short;
      else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int;
      else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long;
      else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float;
      else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double;
      else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String;

      map.addFieldType(new IRecord.IFType(fdftype, i++));
    }
    head.setFieldMap(map);

    ArrayList<ArrayList<Integer>> columnprojects = null;
    String projectionString = jc.get(ConstVar.Projection);
    if (projectionString != null) {
      columnprojects = new ArrayList<ArrayList<Integer>>();
      String[] projectionList = projectionString.split(ConstVar.RecordSplit);
      for (String str : projectionList) {
        ArrayList<Integer> cp = new ArrayList<Integer>();
        String[] item = str.split(ConstVar.FieldSplit);
        for (String s : item) {
          cp.add(Integer.valueOf(s));
        }
        columnprojects.add(cp);
      }
    }

    if (!jc.getBoolean(ConstVar.NeedPostfix, true)) {
      final Configuration conf = new Configuration(jc);
      final IFormatDataFile ifdf = new IFormatDataFile(conf);
      ifdf.create(finalOutPath.toString(), head);
      return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {}

        @Override
        public void close(boolean abort) throws IOException {
          ifdf.close();
        }
      };
    }

    final IColumnDataFile icdf = new IColumnDataFile(jc);
    icdf.create(finalOutPath.toString(), head, columnprojects);

    LOG.info(finalOutPath.toString());
    LOG.info("output file compress?\t" + compress);
    LOG.info("head:\t" + head.toStr());

    return new RecordWriter() {

      @Override
      public void write(Writable w) throws IOException {
        icdf.addRecord((IRecord) w);
      }

      @Override
      public void close(boolean abort) throws IOException {
        icdf.close();
      }
    };
  }
 public void configure(JobConf job) {
   setConf(job);
   fastCheck = job.getBoolean("fs.test.fastCheck", false);
 }