@Override public void map( WritableComparable key, CompactorInputSplit split, OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector, Reporter reporter) throws IOException { // This will only get called once, since CompactRecordReader only returns one record, // the input split. // Based on the split we're passed we go instantiate the real reader and then iterate on it // until it finishes. @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class AcidInputFormat<WritableComparable, V> aif = instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME)); ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY)); boolean isMajor = jobConf.getBoolean(IS_MAJOR, false); AcidInputFormat.RawReader<V> reader = aif.getRawReader( jobConf, isMajor, split.getBucket(), txnList, split.getBaseDir(), split.getDeltaDirs()); RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); while (reader.next(identifier, value)) { if (isMajor && reader.isDelete(value)) continue; writer.write(value); reporter.progress(); } }
/** * Mapper configuration. Extracts source and destination file system, as well as top-level paths * on source and destination directories. Gets the named file systems, to be used later in map. */ public void configure(JobConf job) { destPath = new Path(job.get(DST_DIR_LABEL, "/")); try { destFileSys = destPath.getFileSystem(job); } catch (IOException ex) { throw new RuntimeException("Unable to get the named file system.", ex); } sizeBuf = job.getInt("copy.buf.size", 128 * 1024); buffer = new byte[sizeBuf]; ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false); preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false); if (preserve_status) { preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL)); } update = job.getBoolean(Options.UPDATE.propertyname, false); overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false); this.job = job; }
/** * {@inheritDoc} * * @see org.apache.hadoop.mapred.MapReduceBase#configure(org.apache.hadoop.mapred.JobConf) */ @Override public void configure(JobConf job) { m_caseSensitive = job.getBoolean(JOB_PARAMETER_CASE_SENSITIVE, true); m_inputFile = job.get(MAP_PARAMETER_INPUT_FILE); if (job.getBoolean(JOB_PARAMETER_SKIP_PATTERNS, false)) { Path[] patternsFiles = new Path[0]; try { patternsFiles = DistributedCache.getLocalCacheFiles(job); } catch (IOException ioe) { System.err.println( "Caught exception while getting cached files: " + StringUtils.stringifyException(ioe)); } for (Path patternsFile : patternsFiles) { parseSkipFile(patternsFile); } } }
public void checkOutputSpecs(FileSystem fs, JobConf conf) throws IOException { Args args = (Args) Utils.getObject(conf, ARGS_CONF); fs = Utils.getFS(args.outputDirHdfs, conf); if (conf.getBoolean("mapred.reduce.tasks.speculative.execution", true)) { // Because we don't want to write a bunch of extra times. throw new InvalidJobConfException("Speculative execution should be false"); } if (fs.exists(new Path(args.outputDirHdfs))) { throw new InvalidJobConfException("Output dir already exists " + args.outputDirHdfs); } }
/** * Test if {@link CompressionEmulationUtil#configureCompressionEmulation( * org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.JobConf)} can extract compression * related configuration parameters. */ @Test public void testExtractCompressionConfigs() { JobConf source = new JobConf(); JobConf target = new JobConf(); // set the default values source.setBoolean(FileOutputFormat.COMPRESS, false); source.set(FileOutputFormat.COMPRESS_CODEC, "MyDefaultCodec"); source.set(FileOutputFormat.COMPRESS_TYPE, "MyDefaultType"); source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false); source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyDefaultCodec2"); CompressionEmulationUtil.configureCompressionEmulation(source, target); // check default values assertFalse(target.getBoolean(FileOutputFormat.COMPRESS, true)); assertEquals("MyDefaultCodec", target.get(FileOutputFormat.COMPRESS_CODEC)); assertEquals("MyDefaultType", target.get(FileOutputFormat.COMPRESS_TYPE)); assertFalse(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true)); assertEquals("MyDefaultCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC)); assertFalse(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target)); // set new values source.setBoolean(FileOutputFormat.COMPRESS, true); source.set(FileOutputFormat.COMPRESS_CODEC, "MyCodec"); source.set(FileOutputFormat.COMPRESS_TYPE, "MyType"); source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyCodec2"); org.apache.hadoop.mapred.FileInputFormat.setInputPaths(source, "file.gz"); target = new JobConf(); // reset CompressionEmulationUtil.configureCompressionEmulation(source, target); // check new values assertTrue(target.getBoolean(FileOutputFormat.COMPRESS, false)); assertEquals("MyCodec", target.get(FileOutputFormat.COMPRESS_CODEC)); assertEquals("MyType", target.get(FileOutputFormat.COMPRESS_TYPE)); assertTrue(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false)); assertEquals("MyCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC)); assertTrue(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target)); }
@VisibleForTesting Fetcher( JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl<K, V> scheduler, MergeManager<K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id) { this.jobConf = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = id; this.reduce = reduceId.getTaskID().getId(); this.shuffleSecretKey = shuffleKey; ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString()); wrongLengthErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString()); badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString()); wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString()); connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString()); wrongReduceErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString()); this.connectionTimeout = job.getInt(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT, DEFAULT_STALLED_COPY_TIMEOUT); this.readTimeout = job.getInt(MRJobConfig.SHUFFLE_READ_TIMEOUT, DEFAULT_READ_TIMEOUT); setName("fetcher#" + id); setDaemon(true); synchronized (Fetcher.class) { sslShuffle = job.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job); try { sslFactory.init(); } catch (Exception ex) { sslFactory.destroy(); throw new RuntimeException(ex); } } } }
public static void main(int step, Path inputDir, JobConf job) throws IOException { FileSystem hdfs = inputDir.getFileSystem(job); if (!hdfs.exists(Collector.partitionSizesPath)) { System.out.println("Partition sizes file does not exists!"); return; } debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE); MapFile.Reader partitionSizeReader = new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(), new JobConf()); Text partitionK = new Text(); LongWritable partSizeV = new LongWritable(); try { while (partitionSizeReader.next(partitionK, partSizeV)) { partitionsNames.add(partitionK.toString()); // useless? partitionsSizes.put(partitionK.toString(), partSizeV.get()); } } catch (Exception e) {; } for (int i = 0; i < partitionsNames.size(); i++) { System.out.println( "Partition " + partitionsNames.get(i) + " has " + partitionsSizes.get(partitionsNames.get(i)) + " vectors."); } if (partitionsNames.size() <= 1) return; stage0(); printUndirectedNeighbors("Stage0"); printPartitionsStat("Stage0"); printCircularPartitionsWeight("\nCircular"); calcCWStandardDeviation(); stage1(); printDirectedNeighbors("Stage1"); System.out.println("Stage 1 final weights: "); printPartitionsWeights("Stage1"); if ((step == 2) || (step == 12)) { stage2(); printDirectedNeighbors("Stage2"); System.out.println("Stage 2 final weights: "); printPartitionsWeights("Stage2"); } // stage3(job, hdfs); writeComparisonList(job, hdfs); // printComparisonList(job, hdfs);// remove }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options .inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1); // setting statementId == -1 makes compacted delta files use // delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
@Override public void configure(JobConf job) { this.job = job; mapper = new ObjectMapper(); messagePack = new MessagePack(); timeFmt = DateTimeFormat.forPattern("YYYYMMddHHmmssSSS"); oidSerial = new AtomicInteger(0); serial = new AtomicInteger(0); storeAttirbute = job.getBoolean(CIngest.CONF_INGEST_STORE_ATTR, false); Iterator<String> tokens = Splitter.on("///").split(job.get(CIngest.CONF_CASSANDRA_CONNECT_TOKEN)).iterator(); String clusterName = tokens.next(); String seeds = tokens.next(); String keyspaceName = tokens.next(); String columeFamilyName = tokens.next(); context = new AstyanaxContext.Builder() .forCluster(clusterName) .forKeyspace(keyspaceName) .withAstyanaxConfiguration( new AstyanaxConfigurationImpl().setDiscoveryType(NodeDiscoveryType.TOKEN_AWARE)) .withConnectionPoolConfiguration( new ConnectionPoolConfigurationImpl("cp") .setPort(9160) .setMaxConnsPerHost(2) .setSeeds(seeds)) .withConnectionPoolMonitor(new CountingConnectionPoolMonitor()) .buildKeyspace(ThriftFamilyFactory.getInstance()); context.start(); ks = context.getEntity(); cf = new ColumnFamily<String, String>( columeFamilyName, StringSerializer.get(), StringSerializer.get()); }
public void configure(JobConf job) { sLogger.setLevel(Level.OFF); src = Language.languageForISO639_1(job.get(SRC_LANG)); tgt = Language.languageForISO639_1(job.get(TGT_LANG)); sLogger.debug("Source language: " + src.code()); sLogger.debug("Target language: " + tgt.code()); boolean useVocabServer = false; if (!useVocabServer) { if (vocE == null) vocE = new VocabularyWritable(); if (vocF == null) vocF = new VocabularyWritable(); } else { try { vocE = new VocabServerClient( job.get("ha.vocabserver.host"), Integer.parseInt(job.get("ha.vocabserver.port1"))); vocF = new VocabServerClient( job.get("ha.vocabserver.host"), Integer.parseInt(job.get("ha.vocabserver.port2"))); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } lp = LanguagePair.languageForISO639_1Pair(src.code() + "-" + tgt.code()); if (job.getBoolean("ha.trunc.use", true)) { sawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, src, job); tawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, tgt, job); } else { sawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job); tawp = AlignmentWordPreprocessor.CreatePreprocessor(null, null, job); } job_ = job; }
public void configure(JobConf job) { interval = job.getInt("db.fetch.interval.default", 2592000); scoreInjected = job.getFloat("db.score.injected", 1.0f); overwrite = job.getBoolean("db.injector.overwrite", false); update = job.getBoolean("db.injector.update", false); }
/** * Return if native hadoop libraries, if present, can be used for this job. * * @param jobConf job configuration * @return <code>true</code> if native hadoop libraries, if present, can be used for this job; * <code>false</code> otherwise. */ public boolean getLoadNativeLibraries(JobConf jobConf) { return jobConf.getBoolean("hadoop.native.lib", true); }
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { // obtain delegation tokens for the job if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) { TableMapReduceUtil.initCredentials(jobConf); } String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true); if (hbaseColumnsMapping == null) { throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table."); } ColumnMappings columnMappings = null; try { columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching); } catch (SerDeException e) { throw new IOException(e); } int iKey = columnMappings.getKeyIndex(); ColumnMapping keyMapping = columnMappings.getKeyMapping(); // Take filter pushdown into account while calculating splits; this // allows us to prune off regions immediately. Note that although // the Javadoc for the superclass getSplits says that it returns one // split per region, the implementation actually takes the scan // definition into account and excludes regions which don't satisfy // the start/stop row conditions (HBASE-1829). Scan scan = createFilterScan( jobConf, iKey, HiveHBaseInputFormatUtil.getStorageFormatOfKey( keyMapping.mappingSpec, jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string"))); // The list of families that have been added to the scan List<String> addedFamilies = new ArrayList<String>(); // REVIEW: are we supposed to be applying the getReadColumnIDs // same as in getRecordReader? for (ColumnMapping colMap : columnMappings) { if (colMap.hbaseRowKey) { continue; } if (colMap.qualifierName == null) { scan.addFamily(colMap.familyNameBytes); addedFamilies.add(colMap.familyName); } else { if (!addedFamilies.contains(colMap.familyName)) { // add the column only if the family has not already been added scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } } } setScan(scan); Job job = new Job(jobConf); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); i++) { results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]); } return results; }
public MergeManagerImpl( TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec, Class<? extends Reducer> combinerClass, CombineOutputCollector<K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase, MapOutputFile mapOutputFile) { this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.setConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem) localFS).getRaw(); final float maxInMemCopyUse = jobConf.getFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.90f); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException( "Invalid value for " + MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long) (jobConf.getLong( MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse); this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100); final float singleShuffleMemoryLimitPercent = jobConf.getFloat( MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); this.memToMemMergeOutputsThreshold = jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor); this.mergeThreshold = (long) (this.memoryLimit * jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.90f)); LOG.info( "MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException( "Invlaid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } boolean allowMemToMemMerge = jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false); if (allowMemToMemMerge) { this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold); this.memToMemMerger.start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = createInMemoryMerger(); this.inMemoryMerger.start(); this.onDiskMerger = new OnDiskMerger(this); this.onDiskMerger.start(); this.mergePhase = mergePhase; }
/** {@inheritDoc} */ public void configure(JobConf job) { this.jobconf = job; ignoreFailures = jobconf.getBoolean(IGNORE_FAILURES_OPTION_LABEL, true); st = new Statistics(); }
public void configure(JobConf job) { memoryMXBean = ManagementFactory.getMemoryMXBean(); l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); try { l4j.info( "conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs())); l4j.info( "thread classpath = " + Arrays.asList( ((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs())); } catch (Exception e) { l4j.info("cannot get classpath: " + e.getMessage()); } try { jc = job; int estCountBucketSize = jc.getInt("hive.exec.estdistinct.bucketsize.log", 15); int estCountBufferSize = jc.getInt("hive.exec.estdistinct.buffsize.log", 8); GenericUDAFCardinalityEstimation.initParams(estCountBucketSize, estCountBufferSize); boolean isChangSizeZero2Null = jc.getBoolean( HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.varname, HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.defaultBoolVal); boolean isChangeNull2Null = jc.getBoolean( HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.varname, HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.defaultBoolVal); GenericUDTFExplode.isChangSizeZero2Null = isChangSizeZero2Null; GenericUDTFExplode.isChangNull2Null = isChangeNull2Null; GenericUDTFPosExplode.isChangSizeZero2Null = isChangSizeZero2Null; GenericUDTFPosExplode.isChangNull2Null = isChangeNull2Null; mapredWork mrwork = Utilities.getMapRedWork(job); mo = new MapOperator(); mo.setConf(mrwork); mo.setChildren(job); l4j.info(mo.dump(0)); mo.initialize(jc, null); localWork = mrwork.getMapLocalWork(); if (localWork == null) { return; } fetchOperators = new HashMap<String, FetchOperator>(); for (Map.Entry<String, fetchWork> entry : localWork.getAliasToFetchWork().entrySet()) { fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), job)); l4j.info("fetchoperator for " + entry.getKey() + " created"); } for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) { Operator<? extends Serializable> forwardOp = localWork.getAliasToWork().get(entry.getKey()); forwardOp.initialize( jc, new ObjectInspector[] {entry.getValue().getOutputObjectInspector()}); l4j.info("fetchoperator for " + entry.getKey() + " initialized"); } } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { throw (OutOfMemoryError) e; } else { throw new RuntimeException("Map operator initialization failed", e); } } }
@Override public RecordWriter getHiveRecordWriter( JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tbl, Progressable progress) throws IOException { boolean usenewformat = jc.getBoolean("fdf.newformat", false); IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); IFieldMap map = new IFieldMap(); ArrayList<TypeInfo> types; if (columnTypeProperty == null) { types = new ArrayList<TypeInfo>(); map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0)); } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); String compress = tbl.getProperty(ConstVar.Compress); if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1); int i = 0; for (TypeInfo type : types) { byte fdftype = 0; String name = type.getTypeName(); if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte; else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short; else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int; else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long; else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float; else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double; else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String; map.addFieldType(new IRecord.IFType(fdftype, i++)); } head.setFieldMap(map); ArrayList<ArrayList<Integer>> columnprojects = null; String projectionString = jc.get(ConstVar.Projection); if (projectionString != null) { columnprojects = new ArrayList<ArrayList<Integer>>(); String[] projectionList = projectionString.split(ConstVar.RecordSplit); for (String str : projectionList) { ArrayList<Integer> cp = new ArrayList<Integer>(); String[] item = str.split(ConstVar.FieldSplit); for (String s : item) { cp.add(Integer.valueOf(s)); } columnprojects.add(cp); } } if (!jc.getBoolean(ConstVar.NeedPostfix, true)) { final Configuration conf = new Configuration(jc); final IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.create(finalOutPath.toString(), head); return new RecordWriter() { @Override public void write(Writable w) throws IOException {} @Override public void close(boolean abort) throws IOException { ifdf.close(); } }; } final IColumnDataFile icdf = new IColumnDataFile(jc); icdf.create(finalOutPath.toString(), head, columnprojects); LOG.info(finalOutPath.toString()); LOG.info("output file compress?\t" + compress); LOG.info("head:\t" + head.toStr()); return new RecordWriter() { @Override public void write(Writable w) throws IOException { icdf.addRecord((IRecord) w); } @Override public void close(boolean abort) throws IOException { icdf.close(); } }; }
public void configure(JobConf job) { setConf(job); fastCheck = job.getBoolean("fs.test.fastCheck", false); }