Exemplos de JobConf.setCompressMapOutput em Java

Linguagem de programação: Java

Espaço para nome / nome do pacote: org.apache.hadoop.mapred

Classe / Tipo: JobConf

Método / Função: setCompressMapOutput

Exemplos em hotexamples.com: 4

JobConf.setCompressMapOutput em Java - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de org.apache.hadoop.mapred.JobConf.setCompressMapOutput em Java extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

Métodos Frequentes

setMapOutputValueClass (30)

setOutputValueClass (30)

setJobName (30)

setMapperClass (30)

setInputFormat (30)

set (30)

setNumMapTasks (30)

setNumReduceTasks (30)

setOutputFormat (30)

setMapOutputKeyClass (30)

Métodos Frequentes

setOutputKeyClass (30)

getInt (30)

setReducerClass (30)

get (30)

setCombinerClass (27)

setInt (25)

setBoolean (23)

getBoolean (18)

setJarByClass (16)

getLong (14)

setLong (12)

setPartitionerClass (12)

setMapSpeculativeExecution (10)

getFloat (8)

setClass (7)

setJar (6)

setOutputKeyComparatorClass (6)

setReduceSpeculativeExecution (5)

getCredentials (5)

setOutputValueGroupingComparator (5)

Métodos Frequentes

setLong (12)

setPartitionerClass (12)

setMapSpeculativeExecution (10)

getFloat (8)

setClass (7)

setJar (6)

setOutputKeyComparatorClass (6)

setReduceSpeculativeExecution (5)

getCredentials (5)

setOutputValueGroupingComparator (5)

getNumMapTasks (5)

setNumTasksToExecutePerJvm (4)

getJobName (4)

setMapRunnerClass (4)

addResource (4)

getNumReduceTasks (4)

setMaxMapAttempts (4)

setCompressMapOutput (4)

getInputFormat (4)

setSpeculativeExecution (4)

setStrings (3)

setClassLoader (3)

setOutputPath (3)

getMapOutputValueClass (3)

getMapOutputKeyClass (3)

setJobPriority (3)

setFloat (3)

setQueueName (2)

setMaxReduceAttempts (2)

addInputPath (2)

Related in langs

hasSecondarySidebar (PHP)

CloakerIpModel (PHP)

LogOnReplyBeginMessage (C#)

ShortyTheRobot (C#)

DataNode (C++)

mexCallMATLAB (C++)

PolyLine (Go)

Agglomerate (Go)

untabifyBed (Python)

warning (Python)

Métodos Frequentes

getNumMapTasks (5)

setNumTasksToExecutePerJvm (4)

getJobName (4)

setMapRunnerClass (4)

addResource (4)

getNumReduceTasks (4)

setMaxMapAttempts (4)

setCompressMapOutput (4)

getInputFormat (4)

setSpeculativeExecution (4)

setStrings (3)

setClassLoader (3)

setOutputPath (3)

getMapOutputValueClass (3)

getMapOutputKeyClass (3)

setJobPriority (3)

setFloat (3)

setQueueName (2)

setMaxReduceAttempts (2)

addInputPath (2)

getJar (2)

getStrings (2)

getClass (2)

getUseNewMapper (2)

getClassLoader (2)

getOutputKeyComparator (2)

setProfileParams (1)

getMapRunnerClass (1)

getKeepFailedTaskFiles (1)

setProfileEnabled (1)

setLoopReduceCacheSwitch (1)

setProfileTaskRange (1)

getCombinerKeyGroupingComparator (1)

getClassByName (1)

setStepConf (1)

setUser (1)

setWorkingDirectory (1)

getOutputFormat (1)

getQueueName (1)

getOutputPath (1)

Relacionados

BaseStepDialog

DatabaseInteractor

NavAlg

Util

GuiComponentSprite

YesWorkflowDB

ICFBamTableObj

LongTermCredentialSession

ModelDescriptionConstants.SUCCESS

Reporter

Exemplo n.º 1

0

Exibir arquivo

Arquivo: AFormatterWG.java Projeto: ezubaric/Cloud9

public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); String stoplistPath = args[4]; sLogger.info("Tool: AFormatter"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(AFormatterWG.class); conf.setJobName("Authority Formatter -- Web Graph"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); // conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(HITSNode.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setCompressMapOutput(true); conf.setSpeculativeExecution(false); // InputSampler.Sampler<IntWritable, Text> sampler = new // InputSampler.RandomSampler<IntWritable, Text>(0.1, 10, 10); // InputSampler.writePartitionFile(conf, sampler); // conf.setPartitionerClass(TotalOrderPartitioner.class); conf.setMapperClass(AFormatMapperIMC.class); conf.setCombinerClass(AFormatReducer.class); conf.setReducerClass(AFormatReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); Path stopList = new Path(stoplistPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); sLogger.info("Starting job"); DistributedCache.addCacheFile(stopList.toUri(), conf); JobClient.runJob(conf); sLogger.info( "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ConvolutionJob.java Projeto: holdonbear/NeuroHadoop

@Override public int run(String[] args) throws Exception { System.out.println("\n\nConvolutionJob\n"); JobConf conf = new JobConf(getConf(), ConvolutionJob.class); conf.setJobName("ConvolutionJob"); this.cacheKernel(conf); this.CreateRats(conf); conf.setMapperClass(ConvolutionMapper.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } conf.setNumReduceTasks(0); conf.setInputFormat(NonSplittableTextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapred.output.compression.type", "BLOCK"); FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); // FileOutputFormat.setCompressOutput(conf, true); JobClient.runJob(conf); return 0; }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Similarity.java Projeto: gdfm/similarity-self-join

@Override public int run(String[] args) throws IOException { OptionParser p = new OptionParser(); OptionSpec<String> maxwiOpt = p.accepts(maxwiOptName, "location of maxWi map file (HDFS) REQUIRED") .withRequiredArg() .ofType(String.class); OptionSpec<Float> thresholdOpt = p.accepts(thresholdOptName, "similarity threshold") .withRequiredArg() .ofType(Float.class) .defaultsTo(DEFAULT_THRESHOLD); OptionSpec<Integer> stripesOpt = p.accepts(stripesOptName, "number of stripes to divide the similarity matrix") .withRequiredArg() .ofType(Integer.class) .defaultsTo(1); OptionSpec<Integer> spreadOpt = p.accepts(spreadOptName, "number of reducers per stripe") .withRequiredArg() .ofType(Integer.class) .defaultsTo(DEFAULT_SPREAD); OptionSpec<Integer> factorOpt = p.accepts(factorOptName, "number of mappers per reducer") .withRequiredArg() .ofType(Integer.class) .defaultsTo(DEFAULT_FACTOR); OptionSpec<Integer> maxVectorIDOpt = p.accepts(maxVectorIDOptName, "maximum vector ID").withRequiredArg().ofType(Integer.class); p.acceptsAll(Arrays.asList("h", "?"), "show help"); OptionSet options = parseOptions(p, args); // to distinguish indexes built in successive runs DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss"); Date date = new Date(); float threshold = options.valueOf(thresholdOpt); // threshold if (threshold < 0 || threshold >= 1) { System.err.println(thresholdOptName + " should be between 0 and 1"); System.exit(1); } int numStripes = options.valueOf(stripesOpt); // number of stripes if (numStripes < 1) { System.err.println(stripesOptName + " should be > 0"); System.exit(1); } // MapReduce parameters int spread = options.valueOf(spreadOpt); // how many reducers per stripe if (spread < 1) { System.err.println(spreadOptName + " should be > 0"); System.exit(1); } int factor = options.valueOf(factorOpt); // how many mappers per reducer if (factor < 1) { System.err.println(factorOptName + " should be > 0"); System.exit(1); } int maxKey = 0; if (options.has(maxVectorIDOpt)) { maxKey = options.valueOf(maxVectorIDOpt); // maximum value of the vector ID if (maxKey < 1) { System.err.println(maxVectorIDOptName + " should be > 0"); System.exit(1); } } int numReducers = GenericKey.StripePartitioner.numReducers(numStripes, spread); int numMappers = numReducers * factor; int numBuckets = numMappers; // pick the file with max weights from command line String maxWiDir = options.valueOf(maxwiOpt); List<String> nonOptArgs = options.nonOptionArguments(); LOG.info("Threshold set to " + threshold); LOG.info( String.format( "Buckets: %1$-10s Factor: %2$-10s Stripes: %3$-10s Spread: %4$-10s Reducers: %5$-10s", numBuckets, factor, numStripes, spread, numReducers)); // start building the jobs JobConf conf1 = new JobConf(getConf(), Similarity.class); conf1.setFloat(PARAM_APS_THRESHOLD, threshold); conf1.setInt(PARAM_APS_STRIPES, numStripes); DistributedCache.addCacheFile(URI.create(maxWiDir), conf1); Path inputPath = new Path(nonOptArgs.get(0)); Path indexPath = new Path( nonOptArgs.get(0) + "-index-" + threshold + "-s" + numStripes + "_" + df.format(date)); // index filtering pruned nested directory Path indexOnlyPath = new Path(indexPath, "part*"); Path outputPath = new Path(nonOptArgs.get(1) + "-" + threshold + "-s" + numStripes); FileInputFormat.setInputPaths(conf1, inputPath); FileOutputFormat.setOutputPath(conf1, indexPath); conf1.setInputFormat(SequenceFileInputFormat.class); conf1.setOutputFormat(SequenceFileOutputFormat.class); conf1.setMapOutputKeyClass(LongWritable.class); conf1.setMapOutputValueClass(IndexItem.class); conf1.setOutputKeyClass(LongWritable.class); conf1.setOutputValueClass(IndexItemArrayWritable.class); conf1.setMapperClass(IndexerMapper.class); conf1.setReducerClass(IndexerReducer.class); // assuming input is sorted according to the key (vectorID) so that the // part files are locally sorted MultipleOutputs.addNamedOutput( conf1, PRUNED, SequenceFileOutputFormat.class, IntWritable.class, VectorComponentArrayWritable.class); // remove the stuff we added from the job name conf1.set( "mapred.job.name", "APS-" + indexPath.getName().substring(0, indexPath.getName().length() - 16)); conf1.setNumTasksToExecutePerJvm(-1); // JVM reuse conf1.setSpeculativeExecution(false); conf1.setCompressMapOutput(true); // hash the posting lists in different buckets to distribute the load conf1.setNumReduceTasks(numBuckets); RunningJob job1 = JobClient.runJob(conf1); // part 2 JobConf conf2 = new JobConf(getConf(), Similarity.class); if (numStripes > 0) FileUtils.mergeRestFile(conf2, indexPath, PRUNED, INDEX_INTERVAL); MultipleInputs.addInputPath( conf2, indexOnlyPath, SequenceFileInputFormat.class, SimilarityMapperIndex.class); MultipleInputs.addInputPath( conf2, inputPath, SequenceFileInputFormat.class, SimilarityMapperInput.class); FileOutputFormat.setOutputPath(conf2, outputPath); conf2.setCombinerClass(SimilarityCombiner.class); conf2.setReducerClass(SimilarityReducer.class); conf2.setPartitionerClass(GenericKey.StripePartitioner.class); conf2.setOutputKeyComparatorClass(GenericKey.Comparator.class); conf2.setOutputValueGroupingComparator(GenericKey.PrimaryComparator.class); conf2.setMapOutputKeyClass(GenericKey.class); conf2.setMapOutputValueClass(GenericValue.class); conf2.setOutputKeyClass(VectorPair.class); conf2.setOutputValueClass(NullWritable.class); Counter numDocs = job1.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS"); maxKey = maxKey > 0 ? maxKey : (int) numDocs.getValue(); LOG.info("Setting max key value in input to " + maxKey); conf2.setInt(PARAM_APS_MAXKEY, maxKey); conf2.setInt(PARAM_APS_STRIPES, numStripes); conf2.setFloat(PARAM_APS_THRESHOLD, threshold); conf2.setInt(PARAM_APS_REDUCER_PER_STRIPE, spread); conf2.set("mapred.job.name", "APS-" + outputPath.getName()); conf2.setNumTasksToExecutePerJvm(-1); // JVM reuse conf2.setSpeculativeExecution(false); conf2.setCompressMapOutput(true); conf2.setNumReduceTasks(numReducers); JobClient.runJob(conf2); return 0; }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: JobBuilder.java Projeto: Prasadidasi/commoncrawl-crawler

public JobBuilder compressMapOutput(boolean compress) throws IOException { _jobConf.setCompressMapOutput(compress); return this; }