public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); conf.set("I", args[3]); // Num of Row (=Columns) conf.set("IB", args[4]); // RowBlock Size of Matrix Job job = new Job(conf, "CalculateCC"); job.setJarByClass(CorrelationCoefficient.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Mapperごとに読み込むファイルを変える。 MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapAll.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapDiag.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); System.out.println(success); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(TestMultipleDriver.class); job.setJobName("Word Count"); job.setMapperClass(TestKeyValueMapper.class); job.setReducerClass(TestMultipleReducer.class); // Definimos el tipo del InputFormat MultipleInputs.addInputPath( job, new Path("pruebas/score.txt"), KeyValueTextInputFormat.class, TestKeyValueMapper.class); MultipleInputs.addInputPath( job, new Path("pruebas/poemasequencefile"), SequenceFileInputFormat.class, TestSeqFileMapper.class); FileInputFormat.setInputPaths(job, new Path("pruebas/score.txt")); FileOutputFormat.setOutputPath(job, new Path("outkeyvalue2")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Repartition Join (projection by text)"); job.setJarByClass(ReplicatedJoinBasic.class); // Input parameters Path donationsPath = new Path(args[0]); Path projectsPath = new Path(args[1]); Path outputPath = new Path(args[2]); // Mappers configuration MultipleInputs.addInputPath( job, donationsPath, SequenceFileInputFormat.class, DonationsMapper.class); MultipleInputs.addInputPath( job, projectsPath, SequenceFileInputFormat.class, ProjectsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer configuration job.setNumReduceTasks(3); job.setReducerClass(JoinReducer.class); FileOutputFormat.setOutputPath(job, outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws URISyntaxException, IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); conf.set("HADOOP_USER_NAME", "hadoop"); FileSystem fs = FileSystem.get(new URI(OUTPATH), conf); if (fs.exists(new Path(OUTPATH))) { fs.delete(new Path(OUTPATH), true); } Job job = new Job(conf, LinkTest.class.getName()); job.getConfiguration().set("joinType", "allOuter"); MultipleInputs.addInputPath( job, new Path(INPUTPATH1), TextInputFormat.class, UserInfoMapper.class); MultipleInputs.addInputPath( job, new Path(INPUTPATH2), TextInputFormat.class, UserLogMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(UserLog.class); job.setReducerClass(UserReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPATH)); job.setOutputFormatClass(TextOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 3) { System.err.println("Usage: query1 <HDFS input file1> <HDFS input file2> <HDFS output file>"); System.exit(2); } int flag = JOptionPane.showConfirmDialog(null, "Do you want to enter a rectangel?"); if (flag == JOptionPane.YES_OPTION) { x1 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'X1':")); y1 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'Y1':")); x2 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'X2':")); y2 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'Y2':")); if (x1 > x2) { // make sure x1 < x2 int temp = x1; x1 = x2; x2 = temp; } if (y1 > y2) { // make sure y1 < y2 int ttp = y1; y1 = y2; y2 = ttp; } } else { x1 = x2 = y1 = y2 = -1; } conf.setInt("mx1", x1); conf.setInt("mx2", x2); conf.setInt("my1", y1); conf.setInt("my2", y2); Job job = new Job(conf, "spatialjoin"); job.setJarByClass(SpatialJoin.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(FloatSumReducer.class); job.setReducerClass(TransTotalReducer.class); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); MultipleInputs.addInputPath( job, new Path(args[0]), TextInputFormat.class, TokenizerMapper.class); MultipleInputs.addInputPath( job, new Path(args[1]), TextInputFormat.class, TokenizerMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); if (job.waitForCompletion(true)) { System.out.println("=============Here is Your Input Rectangle========================="); System.out.println(x1 + "," + y1 + "," + x2 + "," + y2); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override protected int setJobInputData(Configuration config, Job job) throws InferenciaException { try { // Recuperamos los ficheros que vamos a procesar, y los anyadimos // como datos de entrada final FileSystem fs = FileSystem.get(new URI(InferenciaCte.hdfsUri), config); // Recuperamos los datos del path origen (data/*.bz2) FileStatus[] glob = fs.globStatus(new Path(getRutaFicheros())); // Si tenemos datos... if (null != glob) { if (glob.length > 0) { for (FileStatus fileStatus : glob) { Path pFich = fileStatus.getPath(); MultipleInputs.addInputPath(job, pFich, SequenceFileInputFormat.class, LoadMap.class); } } else { return noDataFound(); } } } catch (IOException e) { throw new InferenciaException(e, e.getMessage()); } catch (URISyntaxException e) { throw new InferenciaException(e, e.getMessage()); } return InferenciaCte.SUCCESS; }
private static void ResultJob() throws IOException, InterruptedException, ClassNotFoundException { /*depth = 9; wasError = true;*/ conf = new Configuration(); conf.setLong("my.vertex.num", num); if (isErrorOccurred) { conf.setBoolean("my.error.was", true); } fs = FileSystem.get(conf); job = Job.getInstance(conf, "Levelized Nested Dissection Result"); job.setJarByClass(LevNestDissectJob.class); job.setReducerClass(LNDResultReducer.class); /*out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth)); out_start = out.suffix("/" + outPath_start);*/ if (wasError) { in = out.suffix("/" + outPath_count); MultipleInputs.addInputPath(job, in, SequenceFileInputFormat.class, StartVertexMapper.class); in_start = out_start; MultipleInputs.addInputPath( job, in_start, SequenceFileInputFormat.class, LNDResultMapper.class); } in_vertex = out.suffix("/" + outPath_vertex); MultipleInputs.addInputPath( job, in_vertex, SequenceFileInputFormat.class, LNDResultMapper.class); out = new Path(outPath == null ? (FILES_OUT + "result") : (outPath + "/" + "depth_" + "result")); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { // Sources: // Mutiple inputs: http://www.lichun.cc/blog/2012/05/hadoop-multipleinputs-usage/ // Custom classs: // http://www.cs.bgu.ac.il/~dsp112/Forum?action=show-thread&id=5a15ede6df2520f2b68db15f2ce752fa Path firstPath = new Path(args[0]); Path sencondPath = new Path(args[1]); Path outputPath = new Path(args[2]); Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(lin1_exercise1.class); job.setJobName("avg books"); // conf.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setCombinerClass(MyCombiner.class); // output format for mapper job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Pair.class); // output format for reducer job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // use MultipleOutputs and specify different Record class and Input formats MultipleInputs.addInputPath(job, firstPath, TextInputFormat.class, MyMapper1.class); MultipleInputs.addInputPath(job, sencondPath, TextInputFormat.class, MyMapper2.class); // FileInputFormat.setInputPaths(conf, new Path(args[0])); // conf.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // conf.setNumReduceTasks(5); // conf.setPartitionerClass(MusicPartitioner.class System.exit(job.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "ResysTest MergeSimJob"); job.setJarByClass(MergeSimJob.class); Configuration conf = job.getConfiguration(); conf.set( "io.compression.codecs", "org.apache.hadoop.io.compress.DefaultCodec," + "org.apache.hadoop.io.compress.GzipCodec," + "org.apache.hadoop.io.compress.BZip2Codec," + "org.apache.hadoop.io.compress.SnappyCodec," + "com.hadoop.compression.lzo.LzopCodec," + "com.hadoop.compression.lzo.LzoCodec"); String ifOutputCompress = conf.get("if_output_compress"); if (!"false".equals(ifOutputCompress)) { conf.set("mapred.output.compress", "true"); String codec = conf.get("codec"); if (codec != null) { conf.set("mapred.output.compression.codec", codec); } else { conf.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec"); } } String ifMapCompress = conf.get("if_map_compress"); if (!"false".equals(ifMapCompress)) { conf.set("mapred.compress.map.output", "true"); conf.set("mapred.map.output.compression.codec", "com.hadoop.compression.lzo.LzoCodec"); } conf.set("mapred.task.timeout", "0"); String inputPath1 = conf.get("input1"); String inputPath2 = conf.get("input2"); String outputPath = conf.get("output"); int NumReduceTasks = Integer.parseInt(conf.get("num_reduce_tasks")); String ifIndexed = conf.get("if_indexed"); if (!"false".equals(ifIndexed)) { MultipleInputs.addInputPath( job, new Path(inputPath1), LzoTextInputFormat.class, MergeSimMapper4ab.class); MultipleInputs.addInputPath( job, new Path(inputPath2), LzoTextInputFormat.class, MergeSimMapper4ba.class); } else { MultipleInputs.addInputPath( job, new Path(inputPath1), TextInputFormat.class, MergeSimMapper4ab.class); MultipleInputs.addInputPath( job, new Path(inputPath2), TextInputFormat.class, MergeSimMapper4ba.class); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setNumReduceTasks(NumReduceTasks); job.setReducerClass(MergeSimReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int result = job.waitForCompletion(true) ? 0 : 1; if (!"false".equals(ifOutputCompress)) { String ifIndex = conf.get("if_index"); if (!"false".equals(ifIndex)) { System.gc(); DistributedLzoIndexer lzoIndexer = new DistributedLzoIndexer(); Configuration indexConf = new Configuration(); indexConf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec"); lzoIndexer.setConf(indexConf); lzoIndexer.run(new String[] {outputPath}); } } System.exit(result); return 0; }
public int run(String[] args) throws Exception { // read job conf from xml Configuration conf = new Configuration(); conf.addResource("configuration.xml"); conf.set("mapred.job.queue.name", "amap"); conf.set("mapred.queue.name", "amap"); conf.set("mapred.job.priority", JobPriority.VERY_HIGH.toString()); conf.set("mapred.textoutputformat.separatorText", ","); // parse date , get/set job prefix Date date = new Date(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); String dateString = ""; if (0 != otherArgs.length && 1 != otherArgs.length) { System.exit(-1); } else if (0 == otherArgs.length) { // current date conf.set("InputDate", dateString); } else if (1 == otherArgs.length) { dateString = otherArgs[0]; conf.set("InputDate", dateString); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd"); try { date = simpleDateFormat.parse(dateString); } catch (ParseException e) { try { simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); date = simpleDateFormat.parse(dateString); } catch (ParseException pe) { pe.printStackTrace(); System.out.println("ERROR.Parameter 'date' can not be parsed. Please check it."); System.exit(-2); } } } Job job = new Job(conf, "stat_allnavi_job" + ConstantsParseDate.outputDate(date)); job.setJarByClass(StatAllNaviJob.class); job.setReducerClass(StatAllNaviReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(4); String aosbusrouPathString = conf.get("mapred.job.loadaosbusroujob.input.datapath.template", ""); if ("".equalsIgnoreCase(aosbusrouPathString)) { System.exit(-3); } aosbusrouPathString = ConstantsParseDate.parseDay(aosbusrouPathString, date); String aoswalkrouPathString = conf.get("mapred.job.loadaoswalkroujob.input.datapath.template", ""); if ("".equalsIgnoreCase(aoswalkrouPathString)) { System.exit(-3); } aoswalkrouPathString = ConstantsParseDate.parseDay(aoswalkrouPathString, date); String wsnaviautoPathString = conf.get("mapred.job.loadwsnaviautojob.input.datapath.template", ""); if ("".equalsIgnoreCase(wsnaviautoPathString)) { System.exit(-3); } wsnaviautoPathString = ConstantsParseDate.parseDay(wsnaviautoPathString, date); String wsnavibusPathString = conf.get("mapred.job.loadwsnavibusjob.input.datapath.template", ""); if ("".equalsIgnoreCase(wsnavibusPathString)) { System.exit(-3); } wsnavibusPathString = ConstantsParseDate.parseDay(wsnavibusPathString, date); MultipleInputs.addInputPath( job, new Path(aosbusrouPathString), TextInputFormat.class, StatBusRouteMapper.class); MultipleInputs.addInputPath( job, new Path(aoswalkrouPathString), TextInputFormat.class, StatWalkRouteMapper.class); MultipleInputs.addInputPath( job, new Path(wsnaviautoPathString), TextInputFormat.class, StatNaviAutoMapper.class); MultipleInputs.addInputPath( job, new Path(wsnavibusPathString), TextInputFormat.class, StatNaviBusMapper.class); String outputPathString = conf.get("mapred.job.statallnavi.output.datapath.template", ""); if ("".equalsIgnoreCase(outputPathString)) { System.out.println("ERROR.job output path should be \"\""); System.exit(-3); } outputPathString = ConstantsParseDate.parseDay(outputPathString, date); Path outputPath = new Path(outputPathString); FileSystem fileSystem = FileSystem.get(URI.create(outputPathString), job.getConfiguration()); fileSystem.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); MultipleOutputs.addNamedOutput( job, "text", TextOutputFormat.class, Text.class, NullWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
private static void DissectionJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.set("my.out.path.vertex", outPath_vertex); conf.set("my.out.path.count", outPath_count); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection " + depth); job.setJarByClass(LevNestDissectJob.class); job.setReducerClass(LevNestDissectReducer.class); if (wasStart) { in_start = out_start; MultipleInputs.addInputPath( job, in_start, SequenceFileInputFormat.class, LevNestDissectMapper.class); } in_vertex = out.suffix("/" + outPath_vertex); MultipleInputs.addInputPath( job, in_vertex, SequenceFileInputFormat.class, LevNestDissectMapper.class); out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth)); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); MultipleOutputs.addNamedOutput( job, "vertex", SequenceFileOutputFormat.class, LongWritable.class, VertexWritable.class); MultipleOutputs.addNamedOutput( job, "count", SequenceFileOutputFormat.class, LongWritable.class, LongWritable.class); job.setMapOutputValueClass(VertexWritable.class); job.waitForCompletion(true); updated = job.getCounters().findCounter(LevNestDissectReducer.UpdatedCounter.UPDATED).getValue(); // Костыль для непонятной ошибки if (notNumbered > 0 && updated == 0) { notNumbered_tmp = job.getCounters() .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED) .getValue(); if (notNumbered_tmp > 0) { notNumbered = notNumbered_tmp; wasError = false; nextDepth(); } else { wasError = true; isErrorOccurred = true; depth--; out = new Path( outPath == null ? (FILES_OUT + (depth - 1)) : (outPath + "/" + "depth_" + (depth - 1))); /*depth -= 2; out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));*/ } } else { wasError = false; notNumbered = job.getCounters() .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED) .getValue(); if (notNumbered > 0) { nextDepth(); } } }