/** * This is the main driver for recursively copying directories across file systems. It takes at * least two cmdline parameters. A source URL and a destination URL. It then essentially does an * "ls -lR" on the source URL, and writes the output in a round-robin manner to all the map input * files. The mapper actually copies the files allotted to it. The reduce is empty. */ public int run(String[] args) { try { copy(conf, Arguments.valueOf(args, conf)); return 0; } catch (IllegalArgumentException e) { System.err.println(StringUtils.stringifyException(e) + "\n" + usage); ToolRunner.printGenericCommandUsage(System.err); return -1; } catch (DuplicationException e) { System.err.println(StringUtils.stringifyException(e)); return DuplicationException.ERROR_CODE; } catch (RemoteException e) { final IOException unwrapped = e.unwrapRemoteException( FileNotFoundException.class, AccessControlException.class, QuotaExceededException.class); System.err.println(StringUtils.stringifyException(unwrapped)); return -3; } catch (Exception e) { System.err.println( "With failures, global counters are inaccurate; " + "consider running with -i"); System.err.println("Copy failed: " + StringUtils.stringifyException(e)); return -999; } }
public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf( "Usage: %s [generic options] <input dir> <output dir>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.out); return -1; } Job job = new Job(getConf()); job.setJarByClass(RowFilter.class); job.setJobName(this.getClass().getName()); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RowMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); if (job.waitForCompletion(true)) { return 0; } return 1; }
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("hitsperurl.DriverHitsPerUrl <inDir> <outDir>"); ToolRunner.printGenericCommandUsage(System.out); System.out.println(""); return -1; } System.out.println(Arrays.toString(args)); Job job = new Job(getConf(), "number of hits per url"); job.setJarByClass(DriverHitsPerUrl.class); job.setMapperClass(ParserMapper.class); // Uncomment this to set combiner // job.setCombinerClass(IntSumReducer.class); job.setReducerClass(AggregateURLReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(WhiteHouseVisitorDriver.class); // input file format FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // map/combine/reduce class definition job.setMapperClass(WhiteHouseVisitorMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // Key, Value set type definition job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Max temperature"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setCombinerClass(MaxTemperatureReducer.class); conf.setReducerClass(MaxTemperatureReducer.class); // vv MaxTemperatureDriverV6 conf.setProfileEnabled(true); conf.setProfileParams( "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s"); conf.setProfileTaskRange(true, "0-2"); // ^^ MaxTemperatureDriverV6 JobClient.runJob(conf); return 0; }
/** Print help. */ private void printHelp() { String summary = "Usage: bin/hdfs oev [OPTIONS] -i INPUT_FILE -o OUTPUT_FILE\n" + "Offline edits viewer\n" + "Parse a Hadoop edits log file INPUT_FILE and save results\n" + "in OUTPUT_FILE.\n" + "Required command line arguments:\n" + "-i,--inputFile <arg> edits file to process, xml (case\n" + " insensitive) extension means XML format,\n" + " any other filename means binary format\n" + "-o,--outputFile <arg> Name of output file. If the specified\n" + " file exists, it will be overwritten,\n" + " format of the file is determined\n" + " by -p option\n" + "\n" + "Optional command line arguments:\n" + "-p,--processor <arg> Select which type of processor to apply\n" + " against image file, currently supported\n" + " processors are: binary (native binary format\n" + " that Hadoop uses), xml (default, XML\n" + " format), stats (prints statistics about\n" + " edits file)\n" + "-h,--help Display usage information and exit\n" + "-v,--verbose More verbose output, prints the input and\n" + " output filenames, for processors that write\n" + " to a file, also output to screen. On large\n" + " image files this will dramatically increase\n" + " processing time (default is false).\n"; System.out.println(summary); System.out.println(); ToolRunner.printGenericCommandUsage(System.out); }
protected void printUsage(PrintStream out) { ToolRunner.printGenericCommandUsage(out); out.println("Usage: gridmix [-generate <MiB>] [-users URI] <iopath> <trace>"); out.println(" e.g. gridmix -generate 100m foo -"); out.println("Configuration parameters:"); out.printf(" %-42s : Output directory\n", GRIDMIX_OUT_DIR); out.printf(" %-42s : Submitting threads\n", GRIDMIX_SUB_THR); out.printf(" %-42s : Queued job desc\n", GRIDMIX_QUE_DEP); out.printf(" %-42s : Key fraction of rec\n", AvgRecordFactory.GRIDMIX_KEY_FRC); out.printf(" %-42s : User resolution class\n", GRIDMIX_USR_RSV); out.printf( " %-42s : Enable/disable using queues in trace\n", GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE); out.printf(" %-42s : Default queue\n", GridmixJob.GRIDMIX_DEFAULT_QUEUE); StringBuilder sb = new StringBuilder(); String sep = ""; for (GridmixJobSubmissionPolicy policy : GridmixJobSubmissionPolicy.values()) { sb.append(sep); sb.append(policy.name()); sep = "|"; } out.printf( " %-40s : Job submission policy (%s)\n", GridmixJobSubmissionPolicy.JOB_SUBMISSION_POLICY, sb.toString()); }
/** * This is the main routine for launching a distributed random write job. It runs 10 maps/node and * each node writes 1 gig of data to a DFS file. The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); /** 建议型的 * */ System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println( "The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
/** Print usage messages */ public static int printUsage(String[] args, String usage) { err.println("args = " + Arrays.asList(args)); err.println(); err.println("Usage: java " + usage); err.println(); ToolRunner.printGenericCommandUsage(err); return -1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } // get configuration Configuration conf = getConf(); CommonUtility.printConfiguration(conf); // create a JobConf JobConf jobconf = new JobConf(conf); // set name jobconf.setJobName("CommonTestJob"); // set input/output path Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(jobconf, in); FileOutputFormat.setOutputPath(jobconf, out); // set input/output format jobconf.setInputFormat(SequenceFileInputFormat.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); // set output key/value jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(RRIntervalWritable.class); // set mapper/reducer class jobconf.setMapperClass(CommonTestMapper.class); jobconf.setReducerClass(IdentityReducer.class); // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqMulti"), // SequenceFileInputFormat.class, CommonTestMapper.class); // // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqSingle"), // SequenceFileInputFormat.class, IdentityMapper.class); jobconf.setNumReduceTasks(0); JobClient.runJob(jobconf); // --- end--- CommonUtility.printConfiguration(jobconf); return 0; }
protected void printUsage(PrintStream out) { ToolRunner.printGenericCommandUsage(out); out.println( "Usage: gridmix [-generate <MiB>] [-users URI] [-Dname=value ...] <iopath> <trace>"); out.println(" e.g. gridmix -generate 100m foo -"); out.println("Options:"); out.println( " -generate <MiB> : Generate input data of size MiB under " + "<iopath>/input/ and generate\n\t\t distributed cache data under " + "<iopath>/distributedCache/."); out.println(" -users <usersResourceURI> : URI that contains the users list."); out.println("Configuration parameters:"); out.println(" General parameters:"); out.printf(" %-48s : Output directory\n", GRIDMIX_OUT_DIR); out.printf(" %-48s : Submitting threads\n", GRIDMIX_SUB_THR); out.printf(" %-48s : Queued job desc\n", GRIDMIX_QUE_DEP); out.printf(" %-48s : User resolution class\n", GRIDMIX_USR_RSV); out.printf(" %-48s : Job types (%s)\n", JobCreator.GRIDMIX_JOB_TYPE, getJobTypes()); out.println(" Parameters related to job submission:"); out.printf(" %-48s : Default queue\n", GridmixJob.GRIDMIX_DEFAULT_QUEUE); out.printf( " %-48s : Enable/disable using queues in trace\n", GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE); out.printf( " %-48s : Job submission policy (%s)\n", GridmixJobSubmissionPolicy.JOB_SUBMISSION_POLICY, getSubmissionPolicies()); out.println(" Parameters specific for LOADJOB:"); out.printf(" %-48s : Key fraction of rec\n", AvgRecordFactory.GRIDMIX_KEY_FRC); out.println(" Parameters specific for SLEEPJOB:"); out.printf(" %-48s : Whether to ignore reduce tasks\n", SleepJob.SLEEPJOB_MAPTASK_ONLY); out.printf( " %-48s : Number of fake locations for map tasks\n", JobCreator.SLEEPJOB_RANDOM_LOCATIONS); out.printf( " %-48s : Maximum map task runtime in mili-sec\n", SleepJob.GRIDMIX_SLEEP_MAX_MAP_TIME); out.printf( " %-48s : Maximum reduce task runtime in mili-sec (merge+reduce)\n", SleepJob.GRIDMIX_SLEEP_MAX_REDUCE_TIME); out.println(" Parameters specific for STRESS submission throttling policy:"); out.printf( " %-48s : jobs vs task-tracker ratio\n", StressJobFactory.CONF_MAX_JOB_TRACKER_RATIO); out.printf( " %-48s : maps vs map-slot ratio\n", StressJobFactory.CONF_OVERLOAD_MAPTASK_MAPSLOT_RATIO); out.printf( " %-48s : reduces vs reduce-slot ratio\n", StressJobFactory.CONF_OVERLOAD_REDUCETASK_REDUCESLOT_RATIO); out.printf( " %-48s : map-slot share per job\n", StressJobFactory.CONF_MAX_MAPSLOT_SHARE_PER_JOB); out.printf( " %-48s : reduce-slot share per job\n", StressJobFactory.CONF_MAX_REDUCESLOT_SHARE_PER_JOB); }
static int printUsage() { System.out.println( "join [-r <reduces>] " + "[-inFormat <input format class>] " + "[-outFormat <output format class>] " + "[-outKey <output key class>] " + "[-outValue <output value class>] " + "[-joinOp <inner|outer|override>] " + "[input]* <input> <output>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }
private void displayUsage(String cmd) { String prefix = "Usage: JobQueueClient "; if ("-queueinfo".equals(cmd)) { System.err.println(prefix + "[" + cmd + "<job-queue-name> [-showJobs]]"); } else { System.err.printf(prefix + "<command> <args>\n"); System.err.printf("\t[-list]\n"); System.err.printf("\t[-info <job-queue-name> [-showJobs]]\n"); System.err.printf("\t[-showacls] \n\n"); ToolRunner.printGenericCommandUsage(System.out); } }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s <input path> <output path>", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Job job = new Job(); job.setJarByClass(PopulationsDriver.class); // TODO add configuration for map and reduce return job.waitForCompletion(true) ? 1 : 0; }
private static int printUsage() { System.out.println( "SAMMEPL's parameters are:\n" + " -train|-eval\n" + " -d|-data <data path>\n" + " -m|-model <model path>\n" + " -n|-num <num instances per map>\n" + " -i|-iteration <num iterations>\n" + " [-md|-metadata <metadata path>]\n" + " [-o|-output <msg output folder>]\n\n"); ToolRunner.printGenericCommandUsage(System.out); return -1; }
public static void main(String[] args) throws Exception { // -files option is also used by GenericOptionsParser // Make sure that is not the first argument for fsck int res = -1; if ((args.length == 0) || ("-files".equals(args[0]))) { printUsage(System.err); ToolRunner.printGenericCommandUsage(System.err); } else if (DFSUtil.parseHelpArgument(args, USAGE, System.out, true)) { res = 0; } else { res = ToolRunner.run(new DFSck(new HdfsConfiguration()), args); } System.exit(res); }
/* (non-Javadoc) * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { // arguments if (args.length < 2) { System.err.printf( "Usage: %s [generic options] <wikidump path> <output path> <start job(optional)>\n", getClass().getSimpleName()); System.err.println("<start job>: parse, degree, graph, rank, list"); ToolRunner.printGenericCommandUsage(System.err); return -1; } // Configuration Configuration conf = getConf(); conf.set("wikidump_path", args[0]); conf.set("output_path", args[1]); // Job selection JobName startJob = JobName.PARSE; if (args.length >= 3) { try { startJob = JobName.valueOf(args[2].toUpperCase()); } catch (IllegalArgumentException e) { } } int K = 20; switch (startJob) { case PARSE: JobParse.run(conf); case DEGREE: JobOutDegree.run(conf); JobInDegree.run(conf); case GRAPH: JobGraph.run(conf); case RANK: JobRank.run(conf, K); case RESULT: JobResult.run(conf, K); break; default: break; } return 0; }
/** Launches all the tasks in order. */ @Override public int run(String[] args) throws Exception { if (args.length < 10) { System.err.println( "Usage: edu.iu.kmeans.KMeansMapCollective <num Of DataPoints> <num of Centroids> <vector size> " + "<number of map tasks> <partition per worker> <number of iteration> <iteration per job> <start Job ID>" + "<work dir> <local points dir>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } int numOfDataPoints = Integer.parseInt(args[0]); int numCentroids = Integer.parseInt(args[1]); int vectorSize = Integer.parseInt(args[2]); int numMapTasks = Integer.parseInt(args[3]); int partitionPerWorker = Integer.parseInt(args[4]); int numIteration = Integer.parseInt(args[5]); int iterationPerJob = Integer.parseInt(args[6]); int startJobID = Integer.parseInt(args[7]); String workDir = args[8]; String localPointFilesDir = args[9]; boolean regenerateData = true; if (args.length == 11) { regenerateData = Boolean.parseBoolean(args[10]); } System.out.println("Number of Map Tasks = " + numMapTasks); int numPointFiles = numMapTasks * partitionPerWorker; if (numOfDataPoints / numPointFiles == 0 || numCentroids / numMapTasks == 0) { return -1; } if (numIteration == 0) { numIteration = 1; } if (iterationPerJob == 0) { iterationPerJob = 1; } launch( numOfDataPoints, numCentroids, vectorSize, numPointFiles, numMapTasks, numIteration, iterationPerJob, startJobID, workDir, localPointFilesDir, regenerateData); return 0; }
static int printUsage() { System.out.println( "sampler -r <reduces>\n" + " [-inFormat <input format class>]\n" + " [-keyClass <map input & output key class>]\n" + " [-splitRandom <double pcnt> <numSamples> <maxsplits> | " + "// Sample from random splits at random (general)\n" + " -splitSample <numSamples> <maxsplits> | " + " // Sample from first records in splits (random data)\n" + " -splitInterval <double pcnt> <maxsplits>]" + " // Sample from splits at intervals (sorted data)"); System.out.println("Default sampler: -splitRandom 0.1 10000 10"); ToolRunner.printGenericCommandUsage(System.out); return -1; }
public int run(String[] args) throws Exception { if (args.length == 0) { System.err.println("Usage: MigrationTool <S3 file system URI>"); System.err.println("\t<S3 file system URI>\tfilesystem to migrate"); ToolRunner.printGenericCommandUsage(System.err); return -1; } URI uri = URI.create(args[0]); initialize(uri); FileSystemStore newStore = new Jets3tFileSystemStore(); newStore.initialize(uri, getConf()); if (get("%2F") != null) { System.err.println("Current version number is [unversioned]."); System.err.println("Target version number is " + newStore.getVersion() + "."); Store oldStore = new UnversionedStore(); migrate(oldStore, newStore); return 0; } else { S3Object root = get("/"); if (root != null) { String version = (String) root.getMetadata("fs-version"); if (version == null) { System.err.println("Can't detect version - exiting."); } else { String newVersion = newStore.getVersion(); System.err.println("Current version number is " + version + "."); System.err.println("Target version number is " + newVersion + "."); if (version.equals(newStore.getVersion())) { System.err.println("No migration required."); return 0; } // use version number to create Store // Store oldStore = ... // migrate(oldStore, newStore); System.err.println("Not currently implemented."); return 0; } } System.err.println("Can't detect version - exiting."); return 0; } }
/** * Parse arguments and then runs a map/reduce job. Print output in standard out. * * @return a non-zero if there is an error. Otherwise, return 0. */ public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } final int nMaps = Integer.parseInt(args[0]); final long nSamples = Long.parseLong(args[1]); System.out.println("Number of Maps = " + nMaps); System.out.println("Samples per Map = " + nSamples); final JobConf jobConf = new JobConf(getConf(), getClass()); System.out.println("Estimated value of Pi is " + estimate(nMaps, nSamples, jobConf)); return 0; }
/** * Displays format of commands. * * @param cmd The command that is being executed. */ private static void printUsage(String cmd) { if ("-report".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-report]"); } else if ("-safemode".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-safemode enter | leave | get | wait]"); } else if ("-saveNamespace".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-saveNamespace]"); } else if ("-refreshNodes".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-refreshNodes]"); } else if ("-finalizeUpgrade".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-finalizeUpgrade]"); } else if ("-upgradeProgress".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-upgradeProgress status | details | force]"); } else if ("-metasave".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-metasave filename]"); } else if (SetQuotaCommand.matches(cmd)) { System.err.println("Usage: java DFSAdmin" + " [" + SetQuotaCommand.USAGE + "]"); } else if (ClearQuotaCommand.matches(cmd)) { System.err.println("Usage: java DFSAdmin" + " [" + ClearQuotaCommand.USAGE + "]"); } else if (SetSpaceQuotaCommand.matches(cmd)) { System.err.println("Usage: java DFSAdmin" + " [" + SetSpaceQuotaCommand.USAGE + "]"); } else if (ClearSpaceQuotaCommand.matches(cmd)) { System.err.println("Usage: java DFSAdmin" + " [" + ClearSpaceQuotaCommand.USAGE + "]"); } else if ("-refreshServiceAcl".equals(cmd)) { System.err.println("Usage: java DFSAdmin" + " [-refreshServiceAcl]"); } else { System.err.println("Usage: java DFSAdmin"); System.err.println(" [-report]"); System.err.println(" [-safemode enter | leave | get | wait]"); System.err.println(" [-saveNamespace]"); System.err.println(" [-refreshNodes]"); System.err.println(" [-finalizeUpgrade]"); System.err.println(" [-upgradeProgress status | details | force]"); System.err.println(" [-metasave filename]"); System.err.println(" [-refreshServiceAcl]"); System.err.println(" [" + SetQuotaCommand.USAGE + "]"); System.err.println(" [" + ClearQuotaCommand.USAGE + "]"); System.err.println(" [" + SetSpaceQuotaCommand.USAGE + "]"); System.err.println(" [" + ClearSpaceQuotaCommand.USAGE + "]"); System.err.println(" [-help [cmd]]"); System.err.println(); ToolRunner.printGenericCommandUsage(System.err); } }
/** * Parse arguments and then runs a map/reduce job. Print output in standard out. * * @return a non-zero if there is an error. Otherwise, return 0. */ public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>"); ToolRunner.printGenericCommandUsage(System.err); return 2; } final int nMaps = Integer.parseInt(args[0]); final long nSamples = Long.parseLong(args[1]); long now = System.currentTimeMillis(); int rand = new Random().nextInt(Integer.MAX_VALUE); final Path tmpDir = new Path(TMP_DIR_PREFIX + "_" + now + "_" + rand); System.out.println("Number of Maps = " + nMaps); System.out.println("Samples per Map = " + nSamples); System.out.println( "Estimated value of Pi is " + estimatePi(nMaps, nSamples, tmpDir, getConf())); return 0; }
@SuppressWarnings("deprecation") @Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } // create JobConf JobConf jobConf = new JobConf(getConf(), this.getClass()); CommonUtility.printConfiguration(jobConf); // set path for input and output FileInputFormat.addInputPath(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); // set format for input and output jobConf.setInputFormat(WholeFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // set class of output key and value jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BytesWritable.class); // set special attributes jobConf.setNumReduceTasks(2); // set mapper and reducer jobConf.setMapperClass(SmallFiles2SequenceFileMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setNumReduceTasks(0); // run the job JobClient.runJob(jobConf); return 0; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Job job = new Job(getConf(), "Max temperature"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
/** Runs this tool. */ @SuppressWarnings({"static-access"}) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("output path") .create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); ArrayListWritable<PairOfInts> postings; BytesWritable bytesValue = new BytesWritable(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); // ArrayListWritable<PairOfVInts> postings = value; for (PairOfInts pair : postings) { System.out.println(pair); collection.seek(pair.getLeftElement()); System.out.println(d.readLine()); } bytesValue = new BytesWritable(); key.set("gold"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); System.out.println( "Complete postings list for 'gold': (" + postings.size() + ", " + postings + ")"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); // postings = value; for (PairOfInts pair : postings) { goldHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } bytesValue = new BytesWritable(); key.set("silver"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); System.out.println( "Complete postings list for 'silver': (" + postings.size() + ", " + postings + ")"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); // postings = value; for (PairOfInts pair : postings) { silverHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : silverHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } bytesValue = new BytesWritable(); key.set("bronze"); Writable w = reader.get(key, bytesValue); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
@SuppressWarnings("static-access") private int parseArgs(String[] args) { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Tfidf vectors") .create(Fields.INPUT.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Vectors' length") .create(Fields.BASIS.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Near documents") .create(Fields.OUTPUT.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Document dates") .create(Fields.DOC_DATES.get())); options.addOption( OptionBuilder.withDescription("Ignore docs without NN").create(Fields.IGNORE.get())); options.addOption( OptionBuilder.withDescription("Output buckets").create(Fields.OUTPUT_BUCKETS.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Number of bands") .create(Fields.BANDS.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Number of rows") .create(Fields.ROWS.get())); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(Fields.INPUT.get()) || !cmdline.hasOption(Fields.OUTPUT.get()) || !cmdline.hasOption(Fields.BASIS.get()) || (!cmdline.hasOption(Fields.DOC_DATES.get()) && !cmdline.hasOption(Fields.OUTPUT_BUCKETS.get()))) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } inputPath = cmdline.getOptionValue(Fields.INPUT.get()); outputPath = cmdline.getOptionValue(Fields.OUTPUT.get()); basisPath = cmdline.getOptionValue(Fields.BASIS.get()); datesPath = cmdline.getOptionValue(Fields.DOC_DATES.get()); ignoreDocs = false; if (cmdline.hasOption(Fields.IGNORE.get())) { ignoreDocs = true; } outputBuckets = false; if (cmdline.hasOption(Fields.OUTPUT_BUCKETS.get())) { outputBuckets = true; } bands = -1; if (cmdline.hasOption(Fields.BANDS.get())) { bands = Integer.parseInt(cmdline.getOptionValue(Fields.BANDS.get())); } rows = -1; if (cmdline.hasOption(Fields.ROWS.get())) { rows = Integer.parseInt(cmdline.getOptionValue(Fields.ROWS.get())); } logger.info("Tool name: " + this.getClass().getName()); logger.info(" - input: " + inputPath); logger.info(" - basis: " + basisPath); logger.info(" - output: " + outputPath); logger.info(" - dates: " + datesPath); logger.info(" - ignore: " + ignoreDocs); logger.info(" - outputBuckets: " + outputBuckets); logger.info(" - bands: " + bands); logger.info(" - rows: " + rows); return 0; }
@SuppressWarnings("static-access") private int parseArgs(String[] args) { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Tfidf vectors") .create(Fields.INPUT.get())); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("Selected articles") .create(Fields.OUTPUT.get())); options.addOption( OptionBuilder.withArgName("integer") .hasArg() .withDescription("Partition count") .create(Fields.PARTITION_COUNT.get())); options.addOption( OptionBuilder.withArgName("integer") .hasArg() .withDescription("Select count") .create(Fields.SELECT_COUNT.get())); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { logger.fatal("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(Fields.INPUT.get()) || !cmdline.hasOption(Fields.OUTPUT.get())) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } inputPath = cmdline.getOptionValue(Fields.INPUT.get()); outputPath = cmdline.getOptionValue(Fields.OUTPUT.get()); partitionCount = Defaults.PARTITION_COUNT.get(); if (cmdline.hasOption(Fields.PARTITION_COUNT.get())) { partitionCount = Integer.parseInt(cmdline.getOptionValue(Fields.PARTITION_COUNT.get())); if (partitionCount <= 0) { System.err.println("Error: \"" + partitionCount + "\" has to be positive!"); return -1; } } selectCount = Integer.parseInt(cmdline.getOptionValue(Fields.SELECT_COUNT.get())); logger.info("Tool name: " + this.getClass().getName()); logger.info(" - input: " + inputPath); logger.info(" - output: " + outputPath); logger.info(" - partitions: " + partitionCount); logger.info(" - select: " + selectCount); return 0; }
private static int printUsage() { System.out.println("usage: [inputDir] [outputDir] [numNodes]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }