public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: test.icde12.HadoopJoin <in> <out>"); System.exit(2); } Job job = new Job(conf, "hadoop join"); job.setJarByClass(HadoopJoin.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(ICDEPartitioner.class); // WritableComparator.define(Text.class,new ICDEComparator()); job.setSortComparatorClass(ICDEComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { if (args.length < 4) { writeUsage(); return 1; } Path secretsPath = new Path(args[0]); Path saltFilePath = new Path(args[1]); Path inputPath = new Path(args[2]); Path outputPath = new Path(args[3]); // Make sure the salt file exists generateSaltIfNeeded(saltFilePath, secretsPath); // Configure the job Job job = configureJob(secretsPath, saltFilePath, inputPath, outputPath); // Run it long startTime = System.currentTimeMillis(); job.submit(); if (job.waitForCompletion(true)) { System.out.printf( "Done obfuscating - took %d seconds.\n", (System.currentTimeMillis() - startTime) / 1000); } else { System.err.printf("Job finished with errors: %s\n", job.getStatus().getFailureInfo()); return 2; } return 0; }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
public void setLocalElasticSearchInstallation(JobConf conf) { String esConfigPath = conf.get(ES_CONFIG_OPT, ES_CONFIG); String esPluginsPath = conf.get(ES_PLUGINS_OPT, ES_PLUGINS); System.setProperty(ES_CONFIG_OPT, esConfigPath); System.setProperty(ES_PLUGINS_OPT, esPluginsPath); LOG.info( "Using Elasticsearch configuration file at " + esConfigPath + " and plugin directory " + esPluginsPath); }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
public static void main(final String[] pArgs) throws Exception { TreasuryYieldMulti tym = new TreasuryYieldMulti(); // Here is an example of how to use multiple collections as the input to // a hadoop job, from within Java code directly. MultiCollectionSplitBuilder mcsb = new MultiCollectionSplitBuilder(); mcsb.add( new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"), (MongoURI) null, // authuri true, // notimeout (DBObject) null, // fields (DBObject) null, // sort (DBObject) null, // query false) .add( new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"), (MongoURI) null, // authuri true, // notimeout (DBObject) null, // fields (DBObject) null, // sort new BasicDBObject("_id", new BasicDBObject("$gt", new Date(883440000000L))), false); // range query Configuration conf = new Configuration(); conf.set(MultiMongoCollectionSplitter.MULTI_COLLECTION_CONF_KEY, mcsb.toJSON()); conf.setSplitterClass(conf, MultiMongoCollectionSplitter.class); System.exit(ToolRunner.run(conf, new TreasuryYieldXMLConfig(), pArgs)); }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
public static void main(String[] args) { int retval = 0; try { retval = ToolRunner.run(new Configuration(), new HashtagCountTool(), args); } catch (Exception e) { System.out.println(e.getMessage()); retval = 2; } System.exit(retval); }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
public static boolean stopIteration(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path preFile = new Path("preX/Result"); Path curFile = new Path("curX/part-00000"); if (!(fs.exists(preFile) && fs.exists(curFile))) { System.exit(1); } boolean stop = true; String line1, line2; FSDataInputStream in1 = fs.open(preFile); FSDataInputStream in2 = fs.open(curFile); InputStreamReader isr1 = new InputStreamReader(in1); InputStreamReader isr2 = new InputStreamReader(in2); BufferedReader br1 = new BufferedReader(isr1); BufferedReader br2 = new BufferedReader(isr2); while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) { String[] str1 = line1.split("\\s+"); String[] str2 = line2.split("\\s+"); double preElem = Double.parseDouble(str1[1]); double curElem = Double.parseDouble(str2[1]); if (Math.abs(preElem - curElem) > eps) { stop = false; break; } } if (stop == false) { fs.delete(preFile, true); if (fs.rename(curFile, preFile) == false) { System.exit(1); } } return stop; }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(Sort.class); job.setJobName("Sort"); FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input/")); FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output/")); job.setMapperClass(Map.class); // job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { log.info("Beginning job..."); Configuration conf = new Configuration(); String[] inputArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-setup".equals(inputArgs[i])) { DistributedCache.addCacheFile(new Path(inputArgs[++i]).toUri(), conf); conf.setBoolean("minibatch.job.setup", true); } else { other_args.add(inputArgs[i]); } } String[] tool_args = other_args.toArray(new String[0]); int result = ToolRunner.run(conf, new BinarySigmoidRBM(), tool_args); System.exit(result); }
private String runResolveCommand(List<String> args) { int loopCount = 0; if (args.size() == 0) { return null; } StringBuffer allOutput = new StringBuffer(); int numProcessed = 0; if (maxArgs < MIN_ALLOWABLE_ARGS) { LOG.warn( "Invalid value " + Integer.toString(maxArgs) + " for " + SCRIPT_ARG_COUNT_KEY + "; must be >= " + Integer.toString(MIN_ALLOWABLE_ARGS)); return null; } while (numProcessed != args.size()) { int start = maxArgs * loopCount; List<String> cmdList = new ArrayList<String>(); cmdList.add(scriptName); for (numProcessed = start; numProcessed < (start + maxArgs) && numProcessed < args.size(); numProcessed++) { cmdList.add(args.get(numProcessed)); } File dir = null; String userDir; if ((userDir = System.getProperty("user.dir")) != null) { dir = new File(userDir); } ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir); try { s.execute(); allOutput.append(s.getOutput() + " "); } catch (Exception e) { LOG.warn(StringUtils.stringifyException(e)); return null; } loopCount++; } return allOutput.toString(); }
public int run(String[] args) throws Exception { Path tempDir = new Path("/user/akhfa/temp"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(AuthorCounter.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempDir); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new XiangLi1_exercise3(), args); System.exit(res); }
public static void main(String[] args) throws Exception { System.out.println("Beginning job..."); // Start phase 1 Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "local"); String[] inputArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Path xmlPath = null; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-setup".equals(inputArgs[i])) { xmlPath = new Path(inputArgs[++i]); DistributedCache.addCacheFile(xmlPath.toUri(), conf); conf.setBoolean("minibatch.job.setup", true); } else { other_args.add(inputArgs[i]); } } String[] tool_args = other_args.toArray(new String[0]); int result = ToolRunner.run(conf, new BatchGenerationEngine(), tool_args); // End phase 1 // get example count and size from xml file // count = count_size[0]; // size = count_size[1]; int[] count_size = parseJobSetup(xmlPath); // distribute those output from phase 1 into different directories String outputPhase1 = tool_args[1]; FileSystem fs = FileSystem.get(new Configuration()); Path outputPhase1Path = new Path(outputPhase1); fs.setWorkingDirectory(outputPhase1Path); FileStatus[] outputP1AllFiles = fs.listStatus(outputPhase1Path); for (int i = 0; i < outputP1AllFiles.length; i++) { int batchNum = i / count_size[1]; Path batchPath = new Path(outputPhase1 + "/batch" + batchNum); // if batch# directory not exists, mkdir if (!fs.exists(batchPath)) FileSystem.mkdirs(fs, batchPath, new FsPermission("777")); // move file into the batch# directory fs.rename( outputP1AllFiles[i].getPath(), new Path( outputPhase1 + "/batch" + batchNum + "/" + outputP1AllFiles[i].getPath().getName())); } // // Generate dictionary of jobs int numberOfJobs = count_size[0] * count_size[1]; JobConfig[] dictionary = new JobConfig[numberOfJobs]; // Add job 0 to dictionary Configuration conf0 = new Configuration(); DistributedCache.addCacheFile(xmlPath.toUri(), conf0); JobConfig job0 = new JobConfig(conf0, "input go here", java.util.UUID.randomUUID().toString()); dictionary[0] = job0; // Add the rest of jobs into dictionary for (int i = 1; i < dictionary.length; i++) { Configuration newConf = new Configuration(); DistributedCache.addCacheFile(xmlPath.toUri(), newConf); JobConfig newJob = new JobConfig(newConf, dictionary[i - 1].args[1], java.util.UUID.randomUUID().toString()); dictionary[i] = newJob; } // running the jobs logger.info("Start " + dictionary.length + " jobs!"); for (int i = 0; i < dictionary.length; i++) { int runResult = ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args); if (runResult == 1) { logger.info("Job " + i + "-th Re-run once!"); dictionary[i].args[1] = java.util.UUID.randomUUID().toString(); runResult = ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args); } if (runResult == 1) { logger.info("Job " + i + "-th Re-run twice!"); dictionary[i].args[1] = java.util.UUID.randomUUID().toString(); runResult = ToolRunner.run(dictionary[i].conf, new BatchGenerationEngine(), dictionary[i].args); } if (runResult == 1) { logger.info("Job " + i + "-th Failed!"); break; } else { if (i - 1 < dictionary.length) dictionary[i + 1].args[0] = dictionary[i].args[1]; } } System.exit(1); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new HBaseConfiguration(), new HBaseStationImporter(), args); System.exit(exitCode); }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
public static void main(String[] args) throws Exception { int rc = ToolRunner.run(new Configuration(), new KnownKeysMRv2(), args); System.exit(rc); }
@Override public final void run() { try { // before preparing the job localize // all the archives TaskAttemptID taskid = t.getTaskID(); LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir"); File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } File workDir = new File( lDirAlloc .getLocalPathToRead( TaskTracker.getJobCacheSubdir() + Path.SEPARATOR + t.getJobID() + Path.SEPARATOR + t.getTaskID() + Path.SEPARATOR + MRConstants.WORKDIR, conf) .toString()); URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); FileStatus fileStatus; FileSystem fileSystem; Path localPath; String baseDir; if ((archives != null) || (files != null)) { if (archives != null) { String[] archivesTimestamps = DistributedCache.getArchiveTimestamps(conf); Path[] p = new Path[archives.length]; for (int i = 0; i < archives.length; i++) { fileSystem = FileSystem.get(archives[i], conf); fileStatus = fileSystem.getFileStatus(new Path(archives[i].getPath())); String cacheId = DistributedCache.makeRelative(archives[i], conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; if (lDirAlloc.ifExists(cachePath, conf)) { localPath = lDirAlloc.getLocalPathToRead(cachePath, conf); } else { localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); } baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache( archives[i], conf, new Path(baseDir), fileStatus, true, Long.parseLong(archivesTimestamps[i]), new Path(workDir.getAbsolutePath()), false); } DistributedCache.setLocalArchives(conf, stringifyPathArray(p)); } if ((files != null)) { String[] fileTimestamps = DistributedCache.getFileTimestamps(conf); Path[] p = new Path[files.length]; for (int i = 0; i < files.length; i++) { fileSystem = FileSystem.get(files[i], conf); fileStatus = fileSystem.getFileStatus(new Path(files[i].getPath())); String cacheId = DistributedCache.makeRelative(files[i], conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; if (lDirAlloc.ifExists(cachePath, conf)) { localPath = lDirAlloc.getLocalPathToRead(cachePath, conf); } else { localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); } baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache( files[i], conf, new Path(baseDir), fileStatus, false, Long.parseLong(fileTimestamps[i]), new Path(workDir.getAbsolutePath()), false); } DistributedCache.setLocalFiles(conf, stringifyPathArray(p)); } Path localTaskFile = new Path(t.getJobFile()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(localTaskFile, true); OutputStream out = localFs.create(localTaskFile); try { conf.writeXml(out); } finally { out.close(); } } if (!prepare()) { return; } String sep = System.getProperty("path.separator"); StringBuffer classPath = new StringBuffer(); // start with same classpath as parent process classPath.append(System.getProperty("java.class.path")); classPath.append(sep); if (!workDir.mkdirs()) { if (!workDir.isDirectory()) { LOG.fatal("Mkdirs failed to create " + workDir.toString()); } } String jar = conf.getJar(); if (jar != null) { // if jar exists, it into workDir File[] libs = new File(jobCacheDir, "lib").listFiles(); if (libs != null) { for (int i = 0; i < libs.length; i++) { classPath.append(sep); // add libs from jar to classpath classPath.append(libs[i]); } } classPath.append(sep); classPath.append(new File(jobCacheDir, "classes")); classPath.append(sep); classPath.append(jobCacheDir); } // include the user specified classpath // archive paths Path[] archiveClasspaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClasspaths != null && archives != null) { Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); if (localArchives != null) { for (int i = 0; i < archives.length; i++) { for (int j = 0; j < archiveClasspaths.length; j++) { if (archives[i].getPath().equals(archiveClasspaths[j].toString())) { classPath.append(sep); classPath.append(localArchives[i].toString()); } } } } } // file paths Path[] fileClasspaths = DistributedCache.getFileClassPaths(conf); if (fileClasspaths != null && files != null) { Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (localFiles != null) { for (int i = 0; i < files.length; i++) { for (int j = 0; j < fileClasspaths.length; j++) { if (files[i].getPath().equals(fileClasspaths[j].toString())) { classPath.append(sep); classPath.append(localFiles[i].toString()); } } } } } classPath.append(sep); classPath.append(workDir); // Build exec child jmv args. Vector<String> vargs = new Vector<String>(8); File jvm = // use same jvm as parent new File(new File(System.getProperty("java.home"), "bin"), "java"); vargs.add(jvm.toString()); // Add child (task) java-vm options. // // The following symbols if present in mapred.child.java.opts value are // replaced: // + @taskid@ is interpolated with value of TaskID. // Other occurrences of @ will not be altered. // // Example with multiple arguments and substitutions, showing // jvm GC logging, and start of a passwordless JVM JMX agent so can // connect with jconsole and the likes to watch child memory, threads // and get thread dumps. // // <property> // <name>mapred.child.java.opts</name> // <value>-verbose:gc -Xloggc:/tmp/@[email protected] \ // -Dcom.sun.management.jmxremote.authenticate=false \ // -Dcom.sun.management.jmxremote.ssl=false \ // </value> // </property> // String javaOpts = conf.get("mapred.child.java.opts", "-Xmx200m"); javaOpts = javaOpts.replace("@taskid@", taskid.toString()); String[] javaOptsSplit = javaOpts.split(" "); // Add java.library.path; necessary for loading native libraries. // // 1. To support native-hadoop library i.e. libhadoop.so, we add the // parent processes' java.library.path to the child. // 2. We also add the 'cwd' of the task to it's java.library.path to help // users distribute native libraries via the DistributedCache. // 3. The user can also specify extra paths to be added to the // java.library.path via mapred.child.java.opts. // String libraryPath = System.getProperty("java.library.path"); if (libraryPath == null) { libraryPath = workDir.getAbsolutePath(); } else { libraryPath += sep + workDir; } boolean hasUserLDPath = false; for (int i = 0; i < javaOptsSplit.length; i++) { if (javaOptsSplit[i].startsWith("-Djava.library.path=")) { javaOptsSplit[i] += sep + libraryPath; hasUserLDPath = true; break; } } if (!hasUserLDPath) { vargs.add("-Djava.library.path=" + libraryPath); } for (int i = 0; i < javaOptsSplit.length; i++) { vargs.add(javaOptsSplit[i]); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); } FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } vargs.add("-Djava.io.tmpdir=" + tmpDir.toString()); // Add classpath. vargs.add("-classpath"); vargs.add(classPath.toString()); // Setup the log4j prop long logSize = TaskLog.getTaskLogLength(conf); vargs.add( "-Dhadoop.log.dir=" + new File(System.getProperty("hadoop.log.dir")).getAbsolutePath()); vargs.add("-Dhadoop.root.logger=INFO,TLA"); vargs.add("-Dhadoop.tasklog.taskid=" + taskid); vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize); if (conf.getProfileEnabled()) { if (conf.getProfileTaskRange(t.isMapTask()).isIncluded(t.getPartition())) { File prof = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.PROFILE); vargs.add(String.format(conf.getProfileParams(), prof.toString())); } } // Add main class and its arguments vargs.add(Child.class.getName()); // main of Child // pass umbilical address InetSocketAddress address = tracker.getTaskTrackerReportAddress(); vargs.add(address.getAddress().getHostAddress()); vargs.add(Integer.toString(address.getPort())); vargs.add(taskid.toString()); // pass task identifier String pidFile = null; if (tracker.isTaskMemoryManagerEnabled()) { pidFile = lDirAlloc .getLocalPathForWrite( (TaskTracker.getPidFilesSubdir() + Path.SEPARATOR + taskid), this.conf) .toString(); } // set memory limit using ulimit if feasible and necessary ... String[] ulimitCmd = Shell.getUlimitMemoryCommand(conf); List<String> setup = null; if (ulimitCmd != null) { setup = new ArrayList<String>(); for (String arg : ulimitCmd) { setup.add(arg); } } // Set up the redirection of the task's stdout and stderr streams File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); stdout.getParentFile().mkdirs(); tracker.getTaskTrackerInstrumentation().reportTaskLaunch(taskid, stdout, stderr); Map<String, String> env = new HashMap<String, String>(); StringBuffer ldLibraryPath = new StringBuffer(); ldLibraryPath.append(workDir.toString()); String oldLdLibraryPath = null; oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH"); if (oldLdLibraryPath != null) { ldLibraryPath.append(sep); ldLibraryPath.append(oldLdLibraryPath); } env.put("LD_LIBRARY_PATH", ldLibraryPath.toString()); jvmManager.launchJvm( this, jvmManager.constructJvmEnv( setup, vargs, stdout, stderr, logSize, workDir, env, pidFile, conf)); synchronized (lock) { while (!done) { lock.wait(); } } tracker.getTaskTrackerInstrumentation().reportTaskEnd(t.getTaskID()); if (exitCodeSet) { if (!killed && exitCode != 0) { if (exitCode == 65) { tracker.getTaskTrackerInstrumentation().taskFailedPing(t.getTaskID()); } throw new IOException("Task process exit with nonzero status of " + exitCode + "."); } } } catch (FSError e) { LOG.fatal("FSError", e); try { tracker.fsError(t.getTaskID(), e.getMessage()); } catch (IOException ie) { LOG.fatal(t.getTaskID() + " reporting FSError", ie); } } catch (Throwable throwable) { LOG.warn(t.getTaskID() + " Child Error", throwable); ByteArrayOutputStream baos = new ByteArrayOutputStream(); throwable.printStackTrace(new PrintStream(baos)); try { tracker.reportDiagnosticInfo(t.getTaskID(), baos.toString()); } catch (IOException e) { LOG.warn(t.getTaskID() + " Reporting Diagnostics", e); } } finally { try { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { DistributedCache.releaseCache(archives[i], conf); } } if (files != null) { for (int i = 0; i < files.length; i++) { DistributedCache.releaseCache(files[i], conf); } } } catch (IOException ie) { LOG.warn("Error releasing caches : Cache files might not have been cleaned up"); } tracker.reportTaskFinished(t.getTaskID(), false); if (t.isMapTask()) { tracker.addFreeMapSlot(); } else { tracker.addFreeReduceSlot(); } } }
public static void main(String[] args) throws Exception { int result = ToolRunner.run(new HadoopNBFilter(), args); System.exit(result); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new SortByTemperatureUsingTotalOrderPartitioner(), args); System.exit(exitCode); }
public static void main(String[] args) throws Exception { int result = ToolRunner.run(new Configuration(), new move(), args); System.exit(result); }
public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new MaxTemperatureDriver(), args); System.exit(exitCode); }
public static void main(String[] args) throws Exception { Config.hadoop_mode = false; if (args.length == 2 && args[0].equals("args")) // needed for mrql.flink script args = args[1].substring(1).split("!"); for (String arg : args) { Config.hadoop_mode |= arg.equals("-local") || arg.equals("-dist"); Config.bsp_mode |= arg.equals("-bsp"); Config.spark_mode |= arg.equals("-spark"); Config.flink_mode |= arg.equals("-flink"); } ; Config.map_reduce_mode = !Config.bsp_mode && !Config.spark_mode && !Config.flink_mode; initialize_evaluator(); if (Config.hadoop_mode) { conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); } ; Config.parse_args(args, conf); Config.hadoop_mode = Config.local_mode || Config.distributed_mode; if (!Config.info) { for (Enumeration en = LogManager.getCurrentLoggers(); en.hasMoreElements(); ) ((Logger) en.nextElement()).setLevel(Level.WARN); LogManager.getRootLogger().setLevel(Level.WARN); } ; Evaluator.evaluator.init(conf); new TopLevel(); System.out.print("Apache MRQL version " + version + " ("); if (Config.compile_functional_arguments) System.out.print("compiled "); else System.out.print("interpreted "); if (Config.hadoop_mode) { if (Config.local_mode) System.out.print("local "); else if (Config.distributed_mode) System.out.print("distributed "); if (Config.spark_mode) System.out.println("Spark mode using " + Config.nodes + " tasks)"); else if (Config.flink_mode) System.out.println("Flink mode using " + Config.nodes + " tasks)"); else if (Config.bsp_mode) System.out.println("Hama BSP mode over " + Config.nodes + " BSP tasks)"); else if (Config.nodes > 0) System.out.println("Hadoop MapReduce mode with " + Config.nodes + " reducers)"); else if (!Config.local_mode) System.out.println("Hadoop MapReduce mode with 1 reducer, use -nodes to change it)"); else System.out.println("Hadoop MapReduce mode)"); } else if (Config.bsp_mode) System.out.println("in-memory BSP mode)"); else System.out.println("in-memory Java mode)"); if (Config.interactive) { System.out.println("Type quit to exit"); ConsoleReader reader = new ConsoleReader(); reader.setBellEnabled(false); History history = new History(new File(System.getProperty("user.home") + "/.mrqlhistory")); reader.setHistory(history); reader.setUseHistory(false); try { loop: while (true) { String line = ""; String s = ""; try { if (Config.hadoop_mode && Config.bsp_mode) Config.write(Plan.conf); do { s = reader.readLine("> "); if (s != null && (s.equals("quit") || s.equals("exit"))) break loop; if (s != null) line += " " + s; } while (s == null || s.indexOf(";") <= 0); line = line.substring(1); history.addToHistory(line); parser = new MRQLParser(new MRQLLex(new StringReader(line))); MRQLLex.reset(); parser.parse(); } catch (EOFException x) { break; } catch (Exception x) { if (x.getMessage() != null) System.out.println(x); } catch (Error x) { System.out.println(x); } } } finally { if (Config.hadoop_mode) { Plan.clean(); Evaluator.evaluator.shutdown(Plan.conf); } ; if (Config.compile_functional_arguments) Compiler.clean(); } } else try { if (Config.hadoop_mode && Config.bsp_mode) Config.write(Plan.conf); try { parser = new MRQLParser(new MRQLLex(new FileInputStream(query_file))); } catch (Exception e) { // when the query file is in HDFS Path path = new Path(query_file); FileSystem fs = path.getFileSystem(conf); parser = new MRQLParser(new MRQLLex(fs.open(path))); } ; parser.parse(); } finally { if (Config.hadoop_mode) { Plan.clean(); Evaluator.evaluator.shutdown(Plan.conf); } ; if (Config.compile_functional_arguments) Compiler.clean(); } }