public void configure(JobConf conf) { /* * It reads all the configurations and distributed cache from outside. */ // Read number of nodes in input layer and output layer from configuration inputNumdims = conf.get("numdims"); inputNumhid = conf.get("numhid"); // Read the weights from distributed cache Path[] pathwaysFiles = new Path[0]; try { pathwaysFiles = DistributedCache.getLocalCacheFiles(conf); for (Path path : pathwaysFiles) { /* * this loop reads all the distributed cache files * In fact, the driver program ensures that there is only one distributed cache file */ BufferedReader fis = new BufferedReader(new FileReader(path.toString())); weightline = fis.readLine(); } } catch (Exception e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Max temperature"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setCombinerClass(MaxTemperatureReducer.class); conf.setReducerClass(MaxTemperatureReducer.class); // vv MaxTemperatureDriverV6 conf.setProfileEnabled(true); conf.setProfileParams( "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s"); conf.setProfileTaskRange(true, "0-2"); // ^^ MaxTemperatureDriverV6 JobClient.runJob(conf); return 0; }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
/** Start the JobTracker process, listen on the indicated port */ JobTracker(Configuration conf) throws IOException { // // Grab some static constants // maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f); PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f); MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks; // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); this.systemDir = jobConf.getSystemDir(); this.fs = FileSystem.get(conf); FileUtil.fullyDelete(fs, systemDir); fs.mkdirs(systemDir); // Same with 'localDir' except it's always on the local disk. jobConf.deleteLocalFiles(SUBDIR); // Set ports, start RPC servers, etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf); this.interTrackerServer.start(); Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); String val = (String) p.getProperty(key); LOG.info("Property '" + key + "' is " + val); } this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030); this.infoServer = new JobTrackerInfoServer(this, infoPort); this.infoServer.start(); this.startTime = System.currentTimeMillis(); new Thread(this.expireTrackers).start(); new Thread(this.retireJobs).start(); new Thread(this.initJobs).start(); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), XiangLi1_exercise3.class); conf.setJobName("xiangli1_exercise3"); conf.setNumReduceTasks(0); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); // conf.setCombinerClass(Reduce.class); // conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(Add1.class); conf.setJobName("sumar1"); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); JobConf job = new JobConf(conf); job.setJarByClass(Jacobi.class); fs.delete(new Path("curX"), true); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, new Path("preX")); FileOutputFormat.setOutputPath(job, new Path("curX")); JobClient.runJob(job); return 1; }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Sqrt2.class); conf.setJobName("sqrt2"); conf.setOutputKeyClass(DoubleWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(Map.class); /*conf.setCombinerClass(Reduce.class);*/ conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
public void configure(JobConf conf) { try { Path vInput; FileSystem fs; URI[] fvector; nsize = conf.getInt("DIMENTION", 0); sumVec = new double[nsize]; resVec = new double[nsize]; diaVec = new double[nsize]; Arrays.fill(sumVec, 0); Arrays.fill(resVec, 0); Arrays.fill(diaVec, 0); fvector = DistributedCache.getCacheFiles(conf); vInput = new Path(fvector[0].getPath()); fs = FileSystem.get(URI.create("hdfs://node17.cs.rochester.edu:9000"), conf); FSDataInputStream fdis = fs.open(vInput); String line; while ((line = fdis.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = Integer.parseInt(tokenizer.nextToken()); int colIdx = Integer.parseInt(tokenizer.nextToken()); double matVar = Double.parseDouble(tokenizer.nextToken()); if (rowIdx == colIdx) { diaVec[rowIdx] = matVar; } else if (colIdx == nsize) { resVec[rowIdx] = matVar; } else { sumVec[rowIdx] += matVar; } } } catch (IOException e) { e.printStackTrace(); } }
public void configure(JobConf job) { interval = job.getInt("db.fetch.interval.default", 2592000); scoreInjected = job.getFloat("db.score.injected", 1.0f); overwrite = job.getBoolean("db.injector.overwrite", false); update = job.getBoolean("db.injector.update", false); }