@Override public int run(String[] args) throws Exception { JobConf conf = JobBuilder.parseInputAndOutput(this, getConf(), args); if (conf == null) { return -1; } conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); conf.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<IntWritable, Text> sampler = new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10); Path input = FileInputFormat.getInputPaths(conf)[0]; input = input.makeQualified(input.getFileSystem(conf)); Path partitionFile = new Path(input, "_partitions"); TotalOrderPartitioner.setPartitionFile(conf, partitionFile); InputSampler.writePartitionFile(conf, sampler); // Add to DistributedCache URI partitionUri = new URI(partitionFile.toString() + "#_partitions"); DistributedCache.addCacheFile(partitionUri, conf); DistributedCache.createSymlink(conf); JobClient.runJob(conf); return 0; }
/** * run a distributed job and verify that TokenCache is available * * @throws IOException */ @Test public void testTokenCache() throws IOException { System.out.println("running dist job"); // make sure JT starts jConf = mrCluster.createJobConf(); // provide namenodes names for the job to get the delegation tokens for String nnUri = dfsCluster.getURI(0).toString(); jConf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri); // job tracker principla id.. jConf.set(JTConfig.JT_USER_NAME, "jt_id/foo@BAR"); // using argument to pass the file name String[] args = { "-tokenCacheFile", tokenFileName.toString(), "-m", "1", "-r", "1", "-mt", "1", "-rt", "1" }; int res = -1; try { res = ToolRunner.run(jConf, new MySleepJob(), args); } catch (Exception e) { System.out.println("Job failed with" + e.getLocalizedMessage()); e.printStackTrace(System.out); fail("Job failed"); } assertEquals("dist job res is not 0", res, 0); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context); initInputFormat(jobConf); org.apache.hadoop.mapred.InputSplit[] splits = realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks()); if (splits == null) { return null; } List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length); for (org.apache.hadoop.mapred.InputSplit split : splits) { if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) { org.apache.hadoop.mapred.FileSplit mapredFileSplit = ((org.apache.hadoop.mapred.FileSplit) split); resultSplits.add( new FileSplit( mapredFileSplit.getPath(), mapredFileSplit.getStart(), mapredFileSplit.getLength(), mapredFileSplit.getLocations())); } else { resultSplits.add(new InputSplitWrapper(split)); } } return resultSplits; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Max temperature"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setCombinerClass(MaxTemperatureReducer.class); conf.setReducerClass(MaxTemperatureReducer.class); // vv MaxTemperatureDriverV6 conf.setProfileEnabled(true); conf.setProfileParams( "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s"); conf.setProfileTaskRange(true, "0-2"); // ^^ MaxTemperatureDriverV6 JobClient.runJob(conf); return 0; }
/** * This method call when injected into a class will add the GenericOptionParser with '-libjars' * parameter * * @param job JobConf to which the classpath to be added * @param jars comma separated jars to be added to the classpath * @param resources comma separated files to be added to the classpath * @throws IOException */ public static void addClassPath(final JobConf job, String jars, String resources) throws IOException { LOGGER.debug( "Libraries being added to the classpath: " + job.get(JOB_CONF_JARS) + "Resources : " + job.get(JOB_CONF_RESOURCES)); // taking libjars and files values passed from console while executing // job StringBuilder oldJars = new StringBuilder().append(job.get(JOB_CONF_JARS)); StringBuilder oldFiles = new StringBuilder().append(job.get(JOB_CONF_RESOURCES)); String jarsTmp = jars; String resourcesTmp = resources; if (oldJars != null && !oldJars.toString().equals(NULL) && oldJars.length() > 0) { oldJars.append(SEPARATOR_COMMA); oldJars.append(jarsTmp); jarsTmp = oldJars.toString(); } if (resourcesTmp != null && resourcesTmp.length() > 0) { if (oldFiles != null && !oldFiles.toString().equals(NULL) && oldFiles.length() > 0) { oldFiles.append(SEPARATOR_COMMA); oldFiles.append(resourcesTmp); resourcesTmp = oldFiles.toString(); } new GenericOptionsParser( job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp, GENERIC_PARSER_FILES, resourcesTmp}); } else { new GenericOptionsParser(job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp}); } }
/** * Driver to copy srcPath to destPath depending on required protocol. * * @param args arguments */ static void copy(final Configuration conf, final Arguments args) throws IOException { LOG.info("srcPaths=" + args.srcs); LOG.info("destPath=" + args.dst); checkSrcPath(conf, args.srcs); JobConf job = createJobConf(conf); if (args.preservedAttributes != null) { job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes); } if (args.mapredSslConf != null) { job.set("dfs.https.client.keystore.resource", args.mapredSslConf); } // Initialize the mapper try { setup(conf, job, args); JobClient.runJob(job); finalize(conf, job, args.dst, args.preservedAttributes); } finally { // delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); // delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
/** * Generate the requested number of file splits, with the filename set to the filename of the * output file. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { /** 设置输入分片的个数* */ JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; } System.out.println("numMaps-------" + numMaps); InputSplit[] result = new InputSplit[numMaps]; Path outDir = FileOutputFormat.getOutputPath(job); for (int i = 0; i < result.length; ++i) { result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[]) null); } return result; }
public JobBuilder reducer(Class<? extends Reducer> reducer, boolean hasCombiner) throws IOException { if (reducer != IdentityReducer.class) _jobConf.setReducerClass(reducer); if (hasCombiner) _jobConf.setCombinerClass(reducer); _jobConf.setJarByClass(reducer); return this; }
public JobBuilder compressor(CompressionType type, Class<? extends CompressionCodec> codec) throws IOException { _jobConf.setBoolean("mapred.output.compress", true); _jobConf.set("mapred.output.compression.type", type.toString()); _jobConf.setClass("mapred.output.compression.codec", codec, CompressionCodec.class); return this; }
public void configure(JobConf conf) { /* * It reads all the configurations and distributed cache from outside. */ // Read number of nodes in input layer and output layer from configuration inputNumdims = conf.get("numdims"); inputNumhid = conf.get("numhid"); // Read the weights from distributed cache Path[] pathwaysFiles = new Path[0]; try { pathwaysFiles = DistributedCache.getLocalCacheFiles(conf); for (Path path : pathwaysFiles) { /* * this loop reads all the distributed cache files * In fact, the driver program ensures that there is only one distributed cache file */ BufferedReader fis = new BufferedReader(new FileReader(path.toString())); weightline = fis.readLine(); } } catch (Exception e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
private RunningJob submitAction(Context context, Namespace ns) throws Exception { Hive2ActionExecutor ae = new Hive2ActionExecutor(); WorkflowAction action = context.getAction(); ae.prepareActionDir(getFileSystem(), context); ae.submitLauncher(getFileSystem(), context, action); String jobId = action.getExternalId(); String jobTracker = action.getTrackerUri(); String consoleUrl = action.getConsoleUrl(); assertNotNull(jobId); assertNotNull(jobTracker); assertNotNull(consoleUrl); Element e = XmlUtils.parseXml(action.getConf()); XConfiguration conf = new XConfiguration( new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString())); conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns)); conf.set("fs.default.name", e.getChildTextTrim("name-node", ns)); conf.set("user.name", context.getProtoActionConf().get("user.name")); conf.set("group.name", getTestGroup()); JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker); XConfiguration.copy(conf, jobConf); String user = jobConf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf); final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId)); assertNotNull(runningJob); return runningJob; }
@Override @SuppressWarnings("unchecked") public void configure(JobConf conf) { super.configure(conf); keySerializerDefinition = getStoreDef().getKeySerializer(); valueSerializerDefinition = getStoreDef().getValueSerializer(); try { SerializerFactory factory = new DefaultSerializerFactory(); if (conf.get("serializer.factory") != null) { factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance(); } keySerializer = (Serializer<Object>) factory.getSerializer(keySerializerDefinition); valueSerializer = (Serializer<Object>) factory.getSerializer(valueSerializerDefinition); } catch (Exception e) { throw new RuntimeException(e); } keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression()); valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression()); routingStrategy = new ConsistentRoutingStrategy( getCluster().getNodes(), getStoreDef().getReplicationFactor()); }
private static void setJobName(JobConf jobConf, List<DataSegment> segments) { if (segments.size() == 1) { final DataSegment segment = segments.get(0); jobConf.setJobName( String.format( "druid-convert-%s-%s-%s", segment.getDataSource(), segment.getInterval(), segment.getVersion())); } else { final Set<String> dataSources = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getDataSource(); } })); final Set<String> versions = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getVersion(); } })); jobConf.setJobName( String.format( "druid-convert-%s-%s", Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray()))); } }
public List<JavaScriptSource> setUpSource(JobConf job) { List<JavaScriptSource> js = new ArrayList<JavaScriptSource>(); js.add( new JavaScriptSource("emit.js", "function emit(k,v){$mapper.emit(k,v,$output_collector)}")); js.add(new JavaScriptSource("map.js", job.get("map.js"))); js.add(new JavaScriptSource("reduce.js", job.get("reduce.js"))); js.add(new JavaScriptSource("functions.js", job.get("functions.js"))); js.add(new JavaScriptSource("filter.js", job.get("filter.js"))); js.add(new JavaScriptSource("query_id", job.get("query_id"))); try { Path[] files = DistributedCache.getLocalCacheFiles(job); if (files != null) { for (int i = 0; i < files.length; i++) { Path path = files[i]; if (path.getName().endsWith(".js")) { String source = Files.toString(new File(path.toString()), Charset.forName("UTF-8")); js.add(new JavaScriptSource(path.getName(), source)); } } } } catch (IOException ioe) { throw new RuntimeException("Couldn't read from DistributedCache", ioe); } return js; }
private void init() { String nodes = conf.get(AnalysisProcessorConfiguration.nodes); this.nodelist = String2List(nodes, SEPERATOR_COMMA); String status = conf.get("status"); this.statuslist = String2List(status, SEPERATOR_COMMA); parsePhase(); }
public void run() throws Exception { long startTime = System.currentTimeMillis(); JobConf conf = new JobConf(ItemCFJob.class); conf.setJobName("ItemCF" + System.currentTimeMillis()); conf.setNumMapTasks(10); conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); StringBuilder sb = new StringBuilder(); sb.append("--input ").append(input); sb.append(" --output ").append(output); if (flag) { sb.append(" --booleanData true"); } else { sb.append(" --booleanData false"); } sb.append(" --similarityClassname " + Constants.mahout_similarityclassname); sb.append(" --tempDir ").append(tmp); String[] args = sb.toString().split(" "); RecommenderJob job = new RecommenderJob(); job.setConf(conf); job.run(args); long endTime = System.currentTimeMillis(); logger.info( "recommdation job [" + conf.getJobName() + "] run finish. it costs" + (endTime - startTime) / 1000 + "s."); }
@Test public void testConfigureAccumuloInputFormatWithIterators() throws Exception { AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); ColumnMapper columnMapper = new ColumnMapper( conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings()); List<IteratorSetting> iterators = new ArrayList<IteratorSetting>(); Set<Range> ranges = Collections.singleton(new Range()); String instanceName = "realInstance"; String zookeepers = "host1:2181,host2:2181,host3:2181"; IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave"); cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name"); iterators.add(cfg); cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName()); cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50"); cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age"); iterators.add(cfg); ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); // Stub out the ZKI mock Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); // Call out to the real configure method Mockito.doCallRealMethod() .when(mockInputFormat) .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Also compute the correct cf:cq pairs so we can assert the right argument was passed Mockito.doCallRealMethod() .when(mockInputFormat) .getPairCollection(columnMapper.getColumnMappings()); mockInputFormat.configure( conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Verify that the correct methods are invoked on AccumuloInputFormat Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false); Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); Mockito.verify(mockInputFormat) .setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER)); Mockito.verify(mockInputFormat).addIterators(conf, iterators); Mockito.verify(mockInputFormat).setRanges(conf, ranges); Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // TODO Auto-generated method stub JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(5); FileSystem fs = FileSystem.get(conf); Path dir = new Path(args[0]); FileStatus[] stats = fs.listStatus(dir); numFiles = stats.length; Job job = new Job(conf); job.setJarByClass(FileCombiner.class); job.setJobName("File Combiner"); job.setMapperClass(FileCombinerMapper.class); job.setReducerClass(FileCombinerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public void configure(JobConf job) { sLogger.setLevel(Level.INFO); srcLang = job.get("fLang"); mJob = job; pwsimMapping = new HMapIV<ArrayListOfIntsWritable>(); valOut = new PairOfIntString(); keyOut = new PairOfInts(); // read doc ids of sample into vectors String samplesFile = job.get("Ivory.SampleFile"); if (samplesFile != null) { try { samplesMap = readSamplesFromCache(getFilename(samplesFile), job); } catch (NumberFormatException e) { e.printStackTrace(); throw new RuntimeException("Incorrect format in " + samplesFile); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("I/O error in " + samplesFile); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error reading sample file: " + samplesFile); } } }
@Override public void map( WritableComparable key, CompactorInputSplit split, OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector, Reporter reporter) throws IOException { // This will only get called once, since CompactRecordReader only returns one record, // the input split. // Based on the split we're passed we go instantiate the real reader and then iterate on it // until it finishes. @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class AcidInputFormat<WritableComparable, V> aif = instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME)); ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY)); boolean isMajor = jobConf.getBoolean(IS_MAJOR, false); AcidInputFormat.RawReader<V> reader = aif.getRawReader( jobConf, isMajor, split.getBucket(), txnList, split.getBaseDir(), split.getDeltaDirs()); RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); while (reader.next(identifier, value)) { if (isMajor && reader.isDelete(value)) continue; writer.write(value); reporter.progress(); } }
@SuppressWarnings({"deprecation", "unchecked"}) public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments) throws IOException { Path tmpDir = new Path("localpath"); Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass(); Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass(); RawComparator<Text> comparator = jobConf.getOutputKeyComparator(); Counter readsCounter = new Counter(); Counter writesCounter = new Counter(); Progress mergePhase = new Progress(); RawKeyValueIterator mergeQueue = Merger.merge( conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), 0.0f); }
@Override public void configure(JobConf conf) { this.threshold = conf.getFloat(PARAM_APS_THRESHOLD, DEFAULT_THRESHOLD); int reducerID = conf.getInt("mapred.task.partition", -1); int max = conf.getInt(PARAM_APS_MAXKEY, 0); int nstripes = conf.getInt(PARAM_APS_STRIPES, 1); int spread = conf.getInt(PARAM_APS_REDUCER_PER_STRIPE, 1); if (reducerID < 0 || max == 0) { LOG.error("Could not find stripe ID, reverting to whole rest file loading"); LOG.debug("reducer = " + reducerID + "\t max = " + max + "\t nstripes = " + nstripes); // open the pruned part file in the DistrubutedCache haspruned = FileUtils.readRestFile(conf, pruned); } else { int stripe = GenericKey.StripePartitioner.findStripe(reducerID, spread); int from = GenericKey.StripePartitioner.minKeyInStripe(stripe, nstripes, max); int to = from + GenericKey.StripePartitioner.numKeysInStripe(stripe, nstripes, max); // read from 'from' included, to 'to' excluded LOG.info( "Reducer " + reducerID + " loading stripe " + stripe + " of " + nstripes + " (" + from + "," + (to - 1) + ")"); haspruned = FileUtils.readRestFile(conf, pruned, from, to); } if (!haspruned) LOG.warn("No pruned file provided in DistributedCache"); else LOG.info("Read " + pruned.size() + " entries from pruned file"); }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: BuildGraph"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(BuildGraph.class); conf.setJobName("BuildGraph " + inputPath + " " + ContrailConfig.K); ContrailConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(BuildGraphMapper.class); conf.setReducerClass(BuildGraphReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
@Override public void close() { System.err.println( "Target: " + vocE.size() + " types. Writing to " + job_.get("root", null) + "/vocab.E"); System.err.println( "Source: " + vocF.size() + " types .Writing to " + job_.get("root", null) + "/vocab.F"); // write out vocabulary to file try { FileSystem fs = FileSystem.get(job_); DataOutputStream dos = new DataOutputStream( new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.E")))); ((VocabularyWritable) vocE).write(dos); dos.close(); DataOutputStream dos2 = new DataOutputStream( new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.F")))); ((VocabularyWritable) vocF).write(dos2); dos2.close(); } catch (IOException e) { throw new RuntimeException("Vocab couldn't be written to disk.\n" + e.toString()); } }
/** * {@inheritDoc} * * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { JobConf configuration = new JobConf(getConf(), WordCountExtended.class); configuration.setJobName(JOB_NAME); configuration.setOutputKeyClass(Text.class); configuration.setOutputValueClass(IntWritable.class); configuration.setMapperClass(Map.class); configuration.setCombinerClass(Reduce.class); configuration.setReducerClass(Reduce.class); configuration.setInputFormat(TextInputFormat.class); configuration.setOutputFormat(TextOutputFormat.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if (JOB_SKIP_ARGUMENT.equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), configuration); configuration.setBoolean(JOB_PARAMETER_SKIP_PATTERNS, true); } else { otherArgs.add(args[i]); } } FileInputFormat.setInputPaths(configuration, new Path(otherArgs.get(0))); FileOutputFormat.setOutputPath(configuration, new Path(otherArgs.get(1))); JobClient.runJob(configuration); return 0; }
private void setPageRankLinksOptions(JobConf job) throws URISyntaxException { job.setLong("pages", options.getNumPages()); job.setLong("slotpages", options.getNumSlotPages()); job.set("delimiter", cdelim); Utils.shareLinkZipfCore(options, job); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); // JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
/** * Configure the job * * @param conf Job to configure * @param rules classification rules to evaluate * @param target label value to evaluate the rules for * @param inpath input path (the dataset) * @param outpath output <code>Path</code> * @param split DatasetSplit used to separate training and testing input */ private static void configureJob( JobConf conf, List<? extends Rule> rules, int target, Path inpath, Path outpath, DatasetSplit split) { split.storeJobParameters(conf); FileInputFormat.setInputPaths(conf, inpath); FileOutputFormat.setOutputPath(conf, outpath); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(CDFitness.class); conf.setMapperClass(CDMapper.class); conf.setCombinerClass(CDReducer.class); conf.setReducerClass(CDReducer.class); conf.setInputFormat(DatasetTextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // store the parameters conf.set(CDMapper.CLASSDISCOVERY_RULES, StringUtils.toString(rules)); conf.set(CDMapper.CLASSDISCOVERY_DATASET, StringUtils.toString(DataSet.getDataSet())); conf.setInt(CDMapper.CLASSDISCOVERY_TARGET_LABEL, target); }