private void configureMapperTypesIfPossible(Job j, Class<? extends Mapper> mapper) { // Find mapper Class<?> targetClass = mapper; Type targetType = mapper; do { targetType = targetClass.getGenericSuperclass(); targetClass = targetClass.getSuperclass(); } while (targetClass != null && targetClass != Object.class && !Mapper.class.equals(targetClass)); if (targetType instanceof ParameterizedType) { Type[] params = ((ParameterizedType) targetType).getActualTypeArguments(); if (params.length == 4) { // set each param (if possible); if (params[2] instanceof Class) { Class<?> clz = (Class<?>) params[2]; if (!clz.isInterface()) j.setMapOutputKeyClass(clz); } // set each param (if possible); if (params[3] instanceof Class) { Class<?> clz = (Class<?>) params[3]; if (!clz.isInterface()) { j.setMapOutputValueClass(clz); } } } } }
@Test public void testOnlyOneKindOfFilterSupported() throws Exception { IntColumn foo = intColumn("foo"); FilterPredicate p = or(eq(foo, 10), eq(foo, 11)); Job job = new Job(); Configuration conf = job.getConfiguration(); ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class); try { ParquetInputFormat.setFilterPredicate(conf, p); fail("this should throw"); } catch (IllegalArgumentException e) { assertEquals( "You cannot provide a FilterPredicate after providing an UnboundRecordFilter", e.getMessage()); } job = new Job(); conf = job.getConfiguration(); ParquetInputFormat.setFilterPredicate(conf, p); try { ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class); fail("this should throw"); } catch (IllegalArgumentException e) { assertEquals( "You cannot provide an UnboundRecordFilter after providing a FilterPredicate", e.getMessage()); } }
public static void main(String[] args) throws Exception { sourcePhoto = "/home/hduser/workspace/images/source.jpg"; sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto); final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "similar photo"); job.setJarByClass(MdbSimilarPhoto.class); // Mapper,Reduce and Combiner type definition job.setMapperClass(PhotoMapper.class); job.setCombinerClass(SimilarityReducer.class); job.setReducerClass(SimilarityReducer.class); // output key/value type definition job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // InputFormat and OutputFormat type definition job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public Path write(Message... messages) throws Exception { synchronized (WriteUsingMR.class) { outputPath = TestUtils.someTemporaryFilePath(); Path inputPath = TestUtils.someTemporaryFilePath(); FileSystem fileSystem = inputPath.getFileSystem(conf); fileSystem.create(inputPath); inputMessages = Collections.unmodifiableList(Arrays.asList(messages)); final Job job = new Job(conf, "write"); // input not really used TextInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WritingMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(ProtoParquetOutputFormat.class); ProtoParquetOutputFormat.setOutputPath(job, outputPath); ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages)); waitForJob(job); inputMessages = null; return outputPath; } }
public void testSequenceOutputClassDefaultsToMapRedOutputClass() throws IOException { Job job = new Job(); // Setting Random class to test getSequenceFileOutput{Key,Value}Class job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(BooleanWritable.class); assertEquals( "SequenceFileOutputKeyClass should default to ouputKeyClass", FloatWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job)); assertEquals( "SequenceFileOutputValueClass should default to " + "ouputValueClass", BooleanWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job)); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class); assertEquals( "SequenceFileOutputKeyClass not updated", IntWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job)); assertEquals( "SequenceFileOutputValueClass not updated", DoubleWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job)); }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(RECOMMENDATIONS_PER_USER, "k", "recommendations per user."); if (parseArguments(args) == null) { return -1; } Job job = prepareJob( getInputPath(), getOutputPath(), SequenceFileInputFormat.class, TopKRecommendationsMapper.class, IntWritable.class, Text.class, SequenceFileOutputFormat.class); Configuration conf = job.getConfiguration(); conf.setInt(RECOMMENDATIONS_PER_USER, Integer.parseInt(getOption(RECOMMENDATIONS_PER_USER))); job.waitForCompletion(true); return 0; }
private void runTestOnTable(Table table) throws IOException, InterruptedException, ClassNotFoundException { Job job = null; try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); TableMapReduceUtil.initTableMapperJob( table.getName(), scan, MultithreadedTableMapper.class, ImmutableBytesWritable.class, Put.class, job); MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class); MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS); TableMapReduceUtil.initTableReducerJob( table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion"); // verify map-reduce results verify(table.getName()); } finally { table.close(); if (job != null) { FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir"))); } } }
/** Update {@link lastStatus} so that it can be viewed from outside */ private void updateStatus() { int highPriorityFiles = 0; int lowPriorityFiles = 0; List<JobStatus> jobs = new ArrayList<JobStatus>(); List<String> highPriorityFileNames = new ArrayList<String>(); for (Map.Entry<String, CorruptFileInfo> e : fileIndex.entrySet()) { String fileName = e.getKey(); CorruptFileInfo fileInfo = e.getValue(); if (fileInfo.getHighestPriority() > 0) { highPriorityFileNames.add(fileName); highPriorityFiles += 1; } else { lowPriorityFiles += 1; } } for (Job job : jobIndex.keySet()) { String url = job.getTrackingURL(); String name = job.getJobName(); JobID jobId = job.getID(); jobs.add(new BlockFixer.JobStatus(jobId, name, url)); } lastStatus = new BlockFixer.Status(highPriorityFiles, lowPriorityFiles, jobs, highPriorityFileNames); RaidNodeMetrics.getInstance().corruptFilesHighPri.set(highPriorityFiles); RaidNodeMetrics.getInstance().corruptFilesLowPri.set(lowPriorityFiles); LOG.info("Update status done." + lastStatus.toString()); }
/** Handle a successful job. */ private void succeedJob(Job job, long filesSucceeded, long filesFailed) throws IOException { String jobName = job.getJobName(); LOG.info("Job " + job.getID() + "(" + jobName + ") finished (succeeded)"); if (filesFailed == 0) { // no files have failed for (CorruptFileInfo fileInfo : jobIndex.get(job)) { boolean failed = false; fileInfo.finishJob(jobName, failed); } } else { // we have to look at the output to check which files have failed Set<String> failedFiles = getFailedFiles(job); for (CorruptFileInfo fileInfo : jobIndex.get(job)) { if (failedFiles.contains(fileInfo.getFile().toString())) { boolean failed = true; fileInfo.finishJob(jobName, failed); } else { // call succeed for files that have succeeded or for which no action // was taken boolean failed = false; fileInfo.finishJob(jobName, failed); } } } // report succeeded files to metrics incrFilesFixed(filesSucceeded); incrFileFixFailures(filesFailed); numJobsRunning--; }
/** * Sets the connector information needed to communicate with Accumulo in this job. * * <p><b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the * configuration as a means to pass the token to MapReduce tasks. This information is BASE64 * encoded to provide a charset safe conversion to a string, but this conversion is not intended * to be secure. {@link PasswordToken} is one example that is insecure in this way; however {@link * DelegationToken}s, acquired using {@link * SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern. * * @param job the Hadoop job instance to be configured * @param principal a valid Accumulo user name (user must have Table.CREATE permission) * @param token the user's password * @since 1.5.0 */ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException { if (token instanceof KerberosToken) { log.info("Received KerberosToken, attempting to fetch DelegationToken"); try { Instance instance = getInstance(job); Connector conn = instance.getConnector(principal, token); token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig()); } catch (Exception e) { log.warn( "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo", e); } } // DelegationTokens can be passed securely from user to task without serializing insecurely in // the configuration if (token instanceof DelegationTokenImpl) { DelegationTokenImpl delegationToken = (DelegationTokenImpl) token; // Convert it into a Hadoop Token AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier(); Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>( identifier.getBytes(), delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName()); // Add the Hadoop Token to the Job so it gets serialized and passed along. job.getCredentials().addToken(hadoopToken.getService(), hadoopToken); } InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "simple feature writer"); job.setJarByClass(FeatureWriterJob.class); job.setMapperClass(MyMapper.class); job.setInputFormatClass(GeoMesaInputFormat.class); job.setOutputFormatClass(GeoMesaOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScalaSimpleFeature.class); job.setNumReduceTasks(0); Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(job, params, query); Map<String, String> outParams = new HashMap<String, String>(); outParams.put("instanceId", "myinstance"); outParams.put("zookeepers", "zoo1,zoo2,zoo3"); outParams.put("user", "myuser"); outParams.put("password", "mypassword"); outParams.put("tableName", "mycatalog_2"); GeoMesaOutputFormat.configureDataStore(job, outParams); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(WhiteHouseVisitorDriver.class); // input file format FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // map/combine/reduce class definition job.setMapperClass(WhiteHouseVisitorMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // Key, Value set type definition job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
private void configureMapTasks(String arg, Job job) throws IOException { job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(keyClass); job.setMapOutputValueClass(valueClass); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, arg); }
private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
@Before public void setup() throws IOException { job = Job.getInstance(); conf = job.getConfiguration(); attemptId = new TaskAttemptID(); Path inputPath = new Path(TABLE_PATH_STR); inputSplit = new FileSplit(inputPath, 0, 1, null); Descriptor desc = new Descriptor(new File(TABLE_PATH_STR), "keyspace", "columnFamily", 1, false); doReturn(desc).when(ssTableColumnRecordReader).getDescriptor(); doReturn(desc).when(ssTableRowRecordReader).getDescriptor(); doNothing() .when(ssTableColumnRecordReader) .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class)); doNothing() .when(ssTableRowRecordReader) .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class)); doReturn(ssTableReader) .when(ssTableColumnRecordReader) .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class)); doReturn(ssTableReader) .when(ssTableRowRecordReader) .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class)); when(ssTableReader.getDirectScanner(null)).thenReturn(tableScanner); }
@Test public void readOutsidePig() throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException { // simulate Pig front-end runtime final SequenceFileLoader<IntWritable, Text> storage = new SequenceFileLoader<IntWritable, Text>( "-c " + IntWritableConverter.class.getName(), "-c " + TextConverter.class.getName()); Job job = new Job(); storage.setUDFContextSignature("12345"); storage.setLocation(tempFilename, job); // simulate Pig back-end runtime RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader(); FileSplit fileSplit = new FileSplit( new Path(tempFilename), 0, new File(tempFilename).length(), new String[] {"localhost"}); TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); reader.initialize(fileSplit, context); InputSplit[] wrappedSplits = new InputSplit[] {fileSplit}; int inputIndex = 0; List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0)); int splitIndex = 0; PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex); split.setConf(job.getConfiguration()); storage.prepareToRead(reader, split); // read tuples and validate validate(new LoadFuncTupleIterator(storage)); }
/** * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5 * that too, to verify that it all went ok. * * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf)); FileSystem.getLocal(conf).close(); // remove cached filesystem (if any) FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir_, true); localFs.mkdirs(outputDir_); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir_); TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir_, lzoFileName_); LzoIndex.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir_); List<InputSplit> is = inputFormat.getSplits(job); // verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5_.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5_.digest())); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ESIndexCreator"); job.setJarByClass(ESIndexCreator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ESIndexCreator.MyMapper.class); job.setNumReduceTasks(0); // Skip Reduce Task job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // 프로그램 인자 // 0: 입력 파일 경로 // 1: 출력 파일 경로 // 2: elastic search server's host name FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set("host", args[2]); job.waitForCompletion(true); }
/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
public int run(String[] args) throws Exception { if (args.length != 1) { System.out.println("usage: CountRows <table name>"); return 1; } Configuration conf = getConf(); try { String tableName = args[0]; LOG.info("Before map/reduce startup"); Job job = new Job(conf, "query: count rows"); job.setJarByClass(this.getClass()); job.getConfiguration().set(TABLE_NAME, args[0]); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( tableName, scan, CountRowMapper.class, ImmutableBytesWritable.class, Put.class, job); // TableMapReduceUtil.initTableReducerJob(tableName, // IdentityTableReducer.class, job); job.setNumReduceTasks(0); LOG.info("Started " + tableName); job.waitForCompletion(true); LOG.info("After map/reduce completion"); } catch (Exception e) { e.printStackTrace(); return 1; } return 0; }
/** * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link * #writePartitionFile}. */ public int run(String[] args) throws Exception { Job job = new Job(getConf()); ArrayList<String> otherArgs = new ArrayList<String>(); Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-inFormat".equals(args[i])) { job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class)); } else if ("-keyClass".equals(args[i])) { job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class)); } else if ("-splitSample".equals(args[i])) { int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new SplitSampler<K, V>(numSamples, maxSplits); } else if ("-splitRandom".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("-splitInterval".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new IntervalSampler<K, V>(pcnt, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (job.getNumReduceTasks() <= 1) { System.err.println("Sampler requires more than one reducer"); return printUsage(); } if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } if (null == sampler) { sampler = new RandomSampler<K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.remove(otherArgs.size() - 1)); TotalOrderPartitioner.setPartitionFile(getConf(), outf); for (String s : otherArgs) { FileInputFormat.addInputPath(job, new Path(s)); } InputSampler.<K, V>writePartitionFile(job, sampler); return 0; }
/** * Performs an HDF to text operation as a MapReduce job and returns total number of points * generated. * * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce( Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
/** Run a job. */ static void runJob(String name, Job job, Machine machine, String startmessage, Util.Timer timer) { JOB_SEMAPHORE.acquireUninterruptibly(); Long starttime = null; try { try { starttime = timer.tick("starting " + name + " ...\n " + startmessage); // initialize and submit a job machine.init(job); job.submit(); // Separate jobs final long sleeptime = 1000L * job.getConfiguration().getInt(JOB_SEPARATION_PROPERTY, 10); if (sleeptime > 0) { Util.out.println(name + "> sleep(" + Util.millis2String(sleeptime) + ")"); Thread.sleep(sleeptime); } } finally { JOB_SEMAPHORE.release(); } if (!job.waitForCompletion(false)) throw new RuntimeException(name + " failed."); } catch (Exception e) { throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e); } finally { if (starttime != null) timer.tick(name + "> timetaken=" + Util.millis2String(timer.tick() - starttime)); } }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
public static void main(String[] args) throws Exception { String dir1 = "/user/miyuru/wcout"; String dir2 = "/user/miyuru/notinverts"; // We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(TokenizerMapper.class); conf.setReducerClass(IntSumReducer.class); conf.setCombinerClass(IntSumReducer.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); Job job = new Job(conf, "NotInFinder"); job.setJarByClass(WordCount.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(LongWritable.class); // job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); configuration.setLong("mapred.min.split.size", 512 * 1024 * 1024L); Job numJob = new Job(configuration, "calculate film program seed num job "); Path[] paths = getPaths(strings[0].split(",")); HadoopUtils.deleteIfExist(strings[1]); MapReduceUtils.initMapperJob( NumCountMapper.class, Text.class, Text.class, this.getClass(), numJob, paths); // TableMapReduceUtil.initTableReducerJob(strings[1], NumCountReducer.class, numJob); MapReduceUtils.initReducerJob(new Path(strings[1]), NumCountReducer.class, numJob); numJob.waitForCompletion(true); Job programeSets = new Job(configuration, "calculate program set num job"); HadoopUtils.deleteIfExist(strings[2]); MapReduceUtils.initMapperJob( NumProgramSetsMapper.class, Text.class, Text.class, this.getClass(), programeSets, new Path(strings[1])); programeSets.setCombinerClass(NumProgramSetCombiner.class); MapReduceUtils.initReducerJob(new Path(strings[2]), NumProgramSetsReducer.class, programeSets); return programeSets.waitForCompletion(true) ? 0 : 1; // return 0; }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(CachingRowCounter.class); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 1; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); if (sb.length() > 0) { for (String columnName : sb.toString().split(" ")) { String[] fields = columnName.split(":"); if (fields.length == 1) { scan.addFamily(Bytes.toBytes(fields[0])); } else { scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1])); } } } scan.setCaching(100); // Second argument is the table name. job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }