private static void setRecordCount(State state, Job job) { Counters counters = null; try { counters = job.getCounters(); } catch (IOException e) { LOG.info("Failed to get job counters. Record count will not be set. ", e); return; } Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT); if (recordCounter != null && recordCounter.getValue() != 0) { state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue())); return; } recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT); if (recordCounter != null && recordCounter.getValue() != 0) { state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue())); return; } LOG.info("Non zero record count not found in both mapper and reducer counters"); }
private void validateFileCounters( org.apache.hadoop.mapreduce.Counters counter, long fileBytesRead, long fileBytesWritten, long mapOutputBytes, long mapOutputMaterializedBytes) { assertTrue( counter .findCounter( org.apache.hadoop.mapreduce.lib.input.FileInputFormat.Counter.BYTES_READ) .getValue() != 0); assertEquals( fileBytesRead, counter .findCounter(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.Counter.BYTES_READ) .getValue()); assertTrue( counter .findCounter( org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.Counter.BYTES_WRITTEN) .getValue() != 0); if (mapOutputBytes >= 0) { assertTrue(counter.findCounter(MAP_OUTPUT_BYTES).getValue() != 0); } if (mapOutputMaterializedBytes >= 0) { assertTrue(counter.findCounter(MAP_OUTPUT_MATERIALIZED_BYTES).getValue() != 0); } }
/** * Performs an HDF to text operation as a MapReduce job and returns total number of points * generated. * * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce( Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
@Test public void testJSONBulkImporter() throws Exception { // Prepare input file: File inputFile = File.createTempFile("TestJSONImportInput", ".txt", getLocalTempDir()); TestingResources.writeTextFile( inputFile, TestingResources.get(BulkImporterTestUtils.JSON_IMPORT_DATA)); Configuration conf = getConf(); conf.set( DescribedInputTextBulkImporter.CONF_FILE, BulkImporterTestUtils.localResource(BulkImporterTestUtils.FOO_IMPORT_DESCRIPTOR)); // Run the bulk-import: final KijiMapReduceJob job = KijiBulkImportJobBuilder.create() .withConf(conf) .withBulkImporter(JSONBulkImporter.class) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString()))) .withOutput(new DirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); final Counters counters = job.getHadoopJob().getCounters(); assertEquals( 3, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue()); assertEquals( 1, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE).getValue()); assertEquals( 0, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED).getValue()); // Validate output: final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("info")); BulkImporterTestUtils.validateImportedRows(scanner, false); scanner.close(); }
private void validateCounters( org.apache.hadoop.mapreduce.Counters counter, long spillRecCnt, long mapInputRecords, long mapOutputRecords) { // Check if the numer of Spilled Records is same as expected assertEquals(spillRecCnt, counter.findCounter(SPILLED_RECORDS).getValue()); assertEquals(mapInputRecords, counter.findCounter(MAP_INPUT_RECORDS).getValue()); assertEquals(mapOutputRecords, counter.findCounter(MAP_OUTPUT_RECORDS).getValue()); }
@Test(expected = IOException.class) public final void testMapperForNullKeyValue() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD); when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text val = new Text("valueOfKey"); mapper.map(null, val, context); }
private Counters getCounters() { Counters c = new Counters(); Map<String, Map<String, Long>> values = counters.value(); for (Map.Entry<String, Map<String, Long>> e : values.entrySet()) { CounterGroup cg = c.getGroup(e.getKey()); for (Map.Entry<String, Long> f : e.getValue().entrySet()) { cg.findCounter(f.getKey()).setValue(f.getValue()); } } return c; }
private void validateCounters(org.apache.hadoop.mapreduce.Counters counters) { Iterator<org.apache.hadoop.mapreduce.CounterGroup> it = counters.iterator(); while (it.hasNext()) { org.apache.hadoop.mapreduce.CounterGroup group = it.next(); LOG.info("Group " + group.getDisplayName()); Iterator<org.apache.hadoop.mapreduce.Counter> itc = group.iterator(); while (itc.hasNext()) { LOG.info("Counter is " + itc.next().getDisplayName()); } } Assert.assertEquals(1, counters.countCounters()); }
private void setSummarySlotSeconds(JobSummary summary, Counters allCounters) { Counter slotMillisMapCounter = allCounters.findCounter(JobCounter.SLOTS_MILLIS_MAPS); if (slotMillisMapCounter != null) { summary.setMapSlotSeconds(slotMillisMapCounter.getValue() / 1000); } Counter slotMillisReduceCounter = allCounters.findCounter(JobCounter.SLOTS_MILLIS_REDUCES); if (slotMillisReduceCounter != null) { summary.setReduceSlotSeconds(slotMillisReduceCounter.getValue() / 1000); } }
static Counters fromAvro(JhCounters counters) { Counters result = new Counters(); for (JhCounterGroup g : counters.groups) { CounterGroup group = result.addGroup( StringInterner.weakIntern(g.name.toString()), StringInterner.weakIntern(g.displayName.toString())); for (JhCounter c : g.counts) { group.addCounter( StringInterner.weakIntern(c.name.toString()), StringInterner.weakIntern(c.displayName.toString()), c.value); } } return result; }
@Test public void parsesMalformedTemperature() throws IOException, InterruptedException { Text value = new Text( "0335999999433181957042302005+37950+139117SAO +0004" + // Year ^^^^ "RJSN V02011359003150070356999999433201957010100005+353"); // Temperature ^^^^^ Counters counters = new Counters(); new MapDriver<LongWritable, Text, Text, IntWritable>() .withMapper(new MaxTemperatureMapper()) .withInput(new LongWritable(0), value) .withOutput(new Text("1957"), new IntWritable(19570)) .withCounters(counters) .runTest(); Counter c = counters.findCounter(MaxTemperatureMapper.Temperature.OVER_100); assertThat(c.getValue(), is(1L)); }
@Test(dataProvider = "simpleTest") public void countersTest(String inString, String[] splits, int lineNum, int wordCount) throws IOException { mapDriver.withInput(new LongWritable(), new Text(inString)); for (String split : splits) { mapDriver.withOutput(new Text(split), new IntWritable(1)); } mapDriver.runTest(); Counters counters = mapDriver.getCounters(); assertEquals( counters.findCounter(WordCountWithTestsMapper.Status.LINES_NUM).getValue(), lineNum, "Wrong lines num!"); assertEquals( counters.findCounter(WordCountWithTestsMapper.Status.WORD_COUNT).getValue(), wordCount, "Wrong word count!"); }
protected Set<Integer> getNeeds(String type, Counters counts) { CounterGroup group = counts.getGroup(type); HashSet<Integer> result = new HashSet<Integer>(group.size()); for (Counter counter : group) { String name = counter.getName(); assert name.startsWith("t"); result.add(Integer.parseInt(name.substring(1))); } return result; }
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW); when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text key = new Text("abc123"); Text val = new Text("valueOfKey"); mapper.isOld = false; mapper.map(key, val, context); HihoValue hihoValue = new HihoValue(); hihoValue.setVal(val); hihoValue.setIsOld(false); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key); verify(context).write(hihoTuple, hihoValue); assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue()); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Join"); job.setJarByClass(Join.class); job.setMapperClass(MapClass.class); job.setPartitionerClass(PartitionerClass.class); job.setReducerClass(ReduceClass.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // NOTE we use two different input paths in here. FileInputFormat.addInputPath(job, new Path(args[Utils.argInIndex])); FileInputFormat.addInputPath(job, new Path(args[Utils.argInIndex + 1])); FileOutputFormat.setOutputPath(job, new Path(args[Utils.argInIndex + 2])); boolean result = job.waitForCompletion(true); // Write totalRecord and totalBytes from task counter to file. if (result) { Counters counters = job.getCounters(); long totalRecords = counters .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS") .getValue(); long totalBytes = counters .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_BYTES") .getValue(); Utils.uploadCountersToS3(totalRecords, totalBytes, "Join"); } System.exit(result ? 0 : 1); }
private void validateCounters( Counters counters, long mapInputRecords, long mapOutputRecords, long reduceInputGroups, long reduceOutputRecords) { assertEquals( "MapInputRecords", mapInputRecords, counters.findCounter("MyCounterGroup", "MAP_INPUT_RECORDS").getValue()); assertEquals( "MapOutputRecords", mapOutputRecords, counters.findCounter("MyCounterGroup", "MAP_OUTPUT_RECORDS").getValue()); assertEquals( "ReduceInputGroups", reduceInputGroups, counters.findCounter("MyCounterGroup", "REDUCE_INPUT_GROUPS").getValue()); assertEquals( "ReduceOutputRecords", reduceOutputRecords, counters.findCounter("MyCounterGroup", "REDUCE_OUTPUT_RECORDS").getValue()); }
public boolean verify(long expectedReferenced) throws Exception { if (job == null) { throw new IllegalStateException("You should call run() first"); } Counters counters = job.getCounters(); Counter referenced = counters.findCounter(Counts.REFERENCED); Counter unreferenced = counters.findCounter(Counts.UNREFERENCED); Counter undefined = counters.findCounter(Counts.UNDEFINED); boolean success = true; // assert if (expectedReferenced != referenced.getValue()) { LOG.error( "Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue()); success = false; } if (unreferenced.getValue() > 0) { LOG.error( "Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue()); success = false; } if (undefined.getValue() > 0) { LOG.error("Found an undefined node. Undefined count=" + undefined.getValue()); success = false; } return success; }
/** * Saves the centroids between K-means iterations. * * @param counters - the counters containing the centroids */ public static void setCentroids(Counters counters) throws Exception { Configuration conf = setupConf(); FileSystem fs = FileSystem.get(conf); Path path = new Path(BASE_OUTPUT); Path tempPath = new Path(BASE_OUTPUT + TEMP_FILE); FSDataOutputStream out = null; if (!fs.exists(path)) fs.mkdirs(path); path = new Path(BASE_OUTPUT + CENTROID_FILE); if (fs.exists(path)) fs.rename(path, tempPath); out = fs.create(path); out.writeUTF(new Long(counters.findCounter(Common.Centroids.ITERATION).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.LOW.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.LOW).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.COUNT_LOW).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.MEDIUM.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.MEDIUM).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF( new Long(counters.findCounter(Common.Centroids.COUNT_MEDIUM).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.HIGH.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.HIGH).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.COUNT_HIGH).getValue()).toString()); out.writeChar('\n'); if (fs.exists(tempPath)) { FSDataInputStream in = fs.open(tempPath); int i = 0; while ((i = in.read()) != -1) out.write(i); in.close(); fs.delete(tempPath, false); } out.close(); }
public static Counter getCounter(String group, String name) { return COUNTERS.findCounter(group, name); }
public static void main(String args[]) throws IOException { String inputFile = "/corpus/pagerank"; long start = System.currentTimeMillis(); long ncPageCount = 0; long pageCount = 0; int iterationCount = 0; try { do { log.info("第" + iterationCount + "轮迭代开始"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "PageRank-Job-" + iterationCount); job.setJarByClass(PageRankJob.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputFile)); job.setMapperClass(PageRankMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PRWritable.class); job.setReducerClass(PageRankReducer.class); job.setOutputFormatClass(TextOutputFormat.class); String outputFile = "/output/" + System.currentTimeMillis(); FileOutputFormat.setOutputPath(job, new Path(outputFile)); if (!job.waitForCompletion(true)) { System.err.println("PageRank Job Failed"); } inputFile = outputFile + "/part-r-00000"; Counters counters = job.getCounters(); long danglingPagePr = counters.findCounter(PRCounters.DANGLING_PAGE_PR).getValue(); pageCount = counters.findCounter(PRCounters.PAGES_COUNT).getValue(); /** 第二阶段任务开始 */ conf = new Configuration(); conf.set("DANGLING_PAGE_PR", String.valueOf(danglingPagePr)); conf.set("PAGES_COUNT", String.valueOf(pageCount)); Job prCompleteJob = Job.getInstance(conf, "PRComplete-Job-" + iterationCount); prCompleteJob.setJarByClass(PageRankJob.class); prCompleteJob.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(prCompleteJob, new Path(inputFile)); prCompleteJob.setMapperClass(PRCompleteMapper.class); prCompleteJob.setMapOutputKeyClass(NullWritable.class); prCompleteJob.setMapOutputValueClass(Text.class); prCompleteJob.setNumReduceTasks(0); outputFile = "/output/" + System.currentTimeMillis(); FileOutputFormat.setOutputPath(prCompleteJob, new Path(outputFile)); if (!prCompleteJob.waitForCompletion(true)) { System.err.println("PRComplete Job Failed"); } inputFile = outputFile + "/part-m-00000"; counters = prCompleteJob.getCounters(); ncPageCount = counters.findCounter(PRCounters.NON_CONVERGING_PAGES).getValue(); iterationCount++; } while (ncPageCount > (long) (pageCount * THRESHOLD)); log.info("经过" + iterationCount + "轮迭代,计算结束"); log.info("总用时:" + (System.currentTimeMillis() - start) + "ms"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("inputDelimiter", "id", "입력 컬럼 구분자", Delimiter.COMMA.getDelimiter()); addOption("outputDelimiter", "od", "출력 컬럼 구분자", Delimiter.COMMA.getDelimiter()); addOption("sequenceIndex", "si", "시퀀스를 삽입할 컬럼의 인덱스(0부터 시작, 기본값 0)", "0"); addOption("startNumber", "sn", "시퀀스 번호의 시작값(기본값 0)", "0"); addOption("columnSize", "cs", "컬럼의 개수", true); addOption( "generateType", "gt", "생성할 시퀀스의 유형(SEQUENCE, TIMESTAMP)", GenerateType.SEQUENCE.getType()); addOption("dateFormat", "df", "날짜 패턴(SimpleDateFormat)", /*FIXME*/ "yyyyMMdd"); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return JOB_FAIL; } //////////////////////////////////////// // Line Count Hadoop Job /////////////////////////////////////// // 임시 디렉토리를 가져온다. flamingo-mapreduce-site.xml 파일에 기본값이 정의되어 있다. Path temporaryPath = getTimestampTempPath(); logger.info("Temporary Path : {}", temporaryPath.toString()); Job lineCountJob = prepareJob( getInputPath(), temporaryPath, TextInputFormat.class, LineCountMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); boolean step1 = lineCountJob.waitForCompletion(true); if (!step1) { return JOB_FAIL; } //////////////////////////////////////////////// // Calculating a start number per input split //////////////////////////////////////////////// // Counter에는 각 Mapper 별로 파일별 위치와 총 ROW의 개수를 포함하고 있다. Counters counters = lineCountJob.getCounters(); CounterGroup group = counters.getGroup(LineCountMapper.class.getName()); // 파일의 위치별로 정렬한다. TreeMap<Long, Long> counterMap = new TreeMap<Long, Long>(); for (Counter counter : group) { try { counterMap.put(Long.parseLong(counter.getName()), counter.getValue()); } catch (NumberFormatException ex) { } } //////////////////////////////////////// // Generate Sequence Hadoop Job /////////////////////////////////////// Job generateSequenceJob = prepareJob( getInputPath(), getOutputPath(), TextInputFormat.class, GenerateSequenceMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); generateSequenceJob .getConfiguration() .set("inputDelimiter", parsedArgs.get("--inputDelimiter")); generateSequenceJob .getConfiguration() .set("outputDelimiter", parsedArgs.get("--outputDelimiter")); generateSequenceJob.getConfiguration().set("sequenceIndex", parsedArgs.get("--sequenceIndex")); generateSequenceJob.getConfiguration().set("generateType", parsedArgs.get("--generateType")); generateSequenceJob.getConfiguration().set("columnSize", parsedArgs.get("--columnSize")); if (GenerateType.valueOf(parsedArgs.get("--generateType")) .equals(GenerateType.SEQUENCE)) { // SEQUENCE generateSequenceJob.getConfiguration().set("startNumber", parsedArgs.get("--startNumber")); int index = generateSequenceJob.getConfiguration().getInt("startNumber", 0); for (long position : counterMap.keySet()) { generateSequenceJob.getConfiguration().set(String.valueOf(position), String.valueOf(index)); index += counterMap.get(position); } } else { // TIMESTAMP generateSequenceJob.getConfiguration().set("dateFormat", parsedArgs.get("--dateFormat")); } boolean step2 = generateSequenceJob.waitForCompletion(true); if (!step2) { return JOB_FAIL; } try { // 임시 경로를 삭제한다. FileSystem.get(generateSequenceJob.getConfiguration()).delete(temporaryPath, true); logger.info("Now removed {}", temporaryPath.toString()); } catch (Exception ex) { // Exception handling is not need. } return JOB_SUCCESS; }
@Test public void testRedirect() throws Exception { Configuration conf = new YarnConfiguration(); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS); conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS); // Start the RM. RMService rmService = new RMService("test"); rmService.init(conf); rmService.start(); // Start the AM. AMService amService = new AMService(); amService.init(conf); amService.start(conf); // Start the HS. HistoryService historyService = new HistoryService(); historyService.init(conf); historyService.start(conf); LOG.info("services started"); Cluster cluster = new Cluster(conf); org.apache.hadoop.mapreduce.JobID jobID = new org.apache.hadoop.mapred.JobID("201103121733", 1); org.apache.hadoop.mapreduce.Counters counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(amContact); LOG.info( "Sleeping for 5 seconds before stop for" + " the client socket to not get EOF immediately.."); Thread.sleep(5000); // bring down the AM service amService.stop(); LOG.info("Sleeping for 5 seconds after stop for" + " the server to exit cleanly.."); Thread.sleep(5000); amRestarting = true; // Same client // results are returned from fake (not started job) counters = cluster.getJob(jobID).getCounters(); Assert.assertEquals(0, counters.countCounters()); Job job = cluster.getJob(jobID); org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0); TaskAttemptID tId = new TaskAttemptID(taskId, 0); // invoke all methods to check that no exception is thrown job.killJob(); job.killTask(tId); job.failTask(tId); job.getTaskCompletionEvents(0, 100); job.getStatus(); job.getTaskDiagnostics(tId); job.getTaskReports(TaskType.MAP); job.getTrackingURL(); amRestarting = false; amService = new AMService(); amService.init(conf); amService.start(conf); amContact = false; // reset counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(amContact); // Stop the AM. It is not even restarting. So it should be treated as // completed. amService.stop(); // Same client counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(hsContact); rmService.stop(); historyService.stop(); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Path inputPath = new Path(args[0]); Path outputDir = new Path(args[1]); // Create configuration Configuration conf = new Configuration(true); // Create job @SuppressWarnings("deprecation") Job job = new Job(conf, "CountryIncomeConf"); job.setJarByClass(CountryIncomeConf.class); // Decompressing .gz file Ex. foo.csv.gz to foo.csv String uri = args[0]; FileSystem fs = FileSystem.get(URI.create(uri), conf); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { System.err.println("No codec found for " + uri); System.exit(1); } String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); InputStream in = null; OutputStream out = null; try { in = codec.createInputStream(fs.open(inputPath)); out = fs.create(new Path(outputUri)); IOUtils.copyBytes(in, out, conf); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } // Setup MapReduce job.setMapperClass(CountryIncomeMapper.class); job.setReducerClass(CountryIncomeReducer.class); job.setNumReduceTasks(1); // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // Input // FileInputFormat.addInputPath(job, inputPath); FileInputFormat.addInputPaths(job, outputUri); job.setInputFormatClass(TextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true); // Execute job int code = job.waitForCompletion(true) ? 0 : 1; // Counter finding and displaying Counters counters = job.getCounters(); // Displaying counters System.out.printf( "Missing Fields: %d, Error Count: %d\n", counters.findCounter(COUNTERS.MISSING_FIELDS_RECORD_COUNT).getValue(), counters.findCounter(COUNTERS.NULL_OR_EMPTY).getValue()); System.exit(code); }
public static Counter getCounter(Enum<?> e) { return COUNTERS.findCounter(e); }