@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true); String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); jobConf.set("jobclient.output.filter", "ALL"); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger // and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the // DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] {"--fanout=2"}); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat( new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] {"--dry-run"}); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
public JobBuilder maxReduceAttempts(int maxAttempts) throws IOException { _jobConf.setMaxReduceAttempts(maxAttempts); return this; }