예제 #1
0
  @Test
  public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8);
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    jobConf.set("jobclient.output.filter", "ALL");
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger
      // and set breakpoints
      jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
      // local job runner has a couple of limitations: only one reducer is supported and the
      // DistributedCache doesn't work.
      // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
      maxReducers = 1;
      shards = 1;
    }

    String[] args =
        new String[] {
          "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf",
          "--morphline-id=morphline1",
          "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
          "--output-dir=" + outDir.toString(),
          "--shards=" + shards,
          "--verbose",
          numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
          numRuns % 3 == 0
              ? "--reducers=" + shards
              : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers))
        };
    if (numRuns % 3 == 2) {
      args = concat(args, new String[] {"--fanout=2"});
    }
    if (numRuns == 0) {
      // force (slow) MapReduce based randomization to get coverage for that as well
      args =
          concat(
              new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"},
              args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
      // Only run this check if mtree merge is disabled.
      // With mtree merge enabled the BatchWriter counters aren't available anymore because
      // variable "job" now refers to the merge job rather than the indexing job
      assertEquals(
          "Invalid counter "
              + SolrRecordWriter.class.getName()
              + "."
              + SolrCounters.DOCUMENTS_WRITTEN,
          count,
          job.getCounters()
              .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
              .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:
    tool = createTool();
    args = concat(args, new String[] {"--dry-run"});
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
  }
 public JobBuilder maxReduceAttempts(int maxAttempts) throws IOException {
   _jobConf.setMaxReduceAttempts(maxAttempts);
   return this;
 }