Пример #1
0
  /**
   * Run a compactor job.
   *
   * @param conf Hive configuration file
   * @param jobName name to run this job with
   * @param t metastore table
   * @param sd metastore storage descriptor
   * @param txns list of valid transactions
   * @param isMajor is this a major compaction?
   * @throws java.io.IOException if the job fails
   */
  void run(
      HiveConf conf,
      String jobName,
      Table t,
      StorageDescriptor sd,
      ValidTxnList txns,
      boolean isMajor,
      Worker.StatsUpdater su)
      throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);

    String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE);
    if (queueName != null && queueName.length() > 0) {
      job.setQueueName(queueName);
    }

    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_MAJOR, isMajor);
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    setColumnTypes(job, sd.getCols());

    // Figure out and encode what files we need to read.  We do this here (rather than in
    // getSplits below) because as part of this we discover our minimum and maximum transactions,
    // and discovering that in getSplits is too late as we then have no way to pass it to our
    // mapper.

    AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false);
    StringableList dirsToSearch = new StringableList();
    Path baseDir = null;
    if (isMajor) {
      // There may not be a base dir if the partition was empty before inserts or if this
      // partition is just now being converted to ACID.
      baseDir = dir.getBaseDirectory();
      if (baseDir == null) {
        List<HdfsFileStatusWithId> originalFiles = dir.getOriginalFiles();
        if (!(originalFiles == null) && !(originalFiles.size() == 0)) {
          // There are original format files
          for (HdfsFileStatusWithId stat : originalFiles) {
            Path path = stat.getFileStatus().getPath();
            dirsToSearch.add(path);
            LOG.debug("Adding original file " + path + " to dirs to search");
          }
          // Set base to the location so that the input format reads the original files.
          baseDir = new Path(sd.getLocation());
        }
      } else {
        // add our base to the list of directories to search for files in.
        LOG.debug("Adding base directory " + baseDir + " to dirs to search");
        dirsToSearch.add(baseDir);
      }
    }

    List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories();

    if (parsedDeltas == null || parsedDeltas.size() == 0) {
      // Seriously, no deltas?  Can't compact that.
      LOG.error("No delta files found to compact in " + sd.getLocation());
      return;
    }

    StringableList deltaDirs = new StringableList();
    long minTxn = Long.MAX_VALUE;
    long maxTxn = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta delta : parsedDeltas) {
      LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
      dirsToSearch.add(delta.getPath());
      deltaDirs.add(delta.getPath());
      minTxn = Math.min(minTxn, delta.getMinTransaction());
      maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
    }

    if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
    job.set(DELTA_DIRS, deltaDirs.toString());
    job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
    job.setLong(MIN_TXN, minTxn);
    job.setLong(MAX_TXN, maxTxn);
    LOG.debug("Setting minimum transaction to " + minTxn);
    LOG.debug("Setting maximume transaction to " + maxTxn);

    RunningJob rj = JobClient.runJob(job);
    LOG.info(
        "Submitted "
            + (isMajor ? CompactionType.MAJOR : CompactionType.MINOR)
            + " compaction job '"
            + jobName
            + "' with jobID="
            + rj.getID()
            + " to "
            + job.getQueueName()
            + " queue.  "
            + "(current delta dirs count="
            + dir.getCurrentDirectories().size()
            + ", obsolete delta dirs count="
            + dir.getObsolete());
    rj.waitForCompletion();
    su.gatherStats();
  }
Пример #2
0
  /** Test if {@link CurrentJHParser} can read events from current JH files. */
  @Test
  public void testCurrentJHParser() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs);

    final Path tempDir = new Path(rootTempDir, "TestCurrentJHParser");
    lfs.delete(tempDir, true);

    String queueName = "testQueue";
    // Run a MR job
    // create a MR cluster
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
    conf.set("mapred.queue.names", queueName);
    MiniMRCluster mrCluster = new MiniMRCluster(1, "file:///", 1, null, null, new JobConf(conf));

    // run a job
    Path inDir = new Path(tempDir, "input");
    Path outDir = new Path(tempDir, "output");
    JobHistoryParser parser = null;
    RewindableInputStream ris = null;
    ArrayList<String> seenEvents = new ArrayList<String>(10);
    RunningJob rJob = null;

    try {
      JobConf jobConf = mrCluster.createJobConf();
      jobConf.setQueueName(queueName);
      // construct a job with 1 map and 1 reduce task.
      rJob = UtilsForTests.runJob(jobConf, inDir, outDir, 1, 1);
      rJob.waitForCompletion();
      assertTrue("Job failed", rJob.isSuccessful());

      JobID id = rJob.getID();

      // get the jobhistory filepath
      Path inputPath =
          new Path(JobHistory.getHistoryFilePath(org.apache.hadoop.mapred.JobID.downgrade(id)));
      // wait for 10 secs for the jobhistory file to move into the done folder
      for (int i = 0; i < 100; ++i) {
        if (lfs.exists(inputPath)) {
          break;
        }
        TimeUnit.MILLISECONDS.wait(100);
      }

      assertTrue("Missing job history file", lfs.exists(inputPath));

      InputDemuxer inputDemuxer = new DefaultInputDemuxer();
      inputDemuxer.bindTo(inputPath, conf);

      Pair<String, InputStream> filePair = inputDemuxer.getNext();

      assertNotNull(filePair);

      ris = new RewindableInputStream(filePair.second());

      // Test if the JobHistoryParserFactory can detect the parser correctly
      parser = JobHistoryParserFactory.getParser(ris);

      // Get ParsedJob
      String jobId = TraceBuilder.extractJobID(filePair.first());
      JobBuilder builder = new JobBuilder(jobId);

      HistoryEvent e;
      while ((e = parser.nextEvent()) != null) {
        String eventString = e.getEventType().toString();
        System.out.println(eventString);
        seenEvents.add(eventString);
        if (builder != null) {
          builder.process(e);
        }
      }

      ParsedJob parsedJob = builder.build();
      // validate the obtainXXX api of ParsedJob, ParsedTask and
      // ParsedTaskAttempt.
      validateParsedJob(parsedJob, 1, 1, queueName);
    } finally {
      // stop the MR cluster
      mrCluster.shutdown();

      if (ris != null) {
        ris.close();
      }
      if (parser != null) {
        parser.close();
      }

      // cleanup the filesystem
      lfs.delete(tempDir, true);
    }

    // Check against the gold standard
    System.out.println("testCurrentJHParser validating using gold std ");
    String[] goldLines =
        new String[] {
          "JOB_SUBMITTED",
          "JOB_PRIORITY_CHANGED",
          "JOB_STATUS_CHANGED",
          "JOB_INITED",
          "JOB_INFO_CHANGED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "JOB_STATUS_CHANGED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "REDUCE_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "JOB_STATUS_CHANGED",
          "JOB_FINISHED"
        };

    // Check the output with gold std
    assertEquals("Size mismatch", goldLines.length, seenEvents.size());

    int index = 0;
    for (String goldLine : goldLines) {
      assertEquals("Content mismatch", goldLine, seenEvents.get(index++));
    }
  }