@Test
 public void writeToString() {
   ValidTxnList txns = new ValidCompactorTxnList(new long[] {9, 7, 10}, 9, 37);
   Assert.assertEquals("37:9:7:9:10", txns.writeToString());
   txns = new ValidCompactorTxnList();
   Assert.assertEquals(Long.toString(Long.MAX_VALUE) + ":-1:", txns.writeToString());
   txns = new ValidCompactorTxnList(new long[0], -1, 23);
   Assert.assertEquals("23:-1:", txns.writeToString());
 }
  @Override
  public boolean next(RecordIdentifier recordIdentifier, OrcStruct prev) throws IOException {
    boolean keysSame = true;
    while (keysSame && primary != null) {

      // The primary's nextRecord is the next value to return
      OrcStruct current = primary.nextRecord;
      recordIdentifier.set(primary.key);

      // Advance the primary reader to the next record
      primary.next(extraValue);

      // Save the current record as the new extraValue for next time so that
      // we minimize allocations
      extraValue = current;

      // now that the primary reader has advanced, we need to see if we
      // continue to read it or move to the secondary.
      if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) {

        // if the primary isn't done, push it back into the readers
        if (primary.nextRecord != null) {
          readers.put(primary.key, primary);
        }

        // update primary and secondaryKey
        Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
        if (entry != null) {
          primary = entry.getValue();
          if (readers.isEmpty()) {
            secondaryKey = null;
          } else {
            secondaryKey = readers.firstKey();
          }
        } else {
          primary = null;
        }
      }

      // if this transaction isn't ok, skip over it
      if (!validTxnList.isTxnValid(((ReaderKey) recordIdentifier).getCurrentTransactionId())) {
        continue;
      }

      // if we are collapsing, figure out if this is a new row
      if (collapse) {
        keysSame = prevKey.compareRow(recordIdentifier) == 0;
        if (!keysSame) {
          prevKey.set(recordIdentifier);
        }
      } else {
        keysSame = false;
      }

      // set the output record by fiddling with the pointers so that we can
      // avoid a copy.
      prev.linkFields(current);
    }
    return !keysSame;
  }
示例#3
0
  // todo: this doesn;t check if compaction is already running (even though Initiator does but we
  // don't go  through Initiator for user initiated compactions)
  @Override
  public void run() {
    do {
      boolean launchedJob = false;
      // Make sure nothing escapes this run method and kills the metastore at large,
      // so wrap it in a big catch Throwable statement.
      try {
        final CompactionInfo ci = txnHandler.findNextToCompact(name);

        if (ci == null && !stop.get()) {
          try {
            Thread.sleep(SLEEP_TIME);
            continue;
          } catch (InterruptedException e) {
            LOG.warn("Worker thread sleep interrupted " + e.getMessage());
            continue;
          }
        }

        // Find the table we will be working with.
        Table t1 = null;
        try {
          t1 = resolveTable(ci);
          if (t1 == null) {
            LOG.info(
                "Unable to find table "
                    + ci.getFullTableName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (MetaException e) {
          txnHandler.markCleaned(ci);
          continue;
        }
        // This chicanery is to get around the fact that the table needs to be final in order to
        // go into the doAs below.
        final Table t = t1;

        // Find the partition we will be working with, if there is one.
        Partition p = null;
        try {
          p = resolvePartition(ci);
          if (p == null && ci.partName != null) {
            LOG.info(
                "Unable to find partition "
                    + ci.getFullPartitionName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (Exception e) {
          txnHandler.markCleaned(ci);
          continue;
        }

        // Find the appropriate storage descriptor
        final StorageDescriptor sd = resolveStorageDescriptor(t, p);

        // Check that the table or partition isn't sorted, as we don't yet support that.
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
          LOG.error("Attempt to compact sorted table, which is not yet supported!");
          txnHandler.markCleaned(ci);
          continue;
        }

        final boolean isMajor = ci.isMajorCompaction();
        final ValidTxnList txns =
            CompactionTxnHandler.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
        LOG.debug("ValidCompactTxnList: " + txns.writeToString());
        txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark());
        final StringBuilder jobName = new StringBuilder(name);
        jobName.append("-compactor-");
        jobName.append(ci.getFullPartitionName());

        // Determine who to run as
        String runAs;
        if (ci.runAs == null) {
          runAs = findUserToRunAs(sd.getLocation(), t);
          txnHandler.setRunAs(ci.id, runAs);
        } else {
          runAs = ci.runAs;
        }

        LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName());

        final StatsUpdater su =
            StatsUpdater.init(
                ci,
                txnHandler.findColumnsWithStats(ci),
                conf,
                runJobAsSelf(runAs) ? runAs : t.getOwner());
        final CompactorMR mr = new CompactorMR();
        launchedJob = true;
        try {
          if (runJobAsSelf(runAs)) {
            mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
          } else {
            UserGroupInformation ugi =
                UserGroupInformation.createProxyUser(
                    t.getOwner(), UserGroupInformation.getLoginUser());
            ugi.doAs(
                new PrivilegedExceptionAction<Object>() {
                  @Override
                  public Object run() throws Exception {
                    mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
                    return null;
                  }
                });
          }
          txnHandler.markCompacted(ci);
        } catch (Exception e) {
          LOG.error(
              "Caught exception while trying to compact "
                  + ci
                  + ".  Marking clean to avoid repeated failures, "
                  + StringUtils.stringifyException(e));
          txnHandler.markFailed(ci);
        }
      } catch (Throwable t) {
        LOG.error(
            "Caught an exception in the main loop of compactor worker "
                + name
                + ", "
                + StringUtils.stringifyException(t));
      }

      // If we didn't try to launch a job it either means there was no work to do or we got
      // here as the result of a communication failure with the DB.  Either way we want to wait
      // a bit before we restart the loop.
      if (!launchedJob && !stop.get()) {
        try {
          Thread.sleep(SLEEP_TIME);
        } catch (InterruptedException e) {
        }
      }
    } while (!stop.get());
  }
示例#4
0
  /**
   * Run a compactor job.
   *
   * @param conf Hive configuration file
   * @param jobName name to run this job with
   * @param t metastore table
   * @param sd metastore storage descriptor
   * @param txns list of valid transactions
   * @param isMajor is this a major compaction?
   * @throws java.io.IOException if the job fails
   */
  void run(
      HiveConf conf,
      String jobName,
      Table t,
      StorageDescriptor sd,
      ValidTxnList txns,
      boolean isMajor,
      Worker.StatsUpdater su)
      throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);

    String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE);
    if (queueName != null && queueName.length() > 0) {
      job.setQueueName(queueName);
    }

    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_MAJOR, isMajor);
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    setColumnTypes(job, sd.getCols());

    // Figure out and encode what files we need to read.  We do this here (rather than in
    // getSplits below) because as part of this we discover our minimum and maximum transactions,
    // and discovering that in getSplits is too late as we then have no way to pass it to our
    // mapper.

    AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false);
    StringableList dirsToSearch = new StringableList();
    Path baseDir = null;
    if (isMajor) {
      // There may not be a base dir if the partition was empty before inserts or if this
      // partition is just now being converted to ACID.
      baseDir = dir.getBaseDirectory();
      if (baseDir == null) {
        List<HdfsFileStatusWithId> originalFiles = dir.getOriginalFiles();
        if (!(originalFiles == null) && !(originalFiles.size() == 0)) {
          // There are original format files
          for (HdfsFileStatusWithId stat : originalFiles) {
            Path path = stat.getFileStatus().getPath();
            dirsToSearch.add(path);
            LOG.debug("Adding original file " + path + " to dirs to search");
          }
          // Set base to the location so that the input format reads the original files.
          baseDir = new Path(sd.getLocation());
        }
      } else {
        // add our base to the list of directories to search for files in.
        LOG.debug("Adding base directory " + baseDir + " to dirs to search");
        dirsToSearch.add(baseDir);
      }
    }

    List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories();

    if (parsedDeltas == null || parsedDeltas.size() == 0) {
      // Seriously, no deltas?  Can't compact that.
      LOG.error("No delta files found to compact in " + sd.getLocation());
      return;
    }

    StringableList deltaDirs = new StringableList();
    long minTxn = Long.MAX_VALUE;
    long maxTxn = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta delta : parsedDeltas) {
      LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
      dirsToSearch.add(delta.getPath());
      deltaDirs.add(delta.getPath());
      minTxn = Math.min(minTxn, delta.getMinTransaction());
      maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
    }

    if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
    job.set(DELTA_DIRS, deltaDirs.toString());
    job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
    job.setLong(MIN_TXN, minTxn);
    job.setLong(MAX_TXN, maxTxn);
    LOG.debug("Setting minimum transaction to " + minTxn);
    LOG.debug("Setting maximume transaction to " + maxTxn);

    RunningJob rj = JobClient.runJob(job);
    LOG.info(
        "Submitted "
            + (isMajor ? CompactionType.MAJOR : CompactionType.MINOR)
            + " compaction job '"
            + jobName
            + "' with jobID="
            + rj.getID()
            + " to "
            + job.getQueueName()
            + " queue.  "
            + "(current delta dirs count="
            + dir.getCurrentDirectories().size()
            + ", obsolete delta dirs count="
            + dir.getObsolete());
    rj.waitForCompletion();
    su.gatherStats();
  }
 @Test
 public void exceptionsInMidst() {
   ValidTxnList txns = new ValidCompactorTxnList(new long[] {8}, 8, 15);
   ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
   Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp);
 }
 @Test
 public void maxTxnLowNoExceptions() {
   ValidTxnList txns = new ValidCompactorTxnList(new long[0], -1, 15);
   ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
   Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp);
 }
 @Test
 public void minTxnHighNoExceptions() {
   ValidTxnList txns = new ValidCompactorTxnList(new long[0], -1, 5);
   ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
   Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp);
 }
 @Test
 public void maxTxnLow() {
   ValidTxnList txns = new ValidCompactorTxnList(new long[] {13, 14}, 13, 15);
   ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
   Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp);
 }